kokkos: async malloc (#46464)
This commit is contained in:
parent
ea1aa0714b
commit
360dbe41f7
@ -227,6 +227,10 @@ class Kokkos(CMakePackage, CudaPackage, ROCmPackage):
|
|||||||
conflicts("+cuda", when="cxxstd=17 ^cuda@:10")
|
conflicts("+cuda", when="cxxstd=17 ^cuda@:10")
|
||||||
conflicts("+cuda", when="cxxstd=20 ^cuda@:11")
|
conflicts("+cuda", when="cxxstd=20 ^cuda@:11")
|
||||||
|
|
||||||
|
# Expose a way to disable CudaMallocAsync that can cause problems
|
||||||
|
# with some MPI such as cray-mpich
|
||||||
|
variant("alloc_async", default=False, description="Use CudaMallocAsync", when="@4.2: +cuda")
|
||||||
|
|
||||||
# SYCL and OpenMPTarget require C++17 or higher
|
# SYCL and OpenMPTarget require C++17 or higher
|
||||||
for cxxstdver in cxxstds[: cxxstds.index("17")]:
|
for cxxstdver in cxxstds[: cxxstds.index("17")]:
|
||||||
conflicts(
|
conflicts(
|
||||||
@ -371,12 +375,9 @@ def cmake_args(self):
|
|||||||
if self.spec.satisfies("%oneapi") or self.spec.satisfies("%intel"):
|
if self.spec.satisfies("%oneapi") or self.spec.satisfies("%intel"):
|
||||||
options.append(self.define("CMAKE_CXX_FLAGS", "-fp-model=precise"))
|
options.append(self.define("CMAKE_CXX_FLAGS", "-fp-model=precise"))
|
||||||
|
|
||||||
# Kokkos 4.2.00+ changed the default to Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=on
|
options.append(
|
||||||
# which breaks GPU-aware with Cray-MPICH
|
self.define_from_variant("Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC", "alloc_async")
|
||||||
# See https://github.com/kokkos/kokkos/pull/6402
|
)
|
||||||
# TODO: disable this once Cray-MPICH is fixed
|
|
||||||
if self.spec.satisfies("@4.2.00:") and self.spec.satisfies("^[virtuals=mpi] cray-mpich"):
|
|
||||||
options.append(self.define("Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC", False))
|
|
||||||
|
|
||||||
# Remove duplicate options
|
# Remove duplicate options
|
||||||
return lang.dedupe(options)
|
return lang.dedupe(options)
|
||||||
|
Loading…
Reference in New Issue
Block a user