diff --git a/var/spack/repos/builtin/packages/hypre/package.py b/var/spack/repos/builtin/packages/hypre/package.py index a7d8a8d35c6..929127bbe24 100644 --- a/var/spack/repos/builtin/packages/hypre/package.py +++ b/var/spack/repos/builtin/packages/hypre/package.py @@ -77,7 +77,8 @@ class Hypre(AutotoolsPackage, CudaPackage, ROCmPackage): variant("int64", default=False, description="Use 64bit integers") variant("mixedint", default=False, description="Use 64bit integers while reducing memory use") variant("complex", default=False, description="Use complex values") - variant("gpu-aware-mpi", default=False, description="Use gpu-aware mpi") + variant("gpu-aware-mpi", default=False, description="Enable GPU-aware MPI support") + variant("gpu-profiling", default=False, description="Enable GPU profiling markers support") variant("mpi", default=True, description="Enable MPI support") variant("openmp", default=False, description="Enable OpenMP support") variant("debug", default=False, description="Build debug instead of optimized version") @@ -88,8 +89,6 @@ class Hypre(AutotoolsPackage, CudaPackage, ROCmPackage): variant("sycl", default=False, description="Enable SYCL support") variant("magma", default=False, description="Enable MAGMA interface") variant("caliper", default=False, description="Enable Caliper support") - variant("rocblas", default=False, description="Enable rocBLAS") - variant("cublas", default=False, description="Enable cuBLAS") variant( "precision", default="double", @@ -131,6 +130,8 @@ def patch(self): # fix sequential compilation in 'src/seq_mv' depends_on("rocthrust", when="+rocm") depends_on("rocrand", when="+rocm") depends_on("rocprim", when="+rocm") + depends_on("rocsolver", when="@2.29.0: +rocm") + depends_on("rocblas", when="@2.29.0: +rocm") depends_on("hipblas", when="+rocm +superlu-dist") depends_on("umpire", when="+umpire") depends_on("umpire+rocm", when="+umpire+rocm") @@ -172,8 +173,13 @@ def patch(self): # fix sequential compilation in 'src/seq_mv' # Option added in v2.16.0 conflicts("+mixedint", when="@:2.15") - # Option added in v2.21.0 + # Options added in v2.18.0 + conflicts("+gpu-aware-mpi", when="@:2.17") + conflicts("+gpu-profiling+cuda", when="@:2.17") + + # Options added in v2.21.0 conflicts("+umpire", when="@:2.20") + conflicts("+gpu-profiling+rocm", when="@:2.20") # Option added in v2.24.0 conflicts("+sycl", when="@:2.23") @@ -181,8 +187,11 @@ def patch(self): # fix sequential compilation in 'src/seq_mv' # Option added in v2.29.0 conflicts("+magma", when="@:2.28") - conflicts("+cublas", when="~cuda", msg="cuBLAS requires CUDA to be enabled") - conflicts("+rocblas", when="~rocm", msg="rocBLAS requires ROCm to be enabled") + # GPU checks + conflicts("+cuda", when="+rocm", msg="CUDA and ROCm are mutually exclusive") + conflicts("+cuda", when="+sycl", msg="CUDA and SYCL are mutually exclusive") + conflicts("+rocm", when="+sycl", msg="ROCm and SYCL are mutually exclusive") + conflicts("+gpu-profiling", when="~cuda~rocm", msg="GPU profiling requires CUDA or ROCm") configure_directory = "src" @@ -275,6 +284,7 @@ def configure_args(self): configure_args.extend(self.enable_or_disable("debug")) if spec.satisfies("+cuda"): + configure_args.append(f"--with-cuda-home={spec['cuda'].prefix}") configure_args.extend(["--with-cuda", "--enable-curand", "--enable-cusparse"]) cuda_arch_vals = spec.variants["cuda_arch"].value if cuda_arch_vals: @@ -282,42 +292,40 @@ def configure_args(self): cuda_arch = cuda_arch_sorted[0] configure_args.append(f"--with-gpu-arch={cuda_arch}") # New in 2.21.0: replaces --enable-cub - if spec.satisfies("@2.21.0:"): + if spec.satisfies("@2.21.0: ~umpire"): configure_args.append("--enable-device-memory-pool") - configure_args.append(f"--with-cuda-home={spec['cuda'].prefix}") - else: + elif spec.satisfies("@:2.20.99"): configure_args.append("--enable-cub") - if spec.satisfies("+cublas"): - configure_args.append("--enable-cublas") + if spec.satisfies("@2.29.0:"): + configure_args.extend(["--enable-cublas", "--enable-cusolver"]) else: configure_args.extend(["--without-cuda", "--disable-curand", "--disable-cusparse"]) if spec.satisfies("@:2.20.99"): configure_args.append("--disable-cub") + if spec.satisfies("@2.29:"): + configure_args.append("--disable-cusolver") if spec.satisfies("+rocm"): - rocm_pkgs = ["rocsparse", "rocthrust", "rocprim", "rocrand"] + configure_args.append("--with-hip") + rocm_pkgs = ["rocthrust", "rocprim", "rocrand", "rocsparse"] if spec.satisfies("+superlu-dist"): rocm_pkgs.append("hipblas") - rocm_inc = "" - for pkg in rocm_pkgs: - rocm_inc += spec[pkg].headers.include_flags + " " + if spec.satisfies("@2.29.0:"): + rocm_pkgs.extend(["rocblas", "rocsolver"]) + configure_args.extend(["--enable-rocblas", "--enable-rocsolver"]) + rocm_inc = " ".join(set(spec[pkg].headers.include_flags for pkg in rocm_pkgs)) configure_args.extend( - [ - "--with-hip", - "--enable-rocrand", - "--enable-rocsparse", - f"--with-extra-CUFLAGS={rocm_inc}", - ] + ["--enable-rocrand", "--enable-rocsparse", f"--with-extra-CUFLAGS={rocm_inc}"] ) rocm_arch_vals = spec.variants["amdgpu_target"].value if rocm_arch_vals: rocm_arch_sorted = list(sorted(rocm_arch_vals, reverse=True)) rocm_arch = rocm_arch_sorted[0] configure_args.append(f"--with-gpu-arch={rocm_arch}") - if spec.satisfies("+rocblas"): - configure_args.append("--enable-rocblas") else: configure_args.extend(["--without-hip", "--disable-rocrand", "--disable-rocsparse"]) + if spec.satisfies("@2.29.0:"): + configure_args.extend(["--disable-rocblas", "--disable-rocsolver"]) if spec.satisfies("+sycl"): configure_args.append("--with-sycl") @@ -338,6 +346,9 @@ def configure_args(self): if spec.satisfies("+gpu-aware-mpi"): configure_args.append("--enable-gpu-aware-mpi") + if spec.satisfies("+gpu-profiling"): + configure_args.append("--enable-gpu-profiling") + configure_args.extend(self.enable_or_disable("fortran")) return configure_args @@ -403,7 +414,7 @@ def _cached_tests_work_dir(self): def test_bigint(self): """build and run bigint tests""" - if "+mpi" not in self.spec: + if self.spec.satisfies("~mpi"): raise SkipTest("Package must be installed with +mpi") # build and run cached examples