CUDA: support Grace Hopper 9.0a compute capability (#45540)

* CUDA: support Grace Hopper 9.0a compute capability

* Fix other packages

* Add type annotations

* Support ancient Python versions

* isort

* spec -> self.spec

Co-authored-by: Andrew W Elble <aweits@rit.edu>

* [@spackbot] updating style on behalf of adamjstewart

---------

Co-authored-by: Andrew W Elble <aweits@rit.edu>
Co-authored-by: adamjstewart <adamjstewart@users.noreply.github.com>
This commit is contained in:
Adam J. Stewart 2024-09-11 17:43:20 +02:00 committed by GitHub
parent 122c3c2dbb
commit 5fa8890bd3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 49 additions and 27 deletions

View File

@ -3,6 +3,9 @@
# #
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import re
from typing import Iterable, List
import spack.variant import spack.variant
from spack.directives import conflicts, depends_on, variant from spack.directives import conflicts, depends_on, variant
from spack.multimethod import when from spack.multimethod import when
@ -44,6 +47,7 @@ class CudaPackage(PackageBase):
"87", "87",
"89", "89",
"90", "90",
"90a",
) )
# FIXME: keep cuda and cuda_arch separate to make usage easier until # FIXME: keep cuda and cuda_arch separate to make usage easier until
@ -70,6 +74,27 @@ def cuda_flags(arch_list):
for s in arch_list for s in arch_list
] ]
@staticmethod
def compute_capabilities(arch_list: Iterable[str]) -> List[str]:
"""Adds a decimal place to each CUDA arch.
>>> compute_capabilities(['90', '90a'])
['9.0', '9.0a']
Args:
arch_list: A list of integer strings, optionally followed by a suffix.
Returns:
A list of float strings, optionally followed by a suffix
"""
pattern = re.compile(r"(\d+)")
capabilities = []
for arch in arch_list:
_, number, letter = re.split(pattern, arch)
number = "{0:.1f}".format(float(number) / 10.0)
capabilities.append(number + letter)
return capabilities
depends_on("cuda", when="+cuda") depends_on("cuda", when="+cuda")
# CUDA version vs Architecture # CUDA version vs Architecture

View File

@ -156,9 +156,7 @@ def cmake_args(self):
args.append(define("HDF5_IS_PARALLEL", spec.satisfies("+mpi"))) args.append(define("HDF5_IS_PARALLEL", spec.satisfies("+mpi")))
if spec.satisfies("+cuda"): if spec.satisfies("+cuda"):
amrex_arch = [ amrex_arch = CudaPackage.compute_capabilities(spec.variants["cuda_arch"].value)
"{0:.1f}".format(float(i) / 10.0) for i in spec.variants["cuda_arch"].value
]
if amrex_arch: if amrex_arch:
args.append(define("AMReX_CUDA_ARCH", amrex_arch)) args.append(define("AMReX_CUDA_ARCH", amrex_arch))

View File

@ -343,7 +343,6 @@ def cmake_args(self):
args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp) args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
args.append("-DTARGET=NVIDIA") args.append("-DTARGET=NVIDIA")
cuda_arch_list = self.spec.variants["cuda_arch"].value cuda_arch_list = self.spec.variants["cuda_arch"].value
int_cuda_arch = int(cuda_arch_list[0])
cuda_arch = "sm_" + cuda_arch_list[0] cuda_arch = "sm_" + cuda_arch_list[0]
args.append("-DCUDA_ARCH=" + cuda_arch) args.append("-DCUDA_ARCH=" + cuda_arch)
@ -393,20 +392,20 @@ def cmake_args(self):
if "cuda" in self.spec.variants["backend"].value: if "cuda" in self.spec.variants["backend"].value:
args.append("-DKokkos_ENABLE_CUDA=ON") args.append("-DKokkos_ENABLE_CUDA=ON")
cuda_arch_list = self.spec.variants["cuda_arch"].value cuda_arch_list = self.spec.variants["cuda_arch"].value
int_cuda_arch = int(cuda_arch_list[0]) cuda_arch = cuda_arch_list[0]
# arhitecture kepler optimisations # arhitecture kepler optimisations
if int_cuda_arch in (30, 32, 35, 37): if cuda_arch in ("30", "32", "35", "37"):
args.append("-D" + "Kokkos_ARCH_KEPLER" + str(int_cuda_arch) + "=ON") args.append("-D" + "Kokkos_ARCH_KEPLER" + cuda_arch + "=ON")
# arhitecture maxwell optimisations # arhitecture maxwell optimisations
if int_cuda_arch in (50, 52, 53): if cuda_arch in ("50", "52", "53"):
args.append("-D" + "Kokkos_ARCH_MAXWELL" + str(int_cuda_arch) + "=ON") args.append("-D" + "Kokkos_ARCH_MAXWELL" + cuda_arch + "=ON")
# arhitecture pascal optimisations # arhitecture pascal optimisations
if int_cuda_arch in (60, 61): if cuda_arch in ("60", "61"):
args.append("-D" + "Kokkos_ARCH_PASCAL" + str(int_cuda_arch) + "=ON") args.append("-D" + "Kokkos_ARCH_PASCAL" + cuda_arch + "=ON")
# architecture volta optimisations # architecture volta optimisations
if int_cuda_arch in (70, 72): if cuda_arch in ("70", "72"):
args.append("-D" + "Kokkos_ARCH_VOLTA" + str(int_cuda_arch) + "=ON") args.append("-D" + "Kokkos_ARCH_VOLTA" + cuda_arch + "=ON")
if int_cuda_arch == 75: if cuda_arch == "75":
args.append("-DKokkos_ARCH_TURING75=ON") args.append("-DKokkos_ARCH_TURING75=ON")
if "omp" in self.spec.variants["backend"].value: if "omp" in self.spec.variants["backend"].value:
args.append("-DKokkos_ENABLE_OPENMP=ON") args.append("-DKokkos_ENABLE_OPENMP=ON")

View File

@ -5,6 +5,7 @@
import itertools import itertools
import os import os
import re
import sys import sys
from subprocess import Popen from subprocess import Popen
@ -182,7 +183,10 @@ class Paraview(CMakePackage, CudaPackage, ROCmPackage):
# Starting from cmake@3.18, CUDA architecture managament can be delegated to CMake. # Starting from cmake@3.18, CUDA architecture managament can be delegated to CMake.
# Hence, it is possible to rely on it instead of relying on custom logic updates from VTK-m for # Hence, it is possible to rely on it instead of relying on custom logic updates from VTK-m for
# newer architectures (wrt mapping). # newer architectures (wrt mapping).
for _arch in [arch for arch in CudaPackage.cuda_arch_values if int(arch) > 86]: pattern = re.compile(r"\d+")
for _arch in CudaPackage.cuda_arch_values:
_number = re.match(pattern, _arch).group()
if int(_number) > 86:
conflicts("cmake@:3.17", when=f"cuda_arch={_arch}") conflicts("cmake@:3.17", when=f"cuda_arch={_arch}")
# We only support one single Architecture # We only support one single Architecture

View File

@ -149,10 +149,8 @@ def install(self, spec, prefix):
args.append("--enable_cuda") args.append("--enable_cuda")
args.append("--cuda_path={0}".format(self.spec["cuda"].prefix)) args.append("--cuda_path={0}".format(self.spec["cuda"].prefix))
args.append("--cudnn_path={0}".format(self.spec["cudnn"].prefix)) args.append("--cudnn_path={0}".format(self.spec["cudnn"].prefix))
capabilities = ",".join( capabilities = CudaPackage.compute_capabilities(spec.variants["cuda_arch"].value)
"{0:.1f}".format(float(i) / 10.0) for i in spec.variants["cuda_arch"].value args.append("--cuda_compute_capabilities={0}".format(",".join(capabilities)))
)
args.append("--cuda_compute_capabilities={0}".format(capabilities))
args.append( args.append(
"--bazel_startup_options=" "--bazel_startup_options="
"--output_user_root={0}".format(self.wrapped_package_object.buildtmp) "--output_user_root={0}".format(self.wrapped_package_object.buildtmp)

View File

@ -630,10 +630,8 @@ def setup_build_environment(self, env):
# Please note that each additional compute capability significantly # Please note that each additional compute capability significantly
# increases your build time and binary size, and that TensorFlow # increases your build time and binary size, and that TensorFlow
# only supports compute capabilities >= 3.5 # only supports compute capabilities >= 3.5
capabilities = ",".join( capabilities = CudaPackage.compute_capabilities(spec.variants["cuda_arch"].value)
"{0:.1f}".format(float(i) / 10.0) for i in spec.variants["cuda_arch"].value env.set("TF_CUDA_COMPUTE_CAPABILITIES", ",".join(capabilities))
)
env.set("TF_CUDA_COMPUTE_CAPABILITIES", capabilities)
else: else:
env.set("TF_NEED_CUDA", "0") env.set("TF_NEED_CUDA", "0")

View File

@ -481,10 +481,10 @@ def patch(self):
def torch_cuda_arch_list(self, env): def torch_cuda_arch_list(self, env):
if "+cuda" in self.spec: if "+cuda" in self.spec:
torch_cuda_arch = ";".join( torch_cuda_arch = CudaPackage.compute_capabilities(
"{0:.1f}".format(float(i) / 10.0) for i in self.spec.variants["cuda_arch"].value self.spec.variants["cuda_arch"].value
) )
env.set("TORCH_CUDA_ARCH_LIST", torch_cuda_arch) env.set("TORCH_CUDA_ARCH_LIST", ";".join(torch_cuda_arch))
def setup_build_environment(self, env): def setup_build_environment(self, env):
"""Set environment variables used to control the build. """Set environment variables used to control the build.