CUDA: support Grace Hopper 9.0a compute capability (#45540)
* CUDA: support Grace Hopper 9.0a compute capability * Fix other packages * Add type annotations * Support ancient Python versions * isort * spec -> self.spec Co-authored-by: Andrew W Elble <aweits@rit.edu> * [@spackbot] updating style on behalf of adamjstewart --------- Co-authored-by: Andrew W Elble <aweits@rit.edu> Co-authored-by: adamjstewart <adamjstewart@users.noreply.github.com>
This commit is contained in:
parent
122c3c2dbb
commit
5fa8890bd3
@ -3,6 +3,9 @@
|
|||||||
#
|
#
|
||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Iterable, List
|
||||||
|
|
||||||
import spack.variant
|
import spack.variant
|
||||||
from spack.directives import conflicts, depends_on, variant
|
from spack.directives import conflicts, depends_on, variant
|
||||||
from spack.multimethod import when
|
from spack.multimethod import when
|
||||||
@ -44,6 +47,7 @@ class CudaPackage(PackageBase):
|
|||||||
"87",
|
"87",
|
||||||
"89",
|
"89",
|
||||||
"90",
|
"90",
|
||||||
|
"90a",
|
||||||
)
|
)
|
||||||
|
|
||||||
# FIXME: keep cuda and cuda_arch separate to make usage easier until
|
# FIXME: keep cuda and cuda_arch separate to make usage easier until
|
||||||
@ -70,6 +74,27 @@ def cuda_flags(arch_list):
|
|||||||
for s in arch_list
|
for s in arch_list
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def compute_capabilities(arch_list: Iterable[str]) -> List[str]:
|
||||||
|
"""Adds a decimal place to each CUDA arch.
|
||||||
|
|
||||||
|
>>> compute_capabilities(['90', '90a'])
|
||||||
|
['9.0', '9.0a']
|
||||||
|
|
||||||
|
Args:
|
||||||
|
arch_list: A list of integer strings, optionally followed by a suffix.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of float strings, optionally followed by a suffix
|
||||||
|
"""
|
||||||
|
pattern = re.compile(r"(\d+)")
|
||||||
|
capabilities = []
|
||||||
|
for arch in arch_list:
|
||||||
|
_, number, letter = re.split(pattern, arch)
|
||||||
|
number = "{0:.1f}".format(float(number) / 10.0)
|
||||||
|
capabilities.append(number + letter)
|
||||||
|
return capabilities
|
||||||
|
|
||||||
depends_on("cuda", when="+cuda")
|
depends_on("cuda", when="+cuda")
|
||||||
|
|
||||||
# CUDA version vs Architecture
|
# CUDA version vs Architecture
|
||||||
|
@ -156,9 +156,7 @@ def cmake_args(self):
|
|||||||
args.append(define("HDF5_IS_PARALLEL", spec.satisfies("+mpi")))
|
args.append(define("HDF5_IS_PARALLEL", spec.satisfies("+mpi")))
|
||||||
|
|
||||||
if spec.satisfies("+cuda"):
|
if spec.satisfies("+cuda"):
|
||||||
amrex_arch = [
|
amrex_arch = CudaPackage.compute_capabilities(spec.variants["cuda_arch"].value)
|
||||||
"{0:.1f}".format(float(i) / 10.0) for i in spec.variants["cuda_arch"].value
|
|
||||||
]
|
|
||||||
if amrex_arch:
|
if amrex_arch:
|
||||||
args.append(define("AMReX_CUDA_ARCH", amrex_arch))
|
args.append(define("AMReX_CUDA_ARCH", amrex_arch))
|
||||||
|
|
||||||
|
@ -343,7 +343,6 @@ def cmake_args(self):
|
|||||||
args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
|
args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
|
||||||
args.append("-DTARGET=NVIDIA")
|
args.append("-DTARGET=NVIDIA")
|
||||||
cuda_arch_list = self.spec.variants["cuda_arch"].value
|
cuda_arch_list = self.spec.variants["cuda_arch"].value
|
||||||
int_cuda_arch = int(cuda_arch_list[0])
|
|
||||||
cuda_arch = "sm_" + cuda_arch_list[0]
|
cuda_arch = "sm_" + cuda_arch_list[0]
|
||||||
args.append("-DCUDA_ARCH=" + cuda_arch)
|
args.append("-DCUDA_ARCH=" + cuda_arch)
|
||||||
|
|
||||||
@ -393,20 +392,20 @@ def cmake_args(self):
|
|||||||
if "cuda" in self.spec.variants["backend"].value:
|
if "cuda" in self.spec.variants["backend"].value:
|
||||||
args.append("-DKokkos_ENABLE_CUDA=ON")
|
args.append("-DKokkos_ENABLE_CUDA=ON")
|
||||||
cuda_arch_list = self.spec.variants["cuda_arch"].value
|
cuda_arch_list = self.spec.variants["cuda_arch"].value
|
||||||
int_cuda_arch = int(cuda_arch_list[0])
|
cuda_arch = cuda_arch_list[0]
|
||||||
# arhitecture kepler optimisations
|
# arhitecture kepler optimisations
|
||||||
if int_cuda_arch in (30, 32, 35, 37):
|
if cuda_arch in ("30", "32", "35", "37"):
|
||||||
args.append("-D" + "Kokkos_ARCH_KEPLER" + str(int_cuda_arch) + "=ON")
|
args.append("-D" + "Kokkos_ARCH_KEPLER" + cuda_arch + "=ON")
|
||||||
# arhitecture maxwell optimisations
|
# arhitecture maxwell optimisations
|
||||||
if int_cuda_arch in (50, 52, 53):
|
if cuda_arch in ("50", "52", "53"):
|
||||||
args.append("-D" + "Kokkos_ARCH_MAXWELL" + str(int_cuda_arch) + "=ON")
|
args.append("-D" + "Kokkos_ARCH_MAXWELL" + cuda_arch + "=ON")
|
||||||
# arhitecture pascal optimisations
|
# arhitecture pascal optimisations
|
||||||
if int_cuda_arch in (60, 61):
|
if cuda_arch in ("60", "61"):
|
||||||
args.append("-D" + "Kokkos_ARCH_PASCAL" + str(int_cuda_arch) + "=ON")
|
args.append("-D" + "Kokkos_ARCH_PASCAL" + cuda_arch + "=ON")
|
||||||
# architecture volta optimisations
|
# architecture volta optimisations
|
||||||
if int_cuda_arch in (70, 72):
|
if cuda_arch in ("70", "72"):
|
||||||
args.append("-D" + "Kokkos_ARCH_VOLTA" + str(int_cuda_arch) + "=ON")
|
args.append("-D" + "Kokkos_ARCH_VOLTA" + cuda_arch + "=ON")
|
||||||
if int_cuda_arch == 75:
|
if cuda_arch == "75":
|
||||||
args.append("-DKokkos_ARCH_TURING75=ON")
|
args.append("-DKokkos_ARCH_TURING75=ON")
|
||||||
if "omp" in self.spec.variants["backend"].value:
|
if "omp" in self.spec.variants["backend"].value:
|
||||||
args.append("-DKokkos_ENABLE_OPENMP=ON")
|
args.append("-DKokkos_ENABLE_OPENMP=ON")
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
from subprocess import Popen
|
from subprocess import Popen
|
||||||
|
|
||||||
@ -182,8 +183,11 @@ class Paraview(CMakePackage, CudaPackage, ROCmPackage):
|
|||||||
# Starting from cmake@3.18, CUDA architecture managament can be delegated to CMake.
|
# Starting from cmake@3.18, CUDA architecture managament can be delegated to CMake.
|
||||||
# Hence, it is possible to rely on it instead of relying on custom logic updates from VTK-m for
|
# Hence, it is possible to rely on it instead of relying on custom logic updates from VTK-m for
|
||||||
# newer architectures (wrt mapping).
|
# newer architectures (wrt mapping).
|
||||||
for _arch in [arch for arch in CudaPackage.cuda_arch_values if int(arch) > 86]:
|
pattern = re.compile(r"\d+")
|
||||||
conflicts("cmake@:3.17", when=f"cuda_arch={_arch}")
|
for _arch in CudaPackage.cuda_arch_values:
|
||||||
|
_number = re.match(pattern, _arch).group()
|
||||||
|
if int(_number) > 86:
|
||||||
|
conflicts("cmake@:3.17", when=f"cuda_arch={_arch}")
|
||||||
|
|
||||||
# We only support one single Architecture
|
# We only support one single Architecture
|
||||||
for _arch, _other_arch in itertools.permutations(CudaPackage.cuda_arch_values, 2):
|
for _arch, _other_arch in itertools.permutations(CudaPackage.cuda_arch_values, 2):
|
||||||
|
@ -149,10 +149,8 @@ def install(self, spec, prefix):
|
|||||||
args.append("--enable_cuda")
|
args.append("--enable_cuda")
|
||||||
args.append("--cuda_path={0}".format(self.spec["cuda"].prefix))
|
args.append("--cuda_path={0}".format(self.spec["cuda"].prefix))
|
||||||
args.append("--cudnn_path={0}".format(self.spec["cudnn"].prefix))
|
args.append("--cudnn_path={0}".format(self.spec["cudnn"].prefix))
|
||||||
capabilities = ",".join(
|
capabilities = CudaPackage.compute_capabilities(spec.variants["cuda_arch"].value)
|
||||||
"{0:.1f}".format(float(i) / 10.0) for i in spec.variants["cuda_arch"].value
|
args.append("--cuda_compute_capabilities={0}".format(",".join(capabilities)))
|
||||||
)
|
|
||||||
args.append("--cuda_compute_capabilities={0}".format(capabilities))
|
|
||||||
args.append(
|
args.append(
|
||||||
"--bazel_startup_options="
|
"--bazel_startup_options="
|
||||||
"--output_user_root={0}".format(self.wrapped_package_object.buildtmp)
|
"--output_user_root={0}".format(self.wrapped_package_object.buildtmp)
|
||||||
|
@ -630,10 +630,8 @@ def setup_build_environment(self, env):
|
|||||||
# Please note that each additional compute capability significantly
|
# Please note that each additional compute capability significantly
|
||||||
# increases your build time and binary size, and that TensorFlow
|
# increases your build time and binary size, and that TensorFlow
|
||||||
# only supports compute capabilities >= 3.5
|
# only supports compute capabilities >= 3.5
|
||||||
capabilities = ",".join(
|
capabilities = CudaPackage.compute_capabilities(spec.variants["cuda_arch"].value)
|
||||||
"{0:.1f}".format(float(i) / 10.0) for i in spec.variants["cuda_arch"].value
|
env.set("TF_CUDA_COMPUTE_CAPABILITIES", ",".join(capabilities))
|
||||||
)
|
|
||||||
env.set("TF_CUDA_COMPUTE_CAPABILITIES", capabilities)
|
|
||||||
else:
|
else:
|
||||||
env.set("TF_NEED_CUDA", "0")
|
env.set("TF_NEED_CUDA", "0")
|
||||||
|
|
||||||
|
@ -481,10 +481,10 @@ def patch(self):
|
|||||||
|
|
||||||
def torch_cuda_arch_list(self, env):
|
def torch_cuda_arch_list(self, env):
|
||||||
if "+cuda" in self.spec:
|
if "+cuda" in self.spec:
|
||||||
torch_cuda_arch = ";".join(
|
torch_cuda_arch = CudaPackage.compute_capabilities(
|
||||||
"{0:.1f}".format(float(i) / 10.0) for i in self.spec.variants["cuda_arch"].value
|
self.spec.variants["cuda_arch"].value
|
||||||
)
|
)
|
||||||
env.set("TORCH_CUDA_ARCH_LIST", torch_cuda_arch)
|
env.set("TORCH_CUDA_ARCH_LIST", ";".join(torch_cuda_arch))
|
||||||
|
|
||||||
def setup_build_environment(self, env):
|
def setup_build_environment(self, env):
|
||||||
"""Set environment variables used to control the build.
|
"""Set environment variables used to control the build.
|
||||||
|
Loading…
Reference in New Issue
Block a user