spack/var/spack/repos/builtin/packages/py-torch-nvidia-apex/package.py
Auriane R. d4a892f200
py-torch-nvidia-apex: Add 24.04.01 and variants from the readme (#45019)
Co-authored-by: Bernhard Kaindl <bernhardkaindl7@gmail.com>
2024-08-18 21:16:58 +02:00

184 lines
7.2 KiB
Python

# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from spack.package import *
class PyTorchNvidiaApex(PythonPackage, CudaPackage):
"""A PyTorch Extension: Tools for easy mixed precision and
distributed training in Pytorch"""
homepage = "https://github.com/nvidia/apex/"
git = "https://github.com/nvidia/apex/"
url = "https://github.com/NVIDIA/apex/archive/refs/tags/24.04.01.tar.gz"
license("BSD-3-Clause")
version("master", branch="master")
version("24.04.01", sha256="065bc5c0146ee579d5db2b38ca3949da4dc799b871961a2c9eb19e18892166ce")
version("23.08", tag="23.08")
version("23.07", tag="23.07")
version("23.06", tag="23.06")
version("23.05", tag="23.05")
version("22.03", tag="22.03")
version("2020-10-19", commit="8a1ed9e8d35dfad26fb973996319965e4224dcdd")
depends_on("cxx", type="build")
variant("cuda", default=True, description="Build with CUDA")
# Based on the table of the readme on github
variant(
"permutation_search_cuda", default=False, description="Build permutation search module"
)
variant("bnp", default=False, description="Build batch norm module")
variant("xentropy", default=False, description="Build cross entropy module")
variant("focal_loss_cuda", default=False, description="Build focal loss module")
variant("fused_index_mul_2d", default=False, description="Build fused_index_mul_2d module")
variant("fast_layer_norm", default=False, description="Build fast layer norm module")
variant("fmhalib", default=False, description="Build fmha module")
variant(
"fast_multihead_attn", default=False, description="Build fast multihead attention module"
)
variant("transducer", default=False, description="Build transducer module")
variant("cudnn_gbn_lib", default=False, description="Build cudnn gbn module")
variant("peer_memory_cuda", default=False, description="Build peer memory module")
variant("nccl_p2p_cuda", default=False, description="Build with nccl p2p")
variant("fast_bottleneck", default=False, description="Build fast_bottleneck module")
variant("fused_conv_bias_relu", default=False, description="Build fused_conv_bias_relu moduel")
requires(
"+peer_memory_cuda+nccl_p2p_cuda",
when="+fast_bottleneck",
msg="+fast_bottleneck requires both +peer_memory_cuda and +nccl_p2p_cuda to be enabled.",
)
requires("^cudnn@8.5:", when="+cudnn_gbn_lib")
requires("^cudnn@8.4:", when="+fused_conv_bias_relu")
requires("^nccl@2.10:", when="+nccl_p2p_cuda")
with default_args(type=("build")):
depends_on("py-setuptools")
depends_on("py-packaging")
depends_on("py-pip")
with default_args(type=("build", "run")):
depends_on("python@3:")
depends_on("py-torch@0.4:")
for _arch in CudaPackage.cuda_arch_values:
depends_on(f"py-torch+cuda cuda_arch={_arch}", when=f"+cuda cuda_arch={_arch}")
depends_on("py-pybind11", type=("build", "link", "run"))
depends_on("cuda@9:", when="+cuda")
# https://github.com/NVIDIA/apex/issues/1498
# https://github.com/NVIDIA/apex/pull/1499
patch("1499.patch", when="@2020-10-19")
conflicts(
"cuda_arch=none",
when="+cuda",
msg="Must specify CUDA compute capabilities of your GPU, see "
"https://developer.nvidia.com/cuda-gpus",
)
def torch_cuda_arch_list(self, env):
if self.spec.satisfies("+cuda"):
torch_cuda_arch = ";".join(
"{0:.1f}".format(float(i) / 10.0) for i in self.spec.variants["cuda_arch"].value
)
env.set("TORCH_CUDA_ARCH_LIST", torch_cuda_arch)
def setup_build_environment(self, env):
if self.spec.satisfies("+cuda"):
env.set("CUDA_HOME", self.spec["cuda"].prefix)
self.torch_cuda_arch_list(env)
else:
env.unset("CUDA_HOME")
def setup_run_environment(self, env):
self.torch_cuda_arch_list(env)
@when("^py-pip@:23.0")
def global_options(self, spec, prefix):
args = []
if spec.satisfies("^py-torch@1.0:"):
args.append("--cpp_ext")
if spec.satisfies("+cuda"):
args.append("--cuda_ext")
if spec.satisfies("+permutation_search_cuda"):
args.append("--permutation_search")
if spec.satisfies("+bnp"):
args.append("--bnp")
if spec.satisfies("+xentropy"):
args.append("--xentropy")
if spec.satisfies("+focal_loss_cuda"):
args.append("--focal_loss")
if spec.satisfies("+fused_index_mul_2d"):
args.append("--index_mul_2d")
if spec.satisfies("+fast_layer_norm"):
args.append("--fast_layer_norm")
if spec.satisfies("+fmhalib"):
args.append("--fmha")
if spec.satisfies("+fast_multihead_attn"):
args.append("--fast_multihead_attn")
if spec.satisfies("+transducer"):
args.append("--transducer")
if spec.satisfies("+cudnn_gbn_lib"):
args.append("--cudnn_gbn")
if spec.satisfies("+peer_memory_cuda"):
args.append("--peer_memory")
if spec.satisfies("+nccl_p2p_cuda"):
args.append("--nccl_p2p")
if spec.satisfies("+fast_bottleneck"):
args.append("--fast_bottleneck")
if spec.satisfies("+fused_conv_bias_relu"):
args.append("--fused_conv_bias_relu")
return args
@when("^py-pip@23.1:")
def config_settings(self, spec, prefix):
global_options = ""
if spec.satisfies("^py-torch@1.0:"):
global_options += "--cpp_ext"
if spec.satisfies("+cuda"):
global_options += " --cuda_ext"
if spec.satisfies("+permutation_search_cuda"):
global_options += " --permutation_search"
if spec.satisfies("+bnp"):
global_options += " --bnp"
if spec.satisfies("+xentropy"):
global_options += " --xentropy"
if spec.satisfies("+focal_loss_cuda"):
global_options += " --focal_loss"
if spec.satisfies("+fused_index_mul_2d"):
global_options += " --index_mul_2d"
if spec.satisfies("+fast_layer_norm"):
global_options += " --fast_layer_norm"
if spec.satisfies("+fmhalib"):
global_options += " --fmha"
if spec.satisfies("+fast_multihead_attn"):
global_options += " --fast_multihead_attn"
if spec.satisfies("+transducer"):
global_options += " --transducer"
if spec.satisfies("+cudnn_gbn_lib"):
global_options += " --cudnn_gbn"
if spec.satisfies("+peer_memory_cuda"):
global_options += " --peer_memory"
if spec.satisfies("+nccl_p2p_cuda"):
global_options += " --nccl_p2p"
if spec.satisfies("+fast_bottleneck"):
global_options += " --fast_bottleneck"
if spec.satisfies("+fused_conv_bias_relu"):
global_options += " --fused_conv_bias_relu"
return {
"builddir": "build",
"compile-args": f"-j{make_jobs}",
"--global-option": global_options,
}