Support ROCm backing in DiHydrogen (#33563)
* Added support for building the DiHydrogen package and LBANN extensions to DiHydrogen with ROCm libraries. Fixed a bug on Cray systems where CMake didn't try hard enough to find an MPI-compatible compiler wrapper. Make it look more. Added support for the roctracer package when using ROCm libraries. * Fixed how ROCm support is defined for pre-v0.3 versions.
This commit is contained in:
parent
4be67facdc
commit
6408b51def
@ -83,9 +83,9 @@ class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
|
||||
for val in ROCmPackage.amdgpu_targets:
|
||||
depends_on("aluminum amdgpu_target=%s" % val, when="amdgpu_target=%s" % val)
|
||||
|
||||
for when in ["+cuda", "+distconv"]:
|
||||
depends_on("cuda", when=when)
|
||||
depends_on("cudnn", when=when)
|
||||
depends_on("roctracer-dev", when="+rocm +distconv")
|
||||
|
||||
depends_on("cudnn", when="+cuda +distconv")
|
||||
depends_on("cub", when="^cuda@:10")
|
||||
|
||||
# Note that #1712 forces us to enumerate the different blas variants
|
||||
@ -108,8 +108,8 @@ class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
|
||||
depends_on("cray-libsci", when="blas=libsci")
|
||||
depends_on("cray-libsci +openmp", when="blas=libsci +openmp_blas")
|
||||
|
||||
# Distconv builds require cuda
|
||||
conflicts("~cuda", when="+distconv")
|
||||
# Distconv builds require cuda or rocm
|
||||
conflicts("+distconv", when="~cuda ~rocm")
|
||||
|
||||
conflicts("+distconv", when="+half")
|
||||
conflicts("+rocm", when="+half")
|
||||
@ -120,6 +120,8 @@ class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
|
||||
depends_on("ninja", type="build")
|
||||
depends_on("cmake@3.17.0:", type="build")
|
||||
|
||||
depends_on("spdlog", when="@:0.1,0.2:")
|
||||
|
||||
depends_on("llvm-openmp", when="%apple-clang +openmp")
|
||||
|
||||
# TODO: Debug linker errors when NVSHMEM is built with UCX
|
||||
@ -155,10 +157,14 @@ def cmake_args(self):
|
||||
"-DH2_ENABLE_DISTCONV_LEGACY=%s" % ("+distconv" in spec),
|
||||
"-DH2_ENABLE_OPENMP=%s" % ("+openmp" in spec),
|
||||
"-DH2_ENABLE_FP16=%s" % ("+half" in spec),
|
||||
"-DH2_ENABLE_HIP_ROCM=%s" % ("+rocm" in spec),
|
||||
"-DH2_DEVELOPER_BUILD=%s" % ("+developer" in spec),
|
||||
]
|
||||
|
||||
if spec.version < Version("0.3"):
|
||||
args.append("-DH2_ENABLE_HIP_ROCM=%s" % ("+rocm" in spec))
|
||||
else:
|
||||
args.append("-DH2_ENABLE_ROCM=%s" % ("+rocm" in spec))
|
||||
|
||||
if not spec.satisfies("^cmake@3.23.0"):
|
||||
# There is a bug with using Ninja generator in this version
|
||||
# of CMake
|
||||
@ -181,7 +187,7 @@ def cmake_args(self):
|
||||
if spec.satisfies("%cce") and spec.satisfies("^cuda+allow-unsupported-compilers"):
|
||||
args.append("-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler")
|
||||
|
||||
if "+cuda" in spec or "+distconv" in spec:
|
||||
if "+cuda" in spec:
|
||||
args.append("-DcuDNN_DIR={0}".format(spec["cudnn"].prefix))
|
||||
|
||||
if spec.satisfies("^cuda@:10"):
|
||||
@ -209,6 +215,12 @@ def cmake_args(self):
|
||||
"-DHIP_CXX_COMPILER={0}".format(self.spec["hip"].hipcc),
|
||||
]
|
||||
)
|
||||
if "platform=cray" in spec:
|
||||
args.extend(
|
||||
[
|
||||
"-DMPI_ASSUME_NO_BUILTIN_MPI=ON",
|
||||
]
|
||||
)
|
||||
archs = self.spec.variants["amdgpu_target"].value
|
||||
if archs != "none":
|
||||
arch_str = ",".join(archs)
|
||||
|
@ -167,7 +167,8 @@ class Lbann(CMakePackage, CudaPackage, ROCmPackage):
|
||||
depends_on("dihydrogen +cuda", when="+dihydrogen +cuda")
|
||||
depends_on("dihydrogen ~al", when="+dihydrogen ~al")
|
||||
depends_on("dihydrogen +al", when="+dihydrogen +al")
|
||||
depends_on("dihydrogen +distconv +cuda", when="+distconv")
|
||||
depends_on("dihydrogen +distconv +cuda", when="+distconv +cuda")
|
||||
depends_on("dihydrogen +distconv +rocm", when="+distconv +rocm")
|
||||
depends_on("dihydrogen ~half", when="+dihydrogen ~half")
|
||||
depends_on("dihydrogen +half", when="+dihydrogen +half")
|
||||
depends_on("dihydrogen ~nvshmem", when="+dihydrogen ~nvshmem")
|
||||
@ -191,6 +192,8 @@ class Lbann(CMakePackage, CudaPackage, ROCmPackage):
|
||||
depends_on("aluminum amdgpu_target=%s" % val, when="+al amdgpu_target=%s" % val)
|
||||
depends_on("dihydrogen amdgpu_target=%s" % val, when="+dihydrogen amdgpu_target=%s" % val)
|
||||
|
||||
depends_on("roctracer-dev", when="+rocm +distconv")
|
||||
|
||||
depends_on("cudnn", when="@0.90:0.100 +cuda")
|
||||
depends_on("cudnn@8.0.2:", when="@:0.90,0.101: +cuda")
|
||||
depends_on("cub", when="@0.94:0.98.2 +cuda ^cuda@:10")
|
||||
@ -334,6 +337,7 @@ def cmake_args(self):
|
||||
"-DLBANN_WITH_ONNX:BOOL=%s" % ("+onnx" in spec),
|
||||
"-DLBANN_WITH_EMBEDDED_PYTHON:BOOL=%s" % ("+python" in spec),
|
||||
"-DLBANN_WITH_PYTHON_FRONTEND:BOOL=%s" % ("+pfe" in spec),
|
||||
"-DLBANN_WITH_ROCTRACER:BOOL=%s" % ("+rocm +distconv" in spec),
|
||||
"-DLBANN_WITH_TBINF=OFF",
|
||||
"-DLBANN_WITH_UNIT_TESTING:BOOL=%s" % ("+unit_tests" in spec),
|
||||
"-DLBANN_WITH_VISION:BOOL=%s" % ("+vision" in spec),
|
||||
@ -424,6 +428,12 @@ def cmake_args(self):
|
||||
"-DHIP_CXX_COMPILER={0}".format(self.spec["hip"].hipcc),
|
||||
]
|
||||
)
|
||||
if "platform=cray" in spec:
|
||||
args.extend(
|
||||
[
|
||||
"-DMPI_ASSUME_NO_BUILTIN_MPI=ON",
|
||||
]
|
||||
)
|
||||
archs = self.spec.variants["amdgpu_target"].value
|
||||
if archs != "none":
|
||||
arch_str = ",".join(archs)
|
||||
|
Loading…
Reference in New Issue
Block a user