DiHydrogen, Hydrogen, and Aluminum CachedCMakePackage (#39714)
This commit is contained in:
		@@ -34,6 +34,11 @@ def cmake_cache_option(name, boolean_value, comment="", force=False):
 | 
			
		||||
    return 'set({0} {1} CACHE BOOL "{2}"{3})\n'.format(name, value, comment, force_str)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cmake_cache_filepath(name, value, comment=""):
 | 
			
		||||
    """Generate a string for a cmake cache variable of type FILEPATH"""
 | 
			
		||||
    return 'set({0} "{1}" CACHE FILEPATH "{2}")\n'.format(name, value, comment)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CachedCMakeBuilder(CMakeBuilder):
 | 
			
		||||
    #: Phases of a Cached CMake package
 | 
			
		||||
    #: Note: the initconfig phase is used for developer builds as a final phase to stop on
 | 
			
		||||
@@ -257,6 +262,15 @@ def initconfig_hardware_entries(self):
 | 
			
		||||
            entries.append(
 | 
			
		||||
                cmake_cache_path("HIP_CXX_COMPILER", "{0}".format(self.spec["hip"].hipcc))
 | 
			
		||||
            )
 | 
			
		||||
            llvm_bin = spec["llvm-amdgpu"].prefix.bin
 | 
			
		||||
            llvm_prefix = spec["llvm-amdgpu"].prefix
 | 
			
		||||
            # Some ROCm systems seem to point to /<path>/rocm-<ver>/ and
 | 
			
		||||
            # others point to /<path>/rocm-<ver>/llvm
 | 
			
		||||
            if os.path.basename(os.path.normpath(llvm_prefix)) != "llvm":
 | 
			
		||||
                llvm_bin = os.path.join(llvm_prefix, "llvm/bin/")
 | 
			
		||||
            entries.append(
 | 
			
		||||
                cmake_cache_filepath("CMAKE_HIP_COMPILER", os.path.join(llvm_bin, "clang++"))
 | 
			
		||||
            )
 | 
			
		||||
            archs = self.spec.variants["amdgpu_target"].value
 | 
			
		||||
            if archs[0] != "none":
 | 
			
		||||
                arch_str = ";".join(archs)
 | 
			
		||||
@@ -277,7 +291,7 @@ def std_initconfig_entries(self):
 | 
			
		||||
            "#------------------{0}".format("-" * 60),
 | 
			
		||||
            "# CMake executable path: {0}".format(self.pkg.spec["cmake"].command.path),
 | 
			
		||||
            "#------------------{0}\n".format("-" * 60),
 | 
			
		||||
            cmake_cache_path("CMAKE_PREFIX_PATH", cmake_prefix_path),
 | 
			
		||||
            cmake_cache_string("CMAKE_PREFIX_PATH", cmake_prefix_path),
 | 
			
		||||
            self.define_cmake_cache_from_variant("CMAKE_BUILD_TYPE", "build_type"),
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -32,6 +32,7 @@
 | 
			
		||||
from spack.build_systems.bundle import BundlePackage
 | 
			
		||||
from spack.build_systems.cached_cmake import (
 | 
			
		||||
    CachedCMakePackage,
 | 
			
		||||
    cmake_cache_filepath,
 | 
			
		||||
    cmake_cache_option,
 | 
			
		||||
    cmake_cache_path,
 | 
			
		||||
    cmake_cache_string,
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@
 | 
			
		||||
from spack.package import *
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Aluminum(CMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
class Aluminum(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    """Aluminum provides a generic interface to high-performance
 | 
			
		||||
    communication libraries, with a focus on allreduce
 | 
			
		||||
    algorithms. Blocking and non-blocking algorithms and GPU-aware
 | 
			
		||||
@@ -22,208 +22,207 @@ class Aluminum(CMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    git = "https://github.com/LLNL/Aluminum.git"
 | 
			
		||||
    tags = ["ecp", "radiuss"]
 | 
			
		||||
 | 
			
		||||
    maintainers("bvanessen")
 | 
			
		||||
    maintainers("benson31", "bvanessen")
 | 
			
		||||
 | 
			
		||||
    version("master", branch="master")
 | 
			
		||||
    version("1.4.1", sha256="d130a67fef1cb7a9cb3bbec1d0de426f020fe68c9df6e172c83ba42281cd90e3")
 | 
			
		||||
    version("1.4.0", sha256="ac54de058f38cead895ec8163f7b1fa7674e4dc5aacba683a660a61babbfe0c6")
 | 
			
		||||
    version("1.3.1", sha256="28ce0af6c6f29f97b7f19c5e45184bd2f8a0b1428f1e898b027d96d47cb74b0b")
 | 
			
		||||
    version("1.3.0", sha256="d0442efbebfdfb89eec793ae65eceb8f1ba65afa9f2e48df009f81985a4c27e3")
 | 
			
		||||
    version("1.2.3", sha256="9b214bdf30f9b7e8e017f83e6615db6be2631f5be3dd186205dbe3aa62f4018a")
 | 
			
		||||
    version(
 | 
			
		||||
        "1.2.2",
 | 
			
		||||
        sha256="c01d9dd98be4cab9b944bae99b403abe76d65e9e1750e7f23bf0105636ad5485",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "1.2.1",
 | 
			
		||||
        sha256="869402708c8a102a67667b83527b4057644a32b8cdf4990bcd1a5c4e5f0e30af",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "1.2.0",
 | 
			
		||||
        sha256="2f3725147f4dbc045b945af68d3d747f5dffbe2b8e928deed64136785210bc9a",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "1.1.0",
 | 
			
		||||
        sha256="78b03e36e5422e8651f400feb4d8a527f87302db025d77aa37e223be6b9bdfc9",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version("1.0.0-lbann", tag="v1.0.0-lbann", commit="40a062b1f63e84e074489c0f926f36b806c6b8f3")
 | 
			
		||||
    version("1.0.0", sha256="028d12e271817214db5c07c77b0528f88862139c3e442e1b12f58717290f414a")
 | 
			
		||||
    version(
 | 
			
		||||
        "0.7.0",
 | 
			
		||||
        sha256="bbb73d2847c56efbe6f99e46b41d837763938483f2e2d1982ccf8350d1148caa",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.6.0",
 | 
			
		||||
        sha256="6ca329951f4c7ea52670e46e5020e7e7879d9b56fed5ff8c5df6e624b313e925",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.5.0",
 | 
			
		||||
        sha256="dc365a5849eaba925355a8efb27005c5f22bcd1dca94aaed8d0d29c265c064c1",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.4.0",
 | 
			
		||||
        sha256="4d6fab5481cc7c994b32fb23a37e9ee44041a9f91acf78f981a97cb8ef57bb7d",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.3.3",
 | 
			
		||||
        sha256="26e7f263f53c6c6ee0fe216e981a558dfdd7ec997d0dd2a24285a609a6c68f3b",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.3.2",
 | 
			
		||||
        sha256="09b6d1bcc02ac54ba269b1123eee7be20f0104b93596956c014b794ba96b037f",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.2.1-1",
 | 
			
		||||
        sha256="066b750e9d1134871709a3e2414b96b166e0e24773efc7d512df2f1d96ee8eef",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.2.1",
 | 
			
		||||
        sha256="3d5d15853cccc718f60df68205e56a2831de65be4d96e7f7e8497097e7905f89",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.2",
 | 
			
		||||
        sha256="fc8f06c6d8faab17a2aedd408d3fe924043bf857da1094d5553f35c4d2af893b",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.1",
 | 
			
		||||
        sha256="3880b736866e439dd94e6a61eeeb5bb2abccebbac82b82d52033bc6c94950bdb",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    variant("nccl", default=False, description="Builds with support for NCCL communication lib")
 | 
			
		||||
    # Library capabilities
 | 
			
		||||
    variant(
 | 
			
		||||
        "cuda_rma",
 | 
			
		||||
        default=False,
 | 
			
		||||
        when="+cuda",
 | 
			
		||||
        description="Builds with support for CUDA intra-node "
 | 
			
		||||
        " Put/Get and IPC RMA functionality",
 | 
			
		||||
    )
 | 
			
		||||
    variant(
 | 
			
		||||
        "ht",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Builds with support for host-enabled MPI"
 | 
			
		||||
        " communication of accelerator data",
 | 
			
		||||
    )
 | 
			
		||||
    variant("nccl", default=False, description="Builds with support for NCCL communication lib")
 | 
			
		||||
    variant("shared", default=True, description="Build Aluminum as a shared library")
 | 
			
		||||
 | 
			
		||||
    # Debugging features
 | 
			
		||||
    variant("hang_check", default=False, description="Enable hang checking")
 | 
			
		||||
    variant("trace", default=False, description="Enable runtime tracing")
 | 
			
		||||
 | 
			
		||||
    # Profiler support
 | 
			
		||||
    variant("nvtx", default=False, when="+cuda", description="Enable profiling via nvprof/NVTX")
 | 
			
		||||
    variant(
 | 
			
		||||
        "cuda_rma",
 | 
			
		||||
        "roctracer", default=False, when="+rocm", description="Enable profiling via rocprof/roctx"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Advanced options
 | 
			
		||||
    variant("mpi_serialize", default=False, description="Serialize MPI operations")
 | 
			
		||||
    variant("stream_mem_ops", default=False, description="Enable stream memory operations")
 | 
			
		||||
    variant(
 | 
			
		||||
        "thread_multiple",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Builds with support for CUDA intra-node "
 | 
			
		||||
        " Put/Get and IPC RMA functionality",
 | 
			
		||||
    )
 | 
			
		||||
    variant("rccl", default=False, description="Builds with support for RCCL communication lib")
 | 
			
		||||
    variant(
 | 
			
		||||
        "ofi_libfabric_plugin",
 | 
			
		||||
        default=spack.platforms.cray.slingshot_network(),
 | 
			
		||||
        when="+rccl",
 | 
			
		||||
        sticky=True,
 | 
			
		||||
        description="Builds with support for OFI libfabric enhanced RCCL/NCCL communication lib",
 | 
			
		||||
    )
 | 
			
		||||
    variant(
 | 
			
		||||
        "ofi_libfabric_plugin",
 | 
			
		||||
        default=spack.platforms.cray.slingshot_network(),
 | 
			
		||||
        when="+nccl",
 | 
			
		||||
        sticky=True,
 | 
			
		||||
        description="Builds with support for OFI libfabric enhanced RCCL/NCCL communication lib",
 | 
			
		||||
        description="Allow multiple threads to call Aluminum concurrently",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    depends_on("cmake@3.21.0:", type="build", when="@1.0.1:")
 | 
			
		||||
    depends_on("cmake@3.17.0:", type="build", when="@:1.0.0")
 | 
			
		||||
    depends_on("mpi")
 | 
			
		||||
    depends_on("nccl@2.7.0-0:", when="+nccl")
 | 
			
		||||
    depends_on("hwloc@1.11:")
 | 
			
		||||
    depends_on("hwloc +cuda +nvml", when="+cuda")
 | 
			
		||||
    depends_on("hwloc@2.3.0:", when="+rocm")
 | 
			
		||||
    depends_on("cub", when="@:0.1,0.6.0: +cuda ^cuda@:10")
 | 
			
		||||
    depends_on("hipcub", when="@:0.1,0.6.0: +rocm")
 | 
			
		||||
    # Benchmark/testing support
 | 
			
		||||
    variant(
 | 
			
		||||
        "benchmarks",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Build the Aluminum benchmarking drivers "
 | 
			
		||||
        "(warning: may significantly increase build time!)",
 | 
			
		||||
    )
 | 
			
		||||
    variant(
 | 
			
		||||
        "tests",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Build the Aluminum test drivers "
 | 
			
		||||
        "(warning: may moderately increase build time!)",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    depends_on("rccl", when="+rccl")
 | 
			
		||||
    depends_on("aws-ofi-rccl", when="+rccl +ofi_libfabric_plugin")
 | 
			
		||||
    depends_on("aws-ofi-nccl", when="+nccl +ofi_libfabric_plugin")
 | 
			
		||||
    # FIXME: Do we want to expose tuning parameters to the Spack
 | 
			
		||||
    # recipe? Some are numeric values, some are on/off switches.
 | 
			
		||||
 | 
			
		||||
    conflicts("~cuda", when="+cuda_rma", msg="CUDA RMA support requires CUDA")
 | 
			
		||||
    conflicts("+cuda", when="+rocm", msg="CUDA and ROCm support are mutually exclusive")
 | 
			
		||||
    conflicts("+nccl", when="+rccl", msg="NCCL and RCCL support are mutually exclusive")
 | 
			
		||||
 | 
			
		||||
    generator("ninja")
 | 
			
		||||
    depends_on("mpi")
 | 
			
		||||
 | 
			
		||||
    depends_on("cmake@3.21.0:", type="build", when="@1.0.1:")
 | 
			
		||||
    depends_on("hwloc@1.11:")
 | 
			
		||||
 | 
			
		||||
    with when("+cuda"):
 | 
			
		||||
        depends_on("cub", when="^cuda@:10")
 | 
			
		||||
        depends_on("hwloc +cuda +nvml")
 | 
			
		||||
        with when("+nccl"):
 | 
			
		||||
            depends_on("nccl@2.7.0-0:")
 | 
			
		||||
            for arch in CudaPackage.cuda_arch_values:
 | 
			
		||||
                depends_on(
 | 
			
		||||
                    "nccl +cuda cuda_arch={0}".format(arch),
 | 
			
		||||
                    when="+cuda cuda_arch={0}".format(arch),
 | 
			
		||||
                )
 | 
			
		||||
            if spack.platforms.cray.slingshot_network():
 | 
			
		||||
                depends_on("aws-ofi-nccl")  # Note: NOT a CudaPackage
 | 
			
		||||
 | 
			
		||||
    with when("+rocm"):
 | 
			
		||||
        for val in ROCmPackage.amdgpu_targets:
 | 
			
		||||
            depends_on(
 | 
			
		||||
                "hipcub +rocm amdgpu_target={0}".format(val), when="amdgpu_target={0}".format(val)
 | 
			
		||||
            )
 | 
			
		||||
            depends_on(
 | 
			
		||||
                "hwloc@2.3.0: +rocm amdgpu_target={0}".format(val),
 | 
			
		||||
                when="amdgpu_target={0}".format(val),
 | 
			
		||||
            )
 | 
			
		||||
            # RCCL is *NOT* implented as a ROCmPackage
 | 
			
		||||
            depends_on(
 | 
			
		||||
                "rccl amdgpu_target={0}".format(val), when="+nccl amdgpu_target={0}".format(val)
 | 
			
		||||
            )
 | 
			
		||||
            depends_on(
 | 
			
		||||
                "roctracer-dev +rocm amdgpu_target={0}".format(val),
 | 
			
		||||
                when="+roctracer amdgpu_target={0}".format(val),
 | 
			
		||||
            )
 | 
			
		||||
        if spack.platforms.cray.slingshot_network():
 | 
			
		||||
            depends_on("aws-ofi-rccl", when="+nccl")
 | 
			
		||||
 | 
			
		||||
    def cmake_args(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        args = [
 | 
			
		||||
            "-DCMAKE_CXX_STANDARD:STRING=17",
 | 
			
		||||
            "-DALUMINUM_ENABLE_CUDA:BOOL=%s" % ("+cuda" in spec),
 | 
			
		||||
            "-DALUMINUM_ENABLE_NCCL:BOOL=%s" % ("+nccl" in spec or "+rccl" in spec),
 | 
			
		||||
            "-DALUMINUM_ENABLE_ROCM:BOOL=%s" % ("+rocm" in spec),
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        if not spec.satisfies("^cmake@3.23.0"):
 | 
			
		||||
            # There is a bug with using Ninja generator in this version
 | 
			
		||||
            # of CMake
 | 
			
		||||
            args.append("-DCMAKE_EXPORT_COMPILE_COMMANDS=ON")
 | 
			
		||||
 | 
			
		||||
        if "+cuda" in spec:
 | 
			
		||||
            if self.spec.satisfies("%clang"):
 | 
			
		||||
                for flag in self.spec.compiler_flags["cxxflags"]:
 | 
			
		||||
                    if "gcc-toolchain" in flag:
 | 
			
		||||
                        args.append("-DCMAKE_CUDA_FLAGS=-Xcompiler={0}".format(flag))
 | 
			
		||||
            if spec.satisfies("^cuda@11.0:"):
 | 
			
		||||
                args.append("-DCMAKE_CUDA_STANDARD=17")
 | 
			
		||||
            else:
 | 
			
		||||
                args.append("-DCMAKE_CUDA_STANDARD=14")
 | 
			
		||||
            archs = spec.variants["cuda_arch"].value
 | 
			
		||||
            if archs != "none":
 | 
			
		||||
                arch_str = ";".join(archs)
 | 
			
		||||
                args.append("-DCMAKE_CUDA_ARCHITECTURES=%s" % arch_str)
 | 
			
		||||
 | 
			
		||||
            if spec.satisfies("%cce") and spec.satisfies("^cuda+allow-unsupported-compilers"):
 | 
			
		||||
                args.append("-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler")
 | 
			
		||||
 | 
			
		||||
        if spec.satisfies("@0.5:"):
 | 
			
		||||
            args.extend(
 | 
			
		||||
                [
 | 
			
		||||
                    "-DALUMINUM_ENABLE_HOST_TRANSFER:BOOL=%s" % ("+ht" in spec),
 | 
			
		||||
                    "-DALUMINUM_ENABLE_MPI_CUDA:BOOL=%s" % ("+cuda_rma" in spec),
 | 
			
		||||
                    "-DALUMINUM_ENABLE_MPI_CUDA_RMA:BOOL=%s" % ("+cuda_rma" in spec),
 | 
			
		||||
                ]
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            args.append("-DALUMINUM_ENABLE_MPI_CUDA:BOOL=%s" % ("+ht" in spec))
 | 
			
		||||
 | 
			
		||||
        if spec.satisfies("@:0.1,0.6.0: +cuda ^cuda@:10"):
 | 
			
		||||
            args.append("-DCUB_DIR:FILEPATH=%s" % spec["cub"].prefix)
 | 
			
		||||
 | 
			
		||||
        # Add support for OS X to find OpenMP (LLVM installed via brew)
 | 
			
		||||
        if self.spec.satisfies("%clang platform=darwin"):
 | 
			
		||||
            clang = self.compiler.cc
 | 
			
		||||
            clang_bin = os.path.dirname(clang)
 | 
			
		||||
            clang_root = os.path.dirname(clang_bin)
 | 
			
		||||
            args.extend(["-DOpenMP_DIR={0}".format(clang_root)])
 | 
			
		||||
 | 
			
		||||
        if "+rocm" in spec:
 | 
			
		||||
            args.extend(
 | 
			
		||||
                [
 | 
			
		||||
                    "-DHIP_ROOT_DIR={0}".format(spec["hip"].prefix),
 | 
			
		||||
                    "-DHIP_CXX_COMPILER={0}".format(self.spec["hip"].hipcc),
 | 
			
		||||
                    "-DCMAKE_CXX_FLAGS=-std=c++17",
 | 
			
		||||
                ]
 | 
			
		||||
            )
 | 
			
		||||
            archs = self.spec.variants["amdgpu_target"].value
 | 
			
		||||
            if archs != "none":
 | 
			
		||||
                arch_str = ",".join(archs)
 | 
			
		||||
                if spec.satisfies("%rocmcc@:5"):
 | 
			
		||||
                    args.append(
 | 
			
		||||
                        "-DHIP_HIPCC_FLAGS=--amdgpu-target={0}"
 | 
			
		||||
                        " -g -fsized-deallocation -fPIC -std=c++17".format(arch_str)
 | 
			
		||||
                    )
 | 
			
		||||
                args.extend(
 | 
			
		||||
                    [
 | 
			
		||||
                        "-DCMAKE_HIP_ARCHITECTURES=%s" % arch_str,
 | 
			
		||||
                        "-DAMDGPU_TARGETS=%s" % arch_str,
 | 
			
		||||
                        "-DGPU_TARGETS=%s" % arch_str,
 | 
			
		||||
                    ]
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
        args = []
 | 
			
		||||
        return args
 | 
			
		||||
 | 
			
		||||
    def get_cuda_flags(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        args = []
 | 
			
		||||
        if spec.satisfies("^cuda+allow-unsupported-compilers"):
 | 
			
		||||
            args.append("-allow-unsupported-compiler")
 | 
			
		||||
 | 
			
		||||
        if spec.satisfies("%clang"):
 | 
			
		||||
            for flag in spec.compiler_flags["cxxflags"]:
 | 
			
		||||
                if "gcc-toolchain" in flag:
 | 
			
		||||
                    args.append("-Xcompiler={0}".format(flag))
 | 
			
		||||
        return args
 | 
			
		||||
 | 
			
		||||
    def std_initconfig_entries(self):
 | 
			
		||||
        entries = super(Aluminum, self).std_initconfig_entries()
 | 
			
		||||
 | 
			
		||||
        # CMAKE_PREFIX_PATH, in CMake types, is a "STRING", not a "PATH". :/
 | 
			
		||||
        entries = [x for x in entries if "CMAKE_PREFIX_PATH" not in x]
 | 
			
		||||
        cmake_prefix_path = os.environ["CMAKE_PREFIX_PATH"].replace(":", ";")
 | 
			
		||||
        entries.append(cmake_cache_string("CMAKE_PREFIX_PATH", cmake_prefix_path))
 | 
			
		||||
        return entries
 | 
			
		||||
 | 
			
		||||
    def initconfig_compiler_entries(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        entries = super(Aluminum, self).initconfig_compiler_entries()
 | 
			
		||||
 | 
			
		||||
        # FIXME: Enforce this better in the actual CMake.
 | 
			
		||||
        entries.append(cmake_cache_string("CMAKE_CXX_STANDARD", "17"))
 | 
			
		||||
        entries.append(cmake_cache_option("BUILD_SHARED_LIBS", "+shared" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("CMAKE_EXPORT_COMPILE_COMMANDS", True))
 | 
			
		||||
        entries.append(cmake_cache_option("MPI_ASSUME_NO_BUILTIN_MPI", True))
 | 
			
		||||
 | 
			
		||||
        return entries
 | 
			
		||||
 | 
			
		||||
    def initconfig_hardware_entries(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        entries = super(Aluminum, self).initconfig_hardware_entries()
 | 
			
		||||
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_CUDA", "+cuda" in spec))
 | 
			
		||||
        if spec.satisfies("+cuda"):
 | 
			
		||||
            entries.append(cmake_cache_string("CMAKE_CUDA_STANDARD", "17"))
 | 
			
		||||
            if not spec.satisfies("cuda_arch=none"):
 | 
			
		||||
                archs = spec.variants["cuda_arch"].value
 | 
			
		||||
                arch_str = ";".join(archs)
 | 
			
		||||
                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", arch_str))
 | 
			
		||||
 | 
			
		||||
            # FIXME: Should this use the "cuda_flags" function of the
 | 
			
		||||
            # CudaPackage class or something? There might be other
 | 
			
		||||
            # flags in play, and we need to be sure to get them all.
 | 
			
		||||
            cuda_flags = self.get_cuda_flags()
 | 
			
		||||
            if len(cuda_flags) > 0:
 | 
			
		||||
                entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))
 | 
			
		||||
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_ROCM", "+rocm" in spec))
 | 
			
		||||
        if spec.satisfies("+rocm"):
 | 
			
		||||
            entries.append(cmake_cache_string("CMAKE_HIP_STANDARD", "17"))
 | 
			
		||||
            if not spec.satisfies("amdgpu_target=none"):
 | 
			
		||||
                archs = self.spec.variants["amdgpu_target"].value
 | 
			
		||||
                arch_str = ";".join(archs)
 | 
			
		||||
                entries.append(cmake_cache_string("CMAKE_HIP_ARCHITECTURES", arch_str))
 | 
			
		||||
                entries.append(cmake_cache_string("AMDGPU_TARGETS", arch_str))
 | 
			
		||||
                entries.append(cmake_cache_string("GPU_TARGETS", arch_str))
 | 
			
		||||
            entries.append(cmake_cache_path("HIP_ROOT_DIR", spec["hip"].prefix))
 | 
			
		||||
 | 
			
		||||
        return entries
 | 
			
		||||
 | 
			
		||||
    def initconfig_package_entries(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        entries = super(Aluminum, self).initconfig_package_entries()
 | 
			
		||||
 | 
			
		||||
        # Library capabilities
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_MPI_CUDA", "+cuda_rma" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_MPI_CUDA_RMA", "+cuda_rma" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_HOST_TRANSFER", "+ht" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_NCCL", "+nccl" in spec))
 | 
			
		||||
 | 
			
		||||
        # Debugging features
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_DEBUG_HANG_CHECK", "+hang_check" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_TRACE", "+trace" in spec))
 | 
			
		||||
 | 
			
		||||
        # Profiler support
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_NVPROF", "+nvtx" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_ROCTRACER", "+roctracer" in spec))
 | 
			
		||||
 | 
			
		||||
        # Advanced options
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_MPI_SERIALIZE", "+mpi_serialize" in spec))
 | 
			
		||||
        entries.append(
 | 
			
		||||
            cmake_cache_option("ALUMINUM_ENABLE_STREAM_MEM_OPS", "+stream_mem_ops" in spec)
 | 
			
		||||
        )
 | 
			
		||||
        entries.append(
 | 
			
		||||
            cmake_cache_option("ALUMINUM_ENABLE_THREAD_MULTIPLE", "+thread_multiple" in spec)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Benchmark/testing support
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_BENCHMARKS", "+benchmarks" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_TESTS", "+tests" in spec))
 | 
			
		||||
 | 
			
		||||
        return entries
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,39 @@
 | 
			
		||||
from spack.package import *
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
# This is a hack to get around some deficiencies in Hydrogen.
 | 
			
		||||
def get_blas_entries(inspec):
 | 
			
		||||
    entries = []
 | 
			
		||||
    spec = inspec["hydrogen"]
 | 
			
		||||
    if "blas=openblas" in spec:
 | 
			
		||||
        entries.append(cmake_cache_option("DiHydrogen_USE_OpenBLAS", True))
 | 
			
		||||
    elif "blas=mkl" in spec or spec.satisfies("^intel-mkl"):
 | 
			
		||||
        entries.append(cmake_cache_option("DiHydrogen_USE_MKL", True))
 | 
			
		||||
    elif "blas=essl" in spec or spec.satisfies("^essl"):
 | 
			
		||||
        entries.append(cmake_cache_string("BLA_VENDOR", "IBMESSL"))
 | 
			
		||||
        # IF IBM ESSL is used it needs help finding the proper LAPACK libraries
 | 
			
		||||
        entries.append(
 | 
			
		||||
            cmake_cache_string(
 | 
			
		||||
                "LAPACK_LIBRARIES",
 | 
			
		||||
                "%s;-llapack;-lblas"
 | 
			
		||||
                % ";".join("-l{0}".format(lib) for lib in self.spec["essl"].libs.names),
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
        entries.append(
 | 
			
		||||
            cmake_cache_string(
 | 
			
		||||
                "BLAS_LIBRARIES",
 | 
			
		||||
                "%s;-lblas"
 | 
			
		||||
                % ";".join("-l{0}".format(lib) for lib in self.spec["essl"].libs.names),
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
    elif "blas=accelerate" in spec:
 | 
			
		||||
        entries.append(cmake_cache_option("DiHydrogen_USE_ACCELERATE", True))
 | 
			
		||||
    elif spec.satisfies("^netlib-lapack"):
 | 
			
		||||
        entries.append(cmake_cache_string("BLA_VENDOR", "Generic"))
 | 
			
		||||
    return entries
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Dihydrogen(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    """DiHydrogen is the second version of the Hydrogen fork of the
 | 
			
		||||
    well-known distributed linear algebra library,
 | 
			
		||||
    Elemental. DiHydrogen aims to be a basic distributed
 | 
			
		||||
@@ -20,117 +52,179 @@ class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    git = "https://github.com/LLNL/DiHydrogen.git"
 | 
			
		||||
    tags = ["ecp", "radiuss"]
 | 
			
		||||
 | 
			
		||||
    maintainers("bvanessen")
 | 
			
		||||
    maintainers("benson31", "bvanessen")
 | 
			
		||||
 | 
			
		||||
    version("develop", branch="develop")
 | 
			
		||||
    version("master", branch="master")
 | 
			
		||||
 | 
			
		||||
    version("0.2.1", sha256="11e2c0f8a94ffa22e816deff0357dde6f82cc8eac21b587c800a346afb5c49ac")
 | 
			
		||||
    version("0.2.0", sha256="e1f597e80f93cf49a0cb2dbc079a1f348641178c49558b28438963bd4a0bdaa4")
 | 
			
		||||
    version("0.1", sha256="171d4b8adda1e501c38177ec966e6f11f8980bf71345e5f6d87d0a988fef4c4e")
 | 
			
		||||
    version("0.3.0", sha256="8dd143441a28e0c7662cd92694e9a4894b61fd48508ac1d77435f342bc226dcf")
 | 
			
		||||
 | 
			
		||||
    # Primary features
 | 
			
		||||
 | 
			
		||||
    variant("dace", default=False, sticky=True, description="Enable DaCe backend.")
 | 
			
		||||
 | 
			
		||||
    variant(
 | 
			
		||||
        "distconv",
 | 
			
		||||
        default=False,
 | 
			
		||||
        sticky=True,
 | 
			
		||||
        description="Enable (legacy) Distributed Convolution support.",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    variant(
 | 
			
		||||
        "nvshmem",
 | 
			
		||||
        default=False,
 | 
			
		||||
        sticky=True,
 | 
			
		||||
        description="Enable support for NVSHMEM-based halo exchanges.",
 | 
			
		||||
        when="+distconv",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    variant(
 | 
			
		||||
        "shared", default=True, sticky=True, description="Enables the build of shared libraries"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Some features of developer interest
 | 
			
		||||
 | 
			
		||||
    variant("al", default=True, description="Builds with Aluminum communication library")
 | 
			
		||||
    variant(
 | 
			
		||||
        "developer",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Enable extra warnings and force tests to be enabled.",
 | 
			
		||||
    )
 | 
			
		||||
    variant("half", default=False, description="Enable FP16 support on the CPU.")
 | 
			
		||||
 | 
			
		||||
    variant("ci", default=False, description="Use default options for CI builds")
 | 
			
		||||
 | 
			
		||||
    variant(
 | 
			
		||||
        "distconv",
 | 
			
		||||
        "coverage",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Support distributed convolutions: spatial, channel, " "filter.",
 | 
			
		||||
        description="Decorate build with code coverage instrumentation options",
 | 
			
		||||
        when="%gcc",
 | 
			
		||||
    )
 | 
			
		||||
    variant("nvshmem", default=False, description="Builds with support for NVSHMEM")
 | 
			
		||||
    variant("openmp", default=False, description="Enable CPU acceleration with OpenMP threads.")
 | 
			
		||||
    variant("rocm", default=False, description="Enable ROCm/HIP language features.")
 | 
			
		||||
    variant("shared", default=True, description="Enables the build of shared libraries")
 | 
			
		||||
 | 
			
		||||
    # Variants related to BLAS
 | 
			
		||||
    variant(
 | 
			
		||||
        "openmp_blas", default=False, description="Use OpenMP for threading in the BLAS library"
 | 
			
		||||
        "coverage",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Decorate build with code coverage instrumentation options",
 | 
			
		||||
        when="%clang",
 | 
			
		||||
    )
 | 
			
		||||
    variant("int64_blas", default=False, description="Use 64bit integers for BLAS.")
 | 
			
		||||
    variant(
 | 
			
		||||
        "blas",
 | 
			
		||||
        default="openblas",
 | 
			
		||||
        values=("openblas", "mkl", "accelerate", "essl", "libsci"),
 | 
			
		||||
        description="Enable the use of OpenBlas/MKL/Accelerate/ESSL/LibSci",
 | 
			
		||||
        "coverage",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Decorate build with code coverage instrumentation options",
 | 
			
		||||
        when="%rocmcc",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    conflicts("~cuda", when="+nvshmem")
 | 
			
		||||
    # Package conflicts and requirements
 | 
			
		||||
 | 
			
		||||
    depends_on("mpi")
 | 
			
		||||
    depends_on("catch2", type="test")
 | 
			
		||||
    conflicts("+nvshmem", when="~cuda", msg="NVSHMEM requires CUDA support.")
 | 
			
		||||
 | 
			
		||||
    # Specify the correct version of Aluminum
 | 
			
		||||
    depends_on("aluminum@0.4.0:0.4", when="@0.1 +al")
 | 
			
		||||
    depends_on("aluminum@0.5.0:0.5", when="@0.2.0 +al")
 | 
			
		||||
    depends_on("aluminum@0.7.0:0.7", when="@0.2.1 +al")
 | 
			
		||||
    depends_on("aluminum@0.7.0:", when="@:0.0,0.2.1: +al")
 | 
			
		||||
    conflicts("+cuda", when="+rocm", msg="CUDA and ROCm are mutually exclusive.")
 | 
			
		||||
 | 
			
		||||
    # Add Aluminum variants
 | 
			
		||||
    depends_on("aluminum +cuda +nccl +cuda_rma", when="+al +cuda")
 | 
			
		||||
    depends_on("aluminum +rocm +rccl", when="+al +rocm")
 | 
			
		||||
    depends_on("aluminum +ht", when="+al +distconv")
 | 
			
		||||
    requires(
 | 
			
		||||
        "+cuda",
 | 
			
		||||
        "+rocm",
 | 
			
		||||
        when="+distconv",
 | 
			
		||||
        policy="any_of",
 | 
			
		||||
        msg="DistConv support requires CUDA or ROCm.",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    for arch in CudaPackage.cuda_arch_values:
 | 
			
		||||
        depends_on("aluminum cuda_arch=%s" % arch, when="+al +cuda cuda_arch=%s" % arch)
 | 
			
		||||
        depends_on("nvshmem cuda_arch=%s" % arch, when="+nvshmem +cuda cuda_arch=%s" % arch)
 | 
			
		||||
    # Dependencies
 | 
			
		||||
 | 
			
		||||
    # variants +rocm and amdgpu_targets are not automatically passed to
 | 
			
		||||
    # dependencies, so do it manually.
 | 
			
		||||
    for val in ROCmPackage.amdgpu_targets:
 | 
			
		||||
        depends_on("aluminum amdgpu_target=%s" % val, when="amdgpu_target=%s" % val)
 | 
			
		||||
    depends_on("catch2@3.0.1:", type=("build", "test"), when="+developer")
 | 
			
		||||
    depends_on("cmake@3.21.0:", type="build")
 | 
			
		||||
    depends_on("cuda@11.0:", when="+cuda")
 | 
			
		||||
    depends_on("spdlog@1.11.0", when="@:0.1,0.2:")
 | 
			
		||||
 | 
			
		||||
    depends_on("roctracer-dev", when="+rocm +distconv")
 | 
			
		||||
    with when("@0.3.0:"):
 | 
			
		||||
        depends_on("hydrogen +al")
 | 
			
		||||
        for arch in CudaPackage.cuda_arch_values:
 | 
			
		||||
            depends_on(
 | 
			
		||||
                "hydrogen +cuda cuda_arch={0}".format(arch),
 | 
			
		||||
                when="+cuda cuda_arch={0}".format(arch),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    depends_on("cudnn", when="+cuda")
 | 
			
		||||
    depends_on("cub", when="^cuda@:10")
 | 
			
		||||
        for val in ROCmPackage.amdgpu_targets:
 | 
			
		||||
            depends_on(
 | 
			
		||||
                "hydrogen amdgpu_target={0}".format(val),
 | 
			
		||||
                when="+rocm amdgpu_target={0}".format(val),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    # Note that #1712 forces us to enumerate the different blas variants
 | 
			
		||||
    depends_on("openblas", when="blas=openblas")
 | 
			
		||||
    depends_on("openblas +ilp64", when="blas=openblas +int64_blas")
 | 
			
		||||
    depends_on("openblas threads=openmp", when="blas=openblas +openmp_blas")
 | 
			
		||||
    with when("+distconv"):
 | 
			
		||||
        depends_on("mpi")
 | 
			
		||||
 | 
			
		||||
    depends_on("intel-mkl", when="blas=mkl")
 | 
			
		||||
    depends_on("intel-mkl +ilp64", when="blas=mkl +int64_blas")
 | 
			
		||||
    depends_on("intel-mkl threads=openmp", when="blas=mkl +openmp_blas")
 | 
			
		||||
        # All this nonsense for one silly little package.
 | 
			
		||||
        depends_on("aluminum@1.4.1:")
 | 
			
		||||
 | 
			
		||||
    depends_on("veclibfort", when="blas=accelerate")
 | 
			
		||||
    conflicts("blas=accelerate +openmp_blas")
 | 
			
		||||
        # Add Aluminum variants
 | 
			
		||||
        depends_on("aluminum +cuda +nccl", when="+distconv +cuda")
 | 
			
		||||
        depends_on("aluminum +rocm +nccl", when="+distconv +rocm")
 | 
			
		||||
 | 
			
		||||
    depends_on("essl", when="blas=essl")
 | 
			
		||||
    depends_on("essl +ilp64", when="blas=essl +int64_blas")
 | 
			
		||||
    depends_on("essl threads=openmp", when="blas=essl +openmp_blas")
 | 
			
		||||
    depends_on("netlib-lapack +external-blas", when="blas=essl")
 | 
			
		||||
        # TODO: Debug linker errors when NVSHMEM is built with UCX
 | 
			
		||||
        depends_on("nvshmem +nccl~ucx", when="+nvshmem")
 | 
			
		||||
 | 
			
		||||
    depends_on("cray-libsci", when="blas=libsci")
 | 
			
		||||
    depends_on("cray-libsci +openmp", when="blas=libsci +openmp_blas")
 | 
			
		||||
        # OMP support is only used in DistConv, and only Apple needs
 | 
			
		||||
        # hand-holding with it.
 | 
			
		||||
        depends_on("llvm-openmp", when="%apple-clang")
 | 
			
		||||
        # FIXME: when="platform=darwin"??
 | 
			
		||||
 | 
			
		||||
    # Distconv builds require cuda or rocm
 | 
			
		||||
    conflicts("+distconv", when="~cuda ~rocm")
 | 
			
		||||
        # CUDA/ROCm arch forwarding
 | 
			
		||||
 | 
			
		||||
    conflicts("+distconv", when="+half")
 | 
			
		||||
    conflicts("+rocm", when="+half")
 | 
			
		||||
        for arch in CudaPackage.cuda_arch_values:
 | 
			
		||||
            depends_on(
 | 
			
		||||
                "aluminum +cuda cuda_arch={0}".format(arch),
 | 
			
		||||
                when="+cuda cuda_arch={0}".format(arch),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    depends_on("half", when="+half")
 | 
			
		||||
            # This is a workaround for a bug in the Aluminum package,
 | 
			
		||||
            # as it should be responsible for its own NCCL dependency.
 | 
			
		||||
            # Rather than failing to concretize, we help it along.
 | 
			
		||||
            depends_on(
 | 
			
		||||
                "nccl cuda_arch={0}".format(arch),
 | 
			
		||||
                when="+distconv +cuda cuda_arch={0}".format(arch),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    generator("ninja")
 | 
			
		||||
    depends_on("cmake@3.17.0:", type="build")
 | 
			
		||||
            # NVSHMEM also needs arch forwarding
 | 
			
		||||
            depends_on(
 | 
			
		||||
                "nvshmem +cuda cuda_arch={0}".format(arch),
 | 
			
		||||
                when="+nvshmem +cuda cuda_arch={0}".format(arch),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    depends_on("spdlog", when="@:0.1,0.2:")
 | 
			
		||||
        # Idenfity versions of cuda_arch that are too old from
 | 
			
		||||
        # lib/spack/spack/build_systems/cuda.py. We require >=60.
 | 
			
		||||
        illegal_cuda_arch_values = [
 | 
			
		||||
            "10",
 | 
			
		||||
            "11",
 | 
			
		||||
            "12",
 | 
			
		||||
            "13",
 | 
			
		||||
            "20",
 | 
			
		||||
            "21",
 | 
			
		||||
            "30",
 | 
			
		||||
            "32",
 | 
			
		||||
            "35",
 | 
			
		||||
            "37",
 | 
			
		||||
            "50",
 | 
			
		||||
            "52",
 | 
			
		||||
            "53",
 | 
			
		||||
        ]
 | 
			
		||||
        for value in illegal_cuda_arch_values:
 | 
			
		||||
            conflicts("cuda_arch=" + value)
 | 
			
		||||
 | 
			
		||||
    depends_on("llvm-openmp", when="%apple-clang +openmp")
 | 
			
		||||
        for val in ROCmPackage.amdgpu_targets:
 | 
			
		||||
            depends_on(
 | 
			
		||||
                "aluminum amdgpu_target={0}".format(val),
 | 
			
		||||
                when="+rocm amdgpu_target={0}".format(val),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    # TODO: Debug linker errors when NVSHMEM is built with UCX
 | 
			
		||||
    depends_on("nvshmem +nccl~ucx", when="+nvshmem")
 | 
			
		||||
        # CUDA-specific distconv dependencies
 | 
			
		||||
        depends_on("cudnn", when="+cuda")
 | 
			
		||||
 | 
			
		||||
    # Idenfity versions of cuda_arch that are too old
 | 
			
		||||
    # from lib/spack/spack/build_systems/cuda.py
 | 
			
		||||
    illegal_cuda_arch_values = ["10", "11", "12", "13", "20", "21"]
 | 
			
		||||
    for value in illegal_cuda_arch_values:
 | 
			
		||||
        conflicts("cuda_arch=" + value)
 | 
			
		||||
        # ROCm-specific distconv dependencies
 | 
			
		||||
        depends_on("hipcub", when="+rocm")
 | 
			
		||||
        depends_on("miopen-hip", when="+rocm")
 | 
			
		||||
        depends_on("roctracer-dev", when="+rocm")
 | 
			
		||||
 | 
			
		||||
    with when("+ci+coverage"):
 | 
			
		||||
        depends_on("lcov", type=("build", "run"))
 | 
			
		||||
        depends_on("py-gcovr", type=("build", "run"))
 | 
			
		||||
        # Technically it's not used in the build, but CMake sets up a
 | 
			
		||||
        # target, so it needs to be found.
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def libs(self):
 | 
			
		||||
@@ -138,104 +232,127 @@ def libs(self):
 | 
			
		||||
        return find_libraries("libH2Core", root=self.prefix, shared=shared, recursive=True)
 | 
			
		||||
 | 
			
		||||
    def cmake_args(self):
 | 
			
		||||
        args = []
 | 
			
		||||
        return args
 | 
			
		||||
 | 
			
		||||
    def get_cuda_flags(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        args = []
 | 
			
		||||
        if spec.satisfies("^cuda+allow-unsupported-compilers"):
 | 
			
		||||
            args.append("-allow-unsupported-compiler")
 | 
			
		||||
 | 
			
		||||
        args = [
 | 
			
		||||
            "-DCMAKE_CXX_STANDARD=17",
 | 
			
		||||
            "-DCMAKE_INSTALL_MESSAGE:STRING=LAZY",
 | 
			
		||||
            "-DBUILD_SHARED_LIBS:BOOL=%s" % ("+shared" in spec),
 | 
			
		||||
            "-DH2_ENABLE_ALUMINUM=%s" % ("+al" in spec),
 | 
			
		||||
            "-DH2_ENABLE_CUDA=%s" % ("+cuda" in spec),
 | 
			
		||||
            "-DH2_ENABLE_DISTCONV_LEGACY=%s" % ("+distconv" in spec),
 | 
			
		||||
            "-DH2_ENABLE_OPENMP=%s" % ("+openmp" in spec),
 | 
			
		||||
            "-DH2_ENABLE_FP16=%s" % ("+half" in spec),
 | 
			
		||||
            "-DH2_DEVELOPER_BUILD=%s" % ("+developer" in spec),
 | 
			
		||||
        ]
 | 
			
		||||
        if spec.satisfies("%clang"):
 | 
			
		||||
            for flag in spec.compiler_flags["cxxflags"]:
 | 
			
		||||
                if "gcc-toolchain" in flag:
 | 
			
		||||
                    args.append("-Xcompiler={0}".format(flag))
 | 
			
		||||
        return args
 | 
			
		||||
 | 
			
		||||
        if spec.version < Version("0.3"):
 | 
			
		||||
            args.append("-DH2_ENABLE_HIP_ROCM=%s" % ("+rocm" in spec))
 | 
			
		||||
        else:
 | 
			
		||||
            args.append("-DH2_ENABLE_ROCM=%s" % ("+rocm" in spec))
 | 
			
		||||
    def initconfig_compiler_entries(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        entries = super(Dihydrogen, self).initconfig_compiler_entries()
 | 
			
		||||
 | 
			
		||||
        if not spec.satisfies("^cmake@3.23.0"):
 | 
			
		||||
            # There is a bug with using Ninja generator in this version
 | 
			
		||||
            # of CMake
 | 
			
		||||
            args.append("-DCMAKE_EXPORT_COMPILE_COMMANDS=ON")
 | 
			
		||||
        # FIXME: Enforce this better in the actual CMake.
 | 
			
		||||
        entries.append(cmake_cache_string("CMAKE_CXX_STANDARD", "17"))
 | 
			
		||||
        entries.append(cmake_cache_option("BUILD_SHARED_LIBS", "+shared" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("CMAKE_EXPORT_COMPILE_COMMANDS", True))
 | 
			
		||||
 | 
			
		||||
        if "+cuda" in spec:
 | 
			
		||||
            if self.spec.satisfies("%clang"):
 | 
			
		||||
                for flag in self.spec.compiler_flags["cxxflags"]:
 | 
			
		||||
                    if "gcc-toolchain" in flag:
 | 
			
		||||
                        args.append("-DCMAKE_CUDA_FLAGS=-Xcompiler={0}".format(flag))
 | 
			
		||||
            if spec.satisfies("^cuda@11.0:"):
 | 
			
		||||
                args.append("-DCMAKE_CUDA_STANDARD=17")
 | 
			
		||||
            else:
 | 
			
		||||
                args.append("-DCMAKE_CUDA_STANDARD=14")
 | 
			
		||||
            archs = spec.variants["cuda_arch"].value
 | 
			
		||||
            if archs != "none":
 | 
			
		||||
                arch_str = ";".join(archs)
 | 
			
		||||
                args.append("-DCMAKE_CUDA_ARCHITECTURES=%s" % arch_str)
 | 
			
		||||
        # It's possible this should have a `if "platform=cray" in
 | 
			
		||||
        # spec:` in front of it, but it's not clear to me when this is
 | 
			
		||||
        # set. In particular, I don't actually see this blurb showing
 | 
			
		||||
        # up on Tioga builds. Which is causing the obvious problem
 | 
			
		||||
        # (namely, the one this was added to supposedly solve in the
 | 
			
		||||
        # first place.
 | 
			
		||||
        entries.append(cmake_cache_option("MPI_ASSUME_NO_BUILTIN_MPI", True))
 | 
			
		||||
 | 
			
		||||
            if spec.satisfies("%cce") and spec.satisfies("^cuda+allow-unsupported-compilers"):
 | 
			
		||||
                args.append("-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler")
 | 
			
		||||
 | 
			
		||||
        if "+cuda" in spec:
 | 
			
		||||
            args.append("-DcuDNN_DIR={0}".format(spec["cudnn"].prefix))
 | 
			
		||||
 | 
			
		||||
        if spec.satisfies("^cuda@:10"):
 | 
			
		||||
            if "+cuda" in spec or "+distconv" in spec:
 | 
			
		||||
                args.append("-DCUB_DIR={0}".format(spec["cub"].prefix))
 | 
			
		||||
 | 
			
		||||
        # Add support for OpenMP with external (Brew) clang
 | 
			
		||||
        if spec.satisfies("%clang +openmp platform=darwin"):
 | 
			
		||||
        if spec.satisfies("%clang +distconv platform=darwin"):
 | 
			
		||||
            clang = self.compiler.cc
 | 
			
		||||
            clang_bin = os.path.dirname(clang)
 | 
			
		||||
            clang_root = os.path.dirname(clang_bin)
 | 
			
		||||
            args.extend(
 | 
			
		||||
                [
 | 
			
		||||
                    "-DOpenMP_CXX_FLAGS=-fopenmp=libomp",
 | 
			
		||||
                    "-DOpenMP_CXX_LIB_NAMES=libomp",
 | 
			
		||||
                    "-DOpenMP_libomp_LIBRARY={0}/lib/libomp.dylib".format(clang_root),
 | 
			
		||||
                ]
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        if "+rocm" in spec:
 | 
			
		||||
            args.extend(
 | 
			
		||||
                [
 | 
			
		||||
                    "-DCMAKE_CXX_FLAGS=-std=c++17",
 | 
			
		||||
                    "-DHIP_ROOT_DIR={0}".format(spec["hip"].prefix),
 | 
			
		||||
                    "-DHIP_CXX_COMPILER={0}".format(self.spec["hip"].hipcc),
 | 
			
		||||
                ]
 | 
			
		||||
            )
 | 
			
		||||
            if "platform=cray" in spec:
 | 
			
		||||
                args.extend(["-DMPI_ASSUME_NO_BUILTIN_MPI=ON"])
 | 
			
		||||
            archs = self.spec.variants["amdgpu_target"].value
 | 
			
		||||
            if archs != "none":
 | 
			
		||||
                arch_str = ",".join(archs)
 | 
			
		||||
                args.append(
 | 
			
		||||
                    "-DHIP_HIPCC_FLAGS=--amdgpu-target={0}"
 | 
			
		||||
                    " -g -fsized-deallocation -fPIC -std=c++17".format(arch_str)
 | 
			
		||||
            entries.append(cmake_cache_string("OpenMP_CXX_FLAGS", "-fopenmp=libomp"))
 | 
			
		||||
            entries.append(cmake_cache_string("OpenMP_CXX_LIB_NAMES", "libomp"))
 | 
			
		||||
            entries.append(
 | 
			
		||||
                cmake_cache_string(
 | 
			
		||||
                    "OpenMP_libomp_LIBRARY", "{0}/lib/libomp.dylib".format(clang_root)
 | 
			
		||||
                )
 | 
			
		||||
                args.extend(
 | 
			
		||||
                    [
 | 
			
		||||
                        "-DCMAKE_HIP_ARCHITECTURES=%s" % arch_str,
 | 
			
		||||
                        "-DAMDGPU_TARGETS=%s" % arch_str,
 | 
			
		||||
                        "-DGPU_TARGETS=%s" % arch_str,
 | 
			
		||||
                    ]
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
        if self.spec.satisfies("^essl"):
 | 
			
		||||
            # IF IBM ESSL is used it needs help finding the proper LAPACK libraries
 | 
			
		||||
            args.extend(
 | 
			
		||||
                [
 | 
			
		||||
                    "-DLAPACK_LIBRARIES=%s;-llapack;-lblas"
 | 
			
		||||
                    % ";".join("-l{0}".format(lib) for lib in self.spec["essl"].libs.names),
 | 
			
		||||
                    "-DBLAS_LIBRARIES=%s;-lblas"
 | 
			
		||||
                    % ";".join("-l{0}".format(lib) for lib in self.spec["essl"].libs.names),
 | 
			
		||||
                ]
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        return args
 | 
			
		||||
        return entries
 | 
			
		||||
 | 
			
		||||
    def initconfig_hardware_entries(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        entries = super(Dihydrogen, self).initconfig_hardware_entries()
 | 
			
		||||
 | 
			
		||||
        entries.append(cmake_cache_option("H2_ENABLE_CUDA", "+cuda" in spec))
 | 
			
		||||
        if spec.satisfies("+cuda"):
 | 
			
		||||
            entries.append(cmake_cache_string("CMAKE_CUDA_STANDARD", "17"))
 | 
			
		||||
            if not spec.satisfies("cuda_arch=none"):
 | 
			
		||||
                archs = spec.variants["cuda_arch"].value
 | 
			
		||||
                arch_str = ";".join(archs)
 | 
			
		||||
                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", arch_str))
 | 
			
		||||
 | 
			
		||||
            # FIXME: Should this use the "cuda_flags" function of the
 | 
			
		||||
            # CudaPackage class or something? There might be other
 | 
			
		||||
            # flags in play, and we need to be sure to get them all.
 | 
			
		||||
            cuda_flags = self.get_cuda_flags()
 | 
			
		||||
            if len(cuda_flags) > 0:
 | 
			
		||||
                entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))
 | 
			
		||||
 | 
			
		||||
        enable_rocm_var = (
 | 
			
		||||
            "H2_ENABLE_ROCM" if spec.version < Version("0.3") else "H2_ENABLE_HIP_ROCM"
 | 
			
		||||
        )
 | 
			
		||||
        entries.append(cmake_cache_option(enable_rocm_var, "+rocm" in spec))
 | 
			
		||||
        if spec.satisfies("+rocm"):
 | 
			
		||||
            entries.append(cmake_cache_string("CMAKE_HIP_STANDARD", "17"))
 | 
			
		||||
            if not spec.satisfies("amdgpu_target=none"):
 | 
			
		||||
                archs = self.spec.variants["amdgpu_target"].value
 | 
			
		||||
                arch_str = ";".join(archs)
 | 
			
		||||
                entries.append(cmake_cache_string("CMAKE_HIP_ARCHITECTURES", arch_str))
 | 
			
		||||
                entries.append(cmake_cache_string("AMDGPU_TARGETS", arch_str))
 | 
			
		||||
                entries.append(cmake_cache_string("GPU_TARGETS", arch_str))
 | 
			
		||||
            entries.append(cmake_cache_path("HIP_ROOT_DIR", spec["hip"].prefix))
 | 
			
		||||
 | 
			
		||||
        return entries
 | 
			
		||||
 | 
			
		||||
    def initconfig_package_entries(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        entries = super(Dihydrogen, self).initconfig_package_entries()
 | 
			
		||||
 | 
			
		||||
        # Basic H2 options
 | 
			
		||||
        entries.append(cmake_cache_option("H2_DEVELOPER_BUILD", "+developer" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("H2_ENABLE_TESTS", "+developer" in spec))
 | 
			
		||||
 | 
			
		||||
        entries.append(cmake_cache_option("H2_ENABLE_CODE_COVERAGE", "+coverage" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("H2_CI_BUILD", "+ci" in spec))
 | 
			
		||||
 | 
			
		||||
        entries.append(cmake_cache_option("H2_ENABLE_DACE", "+dace" in spec))
 | 
			
		||||
 | 
			
		||||
        # DistConv options
 | 
			
		||||
        entries.append(cmake_cache_option("H2_ENABLE_ALUMINUM", "+distconv" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("H2_ENABLE_DISTCONV_LEGACY", "+distconv" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("H2_ENABLE_OPENMP", "+distconv" in spec))
 | 
			
		||||
 | 
			
		||||
        # Paths to stuff, just in case. CMAKE_PREFIX_PATH should catch
 | 
			
		||||
        # all this, but this shouldn't hurt to have.
 | 
			
		||||
        entries.append(cmake_cache_path("spdlog_ROOT", spec["spdlog"].prefix))
 | 
			
		||||
 | 
			
		||||
        if "+developer" in spec:
 | 
			
		||||
            entries.append(cmake_cache_path("Catch2_ROOT", spec["catch2"].prefix))
 | 
			
		||||
 | 
			
		||||
        if "+coverage" in spec:
 | 
			
		||||
            entries.append(cmake_cache_path("lcov_ROOT", spec["lcov"].prefix))
 | 
			
		||||
            entries.append(cmake_cache_path("genhtml_ROOT", spec["lcov"].prefix))
 | 
			
		||||
            if "+ci" in spec:
 | 
			
		||||
                entries.append(cmake_cache_path("gcovr_ROOT", spec["py-gcovr"].prefix))
 | 
			
		||||
 | 
			
		||||
        if "+distconv" in spec:
 | 
			
		||||
            entries.append(cmake_cache_path("Aluminum_ROOT", spec["aluminum"].prefix))
 | 
			
		||||
            if "+cuda" in spec:
 | 
			
		||||
                entries.append(cmake_cache_path("cuDNN_ROOT", spec["cudnn"].prefix))
 | 
			
		||||
 | 
			
		||||
        # Currently this is a hack for all Hydrogen versions. WIP to
 | 
			
		||||
        # fix this at develop.
 | 
			
		||||
        entries.extend(get_blas_entries(spec))
 | 
			
		||||
        return entries
 | 
			
		||||
 | 
			
		||||
    def setup_build_environment(self, env):
 | 
			
		||||
        if self.spec.satisfies("%apple-clang +openmp"):
 | 
			
		||||
 
 | 
			
		||||
@@ -7,254 +7,268 @@
 | 
			
		||||
 | 
			
		||||
from spack.package import *
 | 
			
		||||
 | 
			
		||||
# This limits the versions of lots of things pretty severely.
 | 
			
		||||
#
 | 
			
		||||
#   - Only v1.5.2 and newer are buildable.
 | 
			
		||||
#   - CMake must be v3.22 or newer.
 | 
			
		||||
#   - CUDA must be v11.0.0 or newer.
 | 
			
		||||
 | 
			
		||||
class Hydrogen(CMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
 | 
			
		||||
class Hydrogen(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    """Hydrogen: Distributed-memory dense and sparse-direct linear algebra
 | 
			
		||||
    and optimization library. Based on the Elemental library."""
 | 
			
		||||
 | 
			
		||||
    homepage = "https://libelemental.org"
 | 
			
		||||
    url = "https://github.com/LLNL/Elemental/archive/v1.0.1.tar.gz"
 | 
			
		||||
    url = "https://github.com/LLNL/Elemental/archive/v1.5.1.tar.gz"
 | 
			
		||||
    git = "https://github.com/LLNL/Elemental.git"
 | 
			
		||||
    tags = ["ecp", "radiuss"]
 | 
			
		||||
 | 
			
		||||
    maintainers("bvanessen")
 | 
			
		||||
 | 
			
		||||
    version("develop", branch="hydrogen")
 | 
			
		||||
    version("1.5.3", sha256="faefbe738bd364d0e26ce9ad079a11c93a18c6f075719a365fd4fa5f1f7a989a")
 | 
			
		||||
    version("1.5.2", sha256="a902cad3962471216cfa278ba0561c18751d415cd4d6b2417c02a43b0ab2ea33")
 | 
			
		||||
    version("1.5.1", sha256="447da564278f98366906d561d9c8bc4d31678c56d761679c2ff3e59ee7a2895c")
 | 
			
		||||
    version("1.5.0", sha256="03dd487fb23b9fdbc715554a8ea48c3196a1021502e61b0172ef3fdfbee75180")
 | 
			
		||||
    version("1.4.0", sha256="c13374ff4a6c4d1076e47ba8c8d91a7082588b9958d1ed89cffb12f1d2e1452e")
 | 
			
		||||
    version("1.3.4", sha256="7979f6656f698f0bbad6798b39d4b569835b3013ff548d98089fce7c283c6741")
 | 
			
		||||
    version("1.3.3", sha256="a51a1cfd40ac74d10923dfce35c2c04a3082477683f6b35e7b558ea9f4bb6d51")
 | 
			
		||||
    version("1.3.2", sha256="50bc5e87955f8130003d04dfd9dcad63107e92b82704f8107baf95b0ccf98ed6")
 | 
			
		||||
    version("1.3.1", sha256="a8b8521458e9e747f2b24af87c4c2749a06e500019c383e0cefb33e5df6aaa1d")
 | 
			
		||||
    version("1.3.0", sha256="0f3006aa1d8235ecdd621e7344c99f56651c6836c2e1bc0cf006331b70126b36")
 | 
			
		||||
    version("1.2.0", sha256="8545975139582ee7bfe5d00f8d83a8697afc285bf7026b0761e9943355974806")
 | 
			
		||||
    version("1.1.0-1", sha256="73ce05e4166853a186469269cb00a454de71e126b2019f95bbae703b65606808")
 | 
			
		||||
    version("1.1.0", sha256="b4c12913acd01c72d31f4522266bfeb8df1d4d3b4aef02e07ccbc9a477894e71")
 | 
			
		||||
    version("1.0.1", sha256="27cf76e1ef1d58bd8f9b1e34081a14a682b7ff082fb5d1da56713e5e0040e528")
 | 
			
		||||
    version("1.0", sha256="d8a97de3133f2c6b6bb4b80d32b4a4cc25eb25e0df4f0cec0f8cb19bf34ece98")
 | 
			
		||||
    # Older versions are no longer supported.
 | 
			
		||||
 | 
			
		||||
    variant("shared", default=True, description="Enables the build of shared libraries")
 | 
			
		||||
    variant("openmp", default=True, description="Make use of OpenMP within CPU-kernels")
 | 
			
		||||
    variant(
 | 
			
		||||
        "openmp_blas", default=False, description="Use OpenMP for threading in the BLAS library"
 | 
			
		||||
    )
 | 
			
		||||
    variant("quad", default=False, description="Enable quad precision")
 | 
			
		||||
    variant("int64", default=False, description="Use 64bit integers")
 | 
			
		||||
    variant("int64_blas", default=False, description="Use 64bit integers for BLAS.")
 | 
			
		||||
    variant("scalapack", default=False, description="Build with ScaLAPACK library")
 | 
			
		||||
    variant("shared", default=True, description="Enables the build of shared libraries.")
 | 
			
		||||
    variant(
 | 
			
		||||
        "build_type",
 | 
			
		||||
        default="Release",
 | 
			
		||||
        description="The build type to build",
 | 
			
		||||
        values=("Debug", "Release"),
 | 
			
		||||
    )
 | 
			
		||||
    variant("int64", default=False, description="Use 64-bit integers")
 | 
			
		||||
    variant("al", default=False, description="Use Aluminum communication library")
 | 
			
		||||
    variant(
 | 
			
		||||
        "cub", default=True, when="+cuda", description="Use CUB/hipCUB for GPU memory management"
 | 
			
		||||
    )
 | 
			
		||||
    variant(
 | 
			
		||||
        "cub", default=True, when="+rocm", description="Use CUB/hipCUB for GPU memory management"
 | 
			
		||||
    )
 | 
			
		||||
    variant("half", default=False, description="Support for FP16 precision data types")
 | 
			
		||||
 | 
			
		||||
    # TODO: Add netlib-lapack. For GPU-enabled builds, typical
 | 
			
		||||
    # workflows don't touch host BLAS/LAPACK all that often, and even
 | 
			
		||||
    # less frequently in performance-critical regions.
 | 
			
		||||
    variant(
 | 
			
		||||
        "blas",
 | 
			
		||||
        default="openblas",
 | 
			
		||||
        values=("openblas", "mkl", "accelerate", "essl", "libsci"),
 | 
			
		||||
        description="Enable the use of OpenBlas/MKL/Accelerate/ESSL/LibSci",
 | 
			
		||||
        default="any",
 | 
			
		||||
        values=("any", "openblas", "mkl", "accelerate", "essl", "libsci"),
 | 
			
		||||
        description="Specify a host BLAS library preference",
 | 
			
		||||
    )
 | 
			
		||||
    variant(
 | 
			
		||||
        "mpfr",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Support GNU MPFR's" "arbitrary-precision floating-point arithmetic",
 | 
			
		||||
    )
 | 
			
		||||
    variant("test", default=False, description="Builds test suite")
 | 
			
		||||
    variant("al", default=False, description="Builds with Aluminum communication library")
 | 
			
		||||
    variant("int64_blas", default=False, description="Use 64-bit integers for (host) BLAS.")
 | 
			
		||||
 | 
			
		||||
    variant("openmp", default=True, description="Make use of OpenMP within CPU kernels")
 | 
			
		||||
    variant(
 | 
			
		||||
        "omp_taskloops",
 | 
			
		||||
        when="+openmp",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Use OpenMP taskloops instead of parallel for loops.",
 | 
			
		||||
        description="Use OpenMP taskloops instead of parallel for loops",
 | 
			
		||||
    )
 | 
			
		||||
    variant("half", default=False, description="Builds with support for FP16 precision data types")
 | 
			
		||||
 | 
			
		||||
    conflicts("~openmp", when="+omp_taskloops")
 | 
			
		||||
    # Users should spec this on their own on the command line, no?
 | 
			
		||||
    # This doesn't affect Hydrogen itself at all. Not one bit.
 | 
			
		||||
    # variant(
 | 
			
		||||
    #     "openmp_blas",
 | 
			
		||||
    #     default=False,
 | 
			
		||||
    #     description="Use OpenMP for threading in the BLAS library")
 | 
			
		||||
 | 
			
		||||
    variant("test", default=False, description="Builds test suite")
 | 
			
		||||
 | 
			
		||||
    conflicts("+cuda", when="+rocm", msg="CUDA and ROCm support are mutually exclusive")
 | 
			
		||||
    conflicts("+half", when="+rocm", msg="FP16 support not implemented for ROCm.")
 | 
			
		||||
 | 
			
		||||
    depends_on("cmake@3.21.0:", type="build", when="@1.5.2:")
 | 
			
		||||
    depends_on("cmake@3.17.0:", type="build", when="@:1.5.1")
 | 
			
		||||
    depends_on("cmake@3.22.0:", type="build", when="%cce")
 | 
			
		||||
    depends_on("cmake@3.22.0:", type="build", when="@1.5.2:")
 | 
			
		||||
    depends_on("cmake@3.17.0:", type="build", when="@1.5.1")
 | 
			
		||||
 | 
			
		||||
    depends_on("mpi")
 | 
			
		||||
    depends_on("hwloc@1.11:")
 | 
			
		||||
    depends_on("hwloc +cuda +nvml", when="+cuda")
 | 
			
		||||
    depends_on("hwloc@2.3.0:", when="+rocm")
 | 
			
		||||
    depends_on("blas")
 | 
			
		||||
    depends_on("lapack")
 | 
			
		||||
 | 
			
		||||
    # Note that #1712 forces us to enumerate the different blas variants
 | 
			
		||||
    # Note that this forces us to use OpenBLAS until #1712 is fixed
 | 
			
		||||
    depends_on("openblas", when="blas=openblas")
 | 
			
		||||
    depends_on("openblas +ilp64", when="blas=openblas +int64_blas")
 | 
			
		||||
    depends_on("openblas threads=openmp", when="blas=openblas +openmp_blas")
 | 
			
		||||
 | 
			
		||||
    depends_on("intel-mkl", when="blas=mkl")
 | 
			
		||||
    depends_on("intel-mkl +ilp64", when="blas=mkl +int64_blas")
 | 
			
		||||
    depends_on("intel-mkl threads=openmp", when="blas=mkl +openmp_blas")
 | 
			
		||||
 | 
			
		||||
    # I don't think this is true...
 | 
			
		||||
    depends_on("veclibfort", when="blas=accelerate")
 | 
			
		||||
    conflicts("blas=accelerate +openmp_blas")
 | 
			
		||||
 | 
			
		||||
    depends_on("essl", when="blas=essl")
 | 
			
		||||
    depends_on("essl +ilp64", when="blas=essl +int64_blas")
 | 
			
		||||
    depends_on("essl threads=openmp", when="blas=essl +openmp_blas")
 | 
			
		||||
 | 
			
		||||
    depends_on("netlib-lapack +external-blas", when="blas=essl")
 | 
			
		||||
 | 
			
		||||
    depends_on("cray-libsci", when="blas=libsci")
 | 
			
		||||
    depends_on("cray-libsci +openmp", when="blas=libsci +openmp_blas")
 | 
			
		||||
 | 
			
		||||
    # Specify the correct version of Aluminum
 | 
			
		||||
    depends_on("aluminum@:0.3", when="@:1.3 +al")
 | 
			
		||||
    depends_on("aluminum@0.4.0:0.4", when="@1.4.0:1.4 +al")
 | 
			
		||||
    depends_on("aluminum@0.6.0:0.6", when="@1.5.0:1.5.1 +al")
 | 
			
		||||
    depends_on("aluminum@0.7.0:", when="@:1.0,1.5.2: +al")
 | 
			
		||||
    depends_on("aluminum@0.7.0:", when="@1.5.2: +al")
 | 
			
		||||
 | 
			
		||||
    # Add Aluminum variants
 | 
			
		||||
    depends_on("aluminum +cuda +nccl +cuda_rma", when="+al +cuda")
 | 
			
		||||
    depends_on("aluminum +rocm +rccl", when="+al +rocm")
 | 
			
		||||
    depends_on("aluminum +cuda +ht", when="+al +cuda")
 | 
			
		||||
    depends_on("aluminum +rocm +ht", when="+al +rocm")
 | 
			
		||||
 | 
			
		||||
    for arch in CudaPackage.cuda_arch_values:
 | 
			
		||||
        depends_on("aluminum cuda_arch=%s" % arch, when="+al +cuda cuda_arch=%s" % arch)
 | 
			
		||||
        depends_on("aluminum +cuda cuda_arch=%s" % arch, when="+al +cuda cuda_arch=%s" % arch)
 | 
			
		||||
 | 
			
		||||
    # variants +rocm and amdgpu_targets are not automatically passed to
 | 
			
		||||
    # dependencies, so do it manually.
 | 
			
		||||
    for val in ROCmPackage.amdgpu_targets:
 | 
			
		||||
        depends_on("aluminum amdgpu_target=%s" % val, when="+al +rocm amdgpu_target=%s" % val)
 | 
			
		||||
        depends_on(
 | 
			
		||||
            "aluminum +rocm amdgpu_target=%s" % val, when="+al +rocm amdgpu_target=%s" % val
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    # Note that this forces us to use OpenBLAS until #1712 is fixed
 | 
			
		||||
    depends_on("lapack", when="blas=openblas ~openmp_blas")
 | 
			
		||||
 | 
			
		||||
    depends_on("scalapack", when="+scalapack")
 | 
			
		||||
    depends_on("gmp", when="+mpfr")
 | 
			
		||||
    depends_on("mpc", when="+mpfr")
 | 
			
		||||
    depends_on("mpfr", when="+mpfr")
 | 
			
		||||
 | 
			
		||||
    depends_on("cuda", when="+cuda")
 | 
			
		||||
    depends_on("cub", when="^cuda@:10")
 | 
			
		||||
    depends_on("hipcub", when="+rocm")
 | 
			
		||||
    depends_on("cuda@11.0.0:", when="+cuda")
 | 
			
		||||
    depends_on("hipcub +rocm", when="+rocm +cub")
 | 
			
		||||
    depends_on("half", when="+half")
 | 
			
		||||
 | 
			
		||||
    depends_on("llvm-openmp", when="%apple-clang +openmp")
 | 
			
		||||
 | 
			
		||||
    conflicts(
 | 
			
		||||
        "@0:0.98",
 | 
			
		||||
        msg="Hydrogen did not exist before v0.99. " + "Did you mean to use Elemental instead?",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    generator("ninja")
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def libs(self):
 | 
			
		||||
        shared = True if "+shared" in self.spec else False
 | 
			
		||||
        return find_libraries("libEl", root=self.prefix, shared=shared, recursive=True)
 | 
			
		||||
        return find_libraries("libHydrogen", root=self.prefix, shared=shared, recursive=True)
 | 
			
		||||
 | 
			
		||||
    def cmake_args(self):
 | 
			
		||||
        args = []
 | 
			
		||||
        return args
 | 
			
		||||
 | 
			
		||||
    def get_cuda_flags(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        args = []
 | 
			
		||||
        if spec.satisfies("^cuda+allow-unsupported-compilers"):
 | 
			
		||||
            args.append("-allow-unsupported-compiler")
 | 
			
		||||
 | 
			
		||||
        enable_gpu_fp16 = "+cuda" in spec and "+half" in spec
 | 
			
		||||
        if spec.satisfies("%clang"):
 | 
			
		||||
            for flag in spec.compiler_flags["cxxflags"]:
 | 
			
		||||
                if "gcc-toolchain" in flag:
 | 
			
		||||
                    args.append("-Xcompiler={0}".format(flag))
 | 
			
		||||
        return args
 | 
			
		||||
 | 
			
		||||
        args = [
 | 
			
		||||
            "-DCMAKE_CXX_STANDARD=17",
 | 
			
		||||
            "-DCMAKE_INSTALL_MESSAGE:STRING=LAZY",
 | 
			
		||||
            "-DBUILD_SHARED_LIBS:BOOL=%s" % ("+shared" in spec),
 | 
			
		||||
            "-DHydrogen_ENABLE_OPENMP:BOOL=%s" % ("+openmp" in spec),
 | 
			
		||||
            "-DHydrogen_ENABLE_QUADMATH:BOOL=%s" % ("+quad" in spec),
 | 
			
		||||
            "-DHydrogen_USE_64BIT_INTS:BOOL=%s" % ("+int64" in spec),
 | 
			
		||||
            "-DHydrogen_USE_64BIT_BLAS_INTS:BOOL=%s" % ("+int64_blas" in spec),
 | 
			
		||||
            "-DHydrogen_ENABLE_MPC:BOOL=%s" % ("+mpfr" in spec),
 | 
			
		||||
            "-DHydrogen_GENERAL_LAPACK_FALLBACK=ON",
 | 
			
		||||
            "-DHydrogen_ENABLE_ALUMINUM=%s" % ("+al" in spec),
 | 
			
		||||
            "-DHydrogen_ENABLE_CUB=%s" % ("+cuda" in spec or "+rocm" in spec),
 | 
			
		||||
            "-DHydrogen_ENABLE_CUDA=%s" % ("+cuda" in spec),
 | 
			
		||||
            "-DHydrogen_ENABLE_ROCM=%s" % ("+rocm" in spec),
 | 
			
		||||
            "-DHydrogen_ENABLE_TESTING=%s" % ("+test" in spec),
 | 
			
		||||
            "-DHydrogen_ENABLE_HALF=%s" % ("+half" in spec),
 | 
			
		||||
            "-DHydrogen_ENABLE_GPU_FP16=%s" % enable_gpu_fp16,
 | 
			
		||||
        ]
 | 
			
		||||
    def std_initconfig_entries(self):
 | 
			
		||||
        entries = super(Hydrogen, self).std_initconfig_entries()
 | 
			
		||||
 | 
			
		||||
        if not spec.satisfies("^cmake@3.23.0"):
 | 
			
		||||
            # There is a bug with using Ninja generator in this version
 | 
			
		||||
            # of CMake
 | 
			
		||||
            args.append("-DCMAKE_EXPORT_COMPILE_COMMANDS=ON")
 | 
			
		||||
        # CMAKE_PREFIX_PATH, in CMake types, is a "STRING", not a "PATH". :/
 | 
			
		||||
        entries = [x for x in entries if "CMAKE_PREFIX_PATH" not in x]
 | 
			
		||||
        cmake_prefix_path = os.environ["CMAKE_PREFIX_PATH"].replace(":", ";")
 | 
			
		||||
        entries.append(cmake_cache_string("CMAKE_PREFIX_PATH", cmake_prefix_path))
 | 
			
		||||
        # IDK why this is here, but it was in the original recipe. So, yeah.
 | 
			
		||||
        entries.append(cmake_cache_string("CMAKE_INSTALL_MESSAGE", "LAZY"))
 | 
			
		||||
        return entries
 | 
			
		||||
 | 
			
		||||
        if "+cuda" in spec:
 | 
			
		||||
            if self.spec.satisfies("%clang"):
 | 
			
		||||
                for flag in self.spec.compiler_flags["cxxflags"]:
 | 
			
		||||
                    if "gcc-toolchain" in flag:
 | 
			
		||||
                        args.append("-DCMAKE_CUDA_FLAGS=-Xcompiler={0}".format(flag))
 | 
			
		||||
            args.append("-DCMAKE_CUDA_STANDARD=14")
 | 
			
		||||
            archs = spec.variants["cuda_arch"].value
 | 
			
		||||
            if archs != "none":
 | 
			
		||||
                arch_str = ";".join(archs)
 | 
			
		||||
                args.append("-DCMAKE_CUDA_ARCHITECTURES=%s" % arch_str)
 | 
			
		||||
    def initconfig_compiler_entries(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        entries = super(Hydrogen, self).initconfig_compiler_entries()
 | 
			
		||||
 | 
			
		||||
            if spec.satisfies("%cce") and spec.satisfies("^cuda+allow-unsupported-compilers"):
 | 
			
		||||
                args.append("-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler")
 | 
			
		||||
        # FIXME: Enforce this better in the actual CMake.
 | 
			
		||||
        entries.append(cmake_cache_string("CMAKE_CXX_STANDARD", "17"))
 | 
			
		||||
        entries.append(cmake_cache_option("BUILD_SHARED_LIBS", "+shared" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("CMAKE_EXPORT_COMPILE_COMMANDS", True))
 | 
			
		||||
 | 
			
		||||
        if "+rocm" in spec:
 | 
			
		||||
            args.extend(
 | 
			
		||||
                [
 | 
			
		||||
                    "-DCMAKE_CXX_FLAGS=-std=c++17",
 | 
			
		||||
                    "-DHIP_ROOT_DIR={0}".format(spec["hip"].prefix),
 | 
			
		||||
                    "-DHIP_CXX_COMPILER={0}".format(self.spec["hip"].hipcc),
 | 
			
		||||
                ]
 | 
			
		||||
            )
 | 
			
		||||
            archs = self.spec.variants["amdgpu_target"].value
 | 
			
		||||
            if archs != "none":
 | 
			
		||||
                arch_str = ",".join(archs)
 | 
			
		||||
                cxxflags_str = " ".join(self.spec.compiler_flags["cxxflags"])
 | 
			
		||||
                args.append(
 | 
			
		||||
                    "-DHIP_HIPCC_FLAGS=--amdgpu-target={0}"
 | 
			
		||||
                    " -g -fsized-deallocation -fPIC {1}"
 | 
			
		||||
                    " -std=c++17".format(arch_str, cxxflags_str)
 | 
			
		||||
                )
 | 
			
		||||
                args.extend(
 | 
			
		||||
                    [
 | 
			
		||||
                        "-DCMAKE_HIP_ARCHITECTURES=%s" % arch_str,
 | 
			
		||||
                        "-DAMDGPU_TARGETS=%s" % arch_str,
 | 
			
		||||
                        "-DGPU_TARGETS=%s" % arch_str,
 | 
			
		||||
                    ]
 | 
			
		||||
                )
 | 
			
		||||
        entries.append(cmake_cache_option("MPI_ASSUME_NO_BUILTIN_MPI", True))
 | 
			
		||||
 | 
			
		||||
        # Add support for OS X to find OpenMP (LLVM installed via brew)
 | 
			
		||||
        if self.spec.satisfies("%clang +openmp platform=darwin"):
 | 
			
		||||
        if spec.satisfies("%clang +openmp platform=darwin") or spec.satisfies(
 | 
			
		||||
            "%clang +omp_taskloops platform=darwin"
 | 
			
		||||
        ):
 | 
			
		||||
            clang = self.compiler.cc
 | 
			
		||||
            clang_bin = os.path.dirname(clang)
 | 
			
		||||
            clang_root = os.path.dirname(clang_bin)
 | 
			
		||||
            args.extend(["-DOpenMP_DIR={0}".format(clang_root)])
 | 
			
		||||
            entries.append(cmake_cache_string("OpenMP_CXX_FLAGS", "-fopenmp=libomp"))
 | 
			
		||||
            entries.append(cmake_cache_string("OpenMP_CXX_LIB_NAMES", "libomp"))
 | 
			
		||||
            entries.append(
 | 
			
		||||
                cmake_cache_string(
 | 
			
		||||
                    "OpenMP_libomp_LIBRARY", "{0}/lib/libomp.dylib".format(clang_root)
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        return entries
 | 
			
		||||
 | 
			
		||||
    def initconfig_hardware_entries(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        entries = super(Hydrogen, self).initconfig_hardware_entries()
 | 
			
		||||
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_ENABLE_CUDA", "+cuda" in spec))
 | 
			
		||||
        if spec.satisfies("+cuda"):
 | 
			
		||||
            entries.append(cmake_cache_string("CMAKE_CUDA_STANDARD", "17"))
 | 
			
		||||
            if not spec.satisfies("cuda_arch=none"):
 | 
			
		||||
                archs = spec.variants["cuda_arch"].value
 | 
			
		||||
                arch_str = ";".join(archs)
 | 
			
		||||
                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", arch_str))
 | 
			
		||||
 | 
			
		||||
            # FIXME: Should this use the "cuda_flags" function of the
 | 
			
		||||
            # CudaPackage class or something? There might be other
 | 
			
		||||
            # flags in play, and we need to be sure to get them all.
 | 
			
		||||
            cuda_flags = self.get_cuda_flags()
 | 
			
		||||
            if len(cuda_flags) > 0:
 | 
			
		||||
                entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))
 | 
			
		||||
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_ENABLE_ROCM", "+rocm" in spec))
 | 
			
		||||
        if spec.satisfies("+rocm"):
 | 
			
		||||
            entries.append(cmake_cache_string("CMAKE_HIP_STANDARD", "17"))
 | 
			
		||||
            if not spec.satisfies("amdgpu_target=none"):
 | 
			
		||||
                archs = self.spec.variants["amdgpu_target"].value
 | 
			
		||||
                arch_str = ";".join(archs)
 | 
			
		||||
                entries.append(cmake_cache_string("CMAKE_HIP_ARCHITECTURES", arch_str))
 | 
			
		||||
                entries.append(cmake_cache_string("AMDGPU_TARGETS", arch_str))
 | 
			
		||||
                entries.append(cmake_cache_string("GPU_TARGETS", arch_str))
 | 
			
		||||
            entries.append(cmake_cache_path("HIP_ROOT_DIR", spec["hip"].prefix))
 | 
			
		||||
 | 
			
		||||
        return entries
 | 
			
		||||
 | 
			
		||||
    def initconfig_package_entries(self):
 | 
			
		||||
        spec = self.spec
 | 
			
		||||
        entries = super(Hydrogen, self).initconfig_package_entries()
 | 
			
		||||
 | 
			
		||||
        # Basic Hydrogen options
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_ENABLE_TESTING", "+test" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_GENERAL_LAPACK_FALLBACK", True))
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_USE_64BIT_INTS", "+int64" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_USE_64BIT_BLAS_INTS", "+int64_blas" in spec))
 | 
			
		||||
 | 
			
		||||
        # Advanced dependency options
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_ENABLE_ALUMINUM", "+al" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_ENABLE_CUB", "+cub" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_ENABLE_GPU_FP16", "+cuda +half" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_ENABLE_HALF", "+half" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("Hydrogen_ENABLE_OPENMP", "+openmp" in spec))
 | 
			
		||||
        entries.append(
 | 
			
		||||
            cmake_cache_option("Hydrogen_ENABLE_OMP_TASKLOOP", "+omp_taskloops" in spec)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Note that CUDA/ROCm are handled above.
 | 
			
		||||
 | 
			
		||||
        if "blas=openblas" in spec:
 | 
			
		||||
            args.extend(
 | 
			
		||||
                [
 | 
			
		||||
                    "-DHydrogen_USE_OpenBLAS:BOOL=%s" % ("blas=openblas" in spec),
 | 
			
		||||
                    "-DOpenBLAS_DIR:STRING={0}".format(spec["openblas"].prefix),
 | 
			
		||||
                ]
 | 
			
		||||
            )
 | 
			
		||||
        elif "blas=mkl" in spec:
 | 
			
		||||
            args.extend(["-DHydrogen_USE_MKL:BOOL=%s" % ("blas=mkl" in spec)])
 | 
			
		||||
        elif "blas=accelerate" in spec:
 | 
			
		||||
            args.extend(["-DHydrogen_USE_ACCELERATE:BOOL=TRUE"])
 | 
			
		||||
        elif "blas=essl" in spec:
 | 
			
		||||
            entries.append(cmake_cache_option("Hydrogen_USE_OpenBLAS", "blas=openblas" in spec))
 | 
			
		||||
            # CMAKE_PREFIX_PATH should handle this
 | 
			
		||||
            entries.append(cmake_cache_string("OpenBLAS_DIR", spec["openblas"].prefix))
 | 
			
		||||
        elif "blas=mkl" in spec or spec.satisfies("^intel-mkl"):
 | 
			
		||||
            entries.append(cmake_cache_option("Hydrogen_USE_MKL", True))
 | 
			
		||||
        elif "blas=essl" in spec or spec.satisfies("^essl"):
 | 
			
		||||
            entries.append(cmake_cache_string("BLA_VENDOR", "IBMESSL"))
 | 
			
		||||
            # IF IBM ESSL is used it needs help finding the proper LAPACK libraries
 | 
			
		||||
            args.extend(
 | 
			
		||||
                [
 | 
			
		||||
                    "-DLAPACK_LIBRARIES=%s;-llapack;-lblas"
 | 
			
		||||
            entries.append(
 | 
			
		||||
                cmake_cache_string(
 | 
			
		||||
                    "LAPACK_LIBRARIES",
 | 
			
		||||
                    "%s;-llapack;-lblas"
 | 
			
		||||
                    % ";".join("-l{0}".format(lib) for lib in self.spec["essl"].libs.names),
 | 
			
		||||
                    "-DBLAS_LIBRARIES=%s;-lblas"
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
            entries.append(
 | 
			
		||||
                cmake_cache_string(
 | 
			
		||||
                    "BLAS_LIBRARIES",
 | 
			
		||||
                    "%s;-lblas"
 | 
			
		||||
                    % ";".join("-l{0}".format(lib) for lib in self.spec["essl"].libs.names),
 | 
			
		||||
                ]
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
        elif "blas=accelerate" in spec:
 | 
			
		||||
            entries.append(cmake_cache_option("Hydrogen_USE_ACCELERATE", True))
 | 
			
		||||
        elif spec.satisfies("^netlib-lapack"):
 | 
			
		||||
            entries.append(cmake_cache_string("BLA_VENDOR", "Generic"))
 | 
			
		||||
 | 
			
		||||
        if "+omp_taskloops" in spec:
 | 
			
		||||
            args.extend(["-DHydrogen_ENABLE_OMP_TASKLOOP:BOOL=%s" % ("+omp_taskloops" in spec)])
 | 
			
		||||
 | 
			
		||||
        if "+al" in spec:
 | 
			
		||||
            args.extend(
 | 
			
		||||
                [
 | 
			
		||||
                    "-DHydrogen_ENABLE_ALUMINUM:BOOL=%s" % ("+al" in spec),
 | 
			
		||||
                    "-DALUMINUM_DIR={0}".format(spec["aluminum"].prefix),
 | 
			
		||||
                ]
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        return args
 | 
			
		||||
        return entries
 | 
			
		||||
 | 
			
		||||
    def setup_build_environment(self, env):
 | 
			
		||||
        if self.spec.satisfies("%apple-clang +openmp"):
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,39 @@
 | 
			
		||||
diff --git a/src/callbacks/memory_profiler.cpp b/src/callbacks/memory_profiler.cpp
 | 
			
		||||
index 0d5cec5d2..6f40705af 100644
 | 
			
		||||
--- a/src/callbacks/memory_profiler.cpp
 | 
			
		||||
+++ b/src/callbacks/memory_profiler.cpp
 | 
			
		||||
@@ -158,7 +158,10 @@ struct MemUsage
 | 
			
		||||
   size_t total_mem;
 | 
			
		||||
 
 | 
			
		||||
   MemUsage(const std::string& r, size_t m) : report(r), total_mem(m) {}
 | 
			
		||||
-  bool operator<(const MemUsage& other) { return total_mem < other.total_mem; }
 | 
			
		||||
+  bool operator<(const MemUsage& other) const
 | 
			
		||||
+  {
 | 
			
		||||
+    return total_mem < other.total_mem;
 | 
			
		||||
+  }
 | 
			
		||||
 };
 | 
			
		||||
 } // namespace
 | 
			
		||||
 
 | 
			
		||||
diff --git a/src/optimizers/adam.cpp b/src/optimizers/adam.cpp
 | 
			
		||||
index d00dfbe7c..1d9ad3949 100644
 | 
			
		||||
--- a/src/optimizers/adam.cpp
 | 
			
		||||
+++ b/src/optimizers/adam.cpp
 | 
			
		||||
@@ -34,14 +34,12 @@
 | 
			
		||||
 
 | 
			
		||||
 namespace lbann {
 | 
			
		||||
 
 | 
			
		||||
-#if defined (LBANN_HAS_ROCM) && defined (LBANN_HAS_GPU_FP16)
 | 
			
		||||
+#if defined(LBANN_HAS_ROCM) && defined(LBANN_HAS_GPU_FP16)
 | 
			
		||||
 namespace {
 | 
			
		||||
-bool isfinite(fp16 const& x)
 | 
			
		||||
-{
 | 
			
		||||
-  return std::isfinite(float(x));
 | 
			
		||||
-}
 | 
			
		||||
-}
 | 
			
		||||
+bool isfinite(fp16 const& x) { return std::isfinite(float(x)); }
 | 
			
		||||
+} // namespace
 | 
			
		||||
 #endif
 | 
			
		||||
+using std::isfinite;
 | 
			
		||||
 
 | 
			
		||||
 template <typename TensorDataType>
 | 
			
		||||
 adam<TensorDataType>::adam(TensorDataType learning_rate,
 | 
			
		||||
@@ -5,7 +5,6 @@
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import socket
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
from spack.package import *
 | 
			
		||||
 | 
			
		||||
@@ -24,109 +23,42 @@ class Lbann(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    maintainers("bvanessen")
 | 
			
		||||
 | 
			
		||||
    version("develop", branch="develop")
 | 
			
		||||
    version("0.102", sha256="3734a76794991207e2dd2221f05f0e63a86ddafa777515d93d99d48629140f1a")
 | 
			
		||||
    version("benchmarking", branch="benchmarking")
 | 
			
		||||
    version("0.104", sha256="a847c7789082ab623ed5922ab1248dd95f5f89d93eed44ac3d6a474703bbc0bf")
 | 
			
		||||
    version("0.103", sha256="9da1bf308f38323e30cb07f8ecf8efa05c7f50560e8683b9cd961102b1b3e25a")
 | 
			
		||||
    version(
 | 
			
		||||
        "0.101",
 | 
			
		||||
        sha256="69d3fe000a88a448dc4f7e263bcb342c34a177bd9744153654528cd86335a1f7",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.100",
 | 
			
		||||
        sha256="d1bab4fb6f1b80ae83a7286cc536a32830890f6e5b0c3107a17c2600d0796912",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.99",
 | 
			
		||||
        sha256="3358d44f1bc894321ce07d733afdf6cb7de39c33e3852d73c9f31f530175b7cd",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.98.1",
 | 
			
		||||
        sha256="9a2da8f41cd8bf17d1845edf9de6d60f781204ebd37bffba96d8872036c10c66",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.98",
 | 
			
		||||
        sha256="8d64b9ac0f1d60db553efa4e657f5ea87e790afe65336117267e9c7ae6f68239",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.97.1",
 | 
			
		||||
        sha256="2f2756126ac8bb993202cf532d72c4d4044e877f4d52de9fdf70d0babd500ce4",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.97",
 | 
			
		||||
        sha256="9794a706fc7ac151926231efdf74564c39fbaa99edca4acb745ee7d20c32dae7",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.96",
 | 
			
		||||
        sha256="97af78e9d3c405e963361d0db96ee5425ee0766fa52b43c75b8a5670d48e4b4a",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.95",
 | 
			
		||||
        sha256="d310b986948b5ee2bedec36383a7fe79403721c8dc2663a280676b4e431f83c2",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.94",
 | 
			
		||||
        sha256="567e99b488ebe6294933c98a212281bffd5220fc13a0a5cd8441f9a3761ceccf",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.93",
 | 
			
		||||
        sha256="77bfd7fe52ee7495050f49bcdd0e353ba1730e3ad15042c678faa5eeed55fb8c",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.92",
 | 
			
		||||
        sha256="9187c5bcbc562c2828fe619d53884ab80afb1bcd627a817edb935b80affe7b84",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
    version(
 | 
			
		||||
        "0.91",
 | 
			
		||||
        sha256="b69f470829f434f266119a33695592f74802cff4b76b37022db00ab32de322f5",
 | 
			
		||||
        "0.102",
 | 
			
		||||
        sha256="3734a76794991207e2dd2221f05f0e63a86ddafa777515d93d99d48629140f1a",
 | 
			
		||||
        deprecated=True,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    variant("al", default=True, description="Builds with support for Aluminum Library")
 | 
			
		||||
    variant(
 | 
			
		||||
        "build_type",
 | 
			
		||||
        default="Release",
 | 
			
		||||
        description="The build type to build",
 | 
			
		||||
        values=("Debug", "Release"),
 | 
			
		||||
    )
 | 
			
		||||
    variant(
 | 
			
		||||
        "conduit",
 | 
			
		||||
        default=True,
 | 
			
		||||
        description="Builds with support for Conduit Library "
 | 
			
		||||
        "(note that for v0.99 conduit is required)",
 | 
			
		||||
    )
 | 
			
		||||
    variant(
 | 
			
		||||
        "deterministic",
 | 
			
		||||
        default=False,
 | 
			
		||||
        description="Builds with support for deterministic execution",
 | 
			
		||||
    )
 | 
			
		||||
    variant(
 | 
			
		||||
        "dihydrogen", default=True, description="Builds with support for DiHydrogen Tensor Library"
 | 
			
		||||
    )
 | 
			
		||||
    variant(
 | 
			
		||||
        "distconv",
 | 
			
		||||
        default=False,
 | 
			
		||||
        sticky=True,
 | 
			
		||||
        description="Builds with support for spatial, filter, or channel "
 | 
			
		||||
        "distributed convolutions",
 | 
			
		||||
    )
 | 
			
		||||
    variant(
 | 
			
		||||
        "dtype",
 | 
			
		||||
        default="float",
 | 
			
		||||
        sticky=True,
 | 
			
		||||
        description="Type for floating point representation of weights",
 | 
			
		||||
        values=("float", "double"),
 | 
			
		||||
    )
 | 
			
		||||
    variant("fft", default=False, description="Support for FFT operations")
 | 
			
		||||
    variant("half", default=False, description="Builds with support for FP16 precision data types")
 | 
			
		||||
    variant("hwloc", default=True, description="Add support for topology aware algorithms")
 | 
			
		||||
    variant("nvprof", default=False, description="Build with region annotations for NVPROF")
 | 
			
		||||
    variant(
 | 
			
		||||
        "numpy", default=False, description="Builds with support for processing NumPy data files"
 | 
			
		||||
@@ -139,7 +71,7 @@ class Lbann(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    variant("vtune", default=False, description="Builds with support for Intel VTune")
 | 
			
		||||
    variant("onednn", default=False, description="Support for OneDNN")
 | 
			
		||||
    variant("onnx", default=False, description="Support for exporting models into ONNX format")
 | 
			
		||||
    variant("nvshmem", default=False, description="Support for NVSHMEM")
 | 
			
		||||
    variant("nvshmem", default=False, description="Support for NVSHMEM", when="+distconv")
 | 
			
		||||
    variant(
 | 
			
		||||
        "python",
 | 
			
		||||
        default=True,
 | 
			
		||||
@@ -168,20 +100,13 @@ class Lbann(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    # Don't expose this a dependency until Spack can find the external properly
 | 
			
		||||
    # depends_on('binutils+gold', type='build', when='+gold')
 | 
			
		||||
 | 
			
		||||
    patch("lbann_v0.104_build_cleanup.patch", when="@0.104:")
 | 
			
		||||
 | 
			
		||||
    # Variant Conflicts
 | 
			
		||||
    conflicts("@:0.90,0.99:", when="~conduit")
 | 
			
		||||
    conflicts("@0.90:0.101", when="+fft")
 | 
			
		||||
    conflicts("@:0.90,0.102:", when="~dihydrogen")
 | 
			
		||||
    conflicts("~cuda", when="+nvprof")
 | 
			
		||||
    conflicts("~hwloc", when="+al")
 | 
			
		||||
    conflicts("~cuda", when="+nvshmem")
 | 
			
		||||
    conflicts("+cuda", when="+rocm", msg="CUDA and ROCm support are mutually exclusive")
 | 
			
		||||
 | 
			
		||||
    conflicts("~vision", when="@0.91:0.101")
 | 
			
		||||
    conflicts("~numpy", when="@0.91:0.101")
 | 
			
		||||
    conflicts("~python", when="@0.91:0.101")
 | 
			
		||||
    conflicts("~pfe", when="@0.91:0.101")
 | 
			
		||||
 | 
			
		||||
    requires("%clang", when="+lld")
 | 
			
		||||
 | 
			
		||||
    conflicts("+lld", when="+gold")
 | 
			
		||||
@@ -191,84 +116,56 @@ class Lbann(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    depends_on("cmake@3.17.0:", type="build")
 | 
			
		||||
    depends_on("cmake@3.21.0:", type="build", when="@0.103:")
 | 
			
		||||
 | 
			
		||||
    # Specify the correct versions of Hydrogen
 | 
			
		||||
    depends_on("hydrogen@:1.3.4", when="@0.95:0.100")
 | 
			
		||||
    depends_on("hydrogen@1.4.0:1.4", when="@0.101:0.101.99")
 | 
			
		||||
    depends_on("hydrogen@1.5.0:", when="@:0.90,0.102:")
 | 
			
		||||
    # Specify the core libraries: Hydrogen, DiHydrogen, Aluminum
 | 
			
		||||
    depends_on("hydrogen@1.5.3:")
 | 
			
		||||
    depends_on("aluminum@1.4.1:")
 | 
			
		||||
    depends_on("dihydrogen@0.2.0:")
 | 
			
		||||
 | 
			
		||||
    # Align the following variants across Hydrogen and DiHydrogen
 | 
			
		||||
    forwarded_variants = ["cuda", "rocm", "half", "nvshmem"]
 | 
			
		||||
    for v in forwarded_variants:
 | 
			
		||||
        if v != "nvshmem":
 | 
			
		||||
            depends_on("hydrogen +{0}".format(v), when="+{0}".format(v))
 | 
			
		||||
            depends_on("hydrogen ~{0}".format(v), when="~{0}".format(v))
 | 
			
		||||
        if v != "al" and v != "half":
 | 
			
		||||
            depends_on("dihydrogen +{0}".format(v), when="+{0}".format(v))
 | 
			
		||||
            depends_on("dihydrogen ~{0}".format(v), when="~{0}".format(v))
 | 
			
		||||
        if v == "cuda" or v == "rocm":
 | 
			
		||||
            depends_on("aluminum +{0} +nccl".format(v), when="+{0}".format(v))
 | 
			
		||||
 | 
			
		||||
    # Add Hydrogen variants
 | 
			
		||||
    depends_on("hydrogen +openmp +shared +int64")
 | 
			
		||||
    depends_on("hydrogen +openmp_blas", when=sys.platform != "darwin")
 | 
			
		||||
    depends_on("hydrogen ~al", when="~al")
 | 
			
		||||
    depends_on("hydrogen +al", when="+al")
 | 
			
		||||
    depends_on("hydrogen ~cuda", when="~cuda")
 | 
			
		||||
    depends_on("hydrogen +cuda", when="+cuda")
 | 
			
		||||
    depends_on("hydrogen ~half", when="~half")
 | 
			
		||||
    depends_on("hydrogen +half", when="+half")
 | 
			
		||||
    depends_on("hydrogen ~rocm", when="~rocm")
 | 
			
		||||
    depends_on("hydrogen +rocm", when="+rocm")
 | 
			
		||||
    depends_on("hydrogen build_type=Debug", when="build_type=Debug")
 | 
			
		||||
 | 
			
		||||
    # Older versions depended on Elemental not Hydrogen
 | 
			
		||||
    depends_on("elemental +openmp_blas +shared +int64", when="@0.91:0.94")
 | 
			
		||||
    depends_on(
 | 
			
		||||
        "elemental +openmp_blas +shared +int64 build_type=Debug",
 | 
			
		||||
        when="build_type=Debug @0.91:0.94",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Specify the correct version of Aluminum
 | 
			
		||||
    depends_on("aluminum@:0.3", when="@0.95:0.100 +al")
 | 
			
		||||
    depends_on("aluminum@0.4.0:0.4", when="@0.101:0.101.99 +al")
 | 
			
		||||
    depends_on("aluminum@0.5.0:", when="@:0.90,0.102: +al")
 | 
			
		||||
    # Add DiHydrogen variants
 | 
			
		||||
    depends_on("dihydrogen +distconv", when="+distconv")
 | 
			
		||||
    depends_on("dihydrogen@develop", when="@develop")
 | 
			
		||||
 | 
			
		||||
    # Add Aluminum variants
 | 
			
		||||
    depends_on("aluminum +cuda +nccl", when="+al +cuda")
 | 
			
		||||
    depends_on("aluminum +rocm +rccl", when="+al +rocm")
 | 
			
		||||
 | 
			
		||||
    depends_on("dihydrogen@0.2.0:", when="@:0.90,0.102:")
 | 
			
		||||
    depends_on("dihydrogen +openmp", when="+dihydrogen")
 | 
			
		||||
    depends_on("dihydrogen +openmp_blas", when=sys.platform != "darwin")
 | 
			
		||||
    depends_on("dihydrogen ~cuda", when="+dihydrogen ~cuda")
 | 
			
		||||
    depends_on("dihydrogen +cuda", when="+dihydrogen +cuda")
 | 
			
		||||
    depends_on("dihydrogen ~al", when="+dihydrogen ~al")
 | 
			
		||||
    depends_on("dihydrogen +al", when="+dihydrogen +al")
 | 
			
		||||
    depends_on("dihydrogen +distconv +cuda", when="+distconv +cuda")
 | 
			
		||||
    depends_on("dihydrogen +distconv +rocm", when="+distconv +rocm")
 | 
			
		||||
    depends_on("dihydrogen ~half", when="+dihydrogen ~half")
 | 
			
		||||
    depends_on("dihydrogen +half", when="+dihydrogen +half")
 | 
			
		||||
    depends_on("dihydrogen ~nvshmem", when="+dihydrogen ~nvshmem")
 | 
			
		||||
    depends_on("dihydrogen +nvshmem", when="+dihydrogen +nvshmem")
 | 
			
		||||
    depends_on("dihydrogen ~rocm", when="+dihydrogen ~rocm")
 | 
			
		||||
    depends_on("dihydrogen +rocm", when="+dihydrogen +rocm")
 | 
			
		||||
    depends_on("dihydrogen@0.1", when="@0.101:0.101.99 +dihydrogen")
 | 
			
		||||
    depends_on("dihydrogen@:0.0,0.2:", when="@:0.90,0.102: +dihydrogen")
 | 
			
		||||
    conflicts("~dihydrogen", when="+distconv")
 | 
			
		||||
    depends_on("aluminum@master", when="@develop")
 | 
			
		||||
 | 
			
		||||
    depends_on("hdf5+mpi", when="+distconv")
 | 
			
		||||
 | 
			
		||||
    for arch in CudaPackage.cuda_arch_values:
 | 
			
		||||
        depends_on("hydrogen cuda_arch=%s" % arch, when="+cuda cuda_arch=%s" % arch)
 | 
			
		||||
        depends_on("aluminum cuda_arch=%s" % arch, when="+al +cuda cuda_arch=%s" % arch)
 | 
			
		||||
        depends_on("dihydrogen cuda_arch=%s" % arch, when="+dihydrogen +cuda cuda_arch=%s" % arch)
 | 
			
		||||
        depends_on("aluminum cuda_arch=%s" % arch, when="+cuda cuda_arch=%s" % arch)
 | 
			
		||||
        depends_on("dihydrogen cuda_arch=%s" % arch, when="+cuda cuda_arch=%s" % arch)
 | 
			
		||||
        depends_on("nccl cuda_arch=%s" % arch, when="+cuda cuda_arch=%s" % arch)
 | 
			
		||||
 | 
			
		||||
    # variants +rocm and amdgpu_targets are not automatically passed to
 | 
			
		||||
    # dependencies, so do it manually.
 | 
			
		||||
    for val in ROCmPackage.amdgpu_targets:
 | 
			
		||||
        depends_on("hydrogen amdgpu_target=%s" % val, when="amdgpu_target=%s" % val)
 | 
			
		||||
        depends_on("aluminum amdgpu_target=%s" % val, when="+al amdgpu_target=%s" % val)
 | 
			
		||||
        depends_on("dihydrogen amdgpu_target=%s" % val, when="+dihydrogen amdgpu_target=%s" % val)
 | 
			
		||||
        depends_on("aluminum amdgpu_target=%s" % val, when="amdgpu_target=%s" % val)
 | 
			
		||||
        depends_on("dihydrogen amdgpu_target=%s" % val, when="amdgpu_target=%s" % val)
 | 
			
		||||
 | 
			
		||||
    depends_on("roctracer-dev", when="+rocm +distconv")
 | 
			
		||||
 | 
			
		||||
    depends_on("cudnn", when="@0.90:0.100 +cuda")
 | 
			
		||||
    depends_on("cudnn@8.0.2:", when="@:0.90,0.101: +cuda")
 | 
			
		||||
    depends_on("cub", when="@0.94:0.98.2 +cuda ^cuda@:10")
 | 
			
		||||
    depends_on("cutensor", when="@:0.90,0.102: +cuda")
 | 
			
		||||
    depends_on("cudnn@8.0.2:", when="+cuda")
 | 
			
		||||
    depends_on("cutensor", when="+cuda")
 | 
			
		||||
    depends_on("hipcub", when="+rocm")
 | 
			
		||||
    depends_on("mpi")
 | 
			
		||||
    depends_on("hwloc@1.11:", when="@:0.90,0.102: +hwloc")
 | 
			
		||||
    depends_on("hwloc@1.11.0:1.11", when="@0.95:0.101 +hwloc")
 | 
			
		||||
    depends_on("hwloc@1.11:")
 | 
			
		||||
    depends_on("hwloc +cuda +nvml", when="+cuda")
 | 
			
		||||
    depends_on("hwloc@2.3.0:", when="+rocm")
 | 
			
		||||
    depends_on("hiptt", when="+rocm")
 | 
			
		||||
@@ -296,9 +193,7 @@ class Lbann(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    # Note that conduit defaults to +fortran +parmetis +python, none of which are
 | 
			
		||||
    # necessary by LBANN: you may want to disable those options in your
 | 
			
		||||
    # packages.yaml
 | 
			
		||||
    depends_on("conduit@0.4.0: +hdf5", when="@0.94:0 +conduit")
 | 
			
		||||
    depends_on("conduit@0.5.0:0.6 +hdf5", when="@0.100:0.101 +conduit")
 | 
			
		||||
    depends_on("conduit@0.6.0: +hdf5", when="@:0.90,0.99:")
 | 
			
		||||
    depends_on("conduit@0.6.0: +hdf5")
 | 
			
		||||
 | 
			
		||||
    # LBANN can use Python in two modes 1) as part of an extensible framework
 | 
			
		||||
    # and 2) to drive the front end model creation and launch
 | 
			
		||||
@@ -308,12 +203,12 @@ class Lbann(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    extends("python", when="+python")
 | 
			
		||||
 | 
			
		||||
    # Python front end and possible extra packages
 | 
			
		||||
    depends_on("python@3: +shared", type=("build", "run"), when="@:0.90,0.99: +pfe")
 | 
			
		||||
    depends_on("python@3: +shared", type=("build", "run"), when="+pfe")
 | 
			
		||||
    extends("python", when="+pfe")
 | 
			
		||||
    depends_on("py-setuptools", type="build", when="+pfe")
 | 
			
		||||
    depends_on("py-protobuf+cpp@3.10.0:", type=("build", "run"), when="@:0.90,0.99: +pfe")
 | 
			
		||||
    depends_on("py-protobuf+cpp@3.10.0:4.21.12", type=("build", "run"), when="+pfe")
 | 
			
		||||
 | 
			
		||||
    depends_on("protobuf+shared@3.10.0:", when="@:0.90,0.99:")
 | 
			
		||||
    depends_on("protobuf+shared@3.10.0:3.21.12")
 | 
			
		||||
    depends_on("zlib-api", when="protobuf@3.11.0:")
 | 
			
		||||
 | 
			
		||||
    # using cereal@1.3.1 and above requires changing the
 | 
			
		||||
@@ -328,7 +223,7 @@ class Lbann(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    depends_on("onnx", when="+onnx")
 | 
			
		||||
    depends_on("nvshmem", when="+nvshmem")
 | 
			
		||||
 | 
			
		||||
    depends_on("spdlog", when="@:0.90,0.102:")
 | 
			
		||||
    depends_on("spdlog@1.11.0")
 | 
			
		||||
    depends_on("zstr")
 | 
			
		||||
 | 
			
		||||
    depends_on("caliper+adiak+mpi", when="+caliper")
 | 
			
		||||
@@ -336,6 +231,7 @@ class Lbann(CachedCMakePackage, CudaPackage, ROCmPackage):
 | 
			
		||||
    generator("ninja")
 | 
			
		||||
 | 
			
		||||
    def setup_build_environment(self, env):
 | 
			
		||||
        env.append_flags("CXXFLAGS", "-fno-omit-frame-pointer")
 | 
			
		||||
        if self.spec.satisfies("%apple-clang"):
 | 
			
		||||
            env.append_flags("CPPFLAGS", self.compiler.openmp_flag)
 | 
			
		||||
            env.append_flags("CFLAGS", self.spec["llvm-openmp"].headers.include_flags)
 | 
			
		||||
@@ -357,7 +253,7 @@ def libs(self):
 | 
			
		||||
    def cache_name(self):
 | 
			
		||||
        hostname = socket.gethostname()
 | 
			
		||||
        # Get a hostname that has no node identifier
 | 
			
		||||
        hostname = hostname.rstrip("1234567890")
 | 
			
		||||
        hostname = hostname.rstrip("1234567890-")
 | 
			
		||||
        return "LBANN_{0}_{1}-{2}-{3}@{4}.cmake".format(
 | 
			
		||||
            hostname,
 | 
			
		||||
            self.spec.version,
 | 
			
		||||
@@ -440,12 +336,9 @@ def initconfig_package_entries(self):
 | 
			
		||||
        cmake_variant_fields = [
 | 
			
		||||
            ("LBANN_WITH_CNPY", "numpy"),
 | 
			
		||||
            ("LBANN_DETERMINISTIC", "deterministic"),
 | 
			
		||||
            ("LBANN_WITH_HWLOC", "hwloc"),
 | 
			
		||||
            ("LBANN_WITH_ALUMINUM", "al"),
 | 
			
		||||
            ("LBANN_WITH_ADDRESS_SANITIZER", "asan"),
 | 
			
		||||
            ("LBANN_WITH_BOOST", "boost"),
 | 
			
		||||
            ("LBANN_WITH_CALIPER", "caliper"),
 | 
			
		||||
            ("LBANN_WITH_CONDUIT", "conduit"),
 | 
			
		||||
            ("LBANN_WITH_NVSHMEM", "nvshmem"),
 | 
			
		||||
            ("LBANN_WITH_FFT", "fft"),
 | 
			
		||||
            ("LBANN_WITH_ONEDNN", "onednn"),
 | 
			
		||||
@@ -460,6 +353,9 @@ def initconfig_package_entries(self):
 | 
			
		||||
        for opt, val in cmake_variant_fields:
 | 
			
		||||
            entries.append(self.define_cmake_cache_from_variant(opt, val))
 | 
			
		||||
 | 
			
		||||
        entries.append(cmake_cache_option("LBANN_WITH_ALUMINUM", True))
 | 
			
		||||
        entries.append(cmake_cache_option("LBANN_WITH_CONDUIT", True))
 | 
			
		||||
        entries.append(cmake_cache_option("LBANN_WITH_HWLOC", True))
 | 
			
		||||
        entries.append(cmake_cache_option("LBANN_WITH_ROCTRACER", "+rocm +distconv" in spec))
 | 
			
		||||
        entries.append(cmake_cache_option("LBANN_WITH_TBINF", False))
 | 
			
		||||
        entries.append(
 | 
			
		||||
@@ -492,7 +388,7 @@ def initconfig_package_entries(self):
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        entries.append(self.define_cmake_cache_from_variant("LBANN_WITH_DIHYDROGEN", "dihydrogen"))
 | 
			
		||||
        entries.append(cmake_cache_option("LBANN_WITH_DIHYDROGEN", True))
 | 
			
		||||
        entries.append(self.define_cmake_cache_from_variant("LBANN_WITH_DISTCONV", "distconv"))
 | 
			
		||||
 | 
			
		||||
        # IF IBM ESSL is used it needs help finding the proper LAPACK libraries
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user