Babelstream Spack Package (#36164)

2023-03-27 16:04:28 +01:00
parent e612436e26
commit deca4ce107
1 changed files with 415 additions and 0 deletions
--- a/var/spack/repos/builtin/packages/babelstream/package.py
+++ b/var/spack/repos/builtin/packages/babelstream/package.py
@@ -0,0 +1,415 @@
+# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import re  # To get the variant name after (+)
+
+from spack.package import *
+
+
+def find_model_flag(str):
+    res = re.findall(r"\+(\w+)", str)
+    if not res:
+        return ""
+    return res
+
+
+class Babelstream(CMakePackage, CudaPackage, ROCmPackage):
+    """Measure memory transfer rates to/from global device memory on GPUs.
+    This benchmark is similar in spirit, and based on, the STREAM benchmark for CPUs."""
+
+    homepage = "https://github.com/UoB-HPC/BabelStream"
+    url = "https://github.com/UoB-HPC/BabelStream/archive/refs/tags/v4.0.tar.gz"
+    git = "https://github.com/UoB-HPC/BabelStream.git"
+    version("4.0", sha256="a9cd39277fb15d977d468435eb9b894f79f468233f0131509aa540ffda4f5953")
+    version("main", branch="main")
+    version("develop", branch="develop")
+    maintainers = ["tomdeakin", "kaanolgu" "tom91136", "robj0nes"]
+
+    # Languages
+    # Also supported variants are cuda and rocm (for HIP)
+    variant("sycl", default=False, description="Enable SYCL support")
+    variant("sycl2020", default=False, description="Enable SYCL support")
+    variant("omp", default=False, description="Enable OpenMP support")
+    variant("ocl", default=False, description="Enable OpenCL support")
+    variant("tbb", default=False, description="Enable TBB support")
+    variant("acc", default=False, description="Enable OpenACC support")
+    variant("thrust", default=False, description="Enable THRUST support")
+    variant("raja", default=False, description="Enable RAJA support")
+    variant("stddata", default=False, description="Enable STD-data support")
+    variant("stdindices", default=False, description="Enable STD-indices support")
+    variant("stdranges", default=False, description="Enable STD-ranges support")
+
+    # Some models need to have the programming model abstraction downloaded -
+    # this variant enables a path to be provided.
+    variant("dir", values=str, default="none", description="Enable Directory support")
+
+    # Kokkos conflict and variant
+    conflicts(
+        "dir=none", when="+kokkos", msg="KOKKKOS requires architecture to be specfied by dir="
+    )
+    variant("kokkos", default=False, description="Enable KOKKOS support")
+
+    # ACC conflict
+    variant("cpu_arch", values=str, default="none", description="Enable CPU Target for ACC")
+    variant("target", values=str, default="none", description="Enable CPU Target for ACC")
+
+    # STD conflicts
+    conflicts("+stddata", when="%gcc@:10.1.0", msg="STD-data requires newer version of GCC")
+    conflicts("+stdindices", when="%gcc@:10.1.0", msg="STD-indices requires newer version of GCC")
+    conflicts("+stdranges", when="%gcc@:10.1.0", msg="STD-ranges requires newer version of GCC")
+
+    # CUDA conflict
+    conflicts(
+        "cuda_arch=none",
+        when="+cuda",
+        msg="CUDA requires architecture to be specfied by cuda_arch=",
+    )
+    variant("mem", values=str, default="DEFAULT", description="Enable MEM Target for CUDA")
+    # Raja Conflict
+    variant(
+        "offload", values=str, default="none", description="Enable RAJA Target [CPU or NVIDIA]"
+    )
+    conflicts(
+        "offload=none",
+        when="+raja",
+        msg="RAJA requires architecture to be specfied by target=[CPU,NVIDIA]",
+    )
+
+    # download raja from https://github.com/LLNL/RAJA
+    conflicts(
+        "dir=none",
+        when="+raja",
+        msg="RAJA implementation requires architecture to be specfied by dir=",
+    )
+
+    # Thrust Conflict
+    # conflicts("~cuda", when="+thrust", msg="Thrust requires +cuda variant")
+    depends_on("thrust", when="+thrust")
+    depends_on("rocthrust", when="+thrust implementation=rocm")
+
+    # TBB Dependency
+    depends_on("intel-oneapi-tbb", when="+tbb")
+    partitioner_vals = ["auto", "affinity", "static", "simple"]
+    variant(
+        "partitioner",
+        values=partitioner_vals,
+        default="auto",
+        description="Partitioner specifies how a loop template should partition its work among threads.\
+            Possible values are:\
+            AUTO     - Optimize range subdivision based on work-stealing events.\
+            AFFINITY - Proportional splitting that optimizes for cache affinity.\
+            STATIC   - Distribute work uniformly with no additional load balancing.\
+            SIMPLE   - Recursively split its range until it cannot be further subdivided.\
+            See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners for more details.",
+    )
+
+    # Kokkos Dependency
+    depends_on("kokkos@3.7.1", when="+kokkos")
+
+    # OpenCL Dependency
+
+    backends = {
+        "ocl": [
+            ("amd", "rocm-opencl", "enable ROCM backend"),
+            ("cuda", "cuda", "enable Cuda backend"),
+            ("intel", "intel-oneapi-compilers", "enable Intel backend"),
+            ("pocl", "pocl@1.5", "enable POCL backend"),
+        ],
+        "kokkos": [
+            ("cuda", "cuda", "enable Cuda backend"),
+            ("omp", "none", "enable Cuda backend"),
+        ],
+    }
+    backend_vals = ["none"]
+    for lang in backends:
+        for item in backends[lang]:
+            backend, dpdncy, descr = item
+            backend_vals.append(backend.lower())
+
+    variant("backend", values=backend_vals, default="none", description="Enable backend support")
+
+    for lang in backends:
+        for item in backends[lang]:
+            backend, dpdncy, descr = item
+            if dpdncy.lower() != "none":
+                depends_on("%s" % dpdncy.lower(), when="backend=%s" % backend.lower())
+    # this flag could be used in all required languages
+    variant("flags", values=str, default="none", description="Additional CXX flags to be provided")
+
+    # comp_impl_vals=["ONEAPI-DPCPP","DPCPP","HIPSYCL","COMPUTECPP"]
+    variant(
+        "implementation",
+        values=str,
+        default="none",
+        description="Compile using the specified SYCL compiler option",
+    )
+
+    conflicts(
+        "implementation=none",
+        when="+sycl",
+        msg="SYCL requires compiler implementation to be specified by option=",
+    )
+    conflicts(
+        "implementation=none",
+        when="+thrust",
+        msg="Which Thrust implementation to use, supported options include:\
+         - CUDA (via https://github.com/NVIDIA/thrust)\
+         - ROCM (via https://github.com/ROCmSoftwarePlatform/rocThrust)",
+    )
+
+    # This applies to all
+    depends_on("cmake@3.14.0:", type="build")
+    depends_on("opencl-c-headers", when="+ocl")
+
+    def cmake_args(self):
+        # convert spec to string to work on it
+        spec_string = str(self.spec)
+
+        # take only the first portion of the spec until space
+        spec_string_truncate = spec_string.split(" ", 1)[0]
+        model_list = find_model_flag(spec_string_truncate)  # Prints out ['cuda', 'thrust']
+
+        if len(model_list) > 1:
+            ignore_list = ["cuda"]  # if +acc is provided ignore the cuda model
+            model = list(set(model_list) - set(ignore_list))
+            # We choose 'thrust' from the list of ['cuda', 'thrust']
+            args = ["-DMODEL=" + model[0]]
+        else:
+            # if it is +stddata,indices etc. we need to pass it
+            # as std-data to the CMake compiler
+            # do some alterations here
+            if "std" in model_list[0]:
+                args = ["-DMODEL=" + "std-" + model_list[0].split("d", 1)[1]]
+            else:
+                args = ["-DMODEL=" + model_list[0]]
+
+        # ===================================
+        #             ACC
+        # ===================================
+        if ("+acc" in self.spec) and ("~cuda" in self.spec):
+            args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+            if "cuda_arch" in self.spec.variants:
+                cuda_arch_list = self.spec.variants["cuda_arch"].value
+                # the architecture value is only number so append sm_ to the name
+                cuda_arch = "cc" + cuda_arch_list[0]
+                args.append("-DTARGET_DEVICE=gpu")
+                args.append("-DCUDA_ARCH=" + cuda_arch)
+            elif "cpu_arch" in self.spec.variants:
+                cpu_arch_list = self.spec.variants["cpu_arch"].value
+                # the architecture value is only number so append sm_ to the name
+                cpu_arch = cpu_arch_list[0]
+                args.append("-DTARGET_DEVICE=multicore")
+                args.append("-DTARGET_PROCESSOR=" + cpu_arch)
+
+        # ===================================
+        #    STDdata,STDindices,STDranges
+        # ===================================
+        std_list = ["+stddata", "+stdindices", "+stdranges"]
+        if spec_string.startswith(tuple(std_list)):
+            args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+
+        # ===================================
+        #             CUDA
+        # ===================================
+
+        if ("+cuda" in self.spec) and ("~kokkos" in self.spec) and ("~acc" in self.spec):
+            # Set up the cuda macros needed by the build
+            cuda_arch_list = self.spec.variants["cuda_arch"].value
+            # the architecture value is only number so append sm_ to the name
+            cuda_arch = "sm_" + cuda_arch_list[0]
+            args.append("-DCUDA_ARCH=" + cuda_arch)
+            cuda_dir = self.spec["cuda"].prefix
+            cuda_comp = cuda_dir + "/bin/nvcc"
+            args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
+            args.append("-DMEM=" + self.spec.variants["mem"].value)
+            if self.spec.variants["flags"].value != "none":
+                args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value)
+
+        # ===================================
+        #             OMP
+        # ===================================
+        # `~kokkos` option is there to prevent +kokkos +omp setting to use omp directly from here
+        # Same applies for raja
+        if ("+omp" in self.spec) and ("~kokkos" in self.spec) and ("~raja" in self.spec):
+            args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+            if "cuda_arch" in self.spec.variants:
+                cuda_arch_list = self.spec.variants["cuda_arch"].value
+                # the architecture value is only number so append sm_ to the name
+                cuda_arch = "sm_" + cuda_arch_list[0]
+                args.append("-DOFFLOAD= " + "NVIDIA:" + cuda_arch)
+            elif "amdgpu_target" in self.spec.variants:
+                rocm_arch = self.spec.variants["amdgpu_target"].value
+                # the architecture value is only number so append sm_ to the name
+                args.append("-DOFFLOAD=" + " AMD:" + rocm_arch)
+            else:
+                args.append("-DOFFLOAD=" + "INTEL")
+
+        # ===================================
+        #             SYCL
+        # ===================================
+
+        if "+sycl" in self.spec:
+            args.append("-DSYCL_COMPILER=" + self.spec.variants["implementation"].value.upper())
+            if self.spec.variants["implementation"].value.upper() != "ONEAPI-DPCPP":
+                args.append(
+                    "-DSYCL_COMPILER_DIR=" + self.spec.variants["implementation"].value.upper()
+                )
+                if self.spec.variants["implementation"].value.upper() == "COMPUTE-CPP":
+                    args.append("-DOpenCL_LIBRARY=")
+
+        # ===================================
+        #             SYCL 2020
+        # ===================================
+
+        if "+sycl2020" in self.spec:
+            if self.spec.satisfies("%oneapi"):
+                # -fsycl flag is required for setting up sycl/sycl.hpp seems like
+                #  it doesn't get it from the CMake file
+                args.append("-DCXX_EXTRA_FLAGS= -fsycl -O3")
+                # this is required to enable -DCMAKE_CXX_COMPILER=icpx flag from CMake
+                args.append("-DSYCL_COMPILER=ONEAPI-ICPX")
+            else:
+                args.append(
+                    "-DSYCL_COMPILER=" + self.spec.variants["implementation"].value.upper()
+                )
+                if self.spec.variants["implementation"].value.upper() != "ONEAPI-DPCPP":
+                    args.append(
+                        "-DSYCL_COMPILER_DIR=" + self.spec.variants["implementation"].value.upper()
+                    )
+                    if self.spec.variants["implementation"].value.upper() == "COMPUTE-CPP":
+                        args.append("-DOpenCL_LIBRARY=")
+
+        # ===================================
+        #             HIP(ROCM)
+        # ===================================
+
+        if "+rocm" in self.spec:
+            hip_comp = self.spec["rocm"].prefix + "/bin/hipcc"
+            args.append("-DCMAKE_CXX_COMPILER=" + hip_comp)
+            args.append(
+                "-DCXX_EXTRA_FLAGS= --offload-arch="
+                + self.spec.variants["amdgpu_target"].value
+                + " "
+                + self.spec.variants["flags"].value
+                + " -O3"
+            )
+
+        # ===================================
+        #             TBB
+        # ===================================
+
+        if "+tbb" in self.spec:
+            args.append("-DONE_TBB_DIR=" + self.spec["tbb"].prefix + "/tbb/latest/")
+            args.append("-DPARTITIONER=" + self.spec.variants["partitioner"].value.upper())
+
+        # ===================================
+        #             OpenCL (ocl)
+        # ===================================
+        if "+ocl" in self.spec:
+            if "backend" in self.spec.variants:
+                if "cuda" in self.spec.variants["backend"].value:
+                    cuda_dir = self.spec["cuda"].prefix
+                    args.append("-DOpenCL_LIBRARY=" + cuda_dir + "/lib64/libOpenCL.so")
+                elif "amd" in self.spec.variants["backend"].value:
+                    rocm_dir = self.spec["rocm-opencl"].prefix
+                    args.append("-DOpenCL_LIBRARY=" + rocm_dir + "/lib64/libOpenCL.so")
+                elif "intel" in self.spec.variants["backend"].value:
+                    intel_lib = (
+                        self.spec["intel-oneapi-compilers"].prefix
+                        + "/compiler/2023.0.0/linux/lib/libOpenCL.so"
+                    )
+                    args.append("-DOpenCL_LIBRARY=" + intel_lib)
+                elif "pocl" in self.spec.variants["backend"].value:
+                    args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+                    pocl_lib = self.spec["pocl"].prefix + "/lib64/libOpenCL.so"
+                    args.append("-DOpenCL_LIBRARY=" + pocl_lib)
+                args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+
+        # ===================================
+        #             RAJA
+        # ===================================
+        if "+raja" in self.spec:
+            args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+            args.append("-DRAJA_IN_TREE=" + self.spec.variants["dir"].value)
+            if "offload" in self.spec.variants:
+                if "nvidia" in self.spec.variants["offload"].value:
+                    cuda_dir = self.spec["cuda"].prefix
+                    cuda_comp = cuda_dir + "/bin/nvcc"
+                    args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
+                    args.append("-DTARGET=NVIDIA")
+                    cuda_arch_list = self.spec.variants["cuda_arch"].value
+                    int_cuda_arch = int(cuda_arch_list[0])
+                    cuda_arch = "sm_" + cuda_arch_list[0]
+                    args.append("-DCUDA_ARCH=" + cuda_arch)
+
+                    args.append("DCUDA_TOOLKIT_ROOT_DIR=" + self.spec["cuda"].prefix)
+                    if self.spec.variants["flags"].value != "none":
+                        args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value)
+                # if("cpu" in self.spec.variants['offload'].value):
+
+            if "omp" in self.spec.variants["backend"].value:
+                args.append("-DENABLE_OPENMP=ON")
+            if "cuda" in self.spec.variants["backend"].value:
+                args.append("-DENABLE_CUDA=ON")
+
+        # ===================================
+        #             THRUST
+        # ===================================
+        if "+thrust" in self.spec:
+            if "cuda" in self.spec.variants["implementation"].value:
+                args.append("-DTHRUST_IMPL=" + self.spec.variants["implementation"].value.upper())
+                args.append("-SDK_DIR=" + self.spec["thrust"].prefix + "/include")
+                cuda_arch_list = self.spec.variants["cuda_arch"].value
+                # the architecture value is only number so append sm_ to the name
+                cuda_arch = "sm_" + cuda_arch_list[0]
+                args.append("-DCUDA_ARCH=" + cuda_arch)
+                cuda_dir = self.spec["cuda"].prefix
+                cuda_comp = cuda_dir + "/bin/nvcc"
+                args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
+                args.append("-DBACKEND=" + self.spec.variants["backend"].value.upper())
+                if self.spec.variants["flags"].value != "none":
+                    args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value)
+
+            if "rocm" in self.spec.variants["implementation"].value:
+                args.append("-DTHRUST_IMPL=" + self.spec.variants["implementation"].value.upper())
+                args.append("-SDK_DIR=" + self.spec["rocthrust"].prefix)
+                args.append("-DBACKEND=" + self.spec.variants["backend"].value.upper())
+
+        # ===================================
+        #             kokkos
+        # ===================================
+        # kokkos implementation is versatile and it could use cuda or omp architectures as backend
+        # The usage should be spack install babelstream +kokkos +cuda [or +omp]
+        if "+kokkos" in self.spec:
+            args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+            args.append("-DKOKKOS_IN_TREE=" + self.spec.variants["dir"].value)
+            # args.append("-DKOKKOS_IN_PACKAGE=" + self.spec["kokkos"].prefix)
+            if "backend" in self.spec.variants:
+                if "cuda" in self.spec.variants["backend"].value:
+                    args.append("-DKokkos_ENABLE_CUDA=ON")
+                    cuda_arch_list = self.spec.variants["cuda_arch"].value
+                    int_cuda_arch = int(cuda_arch_list[0])
+                    # arhitecture kepler optimisations
+                    if int_cuda_arch in (30, 32, 35, 37):
+                        args.append("-D" + "Kokkos_ARCH_KEPLER" + str(int_cuda_arch) + "=ON")
+                    # arhitecture maxwell optimisations
+                    if int_cuda_arch in (50, 52, 53):
+                        args.append("-D" + "Kokkos_ARCH_MAXWELL" + str(int_cuda_arch) + "=ON")
+                    # arhitecture pascal optimisations
+                    if int_cuda_arch in (60, 61):
+                        args.append("-D" + "Kokkos_ARCH_PASCAL" + str(int_cuda_arch) + "=ON")
+                    # architecture volta optimisations
+                    if int_cuda_arch in (70, 72):
+                        args.append("-D" + "Kokkos_ARCH_VOLTA" + str(int_cuda_arch) + "=ON")
+                    if int_cuda_arch == 75:
+                        args.append("-DKokkos_ARCH_TURING75=ON")
+                if "omp" in self.spec.variants["backend"].value:
+                    args.append("-DKokkos_ENABLE_OPENMP=ON")
+
+        # not in ["kokkos", "raja", "acc", "hip"] then compiler forced true
+        if set(model_list).intersection(["kokkos", "raja", "acc", "hip"]) is True:
+            args.append("-DCMAKE_CXX_COMPILER_FORCED=True")
+
+        return args