348 lines
13 KiB
Python
348 lines
13 KiB
Python
# Copyright Spack Project Developers. See COPYRIGHT file for details.
|
|
#
|
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
|
|
|
import socket
|
|
|
|
from spack.package import *
|
|
|
|
from .blt import llnl_link_helpers
|
|
|
|
|
|
class RajaPerf(CachedCMakePackage, CudaPackage, ROCmPackage):
|
|
"""RAJA Performance Suite."""
|
|
|
|
homepage = "https://github.com/LLNL/RAJAPerf"
|
|
git = "https://github.com/LLNL/RAJAPerf.git"
|
|
tags = ["radiuss"]
|
|
|
|
maintainers("davidbeckingsale", "adrienbernede")
|
|
|
|
license("BSD-3-Clause")
|
|
|
|
version("develop", branch="develop", submodules="True")
|
|
version("main", branch="main", submodules="True")
|
|
version(
|
|
"2024.07.0",
|
|
tag="v2024.07.0",
|
|
commit="6e81aa58af244a13755a694bfdc7bc301139a244",
|
|
submodules="True",
|
|
)
|
|
version(
|
|
"2023.06.0",
|
|
tag="v2023.06.0",
|
|
commit="e5b2102f50e4642f53d9c86fb622b398a748974a",
|
|
submodules="True",
|
|
)
|
|
version(
|
|
"2022.10.0",
|
|
tag="v2022.10.0",
|
|
commit="57ee53e402d2ac0a398df39ad1ca85cf1d2be45b",
|
|
submodules="True",
|
|
)
|
|
version(
|
|
"0.12.0",
|
|
tag="v0.12.0",
|
|
commit="388c1d7562e1cb364191cb34c1ff62f3cadf54a0",
|
|
submodules="True",
|
|
)
|
|
version(
|
|
"0.11.0",
|
|
tag="v0.11.0",
|
|
commit="22ac1de533ebd477c781d53962a92478c0a11d43",
|
|
submodules="True",
|
|
)
|
|
version(
|
|
"0.10.0",
|
|
tag="v0.10.0",
|
|
commit="6bf725af38da41b1ebd1d29c75ffa5b8e57f7cbf",
|
|
submodules="True",
|
|
)
|
|
version(
|
|
"0.9.0", tag="v0.9.0", commit="064dd17dae696c3e440eeb7469fa90341858a636", submodules="True"
|
|
)
|
|
version(
|
|
"0.8.0", tag="v0.8.0", commit="94c65b2caefec2220f712f34c2a198b682ca7e23", submodules="True"
|
|
)
|
|
version(
|
|
"0.7.0", tag="v0.7.0", commit="a6ef0279d9d240199947d872d8f28bf121f2192c", submodules="True"
|
|
)
|
|
version(
|
|
"0.6.0", tag="v0.6.0", commit="21e476f031bc10bbdb8514425c380553bfb23bdc", submodules="True"
|
|
)
|
|
version(
|
|
"0.5.2", tag="v0.5.2", commit="2da5e27bc648ff5540ffa69bbde67f125e4581d3", submodules="True"
|
|
)
|
|
version(
|
|
"0.5.1", tag="v0.5.1", commit="a7b6f63e4fef2d0146932eff409788da51ab0cb3", submodules="True"
|
|
)
|
|
version(
|
|
"0.5.0", tag="v0.5.0", commit="888f5ebe69a9b2ae35058cf8fb8d89d91a379bea", submodules="True"
|
|
)
|
|
version(
|
|
"0.4.0", tag="v0.4.0", commit="a8f669c1ad01d51132a4e3d9d6aa8b2cabc9eff0", submodules="True"
|
|
)
|
|
|
|
depends_on("cxx", type="build") # generated
|
|
|
|
variant("mpi", default=False, description="Enable MPI support")
|
|
variant("openmp", default=False, description="Build OpenMP backend")
|
|
variant("omptarget", default=False, description="Build with OpenMP target support")
|
|
variant("sycl", default=False, description="Build sycl backend")
|
|
variant("shared", default=False, description="Build Shared Libs")
|
|
variant("omptask", default=False, description="Build OpenMP task variants of algorithms")
|
|
variant(
|
|
"tests",
|
|
default="basic",
|
|
values=("none", "basic", "benchmarks"),
|
|
multi=False,
|
|
description="Tests to run",
|
|
)
|
|
variant("caliper", default=False, description="Build with support for Caliper based profiling")
|
|
variant("lowopttest", default=False, description="Intended for developers to use low optimization level for tests to pass with some compilers.")
|
|
|
|
depends_on("blt")
|
|
depends_on("blt@0.6.2:", type="build", when="@2024.07.0:")
|
|
depends_on("blt@0.5.3", type="build", when="@2023.06")
|
|
depends_on("blt@0.5.2:0.5.3", type="build", when="@2022.10")
|
|
depends_on("blt@0.5.0:", type="build", when="@0.12.0:")
|
|
depends_on("blt@0.4.1:", type="build", when="@0.11.0:")
|
|
depends_on("blt@0.4.0:", type="build", when="@0.8.0:")
|
|
depends_on("blt@0.3.0:", type="build", when="@:0.7.0")
|
|
|
|
depends_on("cmake@3.23:", when="@2024.07.0:", type="build")
|
|
depends_on("cmake@3.23:", when="@0.12.0:2023.06.0 +rocm", type="build")
|
|
depends_on("cmake@3.20:", when="@0.12.0:2023.06.0", type="build")
|
|
depends_on("cmake@3.14:", when="@:0.12.0", type="build")
|
|
|
|
depends_on("mpi", when="+mpi")
|
|
|
|
depends_on("llvm-openmp", when="+openmp %apple-clang")
|
|
|
|
depends_on("rocprim", when="+rocm")
|
|
|
|
depends_on("caliper@2.9.0:", when="+caliper")
|
|
depends_on("caliper@2.9.0: +cuda", when="+caliper +cuda")
|
|
depends_on("caliper@2.9.0: +rocm", when="+caliper +rocm")
|
|
|
|
with when("@0.12.0: +rocm +caliper"):
|
|
depends_on("caliper +rocm")
|
|
for arch in ROCmPackage.amdgpu_targets:
|
|
depends_on(
|
|
"caliper +rocm amdgpu_target={0}".format(arch),
|
|
when="amdgpu_target={0}".format(arch),
|
|
)
|
|
conflicts("+openmp", when="@:2022.03")
|
|
|
|
with when("@0.12.0: +cuda +caliper"):
|
|
depends_on("caliper +cuda")
|
|
for sm_ in CudaPackage.cuda_arch_values:
|
|
depends_on("caliper +cuda cuda_arch={0}".format(sm_), when="cuda_arch={0}".format(sm_))
|
|
|
|
conflicts("~openmp", when="+omptarget", msg="OpenMP target requires OpenMP")
|
|
conflicts("+cuda", when="+omptarget", msg="Cuda may not be activated when omptarget is ON")
|
|
conflicts("+omptarget +rocm")
|
|
conflicts("+sycl +omptarget")
|
|
conflicts("+sycl +rocm")
|
|
# Using RAJA version as threshold on purpose (no 2024.02 version of RAJAPerf were released).
|
|
conflicts(
|
|
"+sycl",
|
|
when="@:2024.02.99",
|
|
msg="Support for SYCL was introduced in RAJA after 2024.02 release, "
|
|
"please use a newer release.",
|
|
)
|
|
|
|
def _get_sys_type(self, spec):
|
|
sys_type = str(spec.architecture)
|
|
if "SYS_TYPE" in env and True:
|
|
sys_type = env["SYS_TYPE"]
|
|
return sys_type
|
|
|
|
@property
|
|
def cache_name(self):
|
|
hostname = socket.gethostname()
|
|
if "SYS_TYPE" in env:
|
|
hostname = hostname.rstrip("1234567890")
|
|
return "{0}-{1}-{2}@{3}-{4}.cmake".format(
|
|
hostname,
|
|
self._get_sys_type(self.spec),
|
|
self.spec.compiler.name,
|
|
self.spec.compiler.version,
|
|
self.spec.dag_hash(8),
|
|
)
|
|
|
|
def initconfig_compiler_entries(self):
|
|
spec = self.spec
|
|
compiler = self.compiler
|
|
# Default entries are already defined in CachedCMakePackage, inherit them:
|
|
entries = super().initconfig_compiler_entries()
|
|
|
|
if spec.satisfies("+lowopttest"):
|
|
entries.append(cmake_cache_string("CMAKE_CXX_FLAGS_RELEASE", "-O1"))
|
|
|
|
if spec.satisfies("+rocm ^blt@:0.6"):
|
|
entries.insert(0, cmake_cache_path("CMAKE_CXX_COMPILER", spec["hip"].hipcc))
|
|
|
|
# adrienbernede-23-01
|
|
# Maybe we want to share this in the above llnl_link_helpers function.
|
|
compilers_using_cxx14 = ["intel-17", "intel-18", "xl"]
|
|
if any(compiler in self.compiler.cxx for compiler in compilers_using_cxx14):
|
|
entries.append(cmake_cache_string("BLT_CXX_STD", "c++14"))
|
|
|
|
llnl_link_helpers(entries, spec, compiler)
|
|
|
|
return entries
|
|
|
|
def initconfig_hardware_entries(self):
|
|
spec = self.spec
|
|
compiler = self.compiler
|
|
entries = super().initconfig_hardware_entries()
|
|
|
|
entries.append("#------------------{0}".format("-" * 30))
|
|
entries.append("# Package custom hardware settings")
|
|
entries.append("#------------------{0}\n".format("-" * 30))
|
|
|
|
entries.append(cmake_cache_option("ENABLE_OPENMP", "+openmp" in spec))
|
|
|
|
# T benefit from the shared function "cuda_for_radiuss_projects",
|
|
# we do not modify CMAKE_CUDA_FLAGS: it is already appended by the
|
|
# shared function.
|
|
if "+cuda" in spec:
|
|
entries.append(cmake_cache_option("ENABLE_CUDA", True))
|
|
# Shared handling of cuda.
|
|
|
|
# Custom options.
|
|
# We place everything in CMAKE_CUDA_FLAGS_(RELEASE|RELWITHDEBINFO|DEBUG)
|
|
# which are not set by cuda_for_radiuss_projects
|
|
if "xl" in compiler.cxx:
|
|
all_targets_flags = (
|
|
"-Xcompiler -qstrict -Xcompiler -qxlcompatmacros -Xcompiler -qalias=noansi"
|
|
+ "-Xcompiler -qsmp=omp -Xcompiler -qhot -Xcompiler -qnoeh"
|
|
+ "-Xcompiler -qsuppress=1500-029 -Xcompiler -qsuppress=1500-036"
|
|
+ "-Xcompiler -qsuppress=1500-030"
|
|
)
|
|
cuda_release_flags = "-O3 -Xcompiler -O2 " + all_targets_flags
|
|
cuda_reldebinf_flags = "-O3 -g -Xcompiler -O2 " + all_targets_flags
|
|
cuda_debug_flags = "-O0 -g -Xcompiler -O2 " + all_targets_flags
|
|
|
|
elif "gcc" in compiler.cxx:
|
|
all_targets_flags = "-Xcompiler -finline-functions -Xcompiler -finline-limit=20000"
|
|
|
|
cuda_release_flags = "-O3 -Xcompiler -Ofast " + all_targets_flags
|
|
cuda_reldebinf_flags = "-O3 -g -Xcompiler -Ofast " + all_targets_flags
|
|
cuda_debug_flags = "-O0 -g -Xcompiler -O0 " + all_targets_flags
|
|
|
|
else:
|
|
all_targets_flags = "-Xcompiler -finline-functions"
|
|
|
|
cuda_release_flags = "-O3 -Xcompiler -Ofast " + all_targets_flags
|
|
cuda_reldebinf_flags = "-O3 -g -Xcompiler -Ofast " + all_targets_flags
|
|
cuda_debug_flags = "-O0 -g -Xcompiler -O0 " + all_targets_flags
|
|
|
|
entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS_RELEASE", cuda_release_flags))
|
|
entries.append(
|
|
cmake_cache_string("CMAKE_CUDA_FLAGS_RELWITHDEBINFO", cuda_reldebinf_flags)
|
|
)
|
|
entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS_DEBUG", cuda_debug_flags))
|
|
|
|
else:
|
|
entries.append(cmake_cache_option("ENABLE_CUDA", False))
|
|
|
|
if "+rocm" in spec:
|
|
entries.append(cmake_cache_option("ENABLE_HIP", True))
|
|
else:
|
|
entries.append(cmake_cache_option("ENABLE_HIP", False))
|
|
|
|
entries.append(cmake_cache_option("RAJA_ENABLE_TARGET_OPENMP", "+omptarget" in spec))
|
|
if "+omptarget" in spec:
|
|
if "%xl" in spec:
|
|
entries.append(
|
|
cmake_cache_string(
|
|
"BLT_OPENMP_COMPILE_FLAGS", "-qoffload;-qsmp=omp;-qnoeh;-qalias=noansi"
|
|
)
|
|
)
|
|
entries.append(
|
|
cmake_cache_string(
|
|
"BLT_OPENMP_LINK_FLAGS", "-qoffload;-qsmp=omp;-qnoeh;-qalias=noansi"
|
|
)
|
|
)
|
|
if "%clang" in spec:
|
|
entries.append(
|
|
cmake_cache_string(
|
|
"BLT_OPENMP_COMPILE_FLAGS", "-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda"
|
|
)
|
|
)
|
|
entries.append(
|
|
cmake_cache_string(
|
|
"BLT_OPENMP_LINK_FLAGS", "-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda"
|
|
)
|
|
)
|
|
|
|
return entries
|
|
|
|
def initconfig_mpi_entries(self):
|
|
spec = self.spec
|
|
entries = super().initconfig_mpi_entries()
|
|
|
|
entries.append(cmake_cache_option("ENABLE_MPI", "+mpi" in spec))
|
|
|
|
return entries
|
|
|
|
def initconfig_package_entries(self):
|
|
spec = self.spec
|
|
entries = []
|
|
|
|
# option_prefix = "RAJA_" if spec.satisfies("@0.14.0:") else ""
|
|
|
|
# TPL locations
|
|
entries.append("#------------------{0}".format("-" * 60))
|
|
entries.append("# TPLs")
|
|
entries.append("#------------------{0}\n".format("-" * 60))
|
|
|
|
entries.append(cmake_cache_path("BLT_SOURCE_DIR", spec["blt"].prefix))
|
|
if "caliper" in self.spec:
|
|
entries.append(
|
|
cmake_cache_path("caliper_DIR", spec["caliper"].prefix + "/share/cmake/caliper/")
|
|
)
|
|
entries.append(
|
|
cmake_cache_path("adiak_DIR", spec["adiak"].prefix + "/lib/cmake/adiak/")
|
|
)
|
|
|
|
# Build options
|
|
entries.append("#------------------{0}".format("-" * 60))
|
|
entries.append("# Build Options")
|
|
entries.append("#------------------{0}\n".format("-" * 60))
|
|
|
|
entries.append(cmake_cache_string("CMAKE_BUILD_TYPE", spec.variants["build_type"].value))
|
|
|
|
entries.append(cmake_cache_string("RAJA_RANGE_ALIGN", "4"))
|
|
entries.append(cmake_cache_string("RAJA_RANGE_MIN_LENGTH", "32"))
|
|
entries.append(cmake_cache_string("RAJA_DATA_ALIGN", "64"))
|
|
|
|
entries.append(cmake_cache_option("RAJA_HOST_CONFIG_LOADED", True))
|
|
|
|
entries.append(cmake_cache_option("BUILD_SHARED_LIBS", "+shared" in spec))
|
|
entries.append(cmake_cache_option("ENABLE_OPENMP", "+openmp" in spec))
|
|
entries.append(cmake_cache_option("RAJA_ENABLE_OPENMP_TASK", "+omptask" in spec))
|
|
entries.append(cmake_cache_option("RAJA_ENABLE_SYCL", spec.satisfies("+sycl")))
|
|
|
|
# C++17
|
|
if spec.satisfies("@2024.07.0:") and spec.satisfies("+sycl"):
|
|
entries.append(cmake_cache_string("BLT_CXX_STD", "c++17"))
|
|
# C++14
|
|
# Using RAJA version as threshold on purpose (no 0.14 version of RAJAPerf were released).
|
|
elif spec.satisfies("@0.14.0:"):
|
|
entries.append(cmake_cache_string("BLT_CXX_STD", "c++14"))
|
|
|
|
entries.append(cmake_cache_option("ENABLE_BENCHMARKS", "tests=benchmarks" in spec))
|
|
entries.append(
|
|
cmake_cache_option("ENABLE_TESTS", "tests=none" not in spec or self.run_tests)
|
|
)
|
|
|
|
entries.append(cmake_cache_option("RAJA_PERFSUITE_USE_CALIPER", "+caliper" in spec))
|
|
|
|
return entries
|
|
|
|
def cmake_args(self):
|
|
return []
|