vtk-m: correct cuda_arch variant behavior (#38697)

Co-authored-by: eugeneswalker <eugenesunsetwalker@gmail.com>
2023-07-12 08:34:50 -04:00
parent 7f2be62ff2
commit 37ef31dc22
5 changed files with 169 additions and 12 deletions
--- a/lib/spack/spack/build_systems/cmake.py
+++ b/lib/spack/spack/build_systems/cmake.py
@@ -296,8 +296,46 @@ def std_args(pkg, generator=None):
                define("CMAKE_PREFIX_PATH", spack.build_environment.get_cmake_prefix_path(pkg)),
            ]
        )
+
        return args

+    @staticmethod
+    def define_cuda_architectures(pkg):
+        """Returns the str ``-DCMAKE_CUDA_ARCHITECTURES:STRING=(expanded cuda_arch)``.
+
+        ``cuda_arch`` is variant composed of a list of target CUDA architectures and
+        it is declared in the cuda package.
+
+        This method is no-op for cmake<3.18 and when ``cuda_arch`` variant is not set.
+
+        """
+        cmake_flag = str()
+        if "cuda_arch" in pkg.spec.variants and pkg.spec.satisfies("^cmake@3.18:"):
+            cmake_flag = CMakeBuilder.define(
+                "CMAKE_CUDA_ARCHITECTURES", pkg.spec.variants["cuda_arch"].value
+            )
+
+        return cmake_flag
+
+    @staticmethod
+    def define_hip_architectures(pkg):
+        """Returns the str ``-DCMAKE_HIP_ARCHITECTURES:STRING=(expanded amdgpu_target)``.
+
+        ``amdgpu_target`` is variant composed of a list of the target HIP
+        architectures and it is declared in the rocm package.
+
+        This method is no-op for cmake<3.18 and when ``amdgpu_target`` variant is
+        not set.
+
+        """
+        cmake_flag = str()
+        if "amdgpu_target" in pkg.spec.variants and pkg.spec.satisfies("^cmake@3.21:"):
+            cmake_flag = CMakeBuilder.define(
+                "CMAKE_HIP_ARCHITECTURES", pkg.spec.variants["amdgpu_target"].value
+            )
+
+        return cmake_flag
+
    @staticmethod
    def define(cmake_var, value):
        """Return a CMake command line argument that defines a variable.
--- a/lib/spack/spack/test/build_systems.py
+++ b/lib/spack/spack/test/build_systems.py
@@ -311,6 +311,16 @@ def test_define_from_variant(self):
        with pytest.raises(KeyError, match="not a variant"):
            s.package.define_from_variant("NONEXISTENT")

+    def test_cmake_std_args_cuda(self, default_mock_concretization):
+        s = default_mock_concretization("vtk-m +cuda cuda_arch=70 ^cmake@3.23")
+        option = spack.build_systems.cmake.CMakeBuilder.define_cuda_architectures(s.package)
+        assert "-DCMAKE_CUDA_ARCHITECTURES:STRING=70" == option
+
+    def test_cmake_std_args_hip(self, default_mock_concretization):
+        s = default_mock_concretization("vtk-m +rocm amdgpu_target=gfx900 ^cmake@3.23")
+        option = spack.build_systems.cmake.CMakeBuilder.define_hip_architectures(s.package)
+        assert "-DCMAKE_HIP_ARCHITECTURES:STRING=gfx900" == option
+

@pytest.mark.usefixtures("config", "mock_packages")
 class TestDownloadMixins:
--- a/var/spack/repos/builtin.mock/packages/vtk-m/package.py
+++ b/var/spack/repos/builtin.mock/packages/vtk-m/package.py
@@ -0,0 +1,37 @@
+# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+from spack.package import *
+
+
+class VtkM(CMakePackage):
+    """This is a fake vtk-m package used to demonstrate virtual package providers
+    with dependencies."""
+
+    homepage = "http://www.spack-fake-vtk-m.org"
+    url = "http://www.spack-fake-vtk-m.org/downloads/vtk-m-1.0.tar.gz"
+
+    version("1.0", md5="0123456789abcdef0123456789abcdef")
+
+    variant("cuda", default=False, description="Build with CUDA")
+    variant(
+        "cuda_arch",
+        description="CUDA architecture",
+        default="none",
+        values=("70", "none"),
+        multi=False,
+        when="+cuda",
+    )
+
+    variant("rocm", default=False, description="Enable ROCm support")
+    variant(
+        "amdgpu_target",
+        default="none",
+        description="AMD GPU architecture",
+        values=("gfx900", "none"),
+        multi=False,
+        when="+rocm",
+    )
+    depends_on("cmake@3.18:")
--- a/var/spack/repos/builtin/packages/vtk-m/package.py
+++ b/var/spack/repos/builtin/packages/vtk-m/package.py
@@ -135,6 +135,10 @@ class VtkM(CMakePackage, CudaPackage, ROCmPackage):
    # Patch
    patch("diy-include-cstddef.patch", when="@1.5.3:1.8.0")

+    # VTK-M PR#2972
+    # https://gitlab.kitware.com/vtk/vtk-m/-/merge_requests/2972
+    patch("vtkm-cuda-swap-conflict-pr2972.patch", when="@1.9 +cuda ^cuda@12:")
+
    def cmake_args(self):
        spec = self.spec
        options = []
@@ -231,24 +235,29 @@ def cmake_args(self):
            if "+cuda_native" in spec:
                options.append("-DVTKm_ENABLE_CUDA:BOOL=ON")
                options.append("-DCMAKE_CUDA_HOST_COMPILER={0}".format(env["SPACK_CXX"]))
-                if "cuda_arch" in spec.variants:
-                    cuda_value = spec.variants["cuda_arch"].value
-                    cuda_arch = cuda_value[0]
-                    if cuda_arch in gpu_name_table:
-                        vtkm_cuda_arch = gpu_name_table[cuda_arch]
-                        options.append("-DVTKm_CUDA_Architecture={0}".format(vtkm_cuda_arch))
+
+                if spec.satisfies("@1.9.0:"):
+                    options.append(self.builder.define_cuda_architectures(self))
+
                else:
-                    # this fix is necessary if compiling platform has cuda, but
-                    # no devices (this is common for front end nodes on hpc
-                    # clusters). We choose volta as a lowest common denominator
-                    options.append("-DVTKm_CUDA_Architecture=volta")
+                    # VTKm_CUDA_Architecture only accepts a single CUDA arch
+                    num_cuda_arch = spec.variants["cuda_arch"].value[0]
+                    str_cuda_arch = str()
+
+                    try:
+                        str_cuda_arch = gpu_name_table[num_cuda_arch]
+                    except KeyError:
+                        raise InstallError(
+                            f"cuda_arch={num_cuda_arch} needs cmake>=3.18 & VTK-m>=1.9.0"
+                        )
+                    options.append(f"-DVTKm_CUDA_Architecture={str_cuda_arch}")
+
            else:
                options.append("-DVTKm_ENABLE_CUDA:BOOL=OFF")

            # hip support
            if "+rocm" in spec:
-                archs = ",".join(self.spec.variants["amdgpu_target"].value)
-                options.append("-DCMAKE_HIP_ARCHITECTURES:STRING={0}".format(archs))
+                options.append(self.builder.define_hip_architectures(self))

            # openmp support
            if "+openmp" in spec:
--- a/var/spack/repos/builtin/packages/vtk-m/vtkm-cuda-swap-conflict-pr2972.patch
+++ b/var/spack/repos/builtin/packages/vtk-m/vtkm-cuda-swap-conflict-pr2972.patch
@@ -0,0 +1,63 @@
+diff -ruN spack-src/vtkm/exec/cuda/internal/ExecutionPolicy.h spack-src-patched/vtkm/exec/cuda/internal/ExecutionPolicy.h
+--- spack-src/vtkm/exec/cuda/internal/ExecutionPolicy.h	2022-10-11 12:07:59.000000000 -0700
+++ spack-src-patched/vtkm/exec/cuda/internal/ExecutionPolicy.h	2023-07-06 17:23:35.898388363 -0700
+@@ -17,6 +17,7 @@
+ #include <vtkm/exec/cuda/internal/ThrustPatches.h>
+ VTKM_THIRDPARTY_PRE_INCLUDE
+ #include <thrust/execution_policy.h>
+#include <thrust/sort.h>
+ #include <thrust/system/cuda/execution_policy.h>
+ #include <thrust/system/cuda/memory.h>
+ VTKM_THIRDPARTY_POST_INCLUDE
+diff -ruN spack-src/vtkm/Swap.h spack-src-patched/vtkm/Swap.h
+--- spack-src/vtkm/Swap.h	2022-10-11 12:07:59.000000000 -0700
+++ spack-src-patched/vtkm/Swap.h	2023-07-06 17:25:31.623393290 -0700
+@@ -24,21 +24,31 @@
+ 
+ /// Performs a swap operation. Safe to call from cuda code.
+ #if defined(VTKM_CUDA)
+// CUDA 12 adds a `cub::Swap` function that creates ambiguity with `vtkm::Swap`.
+// This happens when a function from the `cub` namespace is called with an object of a class
+// defined in the `vtkm` namespace as an argument. If that function has an unqualified call to
+// `Swap`, it results in ADL being used, causing the templated functions `cub::Swap` and
+// `vtkm::Swap` to conflict.
+#if defined(VTKM_CUDA_VERSION_MAJOR) && (VTKM_CUDA_VERSION_MAJOR >= 12) && \
+  defined(VTKM_CUDA_DEVICE_PASS)
+using cub::Swap;
+#else
+ template <typename T>
+-VTKM_EXEC_CONT void Swap(T& a, T& b)
+VTKM_EXEC_CONT inline void Swap(T& a, T& b)
+ {
+-  using namespace thrust;
+  using thrust::swap;
+   swap(a, b);
+ }
+#endif
+ #elif defined(VTKM_HIP)
+ template <typename T>
+-__host__ void Swap(T& a, T& b)
+__host__ inline void Swap(T& a, T& b)
+ {
+-  using namespace std;
+  using std::swap;
+   swap(a, b);
+ }
+ template <typename T>
+-__device__ void Swap(T& a, T& b)
+__device__ inline void Swap(T& a, T& b)
+ {
+   T temp = a;
+   a = b;
+@@ -46,9 +56,9 @@
+ }
+ #else
+ template <typename T>
+-VTKM_EXEC_CONT void Swap(T& a, T& b)
+VTKM_EXEC_CONT inline void Swap(T& a, T& b)
+ {
+-  using namespace std;
+  using std::swap;
+   swap(a, b);
+ }
+ #endif