Ginkgo package: add oneAPI support (#26868)
To use this, you can "spack install intel-oneapi-compilers" and then "spack compiler add" the new compiler. You would need to install with "spack install ginkgo+oneapi%dpcpp"
This commit is contained in:
parent
fbd67feead
commit
b80b085575
@ -0,0 +1,70 @@
|
|||||||
|
commit 83164570f0d3511d114114bcc2b02ad23b753ed0
|
||||||
|
Author: Yuhsiang M. Tsai <yhmtsai@gmail.com>
|
||||||
|
Date: Wed Oct 6 16:33:16 2021 +0200
|
||||||
|
|
||||||
|
syclstd 1.2.1 in new release to propagate subgroup
|
||||||
|
remove 64 subgroup in dense to avoid conj_trans issue on cpu temporarily
|
||||||
|
|
||||||
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||||
|
index ceb269b1cb..b47388a596 100644
|
||||||
|
--- a/CMakeLists.txt
|
||||||
|
+++ b/CMakeLists.txt
|
||||||
|
@@ -67,7 +67,8 @@ if(MSVC)
|
||||||
|
elseif(GINKGO_BUILD_DPCPP OR CMAKE_CXX_COMPILER MATCHES "dpcpp")
|
||||||
|
# For now always use `-ffp-model=precise` with DPC++. This can be removed when
|
||||||
|
# the floating point issues are fixed.
|
||||||
|
- set(GINKGO_COMPILER_FLAGS "-Wpedantic;-ffp-model=precise" CACHE STRING
|
||||||
|
+ # -sycl-std=1.2.1 (or -sycl-std=2017) is temporary workaround after 2021.4 to propagate subgroup setting correctly
|
||||||
|
+ set(GINKGO_COMPILER_FLAGS "-Wpedantic;-ffp-model=precise;-sycl-std=1.2.1" CACHE STRING
|
||||||
|
"Set the required CXX compiler flags, mainly used for warnings. Current default is `-Wpedantic;-ffp-model=precise`")
|
||||||
|
else()
|
||||||
|
set(GINKGO_COMPILER_FLAGS "-Wpedantic" CACHE STRING
|
||||||
|
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
|
||||||
|
index 9d22406f9a..dcc452b293 100644
|
||||||
|
--- a/cmake/create_test.cmake
|
||||||
|
+++ b/cmake/create_test.cmake
|
||||||
|
@@ -40,6 +40,7 @@ function(ginkgo_create_dpcpp_test test_name)
|
||||||
|
add_executable(${test_target_name} ${test_name}.dp.cpp)
|
||||||
|
target_compile_features(${test_target_name} PUBLIC cxx_std_17)
|
||||||
|
target_compile_options(${test_target_name} PRIVATE "${GINKGO_DPCPP_FLAGS}")
|
||||||
|
+ target_compile_options(${test_target_name} PRIVATE "${GINKGO_COMPILER_FLAGS}")
|
||||||
|
target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
|
||||||
|
ginkgo_set_test_target_properties(${test_name} ${test_target_name})
|
||||||
|
# Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
|
||||||
|
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
|
||||||
|
index fee9ec3639..ce71fd5d3c 100644
|
||||||
|
--- a/dpcpp/CMakeLists.txt
|
||||||
|
+++ b/dpcpp/CMakeLists.txt
|
||||||
|
@@ -68,6 +68,7 @@ target_compile_definitions(ginkgo_dpcpp PRIVATE GKO_COMPILING_DPCPP)
|
||||||
|
|
||||||
|
set(GINKGO_DPCPP_FLAGS ${GINKGO_DPCPP_FLAGS} PARENT_SCOPE)
|
||||||
|
target_compile_options(ginkgo_dpcpp PRIVATE "${GINKGO_DPCPP_FLAGS}")
|
||||||
|
+target_compile_options(ginkgo_dpcpp PRIVATE "${GINKGO_COMPILER_FLAGS}")
|
||||||
|
# Note: add MKL as PRIVATE not PUBLIC (MKL example shows) to avoid propagating
|
||||||
|
# find_package(MKL) everywhere when linking ginkgo (see the MKL example
|
||||||
|
# https://software.intel.com/content/www/us/en/develop/documentation/onemkl-windows-developer-guide/top/getting-started/cmake-config-for-onemkl.html)
|
||||||
|
diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp
|
||||||
|
index 0c89530d1d..9a86ab9cd1 100644
|
||||||
|
--- a/dpcpp/matrix/dense_kernels.dp.cpp
|
||||||
|
+++ b/dpcpp/matrix/dense_kernels.dp.cpp
|
||||||
|
@@ -69,14 +69,14 @@ namespace dpcpp {
|
||||||
|
namespace dense {
|
||||||
|
|
||||||
|
|
||||||
|
+// Disable the 64 subgroup. CPU supports 64 now, but conj_transpose will
|
||||||
|
+// lead CL_OUT_OF_RESOURCES. TODO: investigate this issue.
|
||||||
|
using KCFG_1D = ConfigSet<11, 7>;
|
||||||
|
constexpr auto kcfg_1d_list =
|
||||||
|
- syn::value_list<std::uint32_t, KCFG_1D::encode(512, 64),
|
||||||
|
- KCFG_1D::encode(512, 32), KCFG_1D::encode(512, 16),
|
||||||
|
- KCFG_1D::encode(256, 32), KCFG_1D::encode(256, 16),
|
||||||
|
- KCFG_1D::encode(256, 8)>();
|
||||||
|
-constexpr auto subgroup_list =
|
||||||
|
- syn::value_list<std::uint32_t, 64, 32, 16, 8, 4>();
|
||||||
|
+ syn::value_list<std::uint32_t, KCFG_1D::encode(512, 32),
|
||||||
|
+ KCFG_1D::encode(512, 16), KCFG_1D::encode(256, 32),
|
||||||
|
+ KCFG_1D::encode(256, 16), KCFG_1D::encode(256, 8)>();
|
||||||
|
+constexpr auto subgroup_list = syn::value_list<std::uint32_t, 32, 16, 8, 4>();
|
||||||
|
constexpr auto kcfg_1d_array = syn::as_array(kcfg_1d_list);
|
||||||
|
constexpr int default_block_size = 256;
|
||||||
|
|
@ -3,6 +3,7 @@
|
|||||||
#
|
#
|
||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from spack import *
|
from spack import *
|
||||||
@ -31,6 +32,7 @@ class Ginkgo(CMakePackage, CudaPackage, ROCmPackage):
|
|||||||
variant('shared', default=True, description='Build shared libraries')
|
variant('shared', default=True, description='Build shared libraries')
|
||||||
variant('full_optimizations', default=False, description='Compile with all optimizations')
|
variant('full_optimizations', default=False, description='Compile with all optimizations')
|
||||||
variant('openmp', default=sys.platform != 'darwin', description='Build with OpenMP')
|
variant('openmp', default=sys.platform != 'darwin', description='Build with OpenMP')
|
||||||
|
variant('oneapi', default=False, description='Build with oneAPI support')
|
||||||
variant('develtools', default=False, description='Compile with develtools enabled')
|
variant('develtools', default=False, description='Compile with develtools enabled')
|
||||||
variant('hwloc', default=False, description='Enable HWLOC support')
|
variant('hwloc', default=False, description='Enable HWLOC support')
|
||||||
variant('build_type', default='Release',
|
variant('build_type', default='Release',
|
||||||
@ -49,9 +51,13 @@ class Ginkgo(CMakePackage, CudaPackage, ROCmPackage):
|
|||||||
depends_on('googletest', type="test")
|
depends_on('googletest', type="test")
|
||||||
depends_on('numactl', type="test", when="+hwloc")
|
depends_on('numactl', type="test", when="+hwloc")
|
||||||
|
|
||||||
|
depends_on('intel-oneapi-mkl', when="+oneapi")
|
||||||
|
depends_on('intel-oneapi-dpl', when="+oneapi")
|
||||||
|
|
||||||
conflicts('%gcc@:5.2.9')
|
conflicts('%gcc@:5.2.9')
|
||||||
conflicts("+rocm", when="@:1.1.1")
|
conflicts("+rocm", when="@:1.1.1")
|
||||||
conflicts("+cuda", when="+rocm")
|
conflicts("+cuda", when="+rocm")
|
||||||
|
conflicts("+openmp", when="+oneapi")
|
||||||
|
|
||||||
# ROCm 4.1.0 breaks platform settings which breaks Ginkgo's HIP support.
|
# ROCm 4.1.0 breaks platform settings which breaks Ginkgo's HIP support.
|
||||||
conflicts("^hip@4.1.0:", when="@:1.3.0")
|
conflicts("^hip@4.1.0:", when="@:1.3.0")
|
||||||
@ -63,18 +69,39 @@ class Ginkgo(CMakePackage, CudaPackage, ROCmPackage):
|
|||||||
patch('1.4.0_skip_invalid_smoke_tests.patch', when='@master')
|
patch('1.4.0_skip_invalid_smoke_tests.patch', when='@master')
|
||||||
patch('1.4.0_skip_invalid_smoke_tests.patch', when='@1.4.0')
|
patch('1.4.0_skip_invalid_smoke_tests.patch', when='@1.4.0')
|
||||||
|
|
||||||
|
# Newer DPC++ compilers use the updated SYCL 2020 standard which change
|
||||||
|
# kernel attribute propagation rules. This doesn't work well with the
|
||||||
|
# initial Ginkgo oneAPI support.
|
||||||
|
patch('1.4.0_dpcpp_use_old_standard.patch', when='+oneapi @master')
|
||||||
|
patch('1.4.0_dpcpp_use_old_standard.patch', when='+oneapi @1.4.0')
|
||||||
|
|
||||||
|
def setup_build_environment(self, env):
|
||||||
|
spec = self.spec
|
||||||
|
if '+oneapi' in spec:
|
||||||
|
env.set('MKLROOT',
|
||||||
|
join_path(spec['intel-oneapi-mkl'].prefix,
|
||||||
|
'mkl', 'latest'))
|
||||||
|
env.set('DPL_ROOT',
|
||||||
|
join_path(spec['intel-oneapi-dpl'].prefix,
|
||||||
|
'dpl', 'latest'))
|
||||||
|
|
||||||
def cmake_args(self):
|
def cmake_args(self):
|
||||||
# Check that the have the correct C++ standard is available
|
# Check that the have the correct C++ standard is available
|
||||||
if self.spec.satisfies('@:1.2.0'):
|
if self.spec.satisfies('@:1.2.0'):
|
||||||
try:
|
try:
|
||||||
self.compiler.cxx11_flag
|
self.compiler.cxx11_flag
|
||||||
except UnsupportedCompilerFlag:
|
except UnsupportedCompilerFlag:
|
||||||
InstallError('Ginkgo requires a C++11-compliant C++ compiler')
|
raise InstallError('Ginkgo requires a C++11-compliant C++ compiler')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.compiler.cxx14_flag
|
self.compiler.cxx14_flag
|
||||||
except UnsupportedCompilerFlag:
|
except UnsupportedCompilerFlag:
|
||||||
InstallError('Ginkgo requires a C++14-compliant C++ compiler')
|
raise InstallError('Ginkgo requires a C++14-compliant C++ compiler')
|
||||||
|
|
||||||
|
cxx_is_dpcpp = os.path.basename(self.compiler.cxx) == "dpcpp"
|
||||||
|
if self.spec.satisfies('+oneapi') and not cxx_is_dpcpp:
|
||||||
|
raise InstallError("Ginkgo's oneAPI backend requires the" +
|
||||||
|
"DPC++ compiler as main CXX compiler.")
|
||||||
|
|
||||||
spec = self.spec
|
spec = self.spec
|
||||||
from_variant = self.define_from_variant
|
from_variant = self.define_from_variant
|
||||||
@ -85,6 +112,7 @@ def cmake_args(self):
|
|||||||
from_variant('BUILD_SHARED_LIBS', 'shared'),
|
from_variant('BUILD_SHARED_LIBS', 'shared'),
|
||||||
from_variant('GINKGO_JACOBI_FULL_OPTIMIZATIONS', 'full_optimizations'),
|
from_variant('GINKGO_JACOBI_FULL_OPTIMIZATIONS', 'full_optimizations'),
|
||||||
from_variant('GINKGO_BUILD_HWLOC', 'hwloc'),
|
from_variant('GINKGO_BUILD_HWLOC', 'hwloc'),
|
||||||
|
from_variant('GINKGO_BUILD_DPCPP', 'oneapi'),
|
||||||
from_variant('GINKGO_DEVEL_TOOLS', 'develtools'),
|
from_variant('GINKGO_DEVEL_TOOLS', 'develtools'),
|
||||||
# As we are not exposing benchmarks, examples, tests nor doc
|
# As we are not exposing benchmarks, examples, tests nor doc
|
||||||
# as part of the installation, disable building them altogether.
|
# as part of the installation, disable building them altogether.
|
||||||
|
Loading…
Reference in New Issue
Block a user