CP2K/SIRIUS: add support for building with CUDA, fix CP2K build with MKL (#11418)

* cp2k/sirius: add CUDA support * cp2k: fix building with MKL * sirius: make sure to link against Spacks blas/lapack/scalapack
2019-08-13 19:59:08 +02:00
parent 68c176998a
commit 95fec7adfc
3 changed files with 169 additions and 16 deletions
--- a/var/spack/repos/builtin/packages/cp2k/package.py
+++ b/var/spack/repos/builtin/packages/cp2k/package.py
@@ -10,7 +10,7 @@
 import spack.util.environment
-class Cp2k(MakefilePackage):
+class Cp2k(MakefilePackage, CudaPackage):
    """CP2K is a quantum chemistry and solid state physics software package
    that can perform atomistic simulations of solid state, liquid, molecular,
    periodic, material, crystal, and biological systems
@@ -44,6 +44,23 @@ class Cp2k(MakefilePackage):
            description=('Enable planewave electronic structure'
                         ' calculations via SIRIUS'))
    # override cuda_arch from CudaPackage since we only support one arch
    # at a time and only specific ones for which we have parameter files
    # for optimal kernels
    variant('cuda_arch',
            description='CUDA architecture',
            default='none',
            values=('none', '35', '37', '60'),
            multi=False)
    variant('cuda_arch_35_k20x', default=False,
            description=('CP2K (resp. DBCSR) has specific parameter sets for'
                         ' different GPU models. Enable this when building'
                         ' with cuda_arch=35 for a K20x instead of a K40'))
    variant('cuda_fft', default=False,
            description=('Use CUDA also for FFTs in the PW part of CP2K'))
    variant('cuda_blas', default=False,
            description=('Use CUBLAS for general matrix operations in DBCSR'))
    depends_on('python', type='build')
    depends_on('fftw@3:', when='~openmp')
@@ -91,19 +108,23 @@ class Cp2k(MakefilePackage):
    # to get JSON-based UPF format support used in combination with SIRIUS
    depends_on('json-fortran', when='+sirius')
    # the bundled libcusmm uses numpy in the parameter prediction (v7+)
    depends_on('py-numpy', when='@7:+cuda', type='build')
    # PEXSI, ELPA and SIRIUS need MPI in CP2K
    conflicts('~mpi', '+pexsi')
    conflicts('~mpi', '+elpa')
    conflicts('~mpi', '+sirius')
    conflicts('+sirius', '@:6.999')  # sirius support was introduced in 7+
    conflicts('~cuda', '+cuda_fft')
    conflicts('~cuda', '+cuda_blas')
    # Apparently cp2k@4.1 needs an "experimental" version of libwannier.a
    # which is only available contacting the developer directly. See INSTALL
    # in the stage of cp2k@4.1
    depends_on('wannier90', when='@3.0+mpi', type='build')
    # TODO : add dependency on CUDA
    # CP2K needs compiler specific compilation flags, e.g. optflags
    conflicts('%clang')
    conflicts('%cray')
@@ -161,19 +182,23 @@ def edit(self, spec, prefix):
        elif '^mpi@2:' in spec:
            cppflags.append('-D__MPI_VERSION=2')
        if '^intel-mkl' in spec:
            cppflags.append('-D__FFTSG')
        cflags = optimization_flags[self.spec.compiler.name][:]
        cxxflags = optimization_flags[self.spec.compiler.name][:]
        fcflags = optimization_flags[self.spec.compiler.name][:]
        nvflags = ['-O3']
        ldflags = []
        libs = []
        gpuver = ''
        if '%intel' in spec:
            cflags.append('-fp-model precise')
            cxxflags.append('-fp-model precise')
-            fcflags.extend(['-fp-model source', '-heap-arrays 64'])
+            fcflags += [
                '-fp-model source',
                '-heap-arrays 64',
                '-g',
                '-traceback',
            ]
        elif '%gcc' in spec:
            fcflags.extend([
                '-ffree-form',
@@ -184,8 +209,12 @@ def edit(self, spec, prefix):
            fcflags.extend(['-Mfreeform', '-Mextend'])
        if '+openmp' in spec:
            cflags.append(self.compiler.openmp_flag)
            cxxflags.append(self.compiler.openmp_flag)
            fcflags.append(self.compiler.openmp_flag)
            ldflags.append(self.compiler.openmp_flag)
            nvflags.append('-Xcompiler="{0}"'.format(
                self.compiler.openmp_flag))
        ldflags.append(fftw.libs.search_flags)
@@ -231,6 +260,11 @@ def edit(self, spec, prefix):
        ldflags.append((lapack + blas).search_flags)
        libs.extend([str(x) for x in (fftw.libs, lapack, blas)])
        if self.spec.variants['blas'].value == 'mkl':
            cppflags += ['-D__MKL']
        elif self.spec.variants['blas'].value == 'accelerate':
            cppflags += ['-D__ACCELERATE']
        # MPI
        if '+mpi' in self.spec:
            cppflags.extend([
@@ -309,15 +343,38 @@ def edit(self, spec, prefix):
            sirius = spec['sirius']
            cppflags.append('-D__SIRIUS')
            fcflags += ['-I{0}'.format(os.path.join(sirius.prefix, 'fortran'))]
-            libs += [
+            libs += list(sirius.libs)
                os.path.join(sirius.libs.directories[0],
                             'libsirius_f.{0}'.format(dso_suffix))
            ]
            cppflags.append('-D__JSON')
            fcflags += ['$(shell pkg-config --cflags json-fortran)']
            libs += ['$(shell pkg-config --libs json-fortran)']
        if self.spec.satisfies('+cuda'):
            cppflags += ['-D__ACC']
            libs += ['-lcudart', '-lnvrtc', '-lcuda']
            if self.spec.satisfies('+cuda_blas'):
                cppflags += ['-D__DBCSR_ACC=2']
                libs += ['-lcublas']
            else:
                cppflags += ['-D__DBCSR_ACC']
            if self.spec.satisfies('+cuda_fft'):
                cppflags += ['-D__PW_CUDA']
                libs += ['-lcufft', '-lcublas']
            cuda_arch = self.spec.variants['cuda_arch'].value
            if cuda_arch:
                gpuver = {
                    '35': 'K40',
                    '37': 'K80',
                    '60': 'P100',
                }[cuda_arch]
                if (cuda_arch == '35'
                        and self.spec.satisfies('+cuda_arch_35_k20x')):
                    gpuver = 'K20X'
        if 'smm=libsmm' in spec:
            lib_dir = os.path.join(
                'lib', self.makefile_architecture, self.makefile_version
@@ -349,6 +406,7 @@ def edit(self, spec, prefix):
        cflags.extend(cppflags)
        cxxflags.extend(cppflags)
        fcflags.extend(cppflags)
        nvflags.extend(cppflags)
        with open(self.makefile, 'w') as mkf:
            if '+plumed' in self.spec:
@@ -373,11 +431,16 @@ def edit(self, spec, prefix):
            mkf.write('FC = {0}\n'.format(fc))
            mkf.write('LD = {0}\n'.format(fc))
            if self.spec.satisfies('+cuda'):
                mkf.write('NVCC = {0}\n'.format(
                    os.path.join(self.spec['cuda'].prefix, 'bin', 'nvcc')))
            # Write compiler flags to file
            mkf.write('DFLAGS = {0}\n\n'.format(' '.join(dflags)))
            mkf.write('CPPFLAGS = {0}\n\n'.format(' '.join(cppflags)))
            mkf.write('CFLAGS = {0}\n\n'.format(' '.join(cflags)))
            mkf.write('CXXFLAGS = {0}\n\n'.format(' '.join(cxxflags)))
            mkf.write('NVFLAGS = {0}\n\n'.format(' '.join(nvflags)))
            mkf.write('FCFLAGS = {0}\n\n'.format(' '.join(fcflags)))
            mkf.write('LDFLAGS = {0}\n\n'.format(' '.join(ldflags)))
            if '%intel' in spec:
@@ -385,6 +448,7 @@ def edit(self, spec, prefix):
                    ' '.join(ldflags) + ' -nofor_main')
                )
            mkf.write('LIBS = {0}\n\n'.format(' '.join(libs)))
            mkf.write('GPUVER = {0}\n\n'.format(gpuver))
            mkf.write('DATA_DIR = {0}\n\n'.format(self.prefix.share.data))
    @property
--- a/var/spack/repos/builtin/packages/sirius/cmake-fix-shared-library-installation.patch
+++ b/var/spack/repos/builtin/packages/sirius/cmake-fix-shared-library-installation.patch
@@ -0,0 +1,53 @@
 From 4b51d07369b5972f3917cc8f2425caa814ae0975 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= <tiziano.mueller@chem.uzh.ch>
 Date: Thu, 16 May 2019 10:53:04 +0200
 Subject: [PATCH] cmake: fix shared library installation
 fixes the error during `make install`:
    TARGETS given no LIBRARY DESTINATION for shared library target
 when building shared libraries.
 ... and respect the current OS/distro library dir.
 ---
 src/CMakeLists.txt | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)
 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
 index 65307dd3..2b7a5279 100644
 --- a/src/CMakeLists.txt
 +++ b/src/CMakeLists.txt
@@ -2,6 +2,8 @@
 # working correctly
 # list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
 +include(GNUInstallDirs)  # required to get a proper LIBDIR variable
 +
 # keep two libraries: libsirius and libsirius_f
 if(USE_CUDA)
@@ -9,13 +11,18 @@ if(USE_CUDA)
   file(GLOB_RECURSE CUFILES_KERNELS "Kernels/*.cu")
   add_library(sirius_cu "${CUFILES_KERNELS};${CUFILES_SDDK}")
   set_target_properties(sirius_cu PROPERTIES POSITION_INDEPENDENT_CODE ON)
 -  INSTALL (TARGETS sirius_cu ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/)
 +  INSTALL (TARGETS sirius_cu
 +    ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
 +    LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
 +    )
 endif()
 if(CREATE_FORTRAN_BINDINGS)
   add_library(sirius_f "sirius_api.cpp;sirius.f90")
   SIRIUS_SETUP_TARGET(sirius_f)
   INSTALL (TARGETS sirius_f ARCHIVE DESTINATION
 -    ${CMAKE_INSTALL_PREFIX}/lib/)
 +    ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
 +    LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
 +    )
   set_target_properties(sirius_f PROPERTIES POSITION_INDEPENDENT_CODE ON)
   set_target_properties(sirius_f PROPERTIES Fortran_MODULE_DIRECTORY mod_files)
   target_link_libraries(sirius_f PUBLIC OpenMP::OpenMP_CXX)
 -- 
 2.16.4
--- a/var/spack/repos/builtin/packages/sirius/package.py
+++ b/var/spack/repos/builtin/packages/sirius/package.py
@@ -8,7 +8,7 @@
 from spack import *
-class Sirius(CMakePackage):
+class Sirius(CMakePackage, CudaPackage):
    """Domain specific library for electronic structure calculations"""
    homepage = "https://github.com/electronic-structure/SIRIUS"
@@ -38,12 +38,14 @@ class Sirius(CMakePackage):
    depends_on('elpa~openmp', when='+elpa~openmp')
    depends_on('libvdwxc+mpi', when='+vdwxc')
    depends_on('scalapack', when='+scalapack')
    depends_on("cuda", when="+cuda")
    # TODO:
-    # add support for MKL, CUDA, MAGMA, CRAY_LIBSCI, Python bindings, testing
+    # add support for MKL, MAGMA, CRAY_LIBSCI, Python bindings, testing
    patch("strip-spglib-include-subfolder.patch")
    patch("link-libraries-fortran.patch")
    patch("cmake-fix-shared-library-installation.patch")
    @property
    def libs(self):
@@ -52,12 +54,17 @@ def libs(self):
        if self.spec.satisfies('+fortran'):
            libraries += ['libsirius_f']
        if self.spec.satisfies('+cuda'):
            libraries += ['libsirius_cu']
        return find_libraries(
            libraries, root=self.prefix,
            shared=self.spec.satisfies('+shared'), recursive=True
        )
    def cmake_args(self):
        spec = self.spec
        def _def(variant, flag=None):
            """Returns "-DUSE_VARIANT:BOOL={ON,OFF}" depending on whether
               +variant is set. If the CMake flag differs from the variant
@@ -68,7 +75,7 @@ def _def(variant, flag=None):
                flag if flag else "USE_{0}".format(
                    variant.strip('+~').upper()
                ),
-                "ON" if self.spec.satisfies(variant) else "OFF"
+                "ON" if spec.satisfies(variant) else "OFF"
            )
        args = [
@@ -78,13 +85,42 @@ def _def(variant, flag=None):
            _def('+vdwxc'),
            _def('+scalapack'),
            _def('+fortran', 'CREATE_FORTRAN_BINDINGS'),
            _def('+cuda')
        ]
-        if self.spec.satisfies('+elpa'):
+        lapack = spec['lapack']
        blas = spec['blas']
        args += [
            '-DLAPACK_FOUND=true',
            '-DLAPACK_INCLUDE_DIRS={0}'.format(lapack.prefix.include),
            '-DLAPACK_LIBRARIES={0}'.format(lapack.libs.joined(';')),
            '-DBLAS_FOUND=true',
            '-DBLAS_INCLUDE_DIRS={0}'.format(blas.prefix.include),
            '-DBLAS_LIBRARIES={0}'.format(blas.libs.joined(';')),
        ]
        if '+scalapack' in spec:
            args += [
                '-DSCALAPACK_FOUND=true',
                '-DSCALAPACK_INCLUDE_DIRS={0}'.format(
                    spec['scalapack'].prefix.include),
                '-DSCALAPACK_LIBRARIES={0}'.format(
                    spec['scalapack'].libs.joined(';')),
            ]
        if spec.satisfies('+elpa'):
            elpa_incdir = os.path.join(
-                self.spec['elpa'].headers.directories[0],
+                spec['elpa'].headers.directories[0],
                'elpa'
            )
            args += ["-DELPA_INCLUDE_DIR={0}".format(elpa_incdir)]
        if spec.satisfies('+cuda'):
            cuda_arch = spec.variants['cuda_arch'].value
            if cuda_arch:
                args += [
                    '-DCMAKE_CUDA_FLAGS=-arch=sm_{0}'.format(cuda_arch[0])
                ]
        return args