timemory package: add versions including 3.2.3; update options (#24825)

* add variants: python hatchet/line-profiler support and likwid
  nvmon support
* removed ompt_standalone/ompt_llvm variants
This commit is contained in:
Jonathan R. Madsen 2021-07-21 13:54:04 -05:00 committed by GitHub
parent a68abc15c5
commit 80592613ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -8,8 +8,9 @@
from spack import * from spack import *
class Timemory(CMakePackage): class Timemory(CMakePackage, PythonPackage):
"""Timing + Memory + Hardware Counter Utilities for C/C++/CUDA/Python""" '''Modular profiling toolkit and suite of libraries and tools
for C/C++/Fortran/CUDA/Python'''
homepage = 'https://timemory.readthedocs.io/en/latest/' homepage = 'https://timemory.readthedocs.io/en/latest/'
git = 'https://github.com/NERSC/timemory.git' git = 'https://github.com/NERSC/timemory.git'
@ -17,21 +18,46 @@ class Timemory(CMakePackage):
version('master', branch='master', submodules=True) version('master', branch='master', submodules=True)
version('develop', branch='develop', submodules=True) version('develop', branch='develop', submodules=True)
version('3.0.1', commit='ef638e1cde90275ce7c0e12fc4902c27bcbdeefd', version('3.2.3', commit='d535e478646e331a4c65cfd8c8f759c9a363ccc9', submodules=True)
submodules=True) version('3.2.2', commit='4725f4e4a3eea3b80b50a01ea088b5d5a1cf40ab', submodules=True)
version('3.0.0', commit='b36b1673b2c6b7ff3126d8261bef0f8f176c7beb', version('3.2.1', commit='76ff978d9b1568b7f88a3de82bb84a2042003630', submodules=True)
submodules=True) version('3.2.0', commit='2bdd28e87224558060e27da90f9b6fcfb20dd324', submodules=True)
version('3.1.0', commit='b12de7eeed699d820693fecd6136daff744f21b6', submodules=True)
version('3.0.1', commit='ef638e1cde90275ce7c0e12fc4902c27bcbdeefd', submodules=True)
version('3.0.0', commit='b36b1673b2c6b7ff3126d8261bef0f8f176c7beb', submodules=True)
variant('shared', default=True, description='Build shared libraries') variant('shared', default=True, description='Build shared libraries')
variant('static', default=False, description='Build static libraries') variant('static', default=False, description='Build static libraries')
variant('pic', default=True, description='Build position independent code')
variant('install_headers', default=True, description='Install headers')
variant(
'install_config', default=True, description='Install cmake configuration files'
)
variant('python', default=False, description='Enable Python support') variant('python', default=False, description='Enable Python support')
variant('python_deps', default=False, variant(
description='Install non-critical python dependencies ' 'python_hatchet',
'(may significantly increase spack install time)') default=False,
variant('mpi', default=False, description='Build Python hatchet submodule '
description='Enable support for MPI aggregation') '(does not conflict with py-hatchet)',
variant('nccl', default=False, )
description='Enable support for wrapping NCCL functions') variant(
'python_line_profiler',
default=False,
description=(
'Build timemorys extended version of py-line-profiler '
'(does not conflict with py-line-profiler)'
),
)
variant(
'python_deps',
default=False,
description='Install non-critical python dependencies '
'(may significantly increase spack install time)',
)
variant('mpi', default=False, description='Enable support for MPI aggregation')
variant(
'nccl', default=False, description='Enable support for wrapping NCCL functions'
)
variant('tau', default=False, description='Enable TAU support') variant('tau', default=False, description='Enable TAU support')
variant('papi', default=False, description='Enable PAPI support') variant('papi', default=False, description='Enable PAPI support')
variant('cuda', default=False, description='Enable CUDA support') variant('cuda', default=False, description='Enable CUDA support')
@ -40,67 +66,157 @@ class Timemory(CMakePackage):
variant('vtune', default=False, description='Enable VTune support') variant('vtune', default=False, description='Enable VTune support')
variant('upcxx', default=False, description='Enable UPC++ support') variant('upcxx', default=False, description='Enable UPC++ support')
variant('gotcha', default=False, description='Enable GOTCHA support') variant('gotcha', default=False, description='Enable GOTCHA support')
variant('likwid', default=False, description='Enable LIKWID support') variant(
'likwid',
default=False,
description='Enable LIKWID CPU marker API support (perfmon)',
)
variant(
'likwid_nvmon',
default=False,
description='Enable LIKWID GPU marker API support (nvmon)',
)
variant('caliper', default=False, description='Enable Caliper support') variant('caliper', default=False, description='Enable Caliper support')
variant('dyninst', default=False, variant('dyninst', default=False, description='Build dynamic instrumentation tools')
description='Build dynamic instrumentation tools')
variant('examples', default=False, description='Build/install examples') variant('examples', default=False, description='Build/install examples')
variant('gperftools', default=False, variant('gperftools', default=False, description='Enable gperftools support')
description='Enable gperftools support') variant(
variant('kokkos_tools', default=False, 'kokkos_tools',
description=('Build generic kokkos-tools libraries, e.g. ' default=False,
'kp_timemory, kp_timemory_filter')) description=(
variant('kokkos_build_config', default=False, 'Build generic kokkos-tools libraries, e.g. '
description=('Build pre-configured (i.e. dedicated) kokkos-tools ' 'kp_timemory, kp_timemory_filter'
'libraries, e.g. kp_timemory_cpu_flops')) ),
variant('cuda_arch', default='auto', description='CUDA architecture name', )
values=('auto', 'kepler', 'tesla', 'maxwell', 'pascal', variant(
'volta', 'turing'), multi=False) 'kokkos_build_config',
variant('cpu_target', default='auto', default=False,
description=('Build for specific cpu architecture (specify ' description=(
'cpu-model)')) 'Build pre-configured (i.e. dedicated) kokkos-tools '
variant('use_arch', default=False, 'libraries, e.g. kp_timemory_cpu_flops'
description=('Build all of timemory w/ cpu_target architecture ' ),
'flags (default: roofline toolkit only)')) )
variant('tls_model', default='global-dynamic', variant(
description='Thread-local static model', multi=False, 'cuda_arch',
values=('global-dynamic', 'local-dynamic', 'initial-exec', default='auto',
'local-exec')) description='CUDA architecture name',
variant('lto', default=False, values=(
description='Build w/ link-time optimization') 'auto',
variant('statistics', default=True, 'kepler',
description=('Build components w/ support for statistics ' 'kepler30',
'(min/max/stddev)')) 'kepler32',
variant('extra_optimizations', default=True, 'kepler35',
description='Build timemory with extra optimization flags') 'kepler37',
variant('cxxstd', default='14', description='C++ language standard', 'tesla',
values=('14', '17', '20'), multi=False) 'maxwell',
variant('mpip_library', default=False, 'maxwell50',
description='Build stand-alone timemory-mpip GOTCHA library') 'maxwell52',
'maxwell53',
'pascal',
'pascal60',
'pascal61',
'volta',
'volta70',
'volta72',
'turing',
'turing75',
'ampere',
'ampere80',
'ampere86',
),
multi=True,
)
variant(
'cpu_target',
default='auto',
description=('Build for specific cpu architecture (specify ' 'cpu-model)'),
)
variant(
'use_arch',
default=False,
description=(
'Build all of timemory w/ cpu_target architecture '
'flags (default: roofline toolkit only)'
),
)
variant(
'tls_model',
default='global-dynamic',
description='Thread-local static model',
multi=False,
values=('global-dynamic', 'local-dynamic', 'initial-exec', 'local-exec'),
)
variant('lto', default=False, description='Build with link-time optimization')
variant(
'statistics',
default=True,
description=('Build components w/ support for statistics ' '(min/max/stddev)'),
)
variant(
'extra_optimizations',
default=True,
description='Build timemory with extra optimization flags',
)
variant(
'cxxstd',
default='14',
description='C++ language standard',
values=('14', '17', '20'),
multi=False,
)
variant(
'cudastd',
default='14',
description='CUDA language standard',
values=('14', '17'),
multi=False,
)
variant(
'unity_build',
default=True,
description='Build with CMAKE_UNITY_BUILD=ON for faster builds '
'but larger memory consumption',
)
variant(
'mpip_library',
default=False,
description='Build stand-alone timemory-mpip GOTCHA library',
)
variant('ompt', default=False, description=('Enable OpenMP tools support')) variant('ompt', default=False, description=('Enable OpenMP tools support'))
variant('ompt_standalone', default=False, variant(
description=('Enable OpenMP tools support via drop-in ' 'ompt_library',
'replacement of libomp/libgomp/libiomp5')) default=False,
variant('ompt_llvm', default=False, description='Build stand-alone timemory-ompt library',
description='Enable OpenMP tools support as part of llvm build') )
variant('ompt_library', default=False, variant('allinea_map', default=False, description='Enable Allinea ARM-MAP support')
description='Build stand-alone timemory-ompt library') variant(
variant('allinea_map', default=False, 'require_packages',
description='Enable Allinea ARM-MAP support') default=True,
variant('require_packages', default=True, description=('find_package(...) resulting in NOTFOUND ' 'generates error'),
description=('find_package(...) resulting in NOTFOUND ' )
'generates error')) variant(
'compiler', default=True, description='Enable compiler instrumentation support'
depends_on('cmake@3.11:', type='build') )
variant(
'ert',
default=True,
description='Enable extern templates for empirical roofline toolkit (ERT)',
)
extends('python', when='+python') extends('python', when='+python')
depends_on('cmake@3.15:', type='build')
depends_on('python@3:', when='+python', type=('build', 'run')) depends_on('python@3:', when='+python', type=('build', 'run'))
depends_on('py-cython', when='+python', type=('build')) depends_on('py-cython', when='+python_hatchet', type=('build'))
depends_on('py-cython', when='+python_line_profiler', type=('build'))
depends_on('pil', when='+python+python_deps', type=('run')) depends_on('pil', when='+python+python_deps', type=('run'))
depends_on('py-numpy', when='+python+python_deps', type=('run')) depends_on('py-numpy', when='+python+python_deps', type=('run'))
depends_on('py-hatchet', when='+python+python_deps', type=('run'))
depends_on('py-matplotlib', when='+python+python_deps', type=('run')) depends_on('py-matplotlib', when='+python+python_deps', type=('run'))
depends_on('py-ipython', when='+python+python_deps', type=('run'))
depends_on('py-mpi4py', when='+python+mpi+python_deps', type=('run')) depends_on('py-mpi4py', when='+python+mpi+python_deps', type=('run'))
depends_on('py-pandas', when='+python_deps+python_hatchet', type=('run'))
depends_on('py-pydot', when='+python_deps+python_hatchet', type=('run'))
depends_on('py-pyyaml', when='+python_deps+python_hatchet', type=('run'))
depends_on('py-multiprocess', when='+python_deps+python_hatchet', type=('run'))
depends_on('mpi', when='+mpi') depends_on('mpi', when='+mpi')
depends_on('nccl', when='+nccl') depends_on('nccl', when='+nccl')
depends_on('tau', when='+tau') depends_on('tau', when='+tau')
@ -108,117 +224,149 @@ class Timemory(CMakePackage):
depends_on('cuda', when='+cuda') depends_on('cuda', when='+cuda')
depends_on('cuda', when='+cupti') depends_on('cuda', when='+cupti')
depends_on('upcxx', when='+upcxx') depends_on('upcxx', when='+upcxx')
depends_on('likwid', when='+likwid') depends_on('likwid', when='+likwid~likwid_nvmon')
depends_on('likwid+cuda', when='+likwid+likwid_nvmon')
depends_on('gotcha', when='+gotcha') depends_on('gotcha', when='+gotcha')
depends_on('caliper', when='+caliper') depends_on('caliper', when='+caliper')
depends_on('dyninst', when='+dyninst') depends_on('dyninst', when='+dyninst')
depends_on('gperftools', when='+gperftools') depends_on('gperftools', when='+gperftools')
depends_on('intel-parallel-studio', when='+vtune') depends_on('intel-parallel-studio', when='+vtune')
depends_on('llvm-openmp-ompt+standalone', when='+ompt_standalone')
depends_on('llvm-openmp-ompt~standalone', when='+ompt_llvm')
depends_on('arm-forge', when='+allinea_map') depends_on('arm-forge', when='+allinea_map')
conflicts('+python', when='~shared', conflicts(
msg='+python requires building shared libraries') '+python',
when='~shared~static',
msg='+python requires building shared or static libraries',
)
conflicts(
'~pic',
'~shared+static+python',
msg='Python bindings cannot be be linked to static libs w/o +pic',
)
conflicts('+python_deps', when='~python') conflicts('+python_deps', when='~python')
conflicts('+cupti', when='~cuda', msg='CUPTI requires CUDA') conflicts('+cupti', when='~cuda', msg='CUPTI requires CUDA')
conflicts('+kokkos_tools', when='~tools', conflicts('+kokkos_tools', when='~tools', msg='+kokkos_tools requires +tools')
msg='+kokkos_tools requires +tools') conflicts(
conflicts('+kokkos_build_config', when='~tools', '+kokkos_build_config',
msg='+kokkos_build_config requires +tools') when='~tools~kokkos_tools',
conflicts('+kokkos_build_config', when='~kokkos_tools', msg='+kokkos_build_config requires +tools+kokkos_tools',
msg='+kokkos_build_config requires +kokkos_tools') )
conflicts('tls_model=local-dynamic', when='+python', conflicts(
msg='+python require tls_model=global-dynamic') 'tls_model=local-dynamic',
conflicts('tls_model=initial-exec', when='+python', when='+python',
msg='+python require tls_model=global-dynamic') msg='+python require tls_model=global-dynamic',
conflicts('tls_model=local-exec', when='+python', )
msg='+python require tls_model=global-dynamic') conflicts(
conflicts('+nccl', when='~gotcha', 'tls_model=initial-exec',
msg='+nccl requires +gotcha') when='+python',
msg='+python require tls_model=global-dynamic',
)
conflicts(
'tls_model=local-exec',
when='+python',
msg='+python require tls_model=global-dynamic',
)
conflicts('+nccl', when='~gotcha', msg='+nccl requires +gotcha')
conflicts(
'+nccl',
when='~shared~static',
msg='+nccl requires building shared or static libraries',
)
conflicts('+mpip_library', when='~mpi', msg='+mpip_library requires +mpi') conflicts('+mpip_library', when='~mpi', msg='+mpip_library requires +mpi')
conflicts('+mpip_library', when='~gotcha', conflicts('+mpip_library', when='~gotcha', msg='+mpip_library requires +gotcha')
msg='+mpip_library requires +gotcha') conflicts(
conflicts('+mpip_library', when='~shared', '+mpip_library',
msg='+mpip_library requires building shared libraries') when='~shared~static',
conflicts('+ompt_standalone', when='~ompt', msg='+mpip_library requires building shared or static libraries',
msg='+ompt_standalone requires +ompt') )
conflicts('+ompt_llvm', when='~ompt', conflicts('+ompt_library', when='~ompt', msg='+ompt_library requires +ompt')
msg='+ompt_llvm requires +ompt') conflicts(
conflicts('+ompt_library', when='~ompt', '+ompt_library',
msg='+ompt_library requires +ompt') when='~shared~static',
conflicts('+ompt_library', when='~shared~static', msg='+ompt_library requires building shared or static libraries',
msg='+ompt_library requires building shared or static libraries') )
conflicts('+ompt_standalone+ompt_llvm', conflicts('+likwid_nvmon', when='~likwid', msg='+likwid_nvmon requires +likwid')
msg=('+ompt_standalone and +ompt_llvm are not compatible. Use '
'+ompt_llvm~ompt_standalone if building LLVM, use '
'~ompt_llvm+ompt_standalone if ompt.h is not provided by '
'the compiler'))
def cmake_args(self): def cmake_args(self):
spec = self.spec spec = self.spec
args = [ args = [
'-DTIMEMORY_BUILD_PYTHON=ON', self.define('SPACK_BUILD', True),
'-DTIMEMORY_BUILD_TESTING=OFF', self.define('TIMEMORY_BUILD_OMPT', False), # disable submodule
'-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON', self.define('TIMEMORY_BUILD_PYTHON', True),
self.define('TIMEMORY_BUILD_GOTCHA', False), # disable submodule
self.define('TIMEMORY_BUILD_CALIPER', False), # disable submodule
self.define('TIMEMORY_BUILD_TESTING', False),
self.define('TIMEMORY_USE_MPI_LINK_FLAGS', False),
self.define('CMAKE_INSTALL_RPATH_USE_LINK_PATH', True),
self.define_from_variant('BUILD_SHARED_LIBS', 'shared'),
self.define_from_variant('BUILD_STATIC_LIBS', 'static'),
self.define_from_variant('CMAKE_CXX_STANDARD', 'cxxstd'),
self.define_from_variant('CMAKE_CUDA_STANDARD', 'cudastd'),
self.define_from_variant('CMAKE_POSITION_INDEPENDENT_CODE', 'pic'),
self.define_from_variant('CpuArch_TARGET', 'cpu_target'),
self.define_from_variant('TIMEMORY_TLS_MODEL', 'tls_model'),
self.define_from_variant('TIMEMORY_UNITY_BUILD', 'unity_build'),
self.define_from_variant('TIMEMORY_REQUIRE_PACKAGES', 'require_packages'),
self.define_from_variant('TIMEMORY_INSTALL_HEADERS', 'install_headers'),
self.define_from_variant('TIMEMORY_INSTALL_CONFIG', 'install_config'),
self.define_from_variant('TIMEMORY_BUILD_ERT', 'ert'),
self.define_from_variant(
'TIMEMORY_BUILD_COMPILER_INSTRUMENTATION', 'compiler'
),
self.define_from_variant('TIMEMORY_BUILD_LTO', 'lto'),
self.define_from_variant('TIMEMORY_BUILD_TOOLS', 'tools'),
self.define_from_variant('TIMEMORY_BUILD_EXAMPLES', 'examples'),
self.define_from_variant('TIMEMORY_BUILD_NCCLP_LIBRARY', 'nccl'),
self.define_from_variant('TIMEMORY_BUILD_MALLOCP_LIBRARY', 'gotcha'),
self.define_from_variant('TIMEMORY_BUILD_MPIP_LIBRARY', 'mpip_library'),
self.define_from_variant('TIMEMORY_BUILD_OMPT_LIBRARY', 'ompt_library'),
self.define_from_variant('TIMEMORY_BUILD_KOKKOS_TOOLS', 'kokkos_tools'),
self.define_from_variant(
'TIMEMORY_BUILD_KOKKOS_CONFIG', 'kokkos_build_config'
),
self.define_from_variant(
'TIMEMORY_BUILD_EXTRA_OPTIMIZATIONS', 'extra_optimizations'
),
self.define_from_variant('TIMEMORY_BUILD_PYTHON_HATCHET', 'python_hatchet'),
self.define_from_variant(
'TIMEMORY_BUILD_PYTHON_LINE_PROFILER', 'python_line_profiler'
),
self.define_from_variant('TIMEMORY_USE_MPI', 'mpi'),
self.define_from_variant('TIMEMORY_USE_TAU', 'tau'),
self.define_from_variant('TIMEMORY_USE_ARCH', 'use_arch'),
self.define_from_variant('TIMEMORY_USE_PAPI', 'papi'),
self.define_from_variant('TIMEMORY_USE_OMPT', 'ompt'),
self.define_from_variant('TIMEMORY_USE_CUDA', 'cuda'),
self.define_from_variant('TIMEMORY_USE_NCCL', 'nccl'),
self.define_from_variant('TIMEMORY_USE_CUPTI', 'cupti'),
self.define_from_variant('TIMEMORY_USE_VTUNE', 'vtune'),
self.define_from_variant('TIMEMORY_USE_UPCXX', 'upcxx'),
self.define_from_variant('TIMEMORY_USE_PYTHON', 'python'),
self.define_from_variant('TIMEMORY_USE_GOTCHA', 'gotcha'),
self.define_from_variant('TIMEMORY_USE_LIKWID', 'likwid'),
self.define_from_variant('TIMEMORY_USE_LIKWID_PERFMON', 'likwid'),
self.define_from_variant('TIMEMORY_USE_LIKWID_NVMON', 'likwid_nvmon'),
self.define_from_variant('TIMEMORY_USE_DYNINST', 'dyninst'),
self.define_from_variant('TIMEMORY_USE_CALIPER', 'caliper'),
self.define_from_variant('TIMEMORY_USE_GPERFTOOLS', 'gperftools'),
self.define_from_variant('TIMEMORY_USE_STATISTICS', 'statistics'),
self.define_from_variant('TIMEMORY_USE_ALLINEA_MAP', 'allinea_map'),
] ]
cxxstd = spec.variants['cxxstd'].value
args.append('-DCMAKE_CXX_STANDARD={0}'.format(cxxstd))
tls = spec.variants['tls_model'].value
args.append('-DTIMEMORY_TLS_MODEL={0}'.format(tls))
if '+python' in spec: if '+python' in spec:
args.append('-DPYTHON_EXECUTABLE={0}'.format( pyexe = spec['python'].command.path
spec['python'].command.path)) args.append(self.define('PYTHON_EXECUTABLE=', pyexe))
args.append(self.define('Python3_EXECUTABLE', pyexe))
if '+nccl' in spec:
args.append('-DTIMEMORY_USE_NCCL=ON')
args.append('-DTIMEMORY_BUILD_NCCLP_LIBRARY=ON')
if '+mpi' in spec: if '+mpi' in spec:
args.append('-DTIMEMORY_USE_MPI_LINK_FLAGS=OFF') args.append(self.define('MPI_C_COMPILER', spec['mpi'].mpicc))
args.append('-DMPI_C_COMPILER={0}'.format(spec['mpi'].mpicc)) args.append(self.define('MPI_CXX_COMPILER', spec['mpi'].mpicxx))
args.append('-DMPI_CXX_COMPILER={0}'.format(spec['mpi'].mpicxx))
if '+cuda' in spec: if '+cuda' in spec:
targ = spec.variants['cuda_arch'].value
key = '' if spec.satisfies('@:3.0.1') else 'TIMEMORY_'
# newer versions use 'TIMEMORY_CUDA_ARCH' # newer versions use 'TIMEMORY_CUDA_ARCH'
args.append('-D{0}CUDA_ARCH={1}'.format(key, targ)) key = 'CUDA_ARCH' if spec.satisfies('@:3.0.1') else 'TIMEMORY_CUDA_ARCH'
args.append(self.define_from_variant(key, 'cuda_arch'))
cpu_target = spec.variants['cpu_target'].value args.append(self.define_from_variant('CMAKE_CUDA_STANDARD', 'cudastd'))
if cpu_target == 'auto':
args.append('-DCpuArch_TARGET={0}'.format(cpu_target))
# forced disabling of submodule builds
for dep in ('caliper', 'gotcha', 'ompt'):
args.append('-DTIMEMORY_BUILD_{0}=OFF'.format(dep.upper()))
# spack options which translate to TIMEMORY_<OPTION>
for dep in ('require_packages', 'kokkos_build_config', 'use_arch'):
args.append('-DTIMEMORY_{0}={1}'.format(
dep.upper(), 'ON' if '+{0}'.format(dep) in spec else 'OFF'))
# spack options which translate to BUILD_<OPTION>_LIBS
for dep in ('shared', 'static'):
args.append('-DBUILD_{0}_LIBS={1}'.format(
dep.upper(), 'ON' if '+{0}'.format(dep) in spec else 'OFF'))
# spack options which translate to TIMEMORY_BUILD_<OPTION>
for dep in ('tools', 'examples', 'kokkos_tools', 'lto',
'extra_optimizations', 'mpip_library', 'ompt_library'):
args.append('-DTIMEMORY_BUILD_{0}={1}'.format(
dep.upper(), 'ON' if '+{0}'.format(dep) in spec else 'OFF'))
# spack options which translate to TIMEMORY_USE_<OPTION>
for dep in ('allinea_map', 'python', 'mpi', 'tau', 'papi', 'ompt',
'cuda', 'cupti', 'cupti', 'vtune', 'upcxx', 'gotcha',
'likwid', 'caliper', 'dyninst', 'gperftools',
'statistics'):
args.append('-DTIMEMORY_USE_{0}={1}'.format(
dep.upper(), 'ON' if '+{0}'.format(dep) in spec else 'OFF'))
return args return args