py-horovod: added versions up to v0.20.3 (#18977)
This commit is contained in:
parent
cb7a1ac2bf
commit
0d64020007
@ -4,16 +4,20 @@
|
|||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
|
||||||
|
|
||||||
class PyHorovod(PythonPackage):
|
class PyHorovod(PythonPackage, CudaPackage):
|
||||||
"""Horovod is a distributed deep learning training framework for
|
"""Horovod is a distributed deep learning training framework for
|
||||||
TensorFlow, Keras, PyTorch, and Apache MXNet."""
|
TensorFlow, Keras, PyTorch, and Apache MXNet."""
|
||||||
|
|
||||||
homepage = "https://github.com/horovod"
|
homepage = "https://github.com/horovod"
|
||||||
git = "https://github.com/horovod/horovod.git"
|
git = "https://github.com/horovod/horovod.git"
|
||||||
|
|
||||||
maintainers = ['adamjstewart']
|
maintainers = ['adamjstewart', 'aweits', 'tgaddair']
|
||||||
|
|
||||||
version('master', branch='master', submodules=True)
|
version('master', branch='master', submodules=True)
|
||||||
|
version('0.20.3', tag='v0.20.3', submodules=True)
|
||||||
|
version('0.20.2', tag='v0.20.2', submodules=True)
|
||||||
|
version('0.20.1', tag='v0.20.1', submodules=True)
|
||||||
|
version('0.20.0', tag='v0.20.0', submodules=True)
|
||||||
version('0.19.5', tag='v0.19.5', submodules=True)
|
version('0.19.5', tag='v0.19.5', submodules=True)
|
||||||
version('0.19.4', tag='v0.19.4', submodules=True)
|
version('0.19.4', tag='v0.19.4', submodules=True)
|
||||||
version('0.19.3', tag='v0.19.3', submodules=True)
|
version('0.19.3', tag='v0.19.3', submodules=True)
|
||||||
@ -32,7 +36,7 @@ class PyHorovod(PythonPackage):
|
|||||||
# https://github.com/horovod/horovod/blob/master/docs/install.rst
|
# https://github.com/horovod/horovod/blob/master/docs/install.rst
|
||||||
variant('frameworks', default='pytorch',
|
variant('frameworks', default='pytorch',
|
||||||
description='Deep learning frameworks to build support for',
|
description='Deep learning frameworks to build support for',
|
||||||
values=('tensorflow', 'pytorch', 'mxnet', 'keras', 'spark'),
|
values=('tensorflow', 'pytorch', 'mxnet', 'keras', 'spark', 'ray'),
|
||||||
multi=True)
|
multi=True)
|
||||||
variant('controllers', default='mpi',
|
variant('controllers', default='mpi',
|
||||||
description='Controllers to coordinate work between processes',
|
description='Controllers to coordinate work between processes',
|
||||||
@ -40,17 +44,23 @@ class PyHorovod(PythonPackage):
|
|||||||
variant('tensor_ops', default='nccl',
|
variant('tensor_ops', default='nccl',
|
||||||
description='Framework to use for GPU/CPU operations',
|
description='Framework to use for GPU/CPU operations',
|
||||||
values=('nccl', 'mpi', 'gloo', 'ccl'), multi=False)
|
values=('nccl', 'mpi', 'gloo', 'ccl'), multi=False)
|
||||||
|
variant('cuda', default=True, description='Build with CUDA')
|
||||||
|
variant('rocm', default=False, description='Build with ROCm')
|
||||||
|
|
||||||
# Required dependencies
|
# Required dependencies
|
||||||
|
depends_on('python@3.6:', type=('build', 'run'), when='@0.20:')
|
||||||
depends_on('py-setuptools', type='build')
|
depends_on('py-setuptools', type='build')
|
||||||
depends_on('py-cloudpickle', type=('build', 'run'))
|
depends_on('py-cloudpickle', type=('build', 'run'))
|
||||||
depends_on('py-psutil', type=('build', 'run'))
|
depends_on('py-psutil', type=('build', 'run'))
|
||||||
depends_on('py-pyyaml', type=('build', 'run'))
|
depends_on('py-pyyaml', type=('build', 'run'))
|
||||||
depends_on('py-six', type=('build', 'run'))
|
depends_on('py-six', type=('build', 'run'), when='@:0.19')
|
||||||
|
depends_on('py-dataclasses', type=('build', 'run'), when='@0.20: ^python@:3.6')
|
||||||
|
|
||||||
# Framework dependencies
|
# Framework dependencies
|
||||||
depends_on('py-tensorflow@1.1.0:', type=('build', 'link', 'run'), when='frameworks=tensorflow')
|
depends_on('py-tensorflow@1.1.0:', type=('build', 'link', 'run'), when='frameworks=tensorflow')
|
||||||
|
depends_on('py-tensorflow@1.15:', type=('build', 'link', 'run'), when='frameworks=tensorflow @0.20:')
|
||||||
depends_on('py-torch@0.4.0:', type=('build', 'link', 'run'), when='frameworks=pytorch')
|
depends_on('py-torch@0.4.0:', type=('build', 'link', 'run'), when='frameworks=pytorch')
|
||||||
|
depends_on('py-torch@1.2:', type=('build', 'link', 'run'), when='frameworks=pytorch @0.20:')
|
||||||
depends_on('py-torchvision', type=('build', 'run'), when='frameworks=pytorch @:0.19.1')
|
depends_on('py-torchvision', type=('build', 'run'), when='frameworks=pytorch @:0.19.1')
|
||||||
depends_on('py-cffi@1.4.0:', type=('build', 'run'), when='frameworks=pytorch')
|
depends_on('py-cffi@1.4.0:', type=('build', 'run'), when='frameworks=pytorch')
|
||||||
depends_on('mxnet@1.4.1:+python', type=('build', 'link', 'run'), when='frameworks=mxnet')
|
depends_on('mxnet@1.4.1:+python', type=('build', 'link', 'run'), when='frameworks=mxnet')
|
||||||
@ -61,11 +71,17 @@ class PyHorovod(PythonPackage):
|
|||||||
depends_on('py-petastorm@0.9.0:', type=('build', 'run'), when='frameworks=spark @0.19.2:')
|
depends_on('py-petastorm@0.9.0:', type=('build', 'run'), when='frameworks=spark @0.19.2:')
|
||||||
depends_on('py-pyarrow@0.15.0:', type=('build', 'run'), when='frameworks=spark')
|
depends_on('py-pyarrow@0.15.0:', type=('build', 'run'), when='frameworks=spark')
|
||||||
depends_on('py-pyspark@2.3.2:', type=('build', 'run'), when='frameworks=spark')
|
depends_on('py-pyspark@2.3.2:', type=('build', 'run'), when='frameworks=spark')
|
||||||
|
depends_on('py-ray', type=('build', 'run'), when='frameworks=ray')
|
||||||
|
|
||||||
|
# Build dependencies
|
||||||
|
depends_on('cmake@2.8.12:', type='build', when='@0.20:')
|
||||||
|
depends_on('pkgconfig', type='build')
|
||||||
|
|
||||||
# Controller dependencies
|
# Controller dependencies
|
||||||
depends_on('mpi', when='controllers=mpi')
|
depends_on('mpi', when='controllers=mpi')
|
||||||
# There does not appear to be a way to use an external Gloo installation
|
# There does not appear to be a way to use an external Gloo installation
|
||||||
depends_on('cmake', type='build', when='controllers=gloo')
|
depends_on('cmake', type='build', when='controllers=gloo')
|
||||||
|
depends_on('libuv@1.26:', when='controllers=gloo platform=darwin')
|
||||||
|
|
||||||
# Tensor Operations dependencies
|
# Tensor Operations dependencies
|
||||||
depends_on('nccl@2:', when='tensor_ops=nccl')
|
depends_on('nccl@2:', when='tensor_ops=nccl')
|
||||||
@ -77,13 +93,28 @@ class PyHorovod(PythonPackage):
|
|||||||
depends_on('py-mock', type='test')
|
depends_on('py-mock', type='test')
|
||||||
depends_on('py-pytest', type='test')
|
depends_on('py-pytest', type='test')
|
||||||
depends_on('py-pytest-forked', type='test')
|
depends_on('py-pytest-forked', type='test')
|
||||||
|
depends_on('py-parameterized', type='test', when='@0.20:')
|
||||||
|
|
||||||
conflicts('controllers=gloo', when='platform=darwin', msg='Gloo cannot be compiled on MacOS')
|
conflicts('cuda_arch=none', when='+cuda',
|
||||||
|
msg='Must specify CUDA compute capabilities of your GPU, see '
|
||||||
|
'https://developer.nvidia.com/cuda-gpus')
|
||||||
|
conflicts('tensor_ops=nccl', when='~cuda~rocm', msg='NCCL requires either CUDA or ROCm support')
|
||||||
|
conflicts('framework=ray', when='@:0.19', msg='Ray integration was added in 0.20.X')
|
||||||
|
conflicts('controllers=gloo', when='@:0.20.0 platform=darwin', msg='Gloo cannot be compiled on MacOS')
|
||||||
|
|
||||||
# https://github.com/horovod/horovod/pull/1835
|
# https://github.com/horovod/horovod/pull/1835
|
||||||
patch('fma.patch', when='@0.19.0:0.19.1')
|
patch('fma.patch', when='@0.19.0:0.19.1')
|
||||||
|
|
||||||
def setup_build_environment(self, env):
|
def setup_build_environment(self, env):
|
||||||
|
# https://github.com/horovod/horovod/blob/master/docs/install.rst#environment-variables
|
||||||
|
|
||||||
|
# Build system
|
||||||
|
env.set('PKG_CONFIG_EXECUTABLE',
|
||||||
|
self.spec['pkgconfig'].prefix.bin.join('pkg-config'))
|
||||||
|
if '^cmake' in self.spec:
|
||||||
|
env.set('HOROVOD_CMAKE', self.spec['cmake'].command.path)
|
||||||
|
env.set('MAKEFLAGS', '-j{0}'.format(make_jobs))
|
||||||
|
|
||||||
# Frameworks
|
# Frameworks
|
||||||
if 'frameworks=tensorflow' in self.spec:
|
if 'frameworks=tensorflow' in self.spec:
|
||||||
env.set('HOROVOD_WITH_TENSORFLOW', 1)
|
env.set('HOROVOD_WITH_TENSORFLOW', 1)
|
||||||
@ -110,20 +141,28 @@ def setup_build_environment(self, env):
|
|||||||
|
|
||||||
# Tensor Operations
|
# Tensor Operations
|
||||||
if 'tensor_ops=nccl' in self.spec:
|
if 'tensor_ops=nccl' in self.spec:
|
||||||
env.set('HOROVOD_GPU', 'CUDA')
|
env.set('HOROVOD_GPU_ALLREDUCE', 'NCCL')
|
||||||
|
env.set('HOROVOD_GPU_ALLGATHER', 'NCCL')
|
||||||
env.set('HOROVOD_CUDA_HOME', self.spec['cuda'].prefix)
|
env.set('HOROVOD_GPU_BROADCAST', 'NCCL')
|
||||||
env.set('HOROVOD_CUDA_INCLUDE',
|
|
||||||
self.spec['cuda'].headers.directories[0])
|
|
||||||
env.set('HOROVOD_CUDA_LIB', self.spec['cuda'].libs.directories[0])
|
|
||||||
|
|
||||||
env.set('HOROVOD_NCCL_HOME', self.spec['nccl'].prefix)
|
env.set('HOROVOD_NCCL_HOME', self.spec['nccl'].prefix)
|
||||||
env.set('HOROVOD_NCCL_INCLUDE',
|
env.set('HOROVOD_NCCL_INCLUDE',
|
||||||
self.spec['nccl'].headers.directories[0])
|
self.spec['nccl'].headers.directories[0])
|
||||||
env.set('HOROVOD_NCCL_LIB', self.spec['nccl'].libs.directories[0])
|
env.set('HOROVOD_NCCL_LIB', self.spec['nccl'].libs.directories[0])
|
||||||
|
|
||||||
env.set('HOROVOD_GPU_ALLREDUCE', 'NCCL')
|
if '+cuda' in self.spec:
|
||||||
env.set('HOROVOD_GPU_BROADCAST', 'NCCL')
|
env.set('HOROVOD_GPU', 'CUDA')
|
||||||
|
|
||||||
|
env.set('HOROVOD_CUDA_HOME', self.spec['cuda'].prefix)
|
||||||
|
cuda_cc_list = ','.join(self.spec.variants['cuda_arch'].value)
|
||||||
|
env.set('HOROVOD_BUILD_CUDA_CC_LIST', cuda_cc_list)
|
||||||
|
env.set('HOROVOD_CUDA_INCLUDE',
|
||||||
|
self.spec['cuda'].headers.directories[0])
|
||||||
|
env.set('HOROVOD_CUDA_LIB',
|
||||||
|
self.spec['cuda'].libs.directories[0])
|
||||||
|
elif '+rocm' in self.spec:
|
||||||
|
env.set('HOROVOD_GPU', 'ROCM')
|
||||||
|
# env.set('HOROVOD_ROCM_HOME', self.spec['rocm'].prefix)
|
||||||
else:
|
else:
|
||||||
env.set('HOROVOD_CPU_OPERATIONS',
|
env.set('HOROVOD_CPU_OPERATIONS',
|
||||||
self.spec.variants['tensor_ops'].value.upper())
|
self.spec.variants['tensor_ops'].value.upper())
|
||||||
|
Loading…
Reference in New Issue
Block a user