py-horovod: set appropriate build env vars (#15548)
* py-horovod: set appropriate build env vars * mxnet: add maintainer * py-horovod: fetch git submodules * py-torch: fix CUDA conflicts
This commit is contained in:
parent
77b11433b3
commit
51f8744ab7
@ -13,6 +13,8 @@ class Mxnet(MakefilePackage):
|
||||
homepage = "http://mxnet.io"
|
||||
url = "https://github.com/apache/incubator-mxnet/releases/download/1.3.0/apache-mxnet-src-1.3.0-incubating.tar.gz"
|
||||
|
||||
maintainers = ['adamjstewart']
|
||||
|
||||
version('1.3.0', sha256='c00d6fbb2947144ce36c835308e603f002c1eb90a9f4c5a62f4d398154eed4d2')
|
||||
|
||||
variant('cuda', default=False, description='Enable CUDA support')
|
||||
|
@ -3,10 +3,6 @@
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
from spack import *
|
||||
import fnmatch
|
||||
import os
|
||||
|
||||
|
||||
class PyHorovod(PythonPackage):
|
||||
"""Horovod is a distributed deep learning training framework for
|
||||
@ -14,47 +10,111 @@ class PyHorovod(PythonPackage):
|
||||
|
||||
homepage = "https://github.com/horovod"
|
||||
git = "https://github.com/horovod/horovod.git"
|
||||
url = "https://github.com/horovod/horovod/archive/v0.19.0.tar.gz"
|
||||
|
||||
version('master', branch='master', submodules='True')
|
||||
version('0.19.0', sha256='0e9fec11cd7f5f39a09f0785d1097cb51c44537ae14c9b4b2578b5cdd21efb9b')
|
||||
version('0.18.2', sha256='a073e08cec65474afdb2d011486b4cb6c7ac8fcb1eca3e02b169e1e7b4a66da6')
|
||||
version('0.18.1', sha256='26e236d1f60955e9dd12b9f0a836f0691296a010fcd1ac72295970a780f4e4fb')
|
||||
version('0.18.0', sha256='94f13e7110c5f3fd1aa194b9d886b5bb91c9bc02ade31bcb84fc6e7f9c043455')
|
||||
version('0.17.1', sha256='14eea5744eda9c62988ffa278a9a5472cebbc6a287eca9ed48cacfcd177e8978')
|
||||
version('0.17.0.post1', sha256='220b230611e22dc69777f1be4d9788a07e73a0722e511091fa156cdf68ca798b')
|
||||
version('0.17.0', sha256='4bb121dda6cdaa1677535470adc1836493a9c4930ab19f6b491254ea47a12a4f')
|
||||
version('0.16.4', sha256='c0168dfeb31a56ede52eae115f43fa2d06a5db55a37201064ef901c8000d708d')
|
||||
version('0.16.3', sha256='1857cf1b335723366cc71e4bcd0583f2dde0c821212cda0e1b6bddfe4ba1ea0d')
|
||||
version('0.16.2', sha256='baa9754e59ab0ee72d3b5769cf77e06a2c7b0a2d9626e0e14ca2ab131934ce74')
|
||||
maintainers = ['adamjstewart']
|
||||
|
||||
variant('pytorch', default=True, description='Enables PyTorch')
|
||||
variant('mxnet', default=False, description='Enables mxnet')
|
||||
variant('mpi', default=True, description='Enables MPI build')
|
||||
version('master', branch='master', submodules=True)
|
||||
version('0.19.1', tag='v0.19.1', submodules=True)
|
||||
version('0.19.0', tag='v0.19.0', submodules=True)
|
||||
version('0.18.2', tag='v0.18.2', submodules=True)
|
||||
version('0.18.1', tag='v0.18.1', submodules=True)
|
||||
version('0.18.0', tag='v0.18.0', submodules=True)
|
||||
version('0.17.1', tag='v0.17.1', submodules=True)
|
||||
version('0.17.0', tag='v0.17.0', submodules=True)
|
||||
version('0.16.4', tag='v0.16.4', submodules=True)
|
||||
version('0.16.3', tag='v0.16.3', submodules=True)
|
||||
version('0.16.2', tag='v0.16.2', submodules=True)
|
||||
|
||||
# Deep learning frameworks
|
||||
variant('pytorch', default=True, description='Enables PyTorch')
|
||||
variant('tensorflow', default=False, description='Enables TensorFlow')
|
||||
variant('mxnet', default=False, description='Enables Apache MXNet')
|
||||
|
||||
# Distributed support
|
||||
variant('gloo', default=False, description='Enables features related to distributed support')
|
||||
variant('cuda', default=True, description='Enables CUDA build')
|
||||
variant('mpi', default=True, description='Enables MPI build')
|
||||
|
||||
depends_on('python', type=('build', 'run'))
|
||||
# GPU support
|
||||
variant('cuda', default=True, description='Enables CUDA build')
|
||||
variant('gpu_allreduce', default='mpi',
|
||||
description='Backend to use for GPU_ALLREDUCE',
|
||||
values=('mpi', 'nccl'), multi=False) # DDL support is deprecated
|
||||
variant('gpu_allgather', default='mpi',
|
||||
description='Backend to use for GPU_ALLGATHER',
|
||||
values=('mpi',), multi=False)
|
||||
variant('gpu_broadcast', default='mpi',
|
||||
description='Backend to use for GPU_BROADCAST',
|
||||
values=('mpi', 'nccl'), multi=False)
|
||||
|
||||
# Required dependencies
|
||||
depends_on('py-setuptools', type='build')
|
||||
depends_on('py-cloudpickle', type=('build', 'run'))
|
||||
depends_on('py-psutil', type=('build', 'run'))
|
||||
depends_on('py-pyyaml', type=('build', 'run'))
|
||||
depends_on('py-six', type=('build', 'run'))
|
||||
depends_on('py-torch', type=('build', 'run'))
|
||||
depends_on('py-pip', type=('build'))
|
||||
|
||||
# Optional dependencies
|
||||
depends_on('cuda', when='+cuda')
|
||||
depends_on('nccl', when='+nccl')
|
||||
depends_on('gloo', when='+gloo')
|
||||
# Deep learning frameworks
|
||||
depends_on('py-torch@0.4.0:', type=('build', 'run'), when='+pytorch')
|
||||
depends_on('py-torch+cuda', type=('build', 'run'), when='+pytorch+cuda')
|
||||
depends_on('py-cffi@1.4.0:', type=('build', 'run'), when='+pytorch')
|
||||
depends_on('py-tensorflow@1.1.0:', type=('build', 'link', 'run'), when='+tensorflow')
|
||||
depends_on('mxnet@1.4.0:+python', type=('build', 'link', 'run'), when='+mxnet')
|
||||
depends_on('mxnet+cuda', type=('build', 'link', 'run'), when='+mxnet+cuda')
|
||||
|
||||
# Distributed support
|
||||
# There does not appear to be a way to use an external Gloo installation
|
||||
depends_on('cmake', type='build', when='+gloo')
|
||||
depends_on('mpi', when='+mpi')
|
||||
depends_on('py-torch', type=('build', 'run'), when='+pytorch')
|
||||
depends_on('mxnet', when='+mxnet')
|
||||
depends_on('mpi', when='gpu_allreduce=mpi')
|
||||
depends_on('mpi', when='gpu_allgather=mpi')
|
||||
depends_on('mpi', when='gpu_broadcast=mpi')
|
||||
|
||||
phases = ['clean', 'sdist', 'install']
|
||||
# GPU support
|
||||
depends_on('cuda', when='+cuda')
|
||||
depends_on('nccl@2.0:', when='gpu_allreduce=nccl')
|
||||
depends_on('nccl@2.0:', when='gpu_broadcast=nccl')
|
||||
|
||||
def install(self, spec, prefix):
|
||||
pip = which('pip')
|
||||
for file in os.listdir(prefix):
|
||||
if fnmatch.fnmatch(file, 'horovod-*.tar.gz'):
|
||||
pip('install', file, '--prefix={0}'.format(prefix))
|
||||
# Test dependencies
|
||||
depends_on('py-mock', type='test')
|
||||
depends_on('py-pytest', type='test')
|
||||
depends_on('py-pytest-forked', type='test')
|
||||
|
||||
conflicts('+gloo', when='platform=darwin', msg='Gloo cannot be compiled on MacOS')
|
||||
conflicts('~gloo~mpi', msg='One of Gloo or MPI are required for Horovod to run')
|
||||
conflicts('~pytorch~tensorflow~mxnet', msg='At least one deep learning backend is required')
|
||||
|
||||
def setup_build_environment(self, env):
|
||||
# Deep learning frameworks
|
||||
if '~pytorch' in self.spec:
|
||||
env.set('HOROVOD_WITHOUT_PYTORCH', 1)
|
||||
if '~tensorflow' in self.spec:
|
||||
env.set('HOROVOD_WITHOUT_TENSORFLOW', 1)
|
||||
if '~mxnet' in self.spec:
|
||||
env.set('HOROVOD_WITHOUT_MXNET', 1)
|
||||
|
||||
# Distributed support
|
||||
if '~gloo' in self.spec:
|
||||
env.set('HOROVOD_WITHOUT_GLOO', 1)
|
||||
if '+mpi' in self.spec:
|
||||
env.set('HOROVOD_WITH_MPI', 1)
|
||||
else:
|
||||
env.set('HOROVOD_WITHOUT_MPI', 1)
|
||||
|
||||
# GPU support
|
||||
if '+cuda' in self.spec:
|
||||
env.set('HOROVOD_CUDA_HOME', self.spec['cuda'].prefix)
|
||||
env.set('HOROVOD_CUDA_INCLUDE',
|
||||
self.spec['cuda'].headers.directories[0])
|
||||
env.set('HOROVOD_CUDA_LIB', self.spec['cuda'].libs.directories[0])
|
||||
if '^nccl' in self.spec:
|
||||
env.set('HOROVOD_NCCL_HOME', self.spec['nccl'].prefix)
|
||||
env.set('HOROVOD_NCCL_INCLUDE',
|
||||
self.spec['nccl'].headers.directories[0])
|
||||
env.set('HOROVOD_NCCL_LIB', self.spec['nccl'].libs.directories[0])
|
||||
env.set('HOROVOD_GPU_ALLREDUCE',
|
||||
self.spec.variants['gpu_allreduce'].value.upper())
|
||||
env.set('HOROVOD_GPU_ALLGATHER',
|
||||
self.spec.variants['gpu_allgather'].value.upper())
|
||||
env.set('HOROVOD_GPU_BROADCAST',
|
||||
self.spec.variants['gpu_broadcast'].value.upper())
|
||||
env.set('HOROVOD_ALLOW_MIXED_GPU_IMPL', 1)
|
||||
|
@ -105,22 +105,22 @@ class PyTorch(PythonPackage, CudaPackage):
|
||||
cuda_arch_conflict = ('This version of Torch/Caffe2 only supports compute '
|
||||
'capabilities ')
|
||||
|
||||
conflicts('cuda_arch=none', when='+cuda+caffe2',
|
||||
conflicts('cuda_arch=none', when='+cuda',
|
||||
msg='Must specify CUDA compute capabilities of your GPU, see '
|
||||
'https://developer.nvidia.com/cuda-gpus')
|
||||
conflicts('cuda_arch=52', when='@1.3.0:+cuda+caffe2',
|
||||
conflicts('cuda_arch=52', when='@1.3.0:+cuda',
|
||||
msg=cuda_arch_conflict + '>=5.3')
|
||||
conflicts('cuda_arch=50', when='@1.3.0:+cuda+caffe2',
|
||||
conflicts('cuda_arch=50', when='@1.3.0:+cuda',
|
||||
msg=cuda_arch_conflict + '>=5.3')
|
||||
conflicts('cuda_arch=35', when='@1.3.0:+cuda+caffe2',
|
||||
conflicts('cuda_arch=35', when='@1.3.0:+cuda',
|
||||
msg=cuda_arch_conflict + '>=5.3')
|
||||
conflicts('cuda_arch=32', when='@1.3.0:+cuda+caffe2',
|
||||
conflicts('cuda_arch=32', when='@1.3.0:+cuda',
|
||||
msg=cuda_arch_conflict + '>=5.3')
|
||||
conflicts('cuda_arch=30', when='@1.3.0:+cuda+caffe2',
|
||||
conflicts('cuda_arch=30', when='@1.3.0:+cuda',
|
||||
msg=cuda_arch_conflict + '>=5.3')
|
||||
conflicts('cuda_arch=30', when='@1.2.0:+cuda+caffe2',
|
||||
conflicts('cuda_arch=30', when='@1.2.0:+cuda',
|
||||
msg=cuda_arch_conflict + '>=3.2')
|
||||
conflicts('cuda_arch=20', when='@1.0.0:+cuda+caffe2',
|
||||
conflicts('cuda_arch=20', when='@1.0.0:+cuda',
|
||||
msg=cuda_arch_conflict + '>=3.0')
|
||||
|
||||
# Required dependencies
|
||||
|
Loading…
Reference in New Issue
Block a user