ML CI: Linux x86_64 (#34299)

* ML CI: Linux x86_64

* Update comments

* Rename again

* Rename comments

* Update to match other arches

* No compiler

* Compiler was wrong anyway

* Faster TF
This commit is contained in:
Adam J. Stewart 2022-12-22 11:31:40 -06:00 committed by GitHub
parent 371268a9aa
commit eb67497020
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 280 additions and 253 deletions

View File

@ -760,122 +760,122 @@ tutorial-protected-build:
- artifacts: True - artifacts: True
job: tutorial-protected-generate job: tutorial-protected-generate
######################################## #######################################
# Machine Learning (CPU) # Machine Learning - Linux x86_64 (CPU)
######################################## #######################################
.ml-cpu: .ml-linux-x86_64-cpu:
variables: variables:
SPACK_CI_STACK_NAME: ml-cpu SPACK_CI_STACK_NAME: ml-linux-x86_64-cpu
.ml-cpu-generate: .ml-linux-x86_64-cpu-generate:
extends: .ml-cpu extends: .ml-linux-x86_64-cpu
image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21 image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
tags: ["spack", "aws", "public", "medium", "x86_64_v4"] tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
ml-cpu-pr-generate: ml-linux-x86_64-cpu-pr-generate:
extends: [ ".ml-cpu-generate", ".pr-generate"] extends: [ ".ml-linux-x86_64-cpu-generate", ".pr-generate"]
ml-cpu-protected-generate: ml-linux-x86_64-cpu-protected-generate:
extends: [ ".ml-cpu-generate", ".protected-generate"] extends: [ ".ml-linux-x86_64-cpu-generate", ".protected-generate"]
ml-cpu-pr-build: ml-linux-x86_64-cpu-pr-build:
extends: [ ".ml-cpu", ".pr-build" ] extends: [ ".ml-linux-x86_64-cpu", ".pr-build" ]
trigger: trigger:
include: include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-cpu-pr-generate job: ml-linux-x86_64-cpu-pr-generate
strategy: depend strategy: depend
needs: needs:
- artifacts: True - artifacts: True
job: ml-cpu-pr-generate job: ml-linux-x86_64-cpu-pr-generate
ml-cpu-protected-build: ml-linux-x86_64-cpu-protected-build:
extends: [ ".ml-cpu", ".protected-build" ] extends: [ ".ml-linux-x86_64-cpu", ".protected-build" ]
trigger: trigger:
include: include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-cpu-protected-generate job: ml-linux-x86_64-cpu-protected-generate
strategy: depend strategy: depend
needs: needs:
- artifacts: True - artifacts: True
job: ml-cpu-protected-generate job: ml-linux-x86_64-cpu-protected-generate
######################################## ########################################
# Machine Learning (CUDA) # Machine Learning - Linux x86_64 (CUDA)
######################################## ########################################
.ml-cuda: .ml-linux-x86_64-cuda:
variables: variables:
SPACK_CI_STACK_NAME: ml-cuda SPACK_CI_STACK_NAME: ml-linux-x86_64-cuda
.ml-cuda-generate: .ml-linux-x86_64-cuda-generate:
extends: .ml-cuda extends: .ml-linux-x86_64-cuda
image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21 image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
tags: ["spack", "aws", "public", "medium", "x86_64_v4"] tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
ml-cuda-pr-generate: ml-linux-x86_64-cuda-pr-generate:
extends: [ ".ml-cuda-generate", ".pr-generate"] extends: [ ".ml-linux-x86_64-cuda-generate", ".pr-generate"]
ml-cuda-protected-generate: ml-linux-x86_64-cuda-protected-generate:
extends: [ ".ml-cuda-generate", ".protected-generate"] extends: [ ".ml-linux-x86_64-cuda-generate", ".protected-generate"]
ml-cuda-pr-build: ml-linux-x86_64-cuda-pr-build:
extends: [ ".ml-cuda", ".pr-build" ] extends: [ ".ml-linux-x86_64-cuda", ".pr-build" ]
trigger: trigger:
include: include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-cuda-pr-generate job: ml-linux-x86_64-cuda-pr-generate
strategy: depend strategy: depend
needs: needs:
- artifacts: True - artifacts: True
job: ml-cuda-pr-generate job: ml-linux-x86_64-cuda-pr-generate
ml-cuda-protected-build: ml-linux-x86_64-cuda-protected-build:
extends: [ ".ml-cuda", ".protected-build" ] extends: [ ".ml-linux-x86_64-cuda", ".protected-build" ]
trigger: trigger:
include: include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-cuda-protected-generate job: ml-linux-x86_64-cuda-protected-generate
strategy: depend strategy: depend
needs: needs:
- artifacts: True - artifacts: True
job: ml-cuda-protected-generate job: ml-linux-x86_64-cuda-protected-generate
######################################## ########################################
# Machine Learning (ROCm) # Machine Learning - Linux x86_64 (ROCm)
######################################## ########################################
.ml-rocm: .ml-linux-x86_64-rocm:
variables: variables:
SPACK_CI_STACK_NAME: ml-rocm SPACK_CI_STACK_NAME: ml-linux-x86_64-rocm
.ml-rocm-generate: .ml-linux-x86_64-rocm-generate:
extends: .ml-rocm extends: .ml-linux-x86_64-rocm
image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21 image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
tags: ["spack", "aws", "public", "medium", "x86_64_v4"] tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
ml-rocm-pr-generate: ml-linux-x86_64-rocm-pr-generate:
extends: [ ".ml-rocm-generate", ".pr-generate"] extends: [ ".ml-linux-x86_64-rocm-generate", ".pr-generate"]
ml-rocm-protected-generate: ml-linux-x86_64-rocm-protected-generate:
extends: [ ".ml-rocm-generate", ".protected-generate"] extends: [ ".ml-linux-x86_64-rocm-generate", ".protected-generate"]
ml-rocm-pr-build: ml-linux-x86_64-rocm-pr-build:
extends: [ ".ml-rocm", ".pr-build" ] extends: [ ".ml-linux-x86_64-rocm", ".pr-build" ]
trigger: trigger:
include: include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-rocm-pr-generate job: ml-linux-x86_64-rocm-pr-generate
strategy: depend strategy: depend
needs: needs:
- artifacts: True - artifacts: True
job: ml-rocm-pr-generate job: ml-linux-x86_64-rocm-pr-generate
ml-rocm-protected-build: ml-linux-x86_64-rocm-protected-build:
extends: [ ".ml-rocm", ".protected-build" ] extends: [ ".ml-linux-x86_64-rocm", ".protected-build" ]
trigger: trigger:
include: include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-rocm-protected-generate job: ml-linux-x86_64-rocm-protected-generate
strategy: depend strategy: depend
needs: needs:
- artifacts: True - artifacts: True
job: ml-rocm-protected-generate job: ml-linux-x86_64-rocm-protected-generate

View File

@ -16,77 +16,85 @@ spack:
packages: packages:
all: all:
compiler: [gcc@11.2.0]
target: [x86_64_v3] target: [x86_64_v3]
variants: ~cuda~rocm variants: ~cuda~rocm
definitions:
- packages:
# Horovod
- py-horovod
# Hugging Face
- py-transformers
# JAX
- py-jax
- py-jaxlib
# Keras
- py-keras
- py-keras-applications
- py-keras-preprocessing
- py-keras2onnx
# PyTorch
- py-botorch
- py-efficientnet-pytorch
- py-gpytorch
- py-kornia
- py-pytorch-gradual-warmup-lr
- py-pytorch-lightning
- py-segmentation-models-pytorch
- py-timm
- py-torch
- py-torch-cluster
- py-torch-geometric
- py-torch-nvidia-apex
- py-torch-scatter
- py-torch-sparse
- py-torch-spline-conv
- py-torchaudio
- py-torchdata
- py-torchfile
- py-torchgeo
- py-torchmeta
- py-torchmetrics
- py-torchtext
- py-torchvision
- py-vector-quantize-pytorch
# scikit-learn
- py-scikit-learn
- py-scikit-learn-extra
# TensorBoard
- py-tensorboard
- py-tensorboard-data-server
- py-tensorboard-plugin-wit
- py-tensorboardx
# TensorFlow
- py-tensorflow
- py-tensorflow-datasets
- py-tensorflow-estimator
- py-tensorflow-hub
- py-tensorflow-metadata
- py-tensorflow-probability
# XGBoost
- py-xgboost
# - r-xgboost
- xgboost
- arch:
- target=x86_64_v3
specs: specs:
# Horovod - matrix:
- py-horovod - [$packages]
- [$arch]
# Hugging Face mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-cpu" }
- py-transformers
# JAX
- py-jax
- py-jaxlib
# Keras
- py-keras
- py-keras-applications
- py-keras-preprocessing
- py-keras2onnx
# PyTorch
- py-botorch
- py-efficientnet-pytorch
- py-gpytorch
- py-kornia
- py-pytorch-gradual-warmup-lr
- py-pytorch-lightning
- py-segmentation-models-pytorch
- py-timm
- py-torch
- py-torch-cluster
- py-torch-geometric
- py-torch-nvidia-apex
- py-torch-scatter
- py-torch-sparse
- py-torch-spline-conv
- py-torchaudio
- py-torchdata
- py-torchfile
- py-torchgeo
- py-torchmeta
- py-torchmetrics
- py-torchtext
- py-torchvision
- py-vector-quantize-pytorch
# scikit-learn
- py-scikit-learn
- py-scikit-learn-extra
# TensorBoard
- py-tensorboard
- py-tensorboard-data-server
- py-tensorboard-plugin-wit
- py-tensorboardx
# TensorFlow
- py-tensorflow
- py-tensorflow-datasets
- py-tensorflow-estimator
- py-tensorflow-hub
- py-tensorflow-metadata
- py-tensorflow-probability
# XGBoost
- py-xgboost
# - r-xgboost
- xgboost
mirrors: { "mirror": "s3://spack-binaries/develop/ml-cpu" }
gitlab-ci: gitlab-ci:
script: script:
@ -113,6 +121,7 @@ spack:
mappings: mappings:
- match: - match:
- llvm - llvm
- py-tensorflow
- py-torch - py-torch
runner-attributes: runner-attributes:
tags: [ "spack", "huge", "x86_64_v4" ] tags: [ "spack", "huge", "x86_64_v4" ]

View File

@ -16,80 +16,88 @@ spack:
packages: packages:
all: all:
compiler: [gcc@11.2.0]
target: [x86_64_v3] target: [x86_64_v3]
variants: ~rocm+cuda cuda_arch=80 variants: ~rocm+cuda cuda_arch=80
llvm: llvm:
# https://github.com/spack/spack/issues/27999 # https://github.com/spack/spack/issues/27999
require: ~cuda require: ~cuda
definitions:
- packages:
# Horovod
- py-horovod
# Hugging Face
- py-transformers
# JAX
- py-jax
- py-jaxlib
# Keras
- py-keras
- py-keras-applications
- py-keras-preprocessing
- py-keras2onnx
# PyTorch
- py-botorch
- py-efficientnet-pytorch
- py-gpytorch
- py-kornia
- py-pytorch-gradual-warmup-lr
- py-pytorch-lightning
- py-segmentation-models-pytorch
- py-timm
- py-torch
- py-torch-cluster
- py-torch-geometric
- py-torch-nvidia-apex
- py-torch-scatter
- py-torch-sparse
- py-torch-spline-conv
- py-torchaudio
- py-torchdata
- py-torchfile
- py-torchgeo
- py-torchmeta
- py-torchmetrics
- py-torchtext
- py-torchvision
- py-vector-quantize-pytorch
# scikit-learn
- py-scikit-learn
- py-scikit-learn-extra
# TensorBoard
- py-tensorboard
- py-tensorboard-data-server
- py-tensorboard-plugin-wit
- py-tensorboardx
# TensorFlow
- py-tensorflow
- py-tensorflow-datasets
- py-tensorflow-estimator
- py-tensorflow-hub
- py-tensorflow-metadata
- py-tensorflow-probability
# XGBoost
- py-xgboost
# - r-xgboost
- xgboost
- arch:
- target=x86_64_v3
specs: specs:
# Horovod - matrix:
- py-horovod - [$packages]
- [$arch]
# Hugging Face mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-cuda" }
- py-transformers
# JAX
- py-jax
- py-jaxlib
# Keras
- py-keras
- py-keras-applications
- py-keras-preprocessing
- py-keras2onnx
# PyTorch
- py-botorch
- py-efficientnet-pytorch
- py-gpytorch
- py-kornia
- py-pytorch-gradual-warmup-lr
- py-pytorch-lightning
- py-segmentation-models-pytorch
- py-timm
- py-torch
- py-torch-cluster
- py-torch-geometric
- py-torch-nvidia-apex
- py-torch-scatter
- py-torch-sparse
- py-torch-spline-conv
- py-torchaudio
- py-torchdata
- py-torchfile
- py-torchgeo
- py-torchmeta
- py-torchmetrics
- py-torchtext
- py-torchvision
- py-vector-quantize-pytorch
# scikit-learn
- py-scikit-learn
- py-scikit-learn-extra
# TensorBoard
- py-tensorboard
- py-tensorboard-data-server
- py-tensorboard-plugin-wit
- py-tensorboardx
# TensorFlow
- py-tensorflow
- py-tensorflow-datasets
- py-tensorflow-estimator
- py-tensorflow-hub
- py-tensorflow-metadata
- py-tensorflow-probability
# XGBoost
- py-xgboost
# - r-xgboost
- xgboost
mirrors: { "mirror": "s3://spack-binaries/develop/ml-cuda" }
gitlab-ci: gitlab-ci:
script: script:
@ -116,6 +124,7 @@ spack:
mappings: mappings:
- match: - match:
- llvm - llvm
- py-tensorflow
- py-torch - py-torch
runner-attributes: runner-attributes:
tags: [ "spack", "huge", "x86_64_v4" ] tags: [ "spack", "huge", "x86_64_v4" ]

View File

@ -16,7 +16,6 @@ spack:
packages: packages:
all: all:
compiler: [gcc@11.2.0]
target: [x86_64_v3] target: [x86_64_v3]
variants: ~cuda+rocm amdgpu_target=gfx90a variants: ~cuda+rocm amdgpu_target=gfx90a
gl: gl:
@ -25,74 +24,83 @@ spack:
# Does not yet support Spack-installed ROCm # Does not yet support Spack-installed ROCm
require: ~rocm require: ~rocm
definitions:
- packages:
# Horovod
- py-horovod
# Hugging Face
- py-transformers
# JAX
- py-jax
- py-jaxlib
# Keras
- py-keras
- py-keras-applications
- py-keras-preprocessing
- py-keras2onnx
# PyTorch
# Does not yet support Spack-install ROCm
# - py-botorch
# - py-efficientnet-pytorch
# - py-gpytorch
# - py-kornia
# - py-pytorch-gradual-warmup-lr
# - py-pytorch-lightning
# - py-segmentation-models-pytorch
# - py-timm
# - py-torch
# - py-torch-cluster
# - py-torch-geometric
# - py-torch-nvidia-apex
# - py-torch-scatter
# - py-torch-sparse
# - py-torch-spline-conv
# - py-torchaudio
# - py-torchdata
# - py-torchfile
# - py-torchgeo
# - py-torchmeta
# - py-torchmetrics
# - py-torchtext
# - py-torchvision
# - py-vector-quantize-pytorch
# scikit-learn
- py-scikit-learn
- py-scikit-learn-extra
# TensorBoard
- py-tensorboard
- py-tensorboard-data-server
- py-tensorboard-plugin-wit
- py-tensorboardx
# TensorFlow
- py-tensorflow
- py-tensorflow-datasets
- py-tensorflow-estimator
- py-tensorflow-hub
- py-tensorflow-metadata
- py-tensorflow-probability
# XGBoost
- py-xgboost
# - r-xgboost
- xgboost
- arch:
- target=x86_64_v3
specs: specs:
# Horovod - matrix:
- py-horovod - [$packages]
- [$arch]
# Hugging Face mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-rocm" }
- py-transformers
# JAX
- py-jax
- py-jaxlib
# Keras
- py-keras
- py-keras-applications
- py-keras-preprocessing
- py-keras2onnx
# PyTorch
# Does not yet support Spack-install ROCm
# - py-botorch
# - py-efficientnet-pytorch
# - py-gpytorch
# - py-kornia
# - py-pytorch-gradual-warmup-lr
# - py-pytorch-lightning
# - py-segmentation-models-pytorch
# - py-timm
# - py-torch
# - py-torch-cluster
# - py-torch-geometric
# - py-torch-nvidia-apex
# - py-torch-scatter
# - py-torch-sparse
# - py-torch-spline-conv
# - py-torchaudio
# - py-torchdata
# - py-torchfile
# - py-torchgeo
# - py-torchmeta
# - py-torchmetrics
# - py-torchtext
# - py-torchvision
# - py-vector-quantize-pytorch
# scikit-learn
- py-scikit-learn
- py-scikit-learn-extra
# TensorBoard
- py-tensorboard
- py-tensorboard-data-server
- py-tensorboard-plugin-wit
- py-tensorboardx
# TensorFlow
- py-tensorflow
- py-tensorflow-datasets
- py-tensorflow-estimator
- py-tensorflow-hub
- py-tensorflow-metadata
- py-tensorflow-probability
# XGBoost
- py-xgboost
# - r-xgboost
- xgboost
mirrors: { "mirror": "s3://spack-binaries/develop/ml-rocm" }
gitlab-ci: gitlab-ci:
script: script:
@ -118,8 +126,9 @@ spack:
match_behavior: first match_behavior: first
mappings: mappings:
- match: - match:
- llvm-amdgpu
- llvm - llvm
- llvm-amdgpu
- py-tensorflow
- py-torch - py-torch
- rocblas - rocblas
runner-attributes: runner-attributes: