ML CI: Linux aarch64 (#39666)

* ML CI: Linux aarch64

* Add config files

* No aarch64 tag

* Don't specify image

* Use amazonlinux image

Co-authored-by: kwryankrattiger <80296582+kwryankrattiger@users.noreply.github.com>

* Update and require

* GCC is too old

* Fix some builds

* xgboost doesn't support old GCC + cuda

* Run on newer Ubuntu

* Remove mxnet

* Try aarch64 range

* Use main branch

* Conflict applies to all targets

* cuda only required when +cuda

* Use tagged version

* Comment out tf-estimator

* Add ROCm, use newer Ubuntu

* Remove ROCm

---------

Co-authored-by: kwryankrattiger <80296582+kwryankrattiger@users.noreply.github.com>
This commit is contained in:
Adam J. Stewart 2024-10-28 10:30:07 +01:00 committed by GitHub
parent e83536de38
commit 32ce278a51
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 222 additions and 0 deletions

View File

@ -735,6 +735,52 @@ ml-linux-x86_64-rocm-build:
- artifacts: True
job: ml-linux-x86_64-rocm-generate
########################################
# Machine Learning - Linux aarch64 (CPU)
########################################
.ml-linux-aarch64-cpu:
extends: [ ".linux_aarch64" ]
variables:
SPACK_CI_STACK_NAME: ml-linux-aarch64-cpu
ml-linux-aarch64-cpu-generate:
extends: [ ".generate-aarch64", .ml-linux-aarch64-cpu ]
image: ghcr.io/spack/ubuntu-24.04:v2024-09-05-v2
ml-linux-aarch64-cpu-build:
extends: [ ".build", ".ml-linux-aarch64-cpu" ]
trigger:
include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-linux-aarch64-cpu-generate
strategy: depend
needs:
- artifacts: True
job: ml-linux-aarch64-cpu-generate
#########################################
# Machine Learning - Linux aarch64 (CUDA)
#########################################
.ml-linux-aarch64-cuda:
extends: [ ".linux_aarch64" ]
variables:
SPACK_CI_STACK_NAME: ml-linux-aarch64-cuda
ml-linux-aarch64-cuda-generate:
extends: [ ".generate-aarch64", .ml-linux-aarch64-cuda ]
image: ghcr.io/spack/ubuntu-24.04:v2024-09-05-v2
ml-linux-aarch64-cuda-build:
extends: [ ".build", ".ml-linux-aarch64-cuda" ]
trigger:
include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-linux-aarch64-cuda-generate
strategy: depend
needs:
- artifacts: True
job: ml-linux-aarch64-cuda-generate
#########################################
# Machine Learning - Darwin aarch64 (MPS)
#########################################

View File

@ -0,0 +1,85 @@
spack:
view: false
packages:
all:
require:
- target=aarch64
- ~cuda
- ~rocm
mpi:
require: openmpi
specs:
# Horovod
- py-horovod
# Hugging Face
- py-transformers
# JAX
- py-jax
- py-jaxlib
# Keras
- py-keras backend=tensorflow
- py-keras backend=jax
- py-keras backend=torch
- py-keras-applications
- py-keras-preprocessing
- py-keras2onnx
# PyTorch
- py-botorch
- py-efficientnet-pytorch
- py-gpytorch
- py-kornia
- py-lightning
- py-pytorch-gradual-warmup-lr
- py-pytorch-lightning
- py-segmentation-models-pytorch
- py-timm
- py-torch
- py-torch-cluster
- py-torch-geometric
- py-torch-nvidia-apex
- py-torch-scatter
- py-torch-sparse
- py-torch-spline-conv
- py-torchaudio
- py-torchdata
- py-torchfile
- py-torchgeo
- py-torchmetrics
- py-torchtext
- py-torchvision
- py-vector-quantize-pytorch
# scikit-learn
- py-scikit-learn
- py-scikit-learn-extra
# TensorBoard
- py-tensorboard
- py-tensorboard-data-server
- py-tensorboard-plugin-wit
- py-tensorboardx
# TensorFlow
- py-tensorflow
- py-tensorflow-datasets
- py-tensorflow-hub
- py-tensorflow-metadata
- py-tensorflow-probability
# XGBoost
- py-xgboost
ci:
pipeline-gen:
- build-job:
image:
name: ghcr.io/spack/ubuntu-24.04:v2024-09-05-v2
entrypoint: ['']
cdash:
build-group: Machine Learning

View File

@ -0,0 +1,91 @@
spack:
view: false
packages:
all:
require:
- target=aarch64
- ~rocm
- +cuda
- cuda_arch=80
llvm:
# https://github.com/spack/spack/issues/27999
require: ~cuda
mpi:
require: openmpi
specs:
# Horovod
- py-horovod
# Hugging Face
- py-transformers
# JAX
- py-jax
- py-jaxlib
# Keras
- py-keras backend=tensorflow
- py-keras backend=jax
- py-keras backend=torch
- py-keras-applications
- py-keras-preprocessing
- py-keras2onnx
# PyTorch
- py-botorch
- py-efficientnet-pytorch
- py-gpytorch
- py-kornia
- py-lightning
- py-pytorch-gradual-warmup-lr
- py-pytorch-lightning
- py-segmentation-models-pytorch
- py-timm
- py-torch
- py-torch-cluster
- py-torch-geometric
- py-torch-nvidia-apex
- py-torch-scatter
- py-torch-sparse
- py-torch-spline-conv
- py-torchaudio
- py-torchdata
- py-torchfile
- py-torchgeo
- py-torchmetrics
# torchtext requires older pytorch, which requires older cuda, which doesn't support newer GCC
# - py-torchtext
- py-torchvision
- py-vector-quantize-pytorch
# scikit-learn
- py-scikit-learn
- py-scikit-learn-extra
# TensorBoard
- py-tensorboard
- py-tensorboard-data-server
- py-tensorboard-plugin-wit
- py-tensorboardx
# TensorFlow
- py-tensorflow
- py-tensorflow-datasets
- py-tensorflow-hub
- py-tensorflow-metadata
- py-tensorflow-probability
# XGBoost
# xgboost requires older cuda, which doesn't support newer GCC
# - py-xgboost
ci:
pipeline-gen:
- build-job:
image:
name: ghcr.io/spack/ubuntu-24.04:v2024-09-05-v2
entrypoint: ['']
cdash:
build-group: Machine Learning