CI: add ML ROCm stack (#45302)
* add ML ROCm stack * add suggested changes * remove py-torch and py-tensorflow-estimator * add TF_ROCM_AMDGPU_TARGETS env variable and remove packages from pipeline * remove py-jax and py-xgboost
This commit is contained in:
parent
1b5dc396e3
commit
e529a454eb
@ -726,6 +726,29 @@ ml-linux-x86_64-cuda-build:
|
||||
- artifacts: True
|
||||
job: ml-linux-x86_64-cuda-generate
|
||||
|
||||
########################################
|
||||
# Machine Learning - Linux x86_64 (ROCm)
|
||||
########################################
|
||||
.ml-linux-x86_64-rocm:
|
||||
extends: [ ".linux_x86_64_v3" ]
|
||||
variables:
|
||||
SPACK_CI_STACK_NAME: ml-linux-x86_64-rocm
|
||||
|
||||
ml-linux-x86_64-rocm-generate:
|
||||
extends: [ ".generate-x86_64", .ml-linux-x86_64-rocm, ".tags-x86_64_v4" ]
|
||||
image: ghcr.io/spack/ubuntu-22.04:v2024-05-07
|
||||
|
||||
ml-linux-x86_64-rocm-build:
|
||||
extends: [ ".build", ".ml-linux-x86_64-rocm" ]
|
||||
trigger:
|
||||
include:
|
||||
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
|
||||
job: ml-linux-x86_64-rocm-generate
|
||||
strategy: depend
|
||||
needs:
|
||||
- artifacts: True
|
||||
job: ml-linux-x86_64-rocm-generate
|
||||
|
||||
#########################################
|
||||
# Machine Learning - Darwin aarch64 (MPS)
|
||||
#########################################
|
||||
|
@ -0,0 +1,93 @@
|
||||
spack:
|
||||
view: false
|
||||
packages:
|
||||
all:
|
||||
require:
|
||||
- target=x86_64_v3
|
||||
- ~cuda
|
||||
- +rocm
|
||||
- amdgpu_target=gfx90a
|
||||
gl:
|
||||
require: "osmesa"
|
||||
mpi:
|
||||
require: openmpi
|
||||
|
||||
specs:
|
||||
# Horovod
|
||||
# - py-horovod
|
||||
|
||||
# Hugging Face
|
||||
- py-transformers
|
||||
|
||||
# JAX
|
||||
# Does not yet support Spack-installed ROCm
|
||||
# - py-jax
|
||||
# - py-jaxlib
|
||||
|
||||
# Keras
|
||||
- py-keras backend=tensorflow
|
||||
# - py-keras backend=jax
|
||||
# - py-keras backend=torch
|
||||
- py-keras-applications
|
||||
- py-keras-preprocessing
|
||||
- py-keras2onnx
|
||||
|
||||
# PyTorch
|
||||
# Does not yet support Spack-installed ROCm
|
||||
# - py-botorch
|
||||
# - py-efficientnet-pytorch
|
||||
# - py-gpytorch
|
||||
# - py-kornia
|
||||
# - py-lightning
|
||||
# - py-pytorch-gradual-warmup-lr
|
||||
# - py-pytorch-lightning
|
||||
# - py-segmentation-models-pytorch
|
||||
# - py-timm
|
||||
# - py-torch
|
||||
# - py-torch-cluster
|
||||
# - py-torch-geometric
|
||||
# - py-torch-nvidia-apex
|
||||
# - py-torch-scatter
|
||||
# - py-torch-sparse
|
||||
# - py-torch-spline-conv
|
||||
# - py-torchaudio
|
||||
# - py-torchdata
|
||||
# - py-torchfile
|
||||
# - py-torchgeo
|
||||
# - py-torchmetrics
|
||||
# - py-torchtext
|
||||
# - py-torchvision
|
||||
# - py-vector-quantize-pytorch
|
||||
|
||||
# scikit-learn
|
||||
- py-scikit-learn
|
||||
- py-scikit-learn-extra
|
||||
|
||||
# TensorBoard
|
||||
- py-tensorboard
|
||||
- py-tensorboard-data-server
|
||||
- py-tensorboard-plugin-wit
|
||||
- py-tensorboardx
|
||||
|
||||
# TensorFlow
|
||||
- py-tensorflow
|
||||
- py-tensorflow-datasets
|
||||
# version 2.16 is not available
|
||||
# - py-tensorflow-estimator
|
||||
- py-tensorflow-hub
|
||||
- py-tensorflow-metadata
|
||||
- py-tensorflow-probability
|
||||
|
||||
# XGBoost
|
||||
# Does not yet support Spack-installed ROCm
|
||||
# - py-xgboost
|
||||
|
||||
ci:
|
||||
pipeline-gen:
|
||||
- build-job:
|
||||
image:
|
||||
name: ghcr.io/spack/ubuntu-22.04:v2024-05-07
|
||||
entrypoint: ['']
|
||||
|
||||
cdash:
|
||||
build-group: Machine Learning
|
@ -562,6 +562,7 @@ def setup_build_environment(self, env):
|
||||
for pkg_dep in rocm_dependencies:
|
||||
pkg_dep_cap = pkg_dep.upper().replace("-", "_")
|
||||
env.set(f"{pkg_dep_cap}_PATH", spec[pkg_dep].prefix)
|
||||
env.set("TF_ROCM_AMDGPU_TARGETS", ",".join(self.spec.variants["amdgpu_target"].value))
|
||||
else:
|
||||
env.set("TF_NEED_ROCM", "0")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user