CI: add ML ROCm stack (#45302)
* add ML ROCm stack * add suggested changes * remove py-torch and py-tensorflow-estimator * add TF_ROCM_AMDGPU_TARGETS env variable and remove packages from pipeline * remove py-jax and py-xgboost
This commit is contained in:
parent
1b5dc396e3
commit
e529a454eb
@ -726,6 +726,29 @@ ml-linux-x86_64-cuda-build:
|
|||||||
- artifacts: True
|
- artifacts: True
|
||||||
job: ml-linux-x86_64-cuda-generate
|
job: ml-linux-x86_64-cuda-generate
|
||||||
|
|
||||||
|
########################################
|
||||||
|
# Machine Learning - Linux x86_64 (ROCm)
|
||||||
|
########################################
|
||||||
|
.ml-linux-x86_64-rocm:
|
||||||
|
extends: [ ".linux_x86_64_v3" ]
|
||||||
|
variables:
|
||||||
|
SPACK_CI_STACK_NAME: ml-linux-x86_64-rocm
|
||||||
|
|
||||||
|
ml-linux-x86_64-rocm-generate:
|
||||||
|
extends: [ ".generate-x86_64", .ml-linux-x86_64-rocm, ".tags-x86_64_v4" ]
|
||||||
|
image: ghcr.io/spack/ubuntu-22.04:v2024-05-07
|
||||||
|
|
||||||
|
ml-linux-x86_64-rocm-build:
|
||||||
|
extends: [ ".build", ".ml-linux-x86_64-rocm" ]
|
||||||
|
trigger:
|
||||||
|
include:
|
||||||
|
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
|
||||||
|
job: ml-linux-x86_64-rocm-generate
|
||||||
|
strategy: depend
|
||||||
|
needs:
|
||||||
|
- artifacts: True
|
||||||
|
job: ml-linux-x86_64-rocm-generate
|
||||||
|
|
||||||
#########################################
|
#########################################
|
||||||
# Machine Learning - Darwin aarch64 (MPS)
|
# Machine Learning - Darwin aarch64 (MPS)
|
||||||
#########################################
|
#########################################
|
||||||
|
@ -0,0 +1,93 @@
|
|||||||
|
spack:
|
||||||
|
view: false
|
||||||
|
packages:
|
||||||
|
all:
|
||||||
|
require:
|
||||||
|
- target=x86_64_v3
|
||||||
|
- ~cuda
|
||||||
|
- +rocm
|
||||||
|
- amdgpu_target=gfx90a
|
||||||
|
gl:
|
||||||
|
require: "osmesa"
|
||||||
|
mpi:
|
||||||
|
require: openmpi
|
||||||
|
|
||||||
|
specs:
|
||||||
|
# Horovod
|
||||||
|
# - py-horovod
|
||||||
|
|
||||||
|
# Hugging Face
|
||||||
|
- py-transformers
|
||||||
|
|
||||||
|
# JAX
|
||||||
|
# Does not yet support Spack-installed ROCm
|
||||||
|
# - py-jax
|
||||||
|
# - py-jaxlib
|
||||||
|
|
||||||
|
# Keras
|
||||||
|
- py-keras backend=tensorflow
|
||||||
|
# - py-keras backend=jax
|
||||||
|
# - py-keras backend=torch
|
||||||
|
- py-keras-applications
|
||||||
|
- py-keras-preprocessing
|
||||||
|
- py-keras2onnx
|
||||||
|
|
||||||
|
# PyTorch
|
||||||
|
# Does not yet support Spack-installed ROCm
|
||||||
|
# - py-botorch
|
||||||
|
# - py-efficientnet-pytorch
|
||||||
|
# - py-gpytorch
|
||||||
|
# - py-kornia
|
||||||
|
# - py-lightning
|
||||||
|
# - py-pytorch-gradual-warmup-lr
|
||||||
|
# - py-pytorch-lightning
|
||||||
|
# - py-segmentation-models-pytorch
|
||||||
|
# - py-timm
|
||||||
|
# - py-torch
|
||||||
|
# - py-torch-cluster
|
||||||
|
# - py-torch-geometric
|
||||||
|
# - py-torch-nvidia-apex
|
||||||
|
# - py-torch-scatter
|
||||||
|
# - py-torch-sparse
|
||||||
|
# - py-torch-spline-conv
|
||||||
|
# - py-torchaudio
|
||||||
|
# - py-torchdata
|
||||||
|
# - py-torchfile
|
||||||
|
# - py-torchgeo
|
||||||
|
# - py-torchmetrics
|
||||||
|
# - py-torchtext
|
||||||
|
# - py-torchvision
|
||||||
|
# - py-vector-quantize-pytorch
|
||||||
|
|
||||||
|
# scikit-learn
|
||||||
|
- py-scikit-learn
|
||||||
|
- py-scikit-learn-extra
|
||||||
|
|
||||||
|
# TensorBoard
|
||||||
|
- py-tensorboard
|
||||||
|
- py-tensorboard-data-server
|
||||||
|
- py-tensorboard-plugin-wit
|
||||||
|
- py-tensorboardx
|
||||||
|
|
||||||
|
# TensorFlow
|
||||||
|
- py-tensorflow
|
||||||
|
- py-tensorflow-datasets
|
||||||
|
# version 2.16 is not available
|
||||||
|
# - py-tensorflow-estimator
|
||||||
|
- py-tensorflow-hub
|
||||||
|
- py-tensorflow-metadata
|
||||||
|
- py-tensorflow-probability
|
||||||
|
|
||||||
|
# XGBoost
|
||||||
|
# Does not yet support Spack-installed ROCm
|
||||||
|
# - py-xgboost
|
||||||
|
|
||||||
|
ci:
|
||||||
|
pipeline-gen:
|
||||||
|
- build-job:
|
||||||
|
image:
|
||||||
|
name: ghcr.io/spack/ubuntu-22.04:v2024-05-07
|
||||||
|
entrypoint: ['']
|
||||||
|
|
||||||
|
cdash:
|
||||||
|
build-group: Machine Learning
|
@ -562,6 +562,7 @@ def setup_build_environment(self, env):
|
|||||||
for pkg_dep in rocm_dependencies:
|
for pkg_dep in rocm_dependencies:
|
||||||
pkg_dep_cap = pkg_dep.upper().replace("-", "_")
|
pkg_dep_cap = pkg_dep.upper().replace("-", "_")
|
||||||
env.set(f"{pkg_dep_cap}_PATH", spec[pkg_dep].prefix)
|
env.set(f"{pkg_dep_cap}_PATH", spec[pkg_dep].prefix)
|
||||||
|
env.set("TF_ROCM_AMDGPU_TARGETS", ",".join(self.spec.variants["amdgpu_target"].value))
|
||||||
else:
|
else:
|
||||||
env.set("TF_NEED_ROCM", "0")
|
env.set("TF_NEED_ROCM", "0")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user