Add macOS ML CI stacks (#36586)

* Add macOS ML CI stacks

* torchmeta is no longer maintained and requires ancient PyTorch

* Add MXNet

* update darwin aarch64 stacks

* add darwin-aarch64 scoped config.yaml

* remove unnecessary cleanup job

* fix specifications

* fix labels

* fix labels

* fix indent on tags specification

* no tags for trigger jobs

* try overriding tags in stack spack.yaml

* do not use CI_STACK_CONFIG_SCOPES

* incorporate config:install_tree:root: overrides and compiler defs

* copy relevant ci-scoped config settings directly into stack spack.yaml

* remove build-job-remove

* spack ci generate: add debug flag

* include cdash config directly in stack spack.yaml

* customize build-job script section to avoid absolute paths

* add any-job specification

* tags: use aarch64-macos instead of aarch64

* generate tags: use aarch64-macos instead of aarch64

* do not add morepadding

* use shared mirror; comment out known failures

* remove any-job

* nproc || true

* comment out specs failing due to bazel from cache codesign issue

---------

Co-authored-by: eugeneswalker <eugenesunsetwalker@gmail.com>
This commit is contained in:
Adam J. Stewart 2023-05-25 00:12:54 -05:00 committed by GitHub
parent 06f9bcf734
commit 2f8cea2792
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 223 additions and 172 deletions

View File

@ -128,6 +128,38 @@ default:
extends: [ ".base-job", ".generate-base" ]
tags: ["spack", "public", "medium", "x86_64"]
.darwin-generate-base:
stage: generate
script:
- export SPACK_DISABLE_LOCAL_CONFIG=1
- export SPACK_USER_CACHE_PATH=$(pwd)/_user_cache
- uname -a || true
- grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true
- nproc || true
- . "./share/spack/setup-env.sh"
- spack --version
- cd share/spack/gitlab/cloud_pipelines/stacks/${SPACK_CI_STACK_NAME}
- spack env activate --without-view .
- spack -d ci generate --check-index-only
--buildcache-destination "${SPACK_BUILDCACHE_DESTINATION}"
--artifacts-root "${CI_PROJECT_DIR}/jobs_scratch_dir"
--output-file "${CI_PROJECT_DIR}/jobs_scratch_dir/cloud-ci-pipeline.yml"
after_script:
- cat /proc/loadavg || true
artifacts:
paths:
- "${CI_PROJECT_DIR}/jobs_scratch_dir"
interruptible: true
timeout: 60 minutes
retry:
max: 2
when:
- always
.darwin-generate:
extends: [ ".base-job", ".darwin-generate-base" ]
.generate-deprecated:
extends: [ ".base-job" ]
stage: generate
@ -236,124 +268,6 @@ protected-publish:
# - artifacts: True
# job: my-super-cool-stack-generate
########################################
# E4S Mac Stack
#
# With no near-future plans to have
# protected aws runners running mac
# builds, it seems best to decouple
# them from the rest of the stacks for
# the time being. This way they can
# still run on UO runners and be signed
# using the previous approach.
########################################
# .e4s-mac:
# variables:
# SPACK_CI_STACK_NAME: e4s-mac
# allow_failure: True
# .mac-pr:
# only:
# - /^pr[\d]+_.*$/
# - /^github\/pr[\d]+_.*$/
# variables:
# SPACK_BUILDCACHE_DESTINATION: "s3://spack-binaries-prs/${CI_COMMIT_REF_NAME}"
# SPACK_PRUNE_UNTOUCHED: "True"
# .mac-protected:
# only:
# - /^develop$/
# - /^releases\/v.*/
# - /^v.*/
# - /^github\/develop$/
# variables:
# SPACK_BUILDCACHE_DESTINATION: "s3://spack-binaries/${CI_COMMIT_REF_NAME}/${SPACK_CI_STACK_NAME}"
# .mac-pr-build:
# extends: [ ".mac-pr", ".build" ]
# variables:
# AWS_ACCESS_KEY_ID: ${PR_MIRRORS_AWS_ACCESS_KEY_ID}
# AWS_SECRET_ACCESS_KEY: ${PR_MIRRORS_AWS_SECRET_ACCESS_KEY}
# .mac-protected-build:
# extends: [ ".mac-protected", ".build" ]
# variables:
# AWS_ACCESS_KEY_ID: ${PROTECTED_MIRRORS_AWS_ACCESS_KEY_ID}
# AWS_SECRET_ACCESS_KEY: ${PROTECTED_MIRRORS_AWS_SECRET_ACCESS_KEY}
# SPACK_SIGNING_KEY: ${PACKAGE_SIGNING_KEY}
# e4s-mac-pr-generate:
# extends: [".e4s-mac", ".mac-pr"]
# stage: generate
# script:
# - tmp="$(mktemp -d)"; export SPACK_USER_CONFIG_PATH="$tmp"; export SPACK_USER_CACHE_PATH="$tmp"
# - . "./share/spack/setup-env.sh"
# - spack --version
# - cd share/spack/gitlab/cloud_pipelines/stacks/${SPACK_CI_STACK_NAME}
# - spack env activate --without-view .
# - spack ci generate --check-index-only
# --buildcache-destination "${SPACK_BUILDCACHE_DESTINATION}"
# --artifacts-root "${CI_PROJECT_DIR}/jobs_scratch_dir"
# --output-file "${CI_PROJECT_DIR}/jobs_scratch_dir/cloud-ci-pipeline.yml"
# artifacts:
# paths:
# - "${CI_PROJECT_DIR}/jobs_scratch_dir"
# tags:
# - lambda
# interruptible: true
# retry:
# max: 2
# when:
# - runner_system_failure
# - stuck_or_timeout_failure
# timeout: 60 minutes
# e4s-mac-protected-generate:
# extends: [".e4s-mac", ".mac-protected"]
# stage: generate
# script:
# - tmp="$(mktemp -d)"; export SPACK_USER_CONFIG_PATH="$tmp"; export SPACK_USER_CACHE_PATH="$tmp"
# - . "./share/spack/setup-env.sh"
# - spack --version
# - cd share/spack/gitlab/cloud_pipelines/stacks/${SPACK_CI_STACK_NAME}
# - spack env activate --without-view .
# - spack ci generate --check-index-only
# --artifacts-root "${CI_PROJECT_DIR}/jobs_scratch_dir"
# --output-file "${CI_PROJECT_DIR}/jobs_scratch_dir/cloud-ci-pipeline.yml"
# artifacts:
# paths:
# - "${CI_PROJECT_DIR}/jobs_scratch_dir"
# tags:
# - omicron
# interruptible: true
# retry:
# max: 2
# when:
# - runner_system_failure
# - stuck_or_timeout_failure
# timeout: 60 minutes
# e4s-mac-pr-build:
# extends: [ ".e4s-mac", ".mac-pr-build" ]
# trigger:
# include:
# - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
# job: e4s-mac-pr-generate
# strategy: depend
# needs:
# - artifacts: True
# job: e4s-mac-pr-generate
# e4s-mac-protected-build:
# extends: [ ".e4s-mac", ".mac-protected-build" ]
# trigger:
# include:
# - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
# job: e4s-mac-protected-generate
# strategy: depend
# needs:
# - artifacts: True
# job: e4s-mac-protected-generate
########################################
# E4S pipeline
########################################
@ -762,6 +676,28 @@ ml-linux-x86_64-rocm-build:
- artifacts: True
job: ml-linux-x86_64-rocm-generate
########################################
# Machine Learning - Darwin aarch64 (MPS)
########################################
.ml-darwin-aarch64-mps:
variables:
SPACK_CI_STACK_NAME: ml-darwin-aarch64-mps
ml-darwin-aarch64-mps-generate:
tags: [ "macos-ventura", "apple-clang-14", "aarch64-macos" ]
extends: [ ".ml-darwin-aarch64-mps", ".darwin-generate"]
ml-darwin-aarch64-mps-build:
extends: [ ".ml-darwin-aarch64-mps", ".build" ]
trigger:
include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-darwin-aarch64-mps-generate
strategy: depend
needs:
- artifacts: True
job: ml-darwin-aarch64-mps-generate
########################################
# Deprecated CI testing
########################################

View File

@ -0,0 +1,27 @@
compilers:
- compiler:
spec: apple-clang@14.0.0
paths:
cc: /usr/bin/clang
cxx: /usr/bin/clang++
f77: /opt/homebrew/bin/gfortran
fc: /opt/homebrew/bin/gfortran
flags: {}
operating_system: ventura
target: aarch64
modules: []
environment: {}
extra_rpaths: []
- compiler:
spec: gcc@12.2.0
paths:
cc: /opt/homebrew/bin/gcc-12
cxx: /opt/homebrew/bin/g++-12
f77: /opt/homebrew/bin/gfortran-12
fc: /opt/homebrew/bin/gfortran-12
flags: {}
operating_system: ventura
target: aarch64
modules: []
environment: {}
extra_rpaths: []

View File

@ -0,0 +1,3 @@
config:
install_tree:
root: $spack/opt/spack

View File

@ -1,14 +0,0 @@
ci:
pipeline-gen:
- build-job:
script: |
- tmp="$(mktemp -d)"; export SPACK_USER_CONFIG_PATH="$tmp"; export SPACK_USER_CACHE_PATH="$tmp"
- . "./share/spack/setup-env.sh"
- spack --version
- spack arch
- cd ${SPACK_CONCRETE_ENV_DIR}
- spack env activate --without-view .
- spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
- mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
- spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
tags: ["lambda"]

View File

@ -1,40 +0,0 @@
spack:
view: false
packages:
all:
compiler: [apple-clang@13.1.6]
target: [m1]
definitions:
- easy_specs:
- berkeley-db
- ncurses
- gcc
- py-jupyterlab
- py-scipy
- py-matplotlib
- py-pandas
- arch:
- '%apple-clang@13.1.6 target=m1'
specs:
- matrix:
- - $easy_specs
- - $arch
mirrors: { "mirror": "s3://spack-binaries/develop/e4s-mac" }
ci:
pipeline-gen:
- cleanup-job:
before_script: |
- export SPACK_USER_CACHE_PATH=$(pwd)/.spack-user-cache
- export SPACK_USER_CONFIG_PATH=$(pwd)/.spack-user-config
- . "./share/spack/setup-env.sh"
- spack --version
tags: [lambda]
cdash:
build-group: E4S Mac

View File

@ -0,0 +1,139 @@
spack:
view: false
concretizer:
unify: false
reuse: false
config:
concretizer: clingo
db_lock_timeout: 120
install_tree:
root: $spack/opt/spack
padded_length: 256
projections:
all: '{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'
packages:
all:
require: target=aarch64
variants: +mps~cuda~rocm
mpi:
require: openmpi
specs:
# Hugging Face
- py-transformers
# JAX
- py-jax
# - py-jaxlib # bazel codesign
# Keras
- py-keras-applications
- py-keras-preprocessing
- py-keras2onnx
# - py-keras # bazel codesign
# MXNet
- mxnet
# PyTorch
- py-botorch
- py-gpytorch
- py-pytorch-gradual-warmup-lr
- py-segmentation-models-pytorch
- py-timm
- py-torch
- py-torch-cluster
- py-torch-geometric
- py-torch-sparse
- py-torchdata
- py-torchfile
- py-torchgeo
- py-torchvision
# scikit-learn
- py-scikit-learn
- py-scikit-learn-extra
# TensorBoard
- py-tensorboard
- py-tensorboard-data-server
- py-tensorboard-plugin-wit
- py-tensorboardx
# TensorFlow
# - py-tensorflow # bazel codesign
# - py-tensorflow-datasets # bazel codesign
# - py-tensorflow-hub # bazel codesign
# - py-tensorflow-metadata # bazel codesign
# - py-tensorflow-estimator # bazel codesign
# - py-tensorflow-probability # py-dm-tree due to bazel codesign
# XGBoost
- py-xgboost
- xgboost
# ERRORS
# - py-efficientnet-pytorch # py-torch
# - py-horovod # py-torch
# - py-kornia # py-torch
# - py-lightning # py-torch
# - py-pytorch-lightning # py-torch
# - py-torch-nvidia-apex # py-torch
# - py-torch-scatter # py-torch
# - py-torch-spline-conv # py-torch
# - py-torchaudio # py-torchaudio
# - py-torchmetrics # py-torch
# - py-torchtext # py-torchtext
# - py-vector-quantize-pytorch # py-torch
# - r-xgboost # r
mirrors: { "mirror": "s3://spack-binaries/develop/ml-darwin-aarch64-cpu" }
ci:
pipeline-gen:
- build-job-remove:
image: no-image
tags: [spack, public]
- build-job:
tags: [ "macos-ventura", "apple-clang-14", "aarch64-macos" ]
script::
- - spack compiler find
- cd ${SPACK_CONCRETE_ENV_DIR}
- spack env activate --without-view .
- if [ -n "$SPACK_BUILD_JOBS" ]; then spack config add "config:build_jobs:$SPACK_BUILD_JOBS"; fi
- mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
# AWS runners mount E4S public key (verification), UO runners mount public/private (signing/verification)
- if [[ -r /mnt/key/e4s.gpg ]]; then spack gpg trust /mnt/key/e4s.gpg; fi
# UO runners mount intermediate ci public key (verification), AWS runners mount public/private (signing/verification)
- if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
- if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
- spack --color=always --backtrace ci rebuild --tests > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
after_script:
- - cat /proc/loadavg || true
- signing-job:
image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
tags: ["aws"]
script:
- - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
- /sign.sh
- aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache
- aws s3 cp /tmp/public_keys ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache/_pgp --recursive --exclude "*" --include "*.pub"
- any-job:
image: "ghcr.io/spack/e4s-ubuntu-18.04:v2021-10-18"
tags: ["spack"]
before_script:
- - uname -a || true
- grep -E "vendor|model name" /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true
- nproc || true
- - . "./share/spack/setup-env.sh"
- spack --version
- spack arch
cdash:
build-group: Machine Learning MPS
url: https://cdash.spack.io
project: Spack Testing
site: Cloud Gitlab Infrastructure