ML CI: Linux x86_64 (#34299)

* ML CI: Linux x86_64

* Update comments

* Rename again

* Rename comments

* Update to match other arches

* No compiler

* Compiler was wrong anyway

* Faster TF
This commit is contained in:
Adam J. Stewart 2022-12-22 11:31:40 -06:00 committed by GitHub
parent 371268a9aa
commit eb67497020
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 280 additions and 253 deletions

View File

@ -760,122 +760,122 @@ tutorial-protected-build:
- artifacts: True
job: tutorial-protected-generate
########################################
# Machine Learning (CPU)
########################################
.ml-cpu:
#######################################
# Machine Learning - Linux x86_64 (CPU)
#######################################
.ml-linux-x86_64-cpu:
variables:
SPACK_CI_STACK_NAME: ml-cpu
SPACK_CI_STACK_NAME: ml-linux-x86_64-cpu
.ml-cpu-generate:
extends: .ml-cpu
.ml-linux-x86_64-cpu-generate:
extends: .ml-linux-x86_64-cpu
image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
ml-cpu-pr-generate:
extends: [ ".ml-cpu-generate", ".pr-generate"]
ml-linux-x86_64-cpu-pr-generate:
extends: [ ".ml-linux-x86_64-cpu-generate", ".pr-generate"]
ml-cpu-protected-generate:
extends: [ ".ml-cpu-generate", ".protected-generate"]
ml-linux-x86_64-cpu-protected-generate:
extends: [ ".ml-linux-x86_64-cpu-generate", ".protected-generate"]
ml-cpu-pr-build:
extends: [ ".ml-cpu", ".pr-build" ]
ml-linux-x86_64-cpu-pr-build:
extends: [ ".ml-linux-x86_64-cpu", ".pr-build" ]
trigger:
include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-cpu-pr-generate
job: ml-linux-x86_64-cpu-pr-generate
strategy: depend
needs:
- artifacts: True
job: ml-cpu-pr-generate
job: ml-linux-x86_64-cpu-pr-generate
ml-cpu-protected-build:
extends: [ ".ml-cpu", ".protected-build" ]
ml-linux-x86_64-cpu-protected-build:
extends: [ ".ml-linux-x86_64-cpu", ".protected-build" ]
trigger:
include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-cpu-protected-generate
job: ml-linux-x86_64-cpu-protected-generate
strategy: depend
needs:
- artifacts: True
job: ml-cpu-protected-generate
job: ml-linux-x86_64-cpu-protected-generate
########################################
# Machine Learning (CUDA)
# Machine Learning - Linux x86_64 (CUDA)
########################################
.ml-cuda:
.ml-linux-x86_64-cuda:
variables:
SPACK_CI_STACK_NAME: ml-cuda
SPACK_CI_STACK_NAME: ml-linux-x86_64-cuda
.ml-cuda-generate:
extends: .ml-cuda
.ml-linux-x86_64-cuda-generate:
extends: .ml-linux-x86_64-cuda
image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
ml-cuda-pr-generate:
extends: [ ".ml-cuda-generate", ".pr-generate"]
ml-linux-x86_64-cuda-pr-generate:
extends: [ ".ml-linux-x86_64-cuda-generate", ".pr-generate"]
ml-cuda-protected-generate:
extends: [ ".ml-cuda-generate", ".protected-generate"]
ml-linux-x86_64-cuda-protected-generate:
extends: [ ".ml-linux-x86_64-cuda-generate", ".protected-generate"]
ml-cuda-pr-build:
extends: [ ".ml-cuda", ".pr-build" ]
ml-linux-x86_64-cuda-pr-build:
extends: [ ".ml-linux-x86_64-cuda", ".pr-build" ]
trigger:
include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-cuda-pr-generate
job: ml-linux-x86_64-cuda-pr-generate
strategy: depend
needs:
- artifacts: True
job: ml-cuda-pr-generate
job: ml-linux-x86_64-cuda-pr-generate
ml-cuda-protected-build:
extends: [ ".ml-cuda", ".protected-build" ]
ml-linux-x86_64-cuda-protected-build:
extends: [ ".ml-linux-x86_64-cuda", ".protected-build" ]
trigger:
include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-cuda-protected-generate
job: ml-linux-x86_64-cuda-protected-generate
strategy: depend
needs:
- artifacts: True
job: ml-cuda-protected-generate
job: ml-linux-x86_64-cuda-protected-generate
########################################
# Machine Learning (ROCm)
# Machine Learning - Linux x86_64 (ROCm)
########################################
.ml-rocm:
.ml-linux-x86_64-rocm:
variables:
SPACK_CI_STACK_NAME: ml-rocm
SPACK_CI_STACK_NAME: ml-linux-x86_64-rocm
.ml-rocm-generate:
extends: .ml-rocm
.ml-linux-x86_64-rocm-generate:
extends: .ml-linux-x86_64-rocm
image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
ml-rocm-pr-generate:
extends: [ ".ml-rocm-generate", ".pr-generate"]
ml-linux-x86_64-rocm-pr-generate:
extends: [ ".ml-linux-x86_64-rocm-generate", ".pr-generate"]
ml-rocm-protected-generate:
extends: [ ".ml-rocm-generate", ".protected-generate"]
ml-linux-x86_64-rocm-protected-generate:
extends: [ ".ml-linux-x86_64-rocm-generate", ".protected-generate"]
ml-rocm-pr-build:
extends: [ ".ml-rocm", ".pr-build" ]
ml-linux-x86_64-rocm-pr-build:
extends: [ ".ml-linux-x86_64-rocm", ".pr-build" ]
trigger:
include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-rocm-pr-generate
job: ml-linux-x86_64-rocm-pr-generate
strategy: depend
needs:
- artifacts: True
job: ml-rocm-pr-generate
job: ml-linux-x86_64-rocm-pr-generate
ml-rocm-protected-build:
extends: [ ".ml-rocm", ".protected-build" ]
ml-linux-x86_64-rocm-protected-build:
extends: [ ".ml-linux-x86_64-rocm", ".protected-build" ]
trigger:
include:
- artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
job: ml-rocm-protected-generate
job: ml-linux-x86_64-rocm-protected-generate
strategy: depend
needs:
- artifacts: True
job: ml-rocm-protected-generate
job: ml-linux-x86_64-rocm-protected-generate

View File

@ -16,11 +16,11 @@ spack:
packages:
all:
compiler: [gcc@11.2.0]
target: [x86_64_v3]
variants: ~cuda~rocm
specs:
definitions:
- packages:
# Horovod
- py-horovod
@ -86,7 +86,15 @@ spack:
# - r-xgboost
- xgboost
mirrors: { "mirror": "s3://spack-binaries/develop/ml-cpu" }
- arch:
- target=x86_64_v3
specs:
- matrix:
- [$packages]
- [$arch]
mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-cpu" }
gitlab-ci:
script:
@ -113,6 +121,7 @@ spack:
mappings:
- match:
- llvm
- py-tensorflow
- py-torch
runner-attributes:
tags: [ "spack", "huge", "x86_64_v4" ]

View File

@ -16,14 +16,14 @@ spack:
packages:
all:
compiler: [gcc@11.2.0]
target: [x86_64_v3]
variants: ~rocm+cuda cuda_arch=80
llvm:
# https://github.com/spack/spack/issues/27999
require: ~cuda
specs:
definitions:
- packages:
# Horovod
- py-horovod
@ -89,7 +89,15 @@ spack:
# - r-xgboost
- xgboost
mirrors: { "mirror": "s3://spack-binaries/develop/ml-cuda" }
- arch:
- target=x86_64_v3
specs:
- matrix:
- [$packages]
- [$arch]
mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-cuda" }
gitlab-ci:
script:
@ -116,6 +124,7 @@ spack:
mappings:
- match:
- llvm
- py-tensorflow
- py-torch
runner-attributes:
tags: [ "spack", "huge", "x86_64_v4" ]

View File

@ -16,7 +16,6 @@ spack:
packages:
all:
compiler: [gcc@11.2.0]
target: [x86_64_v3]
variants: ~cuda+rocm amdgpu_target=gfx90a
gl:
@ -25,7 +24,8 @@ spack:
# Does not yet support Spack-installed ROCm
require: ~rocm
specs:
definitions:
- packages:
# Horovod
- py-horovod
@ -92,7 +92,15 @@ spack:
# - r-xgboost
- xgboost
mirrors: { "mirror": "s3://spack-binaries/develop/ml-rocm" }
- arch:
- target=x86_64_v3
specs:
- matrix:
- [$packages]
- [$arch]
mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-rocm" }
gitlab-ci:
script:
@ -118,8 +126,9 @@ spack:
match_behavior: first
mappings:
- match:
- llvm-amdgpu
- llvm
- llvm-amdgpu
- py-tensorflow
- py-torch
- rocblas
runner-attributes: