2022-10-09 17:39:47 -05:00
parent 4a6aff8bd1
commit 01ede3c595
4 changed files with 553 additions and 0 deletions
							
							
								
							
							
						
@@ -749,3 +749,123 @@ tutorial-protected-build:
  needs:
    - artifacts: True
      job: tutorial-protected-generate
########################################
# Machine Learning (CPU)
########################################
.ml-cpu:
  variables:
    SPACK_CI_STACK_NAME: ml-cpu
.ml-cpu-generate:
  extends: .ml-cpu
  image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
  tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
ml-cpu-pr-generate:
  extends: [ ".ml-cpu-generate", ".pr-generate"]
ml-cpu-protected-generate:
  extends: [ ".ml-cpu-generate", ".protected-generate"]
ml-cpu-pr-build:
  extends: [ ".ml-cpu", ".pr-build" ]
  trigger:
    include:
      - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
        job: ml-cpu-pr-generate
    strategy: depend
  needs:
    - artifacts: True
      job: ml-cpu-pr-generate
ml-cpu-protected-build:
  extends: [ ".ml-cpu", ".protected-build" ]
  trigger:
    include:
      - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
        job: ml-cpu-protected-generate
    strategy: depend
  needs:
    - artifacts: True
      job: ml-cpu-protected-generate
########################################
# Machine Learning (CUDA)
########################################
.ml-cuda:
  variables:
    SPACK_CI_STACK_NAME: ml-cuda
.ml-cuda-generate:
  extends: .ml-cuda
  image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
  tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
ml-cuda-pr-generate:
  extends: [ ".ml-cuda-generate", ".pr-generate"]
ml-cuda-protected-generate:
  extends: [ ".ml-cuda-generate", ".protected-generate"]
ml-cuda-pr-build:
  extends: [ ".ml-cuda", ".pr-build" ]
  trigger:
    include:
      - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
        job: ml-cuda-pr-generate
    strategy: depend
  needs:
    - artifacts: True
      job: ml-cuda-pr-generate
ml-cuda-protected-build:
  extends: [ ".ml-cuda", ".protected-build" ]
  trigger:
    include:
      - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
        job: ml-cuda-protected-generate
    strategy: depend
  needs:
    - artifacts: True
      job: ml-cuda-protected-generate
########################################
# Machine Learning (ROCm)
########################################
.ml-rocm:
  variables:
    SPACK_CI_STACK_NAME: ml-rocm
.ml-rocm-generate:
  extends: .ml-rocm
  image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
  tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
ml-rocm-pr-generate:
  extends: [ ".ml-rocm-generate", ".pr-generate"]
ml-rocm-protected-generate:
  extends: [ ".ml-rocm-generate", ".protected-generate"]
ml-rocm-pr-build:
  extends: [ ".ml-rocm", ".pr-build" ]
  trigger:
    include:
      - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
        job: ml-rocm-pr-generate
    strategy: depend
  needs:
    - artifacts: True
      job: ml-rocm-pr-generate
ml-rocm-protected-build:
  extends: [ ".ml-rocm", ".protected-build" ]
  trigger:
    include:
      - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
        job: ml-rocm-protected-generate
    strategy: depend
  needs:
    - artifacts: True
      job: ml-rocm-protected-generate
							
							
							
						
 
							
							
							
						
@@ -0,0 +1,142 @@
spack:
  view: false
  concretizer:
    reuse: false
    unify: false
  config:
    concretizer: clingo
    install_tree:
      root: /home/software/spack
      padded_length: 384
      projections:
        all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}"
  packages:
    all:
      compiler: [gcc@11.2.0]
      target: [x86_64_v4]
      variants: ~cuda~rocm
  specs:
    # Horovod
    - py-horovod
    # JAX
    # https://github.com/google/jax/issues/12614
    # - py-jax
    # - py-jaxlib
    # Keras
    - py-keras
    - py-keras-applications
    - py-keras-preprocessing
    - py-keras2onnx
    # PyTorch
    - py-botorch
    - py-efficientnet-pytorch
    - py-gpytorch
    - py-kornia
    - py-pytorch-gradual-warmup-lr
    - py-pytorch-lightning
    - py-segmentation-models-pytorch
    - py-timm
    - py-torch
    - py-torch-cluster
    - py-torch-geometric
    # https://github.com/NVIDIA/apex/issues/1498
    # - py-torch-nvidia-apex
    - py-torch-scatter
    - py-torch-sparse
    - py-torch-spline-conv
    - py-torchaudio
    - py-torchdata
    - py-torchfile
    - py-torchgeo
    - py-torchmeta
    - py-torchmetrics
    - py-torchtext
    - py-torchvision
    - py-vector-quantize-pytorch
    # scikit-learn
    - py-scikit-learn
    - py-scikit-learn-extra
    # TensorBoard
    - py-tensorboard
    - py-tensorboard-data-server
    - py-tensorboard-plugin-wit
    - py-tensorboardx
    # TensorFlow
    - py-tensorflow
    - py-tensorflow-datasets
    - py-tensorflow-estimator
    - py-tensorflow-hub
    - py-tensorflow-metadata
    - py-tensorflow-probability
    # XGBoost
    - py-xgboost
    # - r-xgboost
    - xgboost
  mirrors: { "mirror": "s3://spack-binaries/develop/ml-cpu" }
  gitlab-ci:
    script:
      - . "./share/spack/setup-env.sh"
      - spack --version
      - cd ${SPACK_CONCRETE_ENV_DIR}
      - spack env activate --without-view .
      - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
      - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
      - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
      - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
      - spack -d ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
    mappings:
      - match:
          - llvm
        runner-attributes:
          tags: [ "spack", "huge", "x86_64_v4" ]
          variables:
            CI_JOB_SIZE: huge
            KUBERNETES_CPU_REQUEST: 11000m
            KUBERNETES_MEMORY_REQUEST: 42G
      - match:
          - "@:"
        runner-attributes:
          tags: [ "spack", "large", "x86_64_v4" ]
          variables:
            CI_JOB_SIZE: large
            KUBERNETES_CPU_REQUEST: 8000m
            KUBERNETES_MEMORY_REQUEST: 12G
    image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
    broken-specs-url: "s3://spack-binaries/broken-specs"
    service-job-attributes:
      before_script:
        - . "./share/spack/setup-env.sh"
        - spack --version
      image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
      tags: ["spack", "public", "x86_64_v4"]
    signing-job-attributes:
      image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
      tags: ["spack", "aws"]
      script:
        - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
        - /sign.sh
        - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache
  cdash:
    build-group: Machine Learning
    url: https://cdash.spack.io
    project: Spack Testing
    site: Cloud Gitlab Infrastructure
							
							
							
						
@@ -0,0 +1,144 @@
spack:
  view: false
  concretizer:
    reuse: false
    unify: false
  config:
    concretizer: clingo
    install_tree:
      root: /home/software/spack
      padded_length: 384
      projections:
        all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}"
  packages:
    all:
      compiler: [gcc@11.2.0]
      target: [x86_64_v4]
      variants: ~rocm+cuda cuda_arch=80
    llvm:
      # https://github.com/spack/spack/issues/27999
      require: ~cuda
  specs:
    # Horovod
    - py-horovod
    # JAX
    # https://github.com/google/jax/issues/12614
    # - py-jax
    # - py-jaxlib
    # Keras
    - py-keras
    - py-keras-applications
    - py-keras-preprocessing
    - py-keras2onnx
    # PyTorch
    - py-botorch
    - py-efficientnet-pytorch
    - py-gpytorch
    - py-kornia
    - py-pytorch-gradual-warmup-lr
    - py-pytorch-lightning
    - py-segmentation-models-pytorch
    - py-timm
    - py-torch
    - py-torch-cluster
    - py-torch-geometric
    - py-torch-nvidia-apex
    - py-torch-scatter
    - py-torch-sparse
    - py-torch-spline-conv
    - py-torchaudio
    - py-torchdata
    - py-torchfile
    - py-torchgeo
    - py-torchmeta
    - py-torchmetrics
    - py-torchtext
    - py-torchvision
    - py-vector-quantize-pytorch
    # scikit-learn
    - py-scikit-learn
    - py-scikit-learn-extra
    # TensorBoard
    - py-tensorboard
    - py-tensorboard-data-server
    - py-tensorboard-plugin-wit
    - py-tensorboardx
    # TensorFlow
    - py-tensorflow
    - py-tensorflow-datasets
    - py-tensorflow-estimator
    - py-tensorflow-hub
    - py-tensorflow-metadata
    - py-tensorflow-probability
    # XGBoost
    - py-xgboost
    # - r-xgboost
    - xgboost
  mirrors: { "mirror": "s3://spack-binaries/develop/ml-cuda" }
  gitlab-ci:
    script:
      - . "./share/spack/setup-env.sh"
      - spack --version
      - cd ${SPACK_CONCRETE_ENV_DIR}
      - spack env activate --without-view .
      - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
      - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
      - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
      - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
      - spack -d ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
    mappings:
      - match:
          - llvm
        runner-attributes:
          tags: [ "spack", "huge", "x86_64_v4" ]
          variables:
            CI_JOB_SIZE: huge
            KUBERNETES_CPU_REQUEST: 11000m
            KUBERNETES_MEMORY_REQUEST: 42G
      - match:
          - "@:"
        runner-attributes:
          tags: [ "spack", "large", "x86_64_v4" ]
          variables:
            CI_JOB_SIZE: large
            KUBERNETES_CPU_REQUEST: 8000m
            KUBERNETES_MEMORY_REQUEST: 12G
    image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
    broken-specs-url: "s3://spack-binaries/broken-specs"
    service-job-attributes:
      before_script:
        - . "./share/spack/setup-env.sh"
        - spack --version
      image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
      tags: ["spack", "public", "x86_64_v4"]
    signing-job-attributes:
      image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
      tags: ["spack", "aws"]
      script:
        - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
        - /sign.sh
        - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache
  cdash:
    build-group: Machine Learning
    url: https://cdash.spack.io
    project: Spack Testing
    site: Cloud Gitlab Infrastructure
							
							
							
						
@@ -0,0 +1,147 @@
spack:
  view: false
  concretizer:
    reuse: false
    unify: false
  config:
    concretizer: clingo
    install_tree:
      root: /home/software/spack
      padded_length: 384
      projections:
        all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}"
  packages:
    all:
      compiler: [gcc@11.2.0]
      target: [x86_64_v4]
      variants: ~cuda+rocm amdgpu_target=gfx90a
    gl:
      require: "osmesa"
    py-torch:
      # Does not yet support Spack-installed ROCm
      require: ~rocm
  specs:
    # Horovod
    - py-horovod
    # JAX
    # https://github.com/google/jax/issues/12614
    # - py-jax
    # - py-jaxlib
    # Keras
    - py-keras
    - py-keras-applications
    - py-keras-preprocessing
    - py-keras2onnx
    # PyTorch
    # Does not yet support Spack-install ROCm
    # - py-botorch
    # - py-efficientnet-pytorch
    # - py-gpytorch
    # - py-kornia
    # - py-pytorch-gradual-warmup-lr
    # - py-pytorch-lightning
    # - py-segmentation-models-pytorch
    # - py-timm
    # - py-torch
    # - py-torch-cluster
    # - py-torch-geometric
    # - py-torch-nvidia-apex
    # - py-torch-scatter
    # - py-torch-sparse
    # - py-torch-spline-conv
    # - py-torchaudio
    # - py-torchdata
    # - py-torchfile
    # - py-torchgeo
    # - py-torchmeta
    # - py-torchmetrics
    # - py-torchtext
    # - py-torchvision
    # - py-vector-quantize-pytorch
    # scikit-learn
    - py-scikit-learn
    - py-scikit-learn-extra
    # TensorBoard
    - py-tensorboard
    - py-tensorboard-data-server
    - py-tensorboard-plugin-wit
    - py-tensorboardx
    # TensorFlow
    - py-tensorflow
    - py-tensorflow-datasets
    - py-tensorflow-estimator
    - py-tensorflow-hub
    - py-tensorflow-metadata
    - py-tensorflow-probability
    # XGBoost
    - py-xgboost
    # - r-xgboost
    - xgboost
  mirrors: { "mirror": "s3://spack-binaries/develop/ml-rocm" }
  gitlab-ci:
    script:
      - . "./share/spack/setup-env.sh"
      - spack --version
      - cd ${SPACK_CONCRETE_ENV_DIR}
      - spack env activate --without-view .
      - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
      - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
      - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
      - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
      - spack -d ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
    mappings:
      - match:
          - llvm
        runner-attributes:
          tags: [ "spack", "huge", "x86_64_v4" ]
          variables:
            CI_JOB_SIZE: huge
            KUBERNETES_CPU_REQUEST: 11000m
            KUBERNETES_MEMORY_REQUEST: 42G
      - match:
          - "@:"
        runner-attributes:
          tags: [ "spack", "large", "x86_64_v4" ]
          variables:
            CI_JOB_SIZE: large
            KUBERNETES_CPU_REQUEST: 8000m
            KUBERNETES_MEMORY_REQUEST: 12G
    image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
    broken-specs-url: "s3://spack-binaries/broken-specs"
    service-job-attributes:
      before_script:
        - . "./share/spack/setup-env.sh"
        - spack --version
      image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
      tags: ["spack", "public", "x86_64_v4"]
    signing-job-attributes:
      image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
      tags: ["spack", "aws"]
      script:
        - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
        - /sign.sh
        - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache
  cdash:
    build-group: Machine Learning
    url: https://cdash.spack.io
    project: Spack Testing
    site: Cloud Gitlab Infrastructure