gpu test stack: add test w/ ^cuda@12 builds on A100 w/ newer driver (#35375)

* gpu test stack: test cuda@12 builds on A100 w/ newer driver

* get gpu info via nvidia-smi;

* kokkos+cuda^cuda@12 has genuine failure
This commit is contained in:
eugeneswalker 2023-02-09 13:18:03 -06:00 committed by GitHub
parent 72f57ffede
commit f7de22eb14
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -28,8 +28,6 @@ spack:
variants: +ld +gold +headers +libiberty ~nls
boost:
variants: +python +filesystem +iostreams +system
cuda:
version: [11.7.0]
elfutils:
variants: +bzip2 ~nls +xz
hdf5:
@ -61,8 +59,12 @@ spack:
specs:
- kokkos +rocm amdgpu_target=gfx90a
- kokkos +wrapper +cuda cuda_arch=80 ^cuda@11.7
- raja +cuda cuda_arch=80 ^cuda@11.7
- kokkos +wrapper +cuda cuda_arch=80 ^cuda@11.7.1
- raja +cuda cuda_arch=80 ^cuda@11.7.1
- raja +cuda cuda_arch=80 ^cuda@12.0.0
# FAILURES
# - kokkos +wrapper +cuda cuda_arch=80 ^cuda@12.0.0 # https://github.com/spack/spack/issues/35378
mirrors: { "mirror": "s3://spack-binaries/develop/gpu-tests" }
@ -72,6 +74,7 @@ spack:
- uname -a || true
- grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true
- nproc
- nvidia-smi || true
- curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz
- printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet
- tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null
@ -107,16 +110,18 @@ spack:
CI_JOB_SIZE: large
- match:
- kokkos +cuda cuda_arch=80
- kokkos +cuda cuda_arch=80 ^cuda@11.7.1
- raja +cuda cuda_arch=80 ^cuda@11.7.1
runner-attributes:
tags: [ "cuda-11.7", "a100" ]
tags: [ "nvidia-515.65.01", "cuda-11.7", "a100" ]
variables:
CI_JOB_SIZE: large
- match:
- raja +cuda cuda_arch=80
- kokkos +cuda cuda_arch=80 ^cuda@12.0.0
- raja +cuda cuda_arch=80 ^cuda@12.0.0
runner-attributes:
tags: [ "cuda-11.7", "a100" ]
tags: [ "nvidia-525.85.12", "cuda-12.0", "a100" ]
variables:
CI_JOB_SIZE: large