Use pcluster-alinux2 container image with pre-installed compilers (#44150)

This commit is contained in:
Stephen Sachs 2024-10-10 10:01:59 +02:00 committed by GitHub
parent 30e9545d3e
commit e9831985e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 76 additions and 70 deletions

View File

@ -799,7 +799,7 @@ deprecated-ci-build:
########################################
.aws-pcluster-generate:
image: { "name": "ghcr.io/spack/pcluster-amazonlinux-2:v2024-01-29", "entrypoint": [""] }
image: { "name": "ghcr.io/spack/pcluster-amazonlinux-2:v2024-10-07", "entrypoint": [""] }
before_script:
# Use gcc from pre-installed spack store
- - . "./share/spack/setup-env.sh"

View File

@ -6,29 +6,19 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
set -e
# Intel compiler needs to be installed from a specific spack git commit.
# The best solution would be to have the compilers hash (or packages contents) be part of the
# individual packages hashes. I don't see this at the moment.
# Set to the latest tag including a recent oneapi compiler.
# NOTE: If we update this spack version in the future make sure the compiler version also updates.
spack_intel_compiler_commit="develop-2023-08-06"
set_pcluster_defaults() {
# Set versions of pre-installed software in packages.yaml
[ -z "${SLURM_VERSION}" ] && SLURM_VERSION=$(strings /opt/slurm/lib/libslurm.so | grep -e '^VERSION' | awk '{print $2}' | sed -e 's?"??g')
[ -z "${SLURM_ROOT}" ] && ls /etc/systemd/system/slurm* &>/dev/null && \
SLURM_ROOT=$(dirname $(dirname "$(awk '/ExecStart=/ {print $1}' /etc/systemd/system/slurm* | sed -e 's?^.*=??1' | head -n1)"))
# Fallback to default location if SLURM not in systemd
[ -z "${SLURM_ROOT}" ] && [ -d "/opt/slurm" ] && SLURM_ROOT=/opt/slurm
[ -z "${SLURM_VERSION}" ] && SLURM_VERSION=$(strings "${SLURM_ROOT}"/lib/libslurm.so | grep -e '^VERSION' | awk '{print $2}' | sed -e 's?"??g')
[ -z "${LIBFABRIC_VERSION}" ] && LIBFABRIC_VERSION=$(awk '/Version:/{print $2}' "$(find /opt/amazon/efa/ -name libfabric.pc | head -n1)" | sed -e 's?~??g' -e 's?amzn.*??g')
export SLURM_VERSION LIBFABRIC_VERSION
export SLURM_ROOT SLURM_VERSION LIBFABRIC_VERSION
envsubst < "${SPACK_ROOT}/share/spack/gitlab/cloud_pipelines/stacks/${SPACK_CI_STACK_NAME}/packages.yaml" > "${SPACK_ROOT}"/etc/spack/packages.yaml
}
setup_spack() {
spack compiler add --scope site
# Do not add autotools/buildtools packages. These versions need to be managed by spack or it will
# eventually end up in a version mismatch (e.g. when compiling gmp).
spack external find --scope site --tag core-packages
}
patch_compilers_yaml() {
# Graceful exit if package not found by spack
set -o pipefail
@ -76,55 +66,47 @@ EOF
}
install_compilers() {
# We need to treat compilers as essentially external, i.e. their installation location
# (including hash) must not change when any changes are pushed to spack. The reason is that
# changes in the compilers are not reflected in the package hashes built in the CI. Hence, those
# packages will reference a wrong compiler path once the path changes.
# `gcc@12.3.0%gcc@7.3.1` is created as part of building the pipeline containers.
# `ghcr.io/spack/pcluster-amazonlinux-2:v2024-01-29` produced the following hashes.
if [ "x86_64" == "$(arch)" ]; then
gcc_hash="vxlibl3ubl5ptwzb3zydgksfa5osdea6"
else
gcc_hash="bikooik6f3fyrkroarulsadbii43ggz5"
fi
spack install /${gcc_hash}
(
spack load gcc
spack compiler add --scope site
)
# Install Intel compilers through a static spack version such that the compiler's hash does not change.
# The compilers needs to be in the same install tree as the rest of the software such that the path
# relocation works correctly. This holds the danger that this part will fail when the current spack gets
# incompatible with the one in $spack_intel_compiler_commit. Therefore, we make intel installations optional
# in package.yaml files and add a fallback `%gcc` version for each application.
if [ "x86_64" == "$(arch)" ]; then
# in packages.yaml files and add a fallback `%gcc` version for each application.
if [ -f "/bootstrap-compilers/spack/etc/spack/compilers.yaml" ]; then
# Running inside a gitlab CI container
# Intel and gcc@12 compiler are pre-installed and their location is known in
cp /bootstrap-compilers/spack/etc/spack/compilers.yaml "${SPACK_ROOT}"/etc/spack/
else
spack compiler add --scope site
# We need to treat compilers as essentially external, i.e. their installation location
# (including hash) must not change when any changes are pushed to spack. The reason is that
# changes in the compilers are not reflected in the package hashes built in the CI. Hence, those
# packages will reference a wrong compiler path once the path changes.
# `gcc@12.4.0%gcc@7.3.1` is created as part of building the pipeline containers.
# `ghcr.io/spack/pcluster-amazonlinux-2:v2024-10-07` produced the following hashes.
if [ "x86_64" == "$(arch)" ]; then
gcc_hash="pttzchh7o54nhmycj4wgzw5mic6rk2nb"
else
gcc_hash="v6wxye6ijzrxnzxftcwnpu3psohsjl2b"
fi
spack install /${gcc_hash}
(
CURRENT_SPACK_ROOT=${SPACK_ROOT}
DIR="$(mktemp -d)"
cd "${DIR}"
# This needs to include commit 361a185ddb such that `ifx` picks up the correct toolchain. Otherwise
# this leads to libstdc++.so errors during linking (e.g. slepc).
git clone --depth=1 -b ${spack_intel_compiler_commit} https://github.com/spack/spack.git \
&& cd spack \
&& curl -sL https://github.com/spack/spack/pull/40557.patch | patch -p1 \
&& curl -sL https://github.com/spack/spack/pull/40561.patch | patch -p1 \
&& cp "${CURRENT_SPACK_ROOT}/etc/spack/config.yaml" etc/spack/ \
&& cp "${CURRENT_SPACK_ROOT}/etc/spack/compilers.yaml" etc/spack/ \
&& cp "${CURRENT_SPACK_ROOT}/etc/spack/packages.yaml" etc/spack/ \
&& . share/spack/setup-env.sh \
&& spack install intel-oneapi-compilers-classic
rm -rf "${DIR}"
spack load gcc
spack compiler add --scope site
)
bash -c ". \"$(spack location -i intel-oneapi-compilers)\"/setvars.sh; spack compiler add --scope site" \
|| true
spack clean -m
if [ "x86_64" == "$(arch)" ]; then
# 2024.1.0 is the last oneapi compiler that works on AL2 and is the one used to compile packages in the build cache.
spack install intel-oneapi-compilers@2024.1.0
(
. "$(spack location -i intel-oneapi-compilers)"/setvars.sh; spack compiler add --scope site \
|| true
)
fi
fi
}
set_pcluster_defaults
setup_spack
install_compilers
patch_compilers_yaml

View File

@ -47,7 +47,7 @@ packages:
slurm:
buildable: false
externals:
- prefix: /opt/slurm/
- prefix: ${SLURM_ROOT}
spec: slurm@${SLURM_VERSION} +pmix
wrf:
require:

View File

@ -7,8 +7,7 @@ spack:
- mpas-model
- mpich
- openfoam
# - quantum-espresso : %gcc@12.3.0 on neoverse_v1 fails.
# Root cause: internal compiler error: in compute_live_loop_exits, at tree-ssa-loop-manip.cc:247
- quantum-espresso
- wrf
- targets:
@ -22,7 +21,7 @@ spack:
ci:
pipeline-gen:
- build-job:
image: { "name": "ghcr.io/spack/pcluster-amazonlinux-2:v2024-01-29", "entrypoint": [""] }
image: { "name": "ghcr.io/spack/pcluster-amazonlinux-2:v2024-10-07", "entrypoint": [""] }
tags: ["aarch64"]
before_script:
- - . "./share/spack/setup-env.sh"

View File

@ -19,6 +19,10 @@ packages:
- "+intel_provided_gcc ^intel-oneapi-mkl target=x86_64_v4"
- "+intel_provided_gcc ^intel-oneapi-mkl target=x86_64_v3"
when: "%intel"
- one_of:
- "+intel_provided_gcc target=x86_64_v4 ^intel-oneapi-mkl"
- "+intel_provided_gcc target=x86_64_v3 ^intel-oneapi-mkl"
when: "%oneapi"
intel-oneapi-compilers:
require: "intel-oneapi-compilers %gcc target=x86_64_v3"
intel-oneapi-mpi:
@ -29,6 +33,10 @@ packages:
- "lammps_sizes=bigbig +molecule +kspace +rigid +asphere +opt +openmp +openmp-package +intel fft=mkl ^intel-oneapi-mkl target=x86_64_v4"
- "lammps_sizes=bigbig +molecule +kspace +rigid +asphere +opt +openmp +openmp-package fft=mkl ^intel-oneapi-mkl target=x86_64_v3"
when: "%intel"
- one_of:
- "lammps_sizes=bigbig +molecule +kspace +rigid +asphere +opt +openmp +openmp-package +intel fft=mkl ^intel-oneapi-mkl target=x86_64_v4"
- "lammps_sizes=bigbig +molecule +kspace +rigid +asphere +opt +openmp +openmp-package fft=mkl ^intel-oneapi-mkl target=x86_64_v3"
when: "%oneapi"
libidn2:
require:
- one_of:
@ -53,6 +61,10 @@ packages:
- "precision=single ^parallelio+pnetcdf target=x86_64_v4"
- "precision=single ^parallelio+pnetcdf target=x86_64_v3"
when: "%intel"
- one_of:
- "precision=single ^parallelio+pnetcdf target=x86_64_v4"
- "precision=single ^parallelio+pnetcdf target=x86_64_v3"
when: "%oneapi"
mpich:
require:
- one_of:
@ -89,10 +101,14 @@ packages:
- "quantum-espresso@6.6 ^intel-oneapi-mkl+cluster target=x86_64_v4"
- "quantum-espresso@6.6 ^intel-oneapi-mkl+cluster target=x86_64_v3"
when: "%intel"
- one_of:
- "quantum-espresso@6.6 ^intel-oneapi-mkl+cluster target=x86_64_v4"
- "quantum-espresso@6.6 ^intel-oneapi-mkl+cluster target=x86_64_v3"
when: "%oneapi"
slurm:
buildable: false
externals:
- prefix: /opt/slurm/
- prefix: ${SLURM_ROOT}
spec: slurm@${SLURM_VERSION} +pmix
wrf:
require:
@ -101,9 +117,14 @@ packages:
- "wrf@4 build_type=dm+sm target=x86_64_v3"
- "wrf@4.2.2 +netcdf_classic fflags=\"-fp-model fast=2 -no-heap-arrays -no-prec-div -no-prec-sqrt -fno-common\" build_type=dm+sm target=x86_64_v3"
when: "%intel"
- one_of:
- "wrf@4 build_type=dm+sm target=x86_64_v4"
- "wrf@4 build_type=dm+sm target=x86_64_v3"
- "wrf@4.2.2 +netcdf_classic fflags=\"-fp-model fast=2 -no-heap-arrays -no-prec-div -no-prec-sqrt -fno-common\" build_type=dm+sm target=x86_64_v3"
when: "%oneapi"
all:
compiler: [intel, oneapi, gcc]
compiler: [oneapi, gcc]
permissions:
read: world
write: user

View File

@ -3,14 +3,18 @@ spack:
definitions:
- apps:
- gromacs %intel
- lammps %intel
- mpas-model %intel
- gromacs %oneapi
- lammps %oneapi
# earliest oneapi version with fix does not run on AmazonLinux2, see https://github.com/spack/spack/pull/46457
# - mpas-model %oneapi
- openfoam %gcc
- palace %oneapi ^superlu-dist%oneapi # hack: force fortran-rt provider through superlu-dist
- quantum-espresso %intel
# - wrf : While building hdf5 cmake errors out with Detecting Fortran/C Interface: Failed to compile
# Root cause: ifort cannot deal with arbitrarily long file names.
# Latest version qunatum-espresso@7.3.1 does not build with oneapi, see https://github.com/spack/spack/pull/46456#issuecomment-2363159511
# - quantum-espresso %oneapi
- openmpi %oneapi
# TODO: oneapi patch in WRF is broken. It uses link time optimization but the gnu linker. Need to check whether the WRF recipe (in later releases) is better.
# WRf can only run if https://github.com/spack/spack/pull/46589 is merged.
# - wrf %oneapi
- targets:
- 'target=x86_64_v4'
@ -23,7 +27,7 @@ spack:
ci:
pipeline-gen:
- build-job:
image: { "name": "ghcr.io/spack/pcluster-amazonlinux-2:v2024-01-29", "entrypoint": [""] }
image: { "name": "ghcr.io/spack/pcluster-amazonlinux-2:v2024-10-07", "entrypoint": [""] }
before_script:
- - . "./share/spack/setup-env.sh"
- . /etc/profile.d/modules.sh