Reproducer should deduce artifact root from concrete environment (#45281)

* Reproducer should decude artifact root from concrete environment

* Add documentation on the layout of the artifacts directory

* Use dag hash in the container name

* Add reproducer options to improve local testing

* --use-local-head allows running reproducer with
  the current Spack HEAD commit rather than computing
  a commit for the reproducer

* Add test to verify commits and recreating reproduction environment

* Add test for non-merge commit case

* ci reproduce-build: Drop overwrite option
in favor of throwing an error if the working dir is non-empty
This commit is contained in:
Ryan Krattiger
2025-02-21 10:46:43 -06:00
committed by GitHub
parent 0da5bafaf2
commit 1fa1864b37
6 changed files with 224 additions and 55 deletions

View File

@@ -820,6 +820,69 @@ presence of a ``SPACK_CDASH_AUTH_TOKEN`` environment variable during the
build group on CDash called "Release Testing" (that group will be created if
it didn't already exist).
.. _ci_artifacts:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
CI Artifacts Directory Layout
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
When running the CI build using the command ``spack ci rebuild`` a number of directories are created for
storing data generated during the CI job. The default root directory for artifacts is ``job_scratch_root``.
This can be overridden by passing the argument ``--artifacts-root`` to the ``spack ci generate`` command
or by setting the ``SPACK_ARTIFACTS_ROOT`` environment variable in the build job scripts.
The top level directories under the artifact root are ``concrete_environment``, ``logs``, ``reproduction``,
``tests``, and ``user_data``. Spack does not restrict what is written to any of these directories nor does
it require user specified files be written to any specific directory.
------------------------
``concrete_environment``
------------------------
The directory ``concrete_environment`` is used to communicate the ci generate processed ``spack.yaml`` and
the concrete ``spack.lock`` for the CI environment.
--------
``logs``
--------
The directory ``logs`` contains the spack build log, ``spack-build-out.txt``, and the spack build environment
modification file, ``spack-build-mod-env.txt``. Additionally all files specified by the packages ``Builder``
property ``archive_files`` are also copied here (ie. ``CMakeCache.txt`` in ``CMakeBuilder``).
----------------
``reproduction``
----------------
The directory ``reproduction`` is used to store the files needed by the ``spack reproduce-build`` command.
This includes ``repro.json``, copies of all of the files in ``concrete_environment``, the concrete spec
JSON file for the current spec being built, and all of the files written in the artifacts root directory.
The ``repro.json`` file is not versioned and is only designed to work with the version of spack CI was run with.
An example of what a ``repro.json`` may look like is here.
.. code:: json
{
"job_name": "adios2@2.9.2 /feaevuj %gcc@11.4.0 arch=linux-ubuntu20.04-x86_64_v3 E4S ROCm External",
"job_spec_json": "adios2.json",
"ci_project_dir": "/builds/spack/spack"
}
---------
``tests``
---------
The directory ``tests`` is used to store output from running ``spack test <job spec>``. This may or may not have
data in it depending on the package that was built and the availability of tests.
-------------
``user_data``
-------------
The directory ``user_data`` is used to store everything else that shouldn't be copied to the ``reproduction`` direcotory.
Users may use this to store additional logs or metrics or other types of files generated by the build job.
-------------------------------------
Using a custom spack in your pipeline
-------------------------------------

View File

@@ -616,7 +616,7 @@ def copy_test_logs_to_artifacts(test_stage, job_test_dir):
copy_files_to_artifacts(os.path.join(test_stage, "*", "*.txt"), job_test_dir)
def download_and_extract_artifacts(url, work_dir):
def download_and_extract_artifacts(url, work_dir) -> str:
"""Look for gitlab artifacts.zip at the given url, and attempt to download
and extract the contents into the given work_dir
@@ -624,6 +624,10 @@ def download_and_extract_artifacts(url, work_dir):
url (str): Complete url to artifacts.zip file
work_dir (str): Path to destination where artifacts should be extracted
Output:
Artifacts root path relative to the archive root
"""
tty.msg(f"Fetching artifacts from: {url}")
@@ -641,13 +645,25 @@ def download_and_extract_artifacts(url, work_dir):
response = urlopen(request, timeout=SPACK_CDASH_TIMEOUT)
with open(artifacts_zip_path, "wb") as out_file:
shutil.copyfileobj(response, out_file)
with zipfile.ZipFile(artifacts_zip_path) as zip_file:
zip_file.extractall(work_dir)
# Get the artifact root
artifact_root = ""
for f in zip_file.filelist:
if "spack.lock" in f.filename:
artifact_root = os.path.dirname(os.path.dirname(f.filename))
break
except OSError as e:
raise SpackError(f"Error fetching artifacts: {e}")
finally:
try:
os.remove(artifacts_zip_path)
except FileNotFoundError:
# If the file doesn't exist we are already raising
pass
with zipfile.ZipFile(artifacts_zip_path) as zip_file:
zip_file.extractall(work_dir)
os.remove(artifacts_zip_path)
return artifact_root
def get_spack_info():
@@ -761,7 +777,7 @@ def setup_spack_repro_version(repro_dir, checkout_commit, merge_commit=None):
return True
def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime, use_local_head):
"""Given a url to gitlab artifacts.zip from a failed 'spack ci rebuild' job,
attempt to setup an environment in which the failure can be reproduced
locally. This entails the following:
@@ -775,8 +791,11 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
commands to run to reproduce the build once inside the container.
"""
work_dir = os.path.realpath(work_dir)
if os.path.exists(work_dir) and os.listdir(work_dir):
raise SpackError(f"Cannot run reproducer in non-emptry working dir:\n {work_dir}")
platform_script_ext = "ps1" if IS_WINDOWS else "sh"
download_and_extract_artifacts(url, work_dir)
artifact_root = download_and_extract_artifacts(url, work_dir)
gpg_path = None
if gpg_url:
@@ -838,6 +857,9 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
with open(repro_file, encoding="utf-8") as fd:
repro_details = json.load(fd)
spec_file = fs.find(work_dir, repro_details["job_spec_json"])[0]
reproducer_spec = spack.spec.Spec.from_specfile(spec_file)
repro_dir = os.path.dirname(repro_file)
rel_repro_dir = repro_dir.replace(work_dir, "").lstrip(os.path.sep)
@@ -898,17 +920,20 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
commit_regex = re.compile(r"commit\s+([^\s]+)")
merge_commit_regex = re.compile(r"Merge\s+([^\s]+)\s+into\s+([^\s]+)")
# Try the more specific merge commit regex first
m = merge_commit_regex.search(spack_info)
if m:
# This was a merge commit and we captured the parents
commit_1 = m.group(1)
commit_2 = m.group(2)
if use_local_head:
commit_1 = "HEAD"
else:
# Not a merge commit, just get the commit sha
m = commit_regex.search(spack_info)
# Try the more specific merge commit regex first
m = merge_commit_regex.search(spack_info)
if m:
# This was a merge commit and we captured the parents
commit_1 = m.group(1)
commit_2 = m.group(2)
else:
# Not a merge commit, just get the commit sha
m = commit_regex.search(spack_info)
if m:
commit_1 = m.group(1)
setup_result = False
if commit_1:
@@ -983,6 +1008,8 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
"entrypoint", entrypoint_script, work_dir, run=False, exit_on_failure=False
)
# Attempt to create a unique name for the reproducer container
container_suffix = "_" + reproducer_spec.dag_hash() if reproducer_spec else ""
docker_command = [
runtime,
"run",
@@ -990,14 +1017,14 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
"-t",
"--rm",
"--name",
"spack_reproducer",
f"spack_reproducer{container_suffix}",
"-v",
":".join([work_dir, mounted_workdir, "Z"]),
"-v",
":".join(
[
os.path.join(work_dir, "jobs_scratch_dir"),
os.path.join(mount_as_dir, "jobs_scratch_dir"),
os.path.join(work_dir, artifact_root),
os.path.join(mount_as_dir, artifact_root),
"Z",
]
),

View File

@@ -176,6 +176,11 @@ def setup_parser(subparser):
reproduce.add_argument(
"-s", "--autostart", help="Run docker reproducer automatically", action="store_true"
)
reproduce.add_argument(
"--use-local-head",
help="Use the HEAD of the local Spack instead of reproducing a commit",
action="store_true",
)
gpg_group = reproduce.add_mutually_exclusive_group(required=False)
gpg_group.add_argument(
"--gpg-file", help="Path to public GPG key for validating binary cache installs"
@@ -608,7 +613,12 @@ def ci_reproduce(args):
gpg_key_url = None
return spack_ci.reproduce_ci_job(
args.job_url, args.working_dir, args.autostart, gpg_key_url, args.runtime
args.job_url,
args.working_dir,
args.autostart,
gpg_key_url,
args.runtime,
args.use_local_head,
)

View File

@@ -28,6 +28,7 @@
from spack.ci.generator_registry import generator
from spack.cmd.ci import FAILED_CREATE_BUILDCACHE_CODE
from spack.database import INDEX_JSON_FILE
from spack.error import SpackError
from spack.schema.buildcache_spec import schema as specfile_schema
from spack.schema.database_index import schema as db_idx_schema
from spack.spec import Spec
@@ -170,7 +171,9 @@ def test_ci_generate_with_env(ci_generate_test, tmp_path, mock_binary_index):
url: https://my.fake.cdash
project: Not used
site: Nothing
"""
""",
"--artifacts-root",
str(tmp_path / "my_artifacts_root"),
)
yaml_contents = syaml.load(outputfile.read_text())
@@ -192,7 +195,7 @@ def test_ci_generate_with_env(ci_generate_test, tmp_path, mock_binary_index):
assert "variables" in yaml_contents
assert "SPACK_ARTIFACTS_ROOT" in yaml_contents["variables"]
assert yaml_contents["variables"]["SPACK_ARTIFACTS_ROOT"] == "jobs_scratch_dir"
assert yaml_contents["variables"]["SPACK_ARTIFACTS_ROOT"] == "my_artifacts_root"
def test_ci_generate_with_env_missing_section(ci_generate_test, tmp_path, mock_binary_index):
@@ -1322,44 +1325,50 @@ def test_ci_reproduce(
env.concretize()
env.write()
repro_dir.mkdir()
def fake_download_and_extract_artifacts(url, work_dir, merge_commit_test=True):
with working_dir(tmp_path), ev.Environment(".") as env:
if not os.path.exists(repro_dir):
repro_dir.mkdir()
job_spec = env.concrete_roots()[0]
with open(repro_dir / "archivefiles.json", "w", encoding="utf-8") as f:
f.write(job_spec.to_json(hash=ht.dag_hash))
job_spec = env.concrete_roots()[0]
with open(repro_dir / "archivefiles.json", "w", encoding="utf-8") as f:
f.write(job_spec.to_json(hash=ht.dag_hash))
artifacts_root = repro_dir / "jobs_scratch_dir"
pipeline_path = artifacts_root / "pipeline.yml"
artifacts_root = repro_dir / "scratch_dir"
pipeline_path = artifacts_root / "pipeline.yml"
ci_cmd(
"generate",
"--output-file",
str(pipeline_path),
"--artifacts-root",
str(artifacts_root),
)
job_name = gitlab_generator.get_job_name(job_spec)
with open(repro_dir / "repro.json", "w", encoding="utf-8") as f:
f.write(
json.dumps(
{
"job_name": job_name,
"job_spec_json": "archivefiles.json",
"ci_project_dir": str(repro_dir),
}
ci_cmd(
"generate",
"--output-file",
str(pipeline_path),
"--artifacts-root",
str(artifacts_root),
)
)
with open(repro_dir / "install.sh", "w", encoding="utf-8") as f:
f.write("#!/bin/sh\n\n#fake install\nspack install blah\n")
job_name = gitlab_generator.get_job_name(job_spec)
with open(repro_dir / "spack_info.txt", "w", encoding="utf-8") as f:
f.write(f"\nMerge {last_two_git_commits[1]} into {last_two_git_commits[0]}\n\n")
with open(repro_dir / "repro.json", "w", encoding="utf-8") as f:
f.write(
json.dumps(
{
"job_name": job_name,
"job_spec_json": "archivefiles.json",
"ci_project_dir": str(repro_dir),
}
)
)
def fake_download_and_extract_artifacts(url, work_dir):
pass
with open(repro_dir / "install.sh", "w", encoding="utf-8") as f:
f.write("#!/bin/sh\n\n#fake install\nspack install blah\n")
with open(repro_dir / "spack_info.txt", "w", encoding="utf-8") as f:
if merge_commit_test:
f.write(
f"\nMerge {last_two_git_commits[1]} into {last_two_git_commits[0]}\n\n"
)
else:
f.write(f"\ncommit {last_two_git_commits[1]}\n\n")
return "jobs_scratch_dir"
monkeypatch.setattr(ci, "download_and_extract_artifacts", fake_download_and_extract_artifacts)
rep_out = ci_cmd(
@@ -1375,6 +1384,64 @@ def fake_download_and_extract_artifacts(url, work_dir):
# Make sure we tell the user where it is when not in interactive mode
assert f"$ {repro_dir}/start.sh" in rep_out
# Ensure the correct commits are used
assert f"checkout_commit: {last_two_git_commits[0]}" in rep_out
assert f"merge_commit: {last_two_git_commits[1]}" in rep_out
# Test re-running in dirty working dir
with pytest.raises(SpackError, match=f"{repro_dir}"):
rep_out = ci_cmd(
"reproduce-build",
"https://example.com/api/v1/projects/1/jobs/2/artifacts",
"--working-dir",
str(repro_dir),
output=str,
)
# Cleanup between tests
shutil.rmtree(repro_dir)
# Test --use-local-head
rep_out = ci_cmd(
"reproduce-build",
"https://example.com/api/v1/projects/1/jobs/2/artifacts",
"--use-local-head",
"--working-dir",
str(repro_dir),
output=str,
)
# Make sure we are checkout out the HEAD commit without a merge commit
assert "checkout_commit: HEAD" in rep_out
assert "merge_commit: None" in rep_out
# Test the case where the spack_info.txt is not a merge commit
monkeypatch.setattr(
ci,
"download_and_extract_artifacts",
lambda url, wd: fake_download_and_extract_artifacts(url, wd, False),
)
# Cleanup between tests
shutil.rmtree(repro_dir)
rep_out = ci_cmd(
"reproduce-build",
"https://example.com/api/v1/projects/1/jobs/2/artifacts",
"--working-dir",
str(repro_dir),
output=str,
)
# Make sure the script was generated
assert (repro_dir / "start.sh").exists()
# Make sure we tell the user where it is when not in interactive mode
assert f"$ {repro_dir}/start.sh" in rep_out
# Ensure the correct commit is used (different than HEAD)
assert f"checkout_commit: {last_two_git_commits[1]}" in rep_out
assert "merge_commit: None" in rep_out
@pytest.mark.parametrize(
"url_in,url_out",