Reproducer should deduce artifact root from concrete environment (#45281)
* Reproducer should decude artifact root from concrete environment * Add documentation on the layout of the artifacts directory * Use dag hash in the container name * Add reproducer options to improve local testing * --use-local-head allows running reproducer with the current Spack HEAD commit rather than computing a commit for the reproducer * Add test to verify commits and recreating reproduction environment * Add test for non-merge commit case * ci reproduce-build: Drop overwrite option in favor of throwing an error if the working dir is non-empty
This commit is contained in:
parent
0da5bafaf2
commit
1fa1864b37
@ -820,6 +820,69 @@ presence of a ``SPACK_CDASH_AUTH_TOKEN`` environment variable during the
|
||||
build group on CDash called "Release Testing" (that group will be created if
|
||||
it didn't already exist).
|
||||
|
||||
.. _ci_artifacts:
|
||||
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
CI Artifacts Directory Layout
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
When running the CI build using the command ``spack ci rebuild`` a number of directories are created for
|
||||
storing data generated during the CI job. The default root directory for artifacts is ``job_scratch_root``.
|
||||
This can be overridden by passing the argument ``--artifacts-root`` to the ``spack ci generate`` command
|
||||
or by setting the ``SPACK_ARTIFACTS_ROOT`` environment variable in the build job scripts.
|
||||
|
||||
The top level directories under the artifact root are ``concrete_environment``, ``logs``, ``reproduction``,
|
||||
``tests``, and ``user_data``. Spack does not restrict what is written to any of these directories nor does
|
||||
it require user specified files be written to any specific directory.
|
||||
|
||||
------------------------
|
||||
``concrete_environment``
|
||||
------------------------
|
||||
|
||||
The directory ``concrete_environment`` is used to communicate the ci generate processed ``spack.yaml`` and
|
||||
the concrete ``spack.lock`` for the CI environment.
|
||||
|
||||
--------
|
||||
``logs``
|
||||
--------
|
||||
|
||||
The directory ``logs`` contains the spack build log, ``spack-build-out.txt``, and the spack build environment
|
||||
modification file, ``spack-build-mod-env.txt``. Additionally all files specified by the packages ``Builder``
|
||||
property ``archive_files`` are also copied here (ie. ``CMakeCache.txt`` in ``CMakeBuilder``).
|
||||
|
||||
----------------
|
||||
``reproduction``
|
||||
----------------
|
||||
|
||||
The directory ``reproduction`` is used to store the files needed by the ``spack reproduce-build`` command.
|
||||
This includes ``repro.json``, copies of all of the files in ``concrete_environment``, the concrete spec
|
||||
JSON file for the current spec being built, and all of the files written in the artifacts root directory.
|
||||
|
||||
The ``repro.json`` file is not versioned and is only designed to work with the version of spack CI was run with.
|
||||
An example of what a ``repro.json`` may look like is here.
|
||||
|
||||
.. code:: json
|
||||
|
||||
{
|
||||
"job_name": "adios2@2.9.2 /feaevuj %gcc@11.4.0 arch=linux-ubuntu20.04-x86_64_v3 E4S ROCm External",
|
||||
"job_spec_json": "adios2.json",
|
||||
"ci_project_dir": "/builds/spack/spack"
|
||||
}
|
||||
|
||||
---------
|
||||
``tests``
|
||||
---------
|
||||
|
||||
The directory ``tests`` is used to store output from running ``spack test <job spec>``. This may or may not have
|
||||
data in it depending on the package that was built and the availability of tests.
|
||||
|
||||
-------------
|
||||
``user_data``
|
||||
-------------
|
||||
|
||||
The directory ``user_data`` is used to store everything else that shouldn't be copied to the ``reproduction`` direcotory.
|
||||
Users may use this to store additional logs or metrics or other types of files generated by the build job.
|
||||
|
||||
-------------------------------------
|
||||
Using a custom spack in your pipeline
|
||||
-------------------------------------
|
||||
|
@ -616,7 +616,7 @@ def copy_test_logs_to_artifacts(test_stage, job_test_dir):
|
||||
copy_files_to_artifacts(os.path.join(test_stage, "*", "*.txt"), job_test_dir)
|
||||
|
||||
|
||||
def download_and_extract_artifacts(url, work_dir):
|
||||
def download_and_extract_artifacts(url, work_dir) -> str:
|
||||
"""Look for gitlab artifacts.zip at the given url, and attempt to download
|
||||
and extract the contents into the given work_dir
|
||||
|
||||
@ -624,6 +624,10 @@ def download_and_extract_artifacts(url, work_dir):
|
||||
|
||||
url (str): Complete url to artifacts.zip file
|
||||
work_dir (str): Path to destination where artifacts should be extracted
|
||||
|
||||
Output:
|
||||
|
||||
Artifacts root path relative to the archive root
|
||||
"""
|
||||
tty.msg(f"Fetching artifacts from: {url}")
|
||||
|
||||
@ -641,13 +645,25 @@ def download_and_extract_artifacts(url, work_dir):
|
||||
response = urlopen(request, timeout=SPACK_CDASH_TIMEOUT)
|
||||
with open(artifacts_zip_path, "wb") as out_file:
|
||||
shutil.copyfileobj(response, out_file)
|
||||
|
||||
with zipfile.ZipFile(artifacts_zip_path) as zip_file:
|
||||
zip_file.extractall(work_dir)
|
||||
# Get the artifact root
|
||||
artifact_root = ""
|
||||
for f in zip_file.filelist:
|
||||
if "spack.lock" in f.filename:
|
||||
artifact_root = os.path.dirname(os.path.dirname(f.filename))
|
||||
break
|
||||
except OSError as e:
|
||||
raise SpackError(f"Error fetching artifacts: {e}")
|
||||
finally:
|
||||
try:
|
||||
os.remove(artifacts_zip_path)
|
||||
except FileNotFoundError:
|
||||
# If the file doesn't exist we are already raising
|
||||
pass
|
||||
|
||||
with zipfile.ZipFile(artifacts_zip_path) as zip_file:
|
||||
zip_file.extractall(work_dir)
|
||||
|
||||
os.remove(artifacts_zip_path)
|
||||
return artifact_root
|
||||
|
||||
|
||||
def get_spack_info():
|
||||
@ -761,7 +777,7 @@ def setup_spack_repro_version(repro_dir, checkout_commit, merge_commit=None):
|
||||
return True
|
||||
|
||||
|
||||
def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
|
||||
def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime, use_local_head):
|
||||
"""Given a url to gitlab artifacts.zip from a failed 'spack ci rebuild' job,
|
||||
attempt to setup an environment in which the failure can be reproduced
|
||||
locally. This entails the following:
|
||||
@ -775,8 +791,11 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
|
||||
commands to run to reproduce the build once inside the container.
|
||||
"""
|
||||
work_dir = os.path.realpath(work_dir)
|
||||
if os.path.exists(work_dir) and os.listdir(work_dir):
|
||||
raise SpackError(f"Cannot run reproducer in non-emptry working dir:\n {work_dir}")
|
||||
|
||||
platform_script_ext = "ps1" if IS_WINDOWS else "sh"
|
||||
download_and_extract_artifacts(url, work_dir)
|
||||
artifact_root = download_and_extract_artifacts(url, work_dir)
|
||||
|
||||
gpg_path = None
|
||||
if gpg_url:
|
||||
@ -838,6 +857,9 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
|
||||
with open(repro_file, encoding="utf-8") as fd:
|
||||
repro_details = json.load(fd)
|
||||
|
||||
spec_file = fs.find(work_dir, repro_details["job_spec_json"])[0]
|
||||
reproducer_spec = spack.spec.Spec.from_specfile(spec_file)
|
||||
|
||||
repro_dir = os.path.dirname(repro_file)
|
||||
rel_repro_dir = repro_dir.replace(work_dir, "").lstrip(os.path.sep)
|
||||
|
||||
@ -898,17 +920,20 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
|
||||
commit_regex = re.compile(r"commit\s+([^\s]+)")
|
||||
merge_commit_regex = re.compile(r"Merge\s+([^\s]+)\s+into\s+([^\s]+)")
|
||||
|
||||
# Try the more specific merge commit regex first
|
||||
m = merge_commit_regex.search(spack_info)
|
||||
if m:
|
||||
# This was a merge commit and we captured the parents
|
||||
commit_1 = m.group(1)
|
||||
commit_2 = m.group(2)
|
||||
if use_local_head:
|
||||
commit_1 = "HEAD"
|
||||
else:
|
||||
# Not a merge commit, just get the commit sha
|
||||
m = commit_regex.search(spack_info)
|
||||
# Try the more specific merge commit regex first
|
||||
m = merge_commit_regex.search(spack_info)
|
||||
if m:
|
||||
# This was a merge commit and we captured the parents
|
||||
commit_1 = m.group(1)
|
||||
commit_2 = m.group(2)
|
||||
else:
|
||||
# Not a merge commit, just get the commit sha
|
||||
m = commit_regex.search(spack_info)
|
||||
if m:
|
||||
commit_1 = m.group(1)
|
||||
|
||||
setup_result = False
|
||||
if commit_1:
|
||||
@ -983,6 +1008,8 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
|
||||
"entrypoint", entrypoint_script, work_dir, run=False, exit_on_failure=False
|
||||
)
|
||||
|
||||
# Attempt to create a unique name for the reproducer container
|
||||
container_suffix = "_" + reproducer_spec.dag_hash() if reproducer_spec else ""
|
||||
docker_command = [
|
||||
runtime,
|
||||
"run",
|
||||
@ -990,14 +1017,14 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
|
||||
"-t",
|
||||
"--rm",
|
||||
"--name",
|
||||
"spack_reproducer",
|
||||
f"spack_reproducer{container_suffix}",
|
||||
"-v",
|
||||
":".join([work_dir, mounted_workdir, "Z"]),
|
||||
"-v",
|
||||
":".join(
|
||||
[
|
||||
os.path.join(work_dir, "jobs_scratch_dir"),
|
||||
os.path.join(mount_as_dir, "jobs_scratch_dir"),
|
||||
os.path.join(work_dir, artifact_root),
|
||||
os.path.join(mount_as_dir, artifact_root),
|
||||
"Z",
|
||||
]
|
||||
),
|
||||
|
@ -176,6 +176,11 @@ def setup_parser(subparser):
|
||||
reproduce.add_argument(
|
||||
"-s", "--autostart", help="Run docker reproducer automatically", action="store_true"
|
||||
)
|
||||
reproduce.add_argument(
|
||||
"--use-local-head",
|
||||
help="Use the HEAD of the local Spack instead of reproducing a commit",
|
||||
action="store_true",
|
||||
)
|
||||
gpg_group = reproduce.add_mutually_exclusive_group(required=False)
|
||||
gpg_group.add_argument(
|
||||
"--gpg-file", help="Path to public GPG key for validating binary cache installs"
|
||||
@ -608,7 +613,12 @@ def ci_reproduce(args):
|
||||
gpg_key_url = None
|
||||
|
||||
return spack_ci.reproduce_ci_job(
|
||||
args.job_url, args.working_dir, args.autostart, gpg_key_url, args.runtime
|
||||
args.job_url,
|
||||
args.working_dir,
|
||||
args.autostart,
|
||||
gpg_key_url,
|
||||
args.runtime,
|
||||
args.use_local_head,
|
||||
)
|
||||
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
from spack.ci.generator_registry import generator
|
||||
from spack.cmd.ci import FAILED_CREATE_BUILDCACHE_CODE
|
||||
from spack.database import INDEX_JSON_FILE
|
||||
from spack.error import SpackError
|
||||
from spack.schema.buildcache_spec import schema as specfile_schema
|
||||
from spack.schema.database_index import schema as db_idx_schema
|
||||
from spack.spec import Spec
|
||||
@ -170,7 +171,9 @@ def test_ci_generate_with_env(ci_generate_test, tmp_path, mock_binary_index):
|
||||
url: https://my.fake.cdash
|
||||
project: Not used
|
||||
site: Nothing
|
||||
"""
|
||||
""",
|
||||
"--artifacts-root",
|
||||
str(tmp_path / "my_artifacts_root"),
|
||||
)
|
||||
yaml_contents = syaml.load(outputfile.read_text())
|
||||
|
||||
@ -192,7 +195,7 @@ def test_ci_generate_with_env(ci_generate_test, tmp_path, mock_binary_index):
|
||||
|
||||
assert "variables" in yaml_contents
|
||||
assert "SPACK_ARTIFACTS_ROOT" in yaml_contents["variables"]
|
||||
assert yaml_contents["variables"]["SPACK_ARTIFACTS_ROOT"] == "jobs_scratch_dir"
|
||||
assert yaml_contents["variables"]["SPACK_ARTIFACTS_ROOT"] == "my_artifacts_root"
|
||||
|
||||
|
||||
def test_ci_generate_with_env_missing_section(ci_generate_test, tmp_path, mock_binary_index):
|
||||
@ -1322,44 +1325,50 @@ def test_ci_reproduce(
|
||||
env.concretize()
|
||||
env.write()
|
||||
|
||||
repro_dir.mkdir()
|
||||
def fake_download_and_extract_artifacts(url, work_dir, merge_commit_test=True):
|
||||
with working_dir(tmp_path), ev.Environment(".") as env:
|
||||
if not os.path.exists(repro_dir):
|
||||
repro_dir.mkdir()
|
||||
|
||||
job_spec = env.concrete_roots()[0]
|
||||
with open(repro_dir / "archivefiles.json", "w", encoding="utf-8") as f:
|
||||
f.write(job_spec.to_json(hash=ht.dag_hash))
|
||||
job_spec = env.concrete_roots()[0]
|
||||
with open(repro_dir / "archivefiles.json", "w", encoding="utf-8") as f:
|
||||
f.write(job_spec.to_json(hash=ht.dag_hash))
|
||||
artifacts_root = repro_dir / "jobs_scratch_dir"
|
||||
pipeline_path = artifacts_root / "pipeline.yml"
|
||||
|
||||
artifacts_root = repro_dir / "scratch_dir"
|
||||
pipeline_path = artifacts_root / "pipeline.yml"
|
||||
|
||||
ci_cmd(
|
||||
"generate",
|
||||
"--output-file",
|
||||
str(pipeline_path),
|
||||
"--artifacts-root",
|
||||
str(artifacts_root),
|
||||
)
|
||||
|
||||
job_name = gitlab_generator.get_job_name(job_spec)
|
||||
|
||||
with open(repro_dir / "repro.json", "w", encoding="utf-8") as f:
|
||||
f.write(
|
||||
json.dumps(
|
||||
{
|
||||
"job_name": job_name,
|
||||
"job_spec_json": "archivefiles.json",
|
||||
"ci_project_dir": str(repro_dir),
|
||||
}
|
||||
ci_cmd(
|
||||
"generate",
|
||||
"--output-file",
|
||||
str(pipeline_path),
|
||||
"--artifacts-root",
|
||||
str(artifacts_root),
|
||||
)
|
||||
)
|
||||
|
||||
with open(repro_dir / "install.sh", "w", encoding="utf-8") as f:
|
||||
f.write("#!/bin/sh\n\n#fake install\nspack install blah\n")
|
||||
job_name = gitlab_generator.get_job_name(job_spec)
|
||||
|
||||
with open(repro_dir / "spack_info.txt", "w", encoding="utf-8") as f:
|
||||
f.write(f"\nMerge {last_two_git_commits[1]} into {last_two_git_commits[0]}\n\n")
|
||||
with open(repro_dir / "repro.json", "w", encoding="utf-8") as f:
|
||||
f.write(
|
||||
json.dumps(
|
||||
{
|
||||
"job_name": job_name,
|
||||
"job_spec_json": "archivefiles.json",
|
||||
"ci_project_dir": str(repro_dir),
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
def fake_download_and_extract_artifacts(url, work_dir):
|
||||
pass
|
||||
with open(repro_dir / "install.sh", "w", encoding="utf-8") as f:
|
||||
f.write("#!/bin/sh\n\n#fake install\nspack install blah\n")
|
||||
|
||||
with open(repro_dir / "spack_info.txt", "w", encoding="utf-8") as f:
|
||||
if merge_commit_test:
|
||||
f.write(
|
||||
f"\nMerge {last_two_git_commits[1]} into {last_two_git_commits[0]}\n\n"
|
||||
)
|
||||
else:
|
||||
f.write(f"\ncommit {last_two_git_commits[1]}\n\n")
|
||||
|
||||
return "jobs_scratch_dir"
|
||||
|
||||
monkeypatch.setattr(ci, "download_and_extract_artifacts", fake_download_and_extract_artifacts)
|
||||
rep_out = ci_cmd(
|
||||
@ -1375,6 +1384,64 @@ def fake_download_and_extract_artifacts(url, work_dir):
|
||||
# Make sure we tell the user where it is when not in interactive mode
|
||||
assert f"$ {repro_dir}/start.sh" in rep_out
|
||||
|
||||
# Ensure the correct commits are used
|
||||
assert f"checkout_commit: {last_two_git_commits[0]}" in rep_out
|
||||
assert f"merge_commit: {last_two_git_commits[1]}" in rep_out
|
||||
|
||||
# Test re-running in dirty working dir
|
||||
with pytest.raises(SpackError, match=f"{repro_dir}"):
|
||||
rep_out = ci_cmd(
|
||||
"reproduce-build",
|
||||
"https://example.com/api/v1/projects/1/jobs/2/artifacts",
|
||||
"--working-dir",
|
||||
str(repro_dir),
|
||||
output=str,
|
||||
)
|
||||
|
||||
# Cleanup between tests
|
||||
shutil.rmtree(repro_dir)
|
||||
|
||||
# Test --use-local-head
|
||||
rep_out = ci_cmd(
|
||||
"reproduce-build",
|
||||
"https://example.com/api/v1/projects/1/jobs/2/artifacts",
|
||||
"--use-local-head",
|
||||
"--working-dir",
|
||||
str(repro_dir),
|
||||
output=str,
|
||||
)
|
||||
|
||||
# Make sure we are checkout out the HEAD commit without a merge commit
|
||||
assert "checkout_commit: HEAD" in rep_out
|
||||
assert "merge_commit: None" in rep_out
|
||||
|
||||
# Test the case where the spack_info.txt is not a merge commit
|
||||
monkeypatch.setattr(
|
||||
ci,
|
||||
"download_and_extract_artifacts",
|
||||
lambda url, wd: fake_download_and_extract_artifacts(url, wd, False),
|
||||
)
|
||||
|
||||
# Cleanup between tests
|
||||
shutil.rmtree(repro_dir)
|
||||
|
||||
rep_out = ci_cmd(
|
||||
"reproduce-build",
|
||||
"https://example.com/api/v1/projects/1/jobs/2/artifacts",
|
||||
"--working-dir",
|
||||
str(repro_dir),
|
||||
output=str,
|
||||
)
|
||||
# Make sure the script was generated
|
||||
assert (repro_dir / "start.sh").exists()
|
||||
|
||||
# Make sure we tell the user where it is when not in interactive mode
|
||||
assert f"$ {repro_dir}/start.sh" in rep_out
|
||||
|
||||
# Ensure the correct commit is used (different than HEAD)
|
||||
assert f"checkout_commit: {last_two_git_commits[1]}" in rep_out
|
||||
assert "merge_commit: None" in rep_out
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url_in,url_out",
|
||||
|
@ -706,7 +706,7 @@ _spack_ci_rebuild() {
|
||||
_spack_ci_reproduce_build() {
|
||||
if $list_options
|
||||
then
|
||||
SPACK_COMPREPLY="-h --help --runtime --working-dir -s --autostart --gpg-file --gpg-url"
|
||||
SPACK_COMPREPLY="-h --help --runtime --working-dir -s --autostart --use-local-head --gpg-file --gpg-url"
|
||||
else
|
||||
SPACK_COMPREPLY=""
|
||||
fi
|
||||
|
@ -999,7 +999,7 @@ complete -c spack -n '__fish_spack_using_command ci rebuild' -s j -l jobs -r -f
|
||||
complete -c spack -n '__fish_spack_using_command ci rebuild' -s j -l jobs -r -d 'explicitly set number of parallel jobs'
|
||||
|
||||
# spack ci reproduce-build
|
||||
set -g __fish_spack_optspecs_spack_ci_reproduce_build h/help runtime= working-dir= s/autostart gpg-file= gpg-url=
|
||||
set -g __fish_spack_optspecs_spack_ci_reproduce_build h/help runtime= working-dir= s/autostart use-local-head gpg-file= gpg-url=
|
||||
complete -c spack -n '__fish_spack_using_command_pos 0 ci reproduce-build' -f
|
||||
complete -c spack -n '__fish_spack_using_command ci reproduce-build' -s h -l help -f -a help
|
||||
complete -c spack -n '__fish_spack_using_command ci reproduce-build' -s h -l help -d 'show this help message and exit'
|
||||
@ -1009,6 +1009,8 @@ complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l working-
|
||||
complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l working-dir -r -d 'where to unpack artifacts'
|
||||
complete -c spack -n '__fish_spack_using_command ci reproduce-build' -s s -l autostart -f -a autostart
|
||||
complete -c spack -n '__fish_spack_using_command ci reproduce-build' -s s -l autostart -d 'Run docker reproducer automatically'
|
||||
complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l use-local-head -f -a use_local_head
|
||||
complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l use-local-head -d 'Use the HEAD of the local Spack instead of reproducing a commit'
|
||||
complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l gpg-file -r -f -a gpg_file
|
||||
complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l gpg-file -r -d 'Path to public GPG key for validating binary cache installs'
|
||||
complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l gpg-url -r -f -a gpg_url
|
||||
|
Loading…
Reference in New Issue
Block a user