diff --git a/lib/spack/docs/pipelines.rst b/lib/spack/docs/pipelines.rst index c30da836bf3..2dd2f2cd80f 100644 --- a/lib/spack/docs/pipelines.rst +++ b/lib/spack/docs/pipelines.rst @@ -820,6 +820,69 @@ presence of a ``SPACK_CDASH_AUTH_TOKEN`` environment variable during the build group on CDash called "Release Testing" (that group will be created if it didn't already exist). +.. _ci_artifacts: + +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +CI Artifacts Directory Layout +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When running the CI build using the command ``spack ci rebuild`` a number of directories are created for +storing data generated during the CI job. The default root directory for artifacts is ``job_scratch_root``. +This can be overridden by passing the argument ``--artifacts-root`` to the ``spack ci generate`` command +or by setting the ``SPACK_ARTIFACTS_ROOT`` environment variable in the build job scripts. + +The top level directories under the artifact root are ``concrete_environment``, ``logs``, ``reproduction``, +``tests``, and ``user_data``. Spack does not restrict what is written to any of these directories nor does +it require user specified files be written to any specific directory. + +------------------------ +``concrete_environment`` +------------------------ + +The directory ``concrete_environment`` is used to communicate the ci generate processed ``spack.yaml`` and +the concrete ``spack.lock`` for the CI environment. + +-------- +``logs`` +-------- + +The directory ``logs`` contains the spack build log, ``spack-build-out.txt``, and the spack build environment +modification file, ``spack-build-mod-env.txt``. Additionally all files specified by the packages ``Builder`` +property ``archive_files`` are also copied here (ie. ``CMakeCache.txt`` in ``CMakeBuilder``). + +---------------- +``reproduction`` +---------------- + +The directory ``reproduction`` is used to store the files needed by the ``spack reproduce-build`` command. +This includes ``repro.json``, copies of all of the files in ``concrete_environment``, the concrete spec +JSON file for the current spec being built, and all of the files written in the artifacts root directory. + +The ``repro.json`` file is not versioned and is only designed to work with the version of spack CI was run with. +An example of what a ``repro.json`` may look like is here. + +.. code:: json + + { + "job_name": "adios2@2.9.2 /feaevuj %gcc@11.4.0 arch=linux-ubuntu20.04-x86_64_v3 E4S ROCm External", + "job_spec_json": "adios2.json", + "ci_project_dir": "/builds/spack/spack" + } + +--------- +``tests`` +--------- + +The directory ``tests`` is used to store output from running ``spack test ``. This may or may not have +data in it depending on the package that was built and the availability of tests. + +------------- +``user_data`` +------------- + +The directory ``user_data`` is used to store everything else that shouldn't be copied to the ``reproduction`` direcotory. +Users may use this to store additional logs or metrics or other types of files generated by the build job. + ------------------------------------- Using a custom spack in your pipeline ------------------------------------- diff --git a/lib/spack/spack/ci/__init__.py b/lib/spack/spack/ci/__init__.py index 83505e967f9..c084989ab01 100644 --- a/lib/spack/spack/ci/__init__.py +++ b/lib/spack/spack/ci/__init__.py @@ -616,7 +616,7 @@ def copy_test_logs_to_artifacts(test_stage, job_test_dir): copy_files_to_artifacts(os.path.join(test_stage, "*", "*.txt"), job_test_dir) -def download_and_extract_artifacts(url, work_dir): +def download_and_extract_artifacts(url, work_dir) -> str: """Look for gitlab artifacts.zip at the given url, and attempt to download and extract the contents into the given work_dir @@ -624,6 +624,10 @@ def download_and_extract_artifacts(url, work_dir): url (str): Complete url to artifacts.zip file work_dir (str): Path to destination where artifacts should be extracted + + Output: + + Artifacts root path relative to the archive root """ tty.msg(f"Fetching artifacts from: {url}") @@ -641,13 +645,25 @@ def download_and_extract_artifacts(url, work_dir): response = urlopen(request, timeout=SPACK_CDASH_TIMEOUT) with open(artifacts_zip_path, "wb") as out_file: shutil.copyfileobj(response, out_file) + + with zipfile.ZipFile(artifacts_zip_path) as zip_file: + zip_file.extractall(work_dir) + # Get the artifact root + artifact_root = "" + for f in zip_file.filelist: + if "spack.lock" in f.filename: + artifact_root = os.path.dirname(os.path.dirname(f.filename)) + break except OSError as e: raise SpackError(f"Error fetching artifacts: {e}") + finally: + try: + os.remove(artifacts_zip_path) + except FileNotFoundError: + # If the file doesn't exist we are already raising + pass - with zipfile.ZipFile(artifacts_zip_path) as zip_file: - zip_file.extractall(work_dir) - - os.remove(artifacts_zip_path) + return artifact_root def get_spack_info(): @@ -761,7 +777,7 @@ def setup_spack_repro_version(repro_dir, checkout_commit, merge_commit=None): return True -def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime): +def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime, use_local_head): """Given a url to gitlab artifacts.zip from a failed 'spack ci rebuild' job, attempt to setup an environment in which the failure can be reproduced locally. This entails the following: @@ -775,8 +791,11 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime): commands to run to reproduce the build once inside the container. """ work_dir = os.path.realpath(work_dir) + if os.path.exists(work_dir) and os.listdir(work_dir): + raise SpackError(f"Cannot run reproducer in non-emptry working dir:\n {work_dir}") + platform_script_ext = "ps1" if IS_WINDOWS else "sh" - download_and_extract_artifacts(url, work_dir) + artifact_root = download_and_extract_artifacts(url, work_dir) gpg_path = None if gpg_url: @@ -838,6 +857,9 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime): with open(repro_file, encoding="utf-8") as fd: repro_details = json.load(fd) + spec_file = fs.find(work_dir, repro_details["job_spec_json"])[0] + reproducer_spec = spack.spec.Spec.from_specfile(spec_file) + repro_dir = os.path.dirname(repro_file) rel_repro_dir = repro_dir.replace(work_dir, "").lstrip(os.path.sep) @@ -898,17 +920,20 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime): commit_regex = re.compile(r"commit\s+([^\s]+)") merge_commit_regex = re.compile(r"Merge\s+([^\s]+)\s+into\s+([^\s]+)") - # Try the more specific merge commit regex first - m = merge_commit_regex.search(spack_info) - if m: - # This was a merge commit and we captured the parents - commit_1 = m.group(1) - commit_2 = m.group(2) + if use_local_head: + commit_1 = "HEAD" else: - # Not a merge commit, just get the commit sha - m = commit_regex.search(spack_info) + # Try the more specific merge commit regex first + m = merge_commit_regex.search(spack_info) if m: + # This was a merge commit and we captured the parents commit_1 = m.group(1) + commit_2 = m.group(2) + else: + # Not a merge commit, just get the commit sha + m = commit_regex.search(spack_info) + if m: + commit_1 = m.group(1) setup_result = False if commit_1: @@ -983,6 +1008,8 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime): "entrypoint", entrypoint_script, work_dir, run=False, exit_on_failure=False ) + # Attempt to create a unique name for the reproducer container + container_suffix = "_" + reproducer_spec.dag_hash() if reproducer_spec else "" docker_command = [ runtime, "run", @@ -990,14 +1017,14 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime): "-t", "--rm", "--name", - "spack_reproducer", + f"spack_reproducer{container_suffix}", "-v", ":".join([work_dir, mounted_workdir, "Z"]), "-v", ":".join( [ - os.path.join(work_dir, "jobs_scratch_dir"), - os.path.join(mount_as_dir, "jobs_scratch_dir"), + os.path.join(work_dir, artifact_root), + os.path.join(mount_as_dir, artifact_root), "Z", ] ), diff --git a/lib/spack/spack/cmd/ci.py b/lib/spack/spack/cmd/ci.py index ce438348f56..d0d04b95ee6 100644 --- a/lib/spack/spack/cmd/ci.py +++ b/lib/spack/spack/cmd/ci.py @@ -176,6 +176,11 @@ def setup_parser(subparser): reproduce.add_argument( "-s", "--autostart", help="Run docker reproducer automatically", action="store_true" ) + reproduce.add_argument( + "--use-local-head", + help="Use the HEAD of the local Spack instead of reproducing a commit", + action="store_true", + ) gpg_group = reproduce.add_mutually_exclusive_group(required=False) gpg_group.add_argument( "--gpg-file", help="Path to public GPG key for validating binary cache installs" @@ -608,7 +613,12 @@ def ci_reproduce(args): gpg_key_url = None return spack_ci.reproduce_ci_job( - args.job_url, args.working_dir, args.autostart, gpg_key_url, args.runtime + args.job_url, + args.working_dir, + args.autostart, + gpg_key_url, + args.runtime, + args.use_local_head, ) diff --git a/lib/spack/spack/test/cmd/ci.py b/lib/spack/spack/test/cmd/ci.py index c6851634f37..e2058efc4ed 100644 --- a/lib/spack/spack/test/cmd/ci.py +++ b/lib/spack/spack/test/cmd/ci.py @@ -28,6 +28,7 @@ from spack.ci.generator_registry import generator from spack.cmd.ci import FAILED_CREATE_BUILDCACHE_CODE from spack.database import INDEX_JSON_FILE +from spack.error import SpackError from spack.schema.buildcache_spec import schema as specfile_schema from spack.schema.database_index import schema as db_idx_schema from spack.spec import Spec @@ -170,7 +171,9 @@ def test_ci_generate_with_env(ci_generate_test, tmp_path, mock_binary_index): url: https://my.fake.cdash project: Not used site: Nothing -""" +""", + "--artifacts-root", + str(tmp_path / "my_artifacts_root"), ) yaml_contents = syaml.load(outputfile.read_text()) @@ -192,7 +195,7 @@ def test_ci_generate_with_env(ci_generate_test, tmp_path, mock_binary_index): assert "variables" in yaml_contents assert "SPACK_ARTIFACTS_ROOT" in yaml_contents["variables"] - assert yaml_contents["variables"]["SPACK_ARTIFACTS_ROOT"] == "jobs_scratch_dir" + assert yaml_contents["variables"]["SPACK_ARTIFACTS_ROOT"] == "my_artifacts_root" def test_ci_generate_with_env_missing_section(ci_generate_test, tmp_path, mock_binary_index): @@ -1322,44 +1325,50 @@ def test_ci_reproduce( env.concretize() env.write() - repro_dir.mkdir() + def fake_download_and_extract_artifacts(url, work_dir, merge_commit_test=True): + with working_dir(tmp_path), ev.Environment(".") as env: + if not os.path.exists(repro_dir): + repro_dir.mkdir() - job_spec = env.concrete_roots()[0] - with open(repro_dir / "archivefiles.json", "w", encoding="utf-8") as f: - f.write(job_spec.to_json(hash=ht.dag_hash)) + job_spec = env.concrete_roots()[0] + with open(repro_dir / "archivefiles.json", "w", encoding="utf-8") as f: + f.write(job_spec.to_json(hash=ht.dag_hash)) + artifacts_root = repro_dir / "jobs_scratch_dir" + pipeline_path = artifacts_root / "pipeline.yml" - artifacts_root = repro_dir / "scratch_dir" - pipeline_path = artifacts_root / "pipeline.yml" - - ci_cmd( - "generate", - "--output-file", - str(pipeline_path), - "--artifacts-root", - str(artifacts_root), - ) - - job_name = gitlab_generator.get_job_name(job_spec) - - with open(repro_dir / "repro.json", "w", encoding="utf-8") as f: - f.write( - json.dumps( - { - "job_name": job_name, - "job_spec_json": "archivefiles.json", - "ci_project_dir": str(repro_dir), - } + ci_cmd( + "generate", + "--output-file", + str(pipeline_path), + "--artifacts-root", + str(artifacts_root), ) - ) - with open(repro_dir / "install.sh", "w", encoding="utf-8") as f: - f.write("#!/bin/sh\n\n#fake install\nspack install blah\n") + job_name = gitlab_generator.get_job_name(job_spec) - with open(repro_dir / "spack_info.txt", "w", encoding="utf-8") as f: - f.write(f"\nMerge {last_two_git_commits[1]} into {last_two_git_commits[0]}\n\n") + with open(repro_dir / "repro.json", "w", encoding="utf-8") as f: + f.write( + json.dumps( + { + "job_name": job_name, + "job_spec_json": "archivefiles.json", + "ci_project_dir": str(repro_dir), + } + ) + ) - def fake_download_and_extract_artifacts(url, work_dir): - pass + with open(repro_dir / "install.sh", "w", encoding="utf-8") as f: + f.write("#!/bin/sh\n\n#fake install\nspack install blah\n") + + with open(repro_dir / "spack_info.txt", "w", encoding="utf-8") as f: + if merge_commit_test: + f.write( + f"\nMerge {last_two_git_commits[1]} into {last_two_git_commits[0]}\n\n" + ) + else: + f.write(f"\ncommit {last_two_git_commits[1]}\n\n") + + return "jobs_scratch_dir" monkeypatch.setattr(ci, "download_and_extract_artifacts", fake_download_and_extract_artifacts) rep_out = ci_cmd( @@ -1375,6 +1384,64 @@ def fake_download_and_extract_artifacts(url, work_dir): # Make sure we tell the user where it is when not in interactive mode assert f"$ {repro_dir}/start.sh" in rep_out + # Ensure the correct commits are used + assert f"checkout_commit: {last_two_git_commits[0]}" in rep_out + assert f"merge_commit: {last_two_git_commits[1]}" in rep_out + + # Test re-running in dirty working dir + with pytest.raises(SpackError, match=f"{repro_dir}"): + rep_out = ci_cmd( + "reproduce-build", + "https://example.com/api/v1/projects/1/jobs/2/artifacts", + "--working-dir", + str(repro_dir), + output=str, + ) + + # Cleanup between tests + shutil.rmtree(repro_dir) + + # Test --use-local-head + rep_out = ci_cmd( + "reproduce-build", + "https://example.com/api/v1/projects/1/jobs/2/artifacts", + "--use-local-head", + "--working-dir", + str(repro_dir), + output=str, + ) + + # Make sure we are checkout out the HEAD commit without a merge commit + assert "checkout_commit: HEAD" in rep_out + assert "merge_commit: None" in rep_out + + # Test the case where the spack_info.txt is not a merge commit + monkeypatch.setattr( + ci, + "download_and_extract_artifacts", + lambda url, wd: fake_download_and_extract_artifacts(url, wd, False), + ) + + # Cleanup between tests + shutil.rmtree(repro_dir) + + rep_out = ci_cmd( + "reproduce-build", + "https://example.com/api/v1/projects/1/jobs/2/artifacts", + "--working-dir", + str(repro_dir), + output=str, + ) + # Make sure the script was generated + assert (repro_dir / "start.sh").exists() + + # Make sure we tell the user where it is when not in interactive mode + assert f"$ {repro_dir}/start.sh" in rep_out + + # Ensure the correct commit is used (different than HEAD) + assert f"checkout_commit: {last_two_git_commits[1]}" in rep_out + assert "merge_commit: None" in rep_out + @pytest.mark.parametrize( "url_in,url_out", diff --git a/share/spack/spack-completion.bash b/share/spack/spack-completion.bash index b7ba7a8d6be..6f15771790d 100644 --- a/share/spack/spack-completion.bash +++ b/share/spack/spack-completion.bash @@ -706,7 +706,7 @@ _spack_ci_rebuild() { _spack_ci_reproduce_build() { if $list_options then - SPACK_COMPREPLY="-h --help --runtime --working-dir -s --autostart --gpg-file --gpg-url" + SPACK_COMPREPLY="-h --help --runtime --working-dir -s --autostart --use-local-head --gpg-file --gpg-url" else SPACK_COMPREPLY="" fi diff --git a/share/spack/spack-completion.fish b/share/spack/spack-completion.fish index 09565faf0fe..9689173431c 100644 --- a/share/spack/spack-completion.fish +++ b/share/spack/spack-completion.fish @@ -999,7 +999,7 @@ complete -c spack -n '__fish_spack_using_command ci rebuild' -s j -l jobs -r -f complete -c spack -n '__fish_spack_using_command ci rebuild' -s j -l jobs -r -d 'explicitly set number of parallel jobs' # spack ci reproduce-build -set -g __fish_spack_optspecs_spack_ci_reproduce_build h/help runtime= working-dir= s/autostart gpg-file= gpg-url= +set -g __fish_spack_optspecs_spack_ci_reproduce_build h/help runtime= working-dir= s/autostart use-local-head gpg-file= gpg-url= complete -c spack -n '__fish_spack_using_command_pos 0 ci reproduce-build' -f complete -c spack -n '__fish_spack_using_command ci reproduce-build' -s h -l help -f -a help complete -c spack -n '__fish_spack_using_command ci reproduce-build' -s h -l help -d 'show this help message and exit' @@ -1009,6 +1009,8 @@ complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l working- complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l working-dir -r -d 'where to unpack artifacts' complete -c spack -n '__fish_spack_using_command ci reproduce-build' -s s -l autostart -f -a autostart complete -c spack -n '__fish_spack_using_command ci reproduce-build' -s s -l autostart -d 'Run docker reproducer automatically' +complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l use-local-head -f -a use_local_head +complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l use-local-head -d 'Use the HEAD of the local Spack instead of reproducing a commit' complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l gpg-file -r -f -a gpg_file complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l gpg-file -r -d 'Path to public GPG key for validating binary cache installs' complete -c spack -n '__fish_spack_using_command ci reproduce-build' -l gpg-url -r -f -a gpg_url