gitlab ci: Provide a knob to control untouched spec pruning (#35274)

When untouched spec pruning is enabled, specs possibly affected by a change cannot be pruned from a pipeline. Previously spack looked at all specs matching changed package names, and traversed dependents of each, all the way to the environment root, to compute the set of environment specs possibly affected by a change (and thus, not candidates for pruning). With this PR, when untouched spec pruning is enabled, a new environment variable can control how far towards the root spack traverses to compute the set of specs possibly affected by a change. SPACK_UNTOUCHED_PRUNING_DEPENDENT_DEPTH can be set to any numeric value before the "spack ci generate" command is called to control this traversal depth parameter. Setting it to "0" traverses only touched specs, setting it to "1" traverses only touched specs and their direct dependents, and so on. Omitting the variable results in the previous behavior of traversing all the way to the root. Setting it to a negative value means no traversal is done, and always yields an empty set of possibly affected specs (which would result in the max pruning possible).
2023-03-08 09:38:07 -07:00
parent 22d4e79037
commit 4a9ffdcfa2
3 changed files with 148 additions and 21 deletions
--- a/lib/spack/spack/ci.py
+++ b/lib/spack/spack/ci.py
@@ -490,16 +490,28 @@ def compute_affected_packages(rev1="HEAD^", rev2="HEAD"):
    return spack.repo.get_all_package_diffs("ARC", rev1=rev1, rev2=rev2)


-def get_spec_filter_list(env, affected_pkgs):
+def get_spec_filter_list(env, affected_pkgs, dependent_traverse_depth=None):
    """Given a list of package names and an active/concretized
       environment, return the set of all concrete specs from the
       environment that could have been affected by changing the
       list of packages.

+       If a ``dependent_traverse_depth`` is given, it is used to limit
+       upward (in the parent direction) traversal of specs of touched
+       packages.  E.g. if 1 is provided, then only direct dependents
+       of touched package specs are traversed to produce specs that
+       could have been affected by changing the package, while if 0 is
+       provided, only the changed specs themselves are traversed. If ``None``
+       is given, upward traversal of touched package specs is done all
+       the way to the environment roots.  Providing a negative number
+       results in no traversals at all, yielding an empty set.
+
    Arguments:

        env (spack.environment.Environment): Active concrete environment
        affected_pkgs (List[str]): Affected package names
+        dependent_traverse_depth: Optional integer to limit dependent
+            traversal, or None to disable the limit.

    Returns:

@@ -516,7 +528,8 @@ def get_spec_filter_list(env, affected_pkgs):
    visited = set()
    dag_hash = lambda s: s.dag_hash()
    for match in env_matches:
-        for parent in match.traverse(direction="parents", key=dag_hash):
+        for dep_level, parent in match.traverse(direction="parents", key=dag_hash, depth=True):
+            if dependent_traverse_depth is None or dep_level <= dependent_traverse_depth:
                affected_specs.update(
                    parent.traverse(direction="children", visited=visited, key=dag_hash)
                )
@@ -580,6 +593,18 @@ def generate_gitlab_ci_yaml(
    cdash_handler = CDashHandler(yaml_root.get("cdash")) if "cdash" in yaml_root else None
    build_group = cdash_handler.build_group if cdash_handler else None

+    dependent_depth = os.environ.get("SPACK_PRUNE_UNTOUCHED_DEPENDENT_DEPTH", None)
+    if dependent_depth is not None:
+        try:
+            dependent_depth = int(dependent_depth)
+        except (TypeError, ValueError):
+            tty.warn(
+                "Unrecognized value ({0}) ".format(dependent_depth),
+                "provide forSPACK_PRUNE_UNTOUCHED_DEPENDENT_DEPTH, ",
+                "ignoring it.",
+            )
+            dependent_depth = None
+
    prune_untouched_packages = False
    spack_prune_untouched = os.environ.get("SPACK_PRUNE_UNTOUCHED", None)
    if spack_prune_untouched is not None and spack_prune_untouched.lower() == "true":
@@ -595,7 +620,9 @@ def generate_gitlab_ci_yaml(
                tty.debug("affected pkgs:")
                for p in affected_pkgs:
                    tty.debug("  {0}".format(p))
-                affected_specs = get_spec_filter_list(env, affected_pkgs)
+                affected_specs = get_spec_filter_list(
+                    env, affected_pkgs, dependent_traverse_depth=dependent_depth
+                )
                tty.debug("all affected specs:")
                for s in affected_specs:
                    tty.debug("  {0}/{1}".format(s.name, s.dag_hash()[:7]))
--- a/lib/spack/spack/test/ci.py
+++ b/lib/spack/spack/test/ci.py
@@ -408,19 +408,36 @@ def test_get_spec_filter_list(mutable_mock_env_path, config, mutable_mock_repo):

    touched = ["libdwarf"]

-    # traversing both directions from libdwarf in the graphs depicted
-    # above (and additionally including dependencies of dependents of
-    # libdwarf) results in the following possibly affected env specs:
-    # mpileaks, callpath, dyninst, libdwarf, libelf, and mpich.
-    # Unaffected specs are hypre and it's dependencies.
+    # Make sure we return the correct set of possibly affected specs,
+    # given a dependent traversal depth and the fact that the touched
+    # package is libdwarf.  Passing traversal depth of None or something
+    # equal to or larger than the greatest depth in the graph are
+    # equivalent and result in traversal of all specs from the touched
+    # package to the root.  Passing negative traversal depth results in
+    # no spec traversals.  Passing any other number yields differing
+    # numbers of possibly affected specs.

-    affected_specs = ci.get_spec_filter_list(e1, touched)
+    full_set = set(["mpileaks", "mpich", "callpath", "dyninst", "libdwarf", "libelf"])
+    empty_set = set([])
+    depth_2_set = set(["mpich", "callpath", "dyninst", "libdwarf", "libelf"])
+    depth_1_set = set(["dyninst", "libdwarf", "libelf"])
+    depth_0_set = set(["libdwarf", "libelf"])
+
+    expectations = {
+        None: full_set,
+        3: full_set,
+        100: full_set,
+        -1: empty_set,
+        0: depth_0_set,
+        1: depth_1_set,
+        2: depth_2_set,
+    }
+
+    for key, val in expectations.items():
+        affected_specs = ci.get_spec_filter_list(e1, touched, dependent_traverse_depth=key)
        affected_pkg_names = set([s.name for s in affected_specs])
-    expected_affected_pkg_names = set(
-        ["mpileaks", "mpich", "callpath", "dyninst", "libdwarf", "libelf"]
-    )
-
-    assert affected_pkg_names == expected_affected_pkg_names
+        print(f"{key}: {affected_pkg_names}")
+        assert affected_pkg_names == val


@pytest.mark.regression("29947")
--- a/lib/spack/spack/test/cmd/ci.py
+++ b/lib/spack/spack/test/cmd/ci.py
@@ -1755,6 +1755,12 @@ def test_ci_generate_prune_untouched(
                mirror_url
            )
        )
+
+    # Dependency graph rooted at callpath
+    # callpath -> dyninst -> libelf
+    #                     -> libdwarf -> libelf
+    #          -> mpich
+
    with tmpdir.as_cwd():
        env_cmd("create", "test", "./spack.yaml")
        outputfile = str(tmpdir.join(".gitlab-ci.yml"))
@@ -1765,19 +1771,96 @@ def fake_compute_affected(r1=None, r2=None):
        def fake_stack_changed(env_path, rev1="HEAD^", rev2="HEAD"):
            return False

-        with ev.read("test"):
+        env_hashes = {}
+
+        with ev.read("test") as active_env:
            monkeypatch.setattr(ci, "compute_affected_packages", fake_compute_affected)
            monkeypatch.setattr(ci, "get_stack_changed", fake_stack_changed)
+
+            active_env.concretize()
+
+            for s in active_env.all_specs():
+                env_hashes[s.name] = s.dag_hash()
+
            ci_cmd("generate", "--output-file", outputfile)

        with open(outputfile) as f:
            contents = f.read()
+            print(contents)
            yaml_contents = syaml.load(contents)

+            generated_hashes = []
+
            for ci_key in yaml_contents.keys():
-                if "archive-files" in ci_key:
-                    print("Error: archive-files should have been pruned")
-                    assert False
+                if ci_key.startswith("(specs)"):
+                    generated_hashes.append(
+                        yaml_contents[ci_key]["variables"]["SPACK_JOB_SPEC_DAG_HASH"]
+                    )
+
+            assert env_hashes["archive-files"] not in generated_hashes
+            for spec_name in ["callpath", "dyninst", "mpich", "libdwarf", "libelf"]:
+                assert env_hashes[spec_name] in generated_hashes
+
+
+def test_ci_generate_prune_env_vars(
+    tmpdir, mutable_mock_env_path, install_mockery, mock_packages, ci_base_environment, monkeypatch
+):
+    """Make sure environment variables controlling untouched spec
+    pruning behave as expected."""
+    os.environ.update(
+        {
+            "SPACK_PRUNE_UNTOUCHED": "TRUE",  # enables pruning of untouched specs
+        }
+    )
+    filename = str(tmpdir.join("spack.yaml"))
+    with open(filename, "w") as f:
+        f.write(
+            """\
+spack:
+  specs:
+    - libelf
+  gitlab-ci:
+    mappings:
+      - match:
+          - arch=test-debian6-core2
+        runner-attributes:
+          tags:
+            - donotcare
+          image: donotcare
+"""
+        )
+
+    with tmpdir.as_cwd():
+        env_cmd("create", "test", "./spack.yaml")
+
+        def fake_compute_affected(r1=None, r2=None):
+            return ["libdwarf"]
+
+        def fake_stack_changed(env_path, rev1="HEAD^", rev2="HEAD"):
+            return False
+
+        expected_depth_param = None
+
+        def check_get_spec_filter_list(env, affected_pkgs, dependent_traverse_depth=None):
+            assert dependent_traverse_depth == expected_depth_param
+            return set()
+
+        monkeypatch.setattr(ci, "compute_affected_packages", fake_compute_affected)
+        monkeypatch.setattr(ci, "get_stack_changed", fake_stack_changed)
+        monkeypatch.setattr(ci, "get_spec_filter_list", check_get_spec_filter_list)
+
+        expectations = {"-1": -1, "0": 0, "True": None}
+
+        for key, val in expectations.items():
+            with ev.read("test"):
+                os.environ.update({"SPACK_PRUNE_UNTOUCHED_DEPENDENT_DEPTH": key})
+                expected_depth_param = val
+                # Leaving out the mirror in the spack.yaml above means the
+                # pipeline generation command will fail, pretty much immediately.
+                # But for this test, we only care how the environment variables
+                # for pruning are handled, the faster the better.  So allow the
+                # spack command to fail.
+                ci_cmd("generate", fail_on_error=False)


 def test_ci_subcommands_without_mirror(