ci: Refactor pipeline generation (#47459)

Reorganize the pipeline generation aspect of the ci module, mostly to separate the representation, generation, and pruning of pipeline graphs from platform-specific output formatting. Introduce a pipeline generation registry to support generating pipelines for other platforms, though gitlab is still the only supported format currently. Fix a long-existing bug in pipeline pruning where only direct dependencies were added to any nodes dependency list.
2024-12-11 12:23:37 -07:00
parent ba52c4f05d
commit de1416b3de
11 changed files with 2962 additions and 2399 deletions
--- a/lib/spack/spack/ci.py
+++ b/lib/spack/spack/ci.py
--- a/lib/spack/spack/ci/README.md
+++ b/lib/spack/spack/ci/README.md
@@ -0,0 +1,41 @@
+# Spack CI generators
+
+This document describes how the ci module can be extended to provide novel
+ci generators.  The module currently has only a single generator for gitlab.
+The unit-tests for the ci module define a small custom generator for testing
+purposes as well.
+
+The process of generating a pipeline involves creating a ci-enabled spack
+environment, activating it, and running `spack ci generate`, possibly with
+arguments describing things like where the output should be written.
+
+Internally pipeline generation is broken into two components: general and
+ci platform specific.
+
+## General pipeline functionality
+
+General pipeline functionality includes building a pipeline graph (really,
+a forest), pruning it in a variety of ways, and gathering attributes for all
+the generated spec build jobs from the spack configuration.
+
+All of the above functionality is defined in the `__init__.py` of the top-level
+ci module, and should be roughly the same for pipelines generated for any
+platform.
+
+## CI platform specific funcionality
+
+Functionality specific to CI platforms (e.g. gitlab, gha, etc.) should be
+defined in a dedicated module.  In order to define a generator for a new
+platform, there are only a few requirements:
+
+1. add a file under `ci` in which you define a generator method decorated with
+the `@generator` attribute. .
+
+1. import it from `lib/spack/spack/ci/__init__.py`, so that your new generator
+is registered.
+
+1. the generator method must take as arguments PipelineDag, SpackCIConfig,
+and PipelineOptions objects, in that order.
+
+1. the generator method must produce an output file containing the
+generated pipeline.
--- a/lib/spack/spack/ci/init.py
+++ b/lib/spack/spack/ci/init.py
--- a/lib/spack/spack/ci/common.py
+++ b/lib/spack/spack/ci/common.py
@@ -0,0 +1,824 @@
+# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+import codecs
+import copy
+import json
+import os
+import re
+import ssl
+import sys
+import time
+from collections import deque
+from enum import Enum
+from typing import Dict, Generator, List, Optional, Set, Tuple
+from urllib.parse import quote, urlencode, urlparse
+from urllib.request import HTTPHandler, HTTPSHandler, Request, build_opener
+
+import llnl.util.filesystem as fs
+import llnl.util.tty as tty
+from llnl.util.lang import Singleton, memoized
+
+import spack.binary_distribution as bindist
+import spack.config as cfg
+import spack.deptypes as dt
+import spack.environment as ev
+import spack.error
+import spack.mirrors.mirror
+import spack.spec
+import spack.util.spack_yaml as syaml
+import spack.util.url as url_util
+import spack.util.web as web_util
+from spack import traverse
+from spack.reporters import CDash, CDashConfiguration
+from spack.reporters.cdash import SPACK_CDASH_TIMEOUT
+from spack.reporters.cdash import build_stamp as cdash_build_stamp
+
+
+def _urlopen():
+    error_handler = web_util.SpackHTTPDefaultErrorHandler()
+
+    # One opener with HTTPS ssl enabled
+    with_ssl = build_opener(
+        HTTPHandler(), HTTPSHandler(context=web_util.ssl_create_default_context()), error_handler
+    )
+
+    # One opener with HTTPS ssl disabled
+    without_ssl = build_opener(
+        HTTPHandler(), HTTPSHandler(context=ssl._create_unverified_context()), error_handler
+    )
+
+    # And dynamically dispatch based on the config:verify_ssl.
+    def dispatch_open(fullurl, data=None, timeout=None, verify_ssl=True):
+        opener = with_ssl if verify_ssl else without_ssl
+        timeout = timeout or cfg.get("config:connect_timeout", 1)
+        return opener.open(fullurl, data, timeout)
+
+    return dispatch_open
+
+
+IS_WINDOWS = sys.platform == "win32"
+SPACK_RESERVED_TAGS = ["public", "protected", "notary"]
+_dyn_mapping_urlopener = Singleton(_urlopen)
+
+
+def copy_files_to_artifacts(src, artifacts_dir):
+    """
+    Copy file(s) to the given artifacts directory
+
+    Parameters:
+        src (str): the glob-friendly path expression for the file(s) to copy
+        artifacts_dir (str): the destination directory
+    """
+    try:
+        fs.copy(src, artifacts_dir)
+    except Exception as err:
+        msg = (
+            f"Unable to copy files ({src}) to artifacts {artifacts_dir} due to "
+            f"exception: {str(err)}"
+        )
+        tty.warn(msg)
+
+
+def win_quote(quote_str: str) -> str:
+    if IS_WINDOWS:
+        quote_str = f'"{quote_str}"'
+    return quote_str
+
+
+def _spec_matches(spec, match_string):
+    return spec.intersects(match_string)
+
+
+def _noop(x):
+    return x
+
+
+def unpack_script(script_section, op=_noop):
+    script = []
+    for cmd in script_section:
+        if isinstance(cmd, list):
+            for subcmd in cmd:
+                script.append(op(subcmd))
+        else:
+            script.append(op(cmd))
+
+    return script
+
+
+def ensure_expected_target_path(path: str) -> str:
+    """Returns passed paths with all Windows path separators exchanged
+    for posix separators
+
+    TODO (johnwparent): Refactor config + cli read/write to deal only in posix style paths
+    """
+    if path:
+        return path.replace("\\", "/")
+    return path
+
+
+def update_env_scopes(
+    env: ev.Environment,
+    cli_scopes: List[str],
+    output_file: str,
+    transform_windows_paths: bool = False,
+) -> None:
+    """Add any config scopes from cli_scopes which aren't already included in the
+    environment, by reading the yaml, adding the missing includes, and writing the
+    updated yaml back to the same location.
+    """
+    with open(env.manifest_path, "r") as env_fd:
+        env_yaml_root = syaml.load(env_fd)
+
+    # Add config scopes to environment
+    env_includes = env_yaml_root["spack"].get("include", [])
+    include_scopes: List[str] = []
+    for scope in cli_scopes:
+        if scope not in include_scopes and scope not in env_includes:
+            include_scopes.insert(0, scope)
+    env_includes.extend(include_scopes)
+    env_yaml_root["spack"]["include"] = [
+        ensure_expected_target_path(i) if transform_windows_paths else i for i in env_includes
+    ]
+
+    with open(output_file, "w") as fd:
+        syaml.dump_config(env_yaml_root, fd, default_flow_style=False)
+
+
+def write_pipeline_manifest(specs, src_prefix, dest_prefix, output_file):
+    """Write out the file describing specs that should be copied"""
+    buildcache_copies = {}
+
+    for release_spec in specs:
+        release_spec_dag_hash = release_spec.dag_hash()
+        # TODO: This assumes signed version of the spec
+        buildcache_copies[release_spec_dag_hash] = [
+            {
+                "src": url_util.join(
+                    src_prefix,
+                    bindist.build_cache_relative_path(),
+                    bindist.tarball_name(release_spec, ".spec.json.sig"),
+                ),
+                "dest": url_util.join(
+                    dest_prefix,
+                    bindist.build_cache_relative_path(),
+                    bindist.tarball_name(release_spec, ".spec.json.sig"),
+                ),
+            },
+            {
+                "src": url_util.join(
+                    src_prefix,
+                    bindist.build_cache_relative_path(),
+                    bindist.tarball_path_name(release_spec, ".spack"),
+                ),
+                "dest": url_util.join(
+                    dest_prefix,
+                    bindist.build_cache_relative_path(),
+                    bindist.tarball_path_name(release_spec, ".spack"),
+                ),
+            },
+        ]
+
+    target_dir = os.path.dirname(output_file)
+
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
+
+    with open(output_file, "w") as fd:
+        fd.write(json.dumps(buildcache_copies))
+
+
+class CDashHandler:
+    """
+    Class for managing CDash data and processing.
+    """
+
+    def __init__(self, ci_cdash):
+        # start with the gitlab ci configuration
+        self.url = ci_cdash.get("url")
+        self.build_group = ci_cdash.get("build-group")
+        self.project = ci_cdash.get("project")
+        self.site = ci_cdash.get("site")
+
+        # grab the authorization token when available
+        self.auth_token = os.environ.get("SPACK_CDASH_AUTH_TOKEN")
+        if self.auth_token:
+            tty.verbose("Using CDash auth token from environment")
+
+        # append runner description to the site if available
+        runner = os.environ.get("CI_RUNNER_DESCRIPTION")
+        if runner:
+            self.site += f" ({runner})"
+
+    def args(self):
+        return [
+            "--cdash-upload-url",
+            win_quote(self.upload_url),
+            "--cdash-build",
+            win_quote(self.build_name),
+            "--cdash-site",
+            win_quote(self.site),
+            "--cdash-buildstamp",
+            win_quote(self.build_stamp),
+        ]
+
+    def build_name(self, spec: Optional[spack.spec.Spec] = None) -> Optional[str]:
+        """Returns the CDash build name.
+
+        A name will be generated if the `spec` is provided,
+        otherwise, the value will be retrieved from the environment
+        through the `SPACK_CDASH_BUILD_NAME` variable.
+
+        Returns: (str) given spec's CDash build name."""
+        if spec:
+            build_name = f"{spec.name}@{spec.version}%{spec.compiler} \
+hash={spec.dag_hash()} arch={spec.architecture} ({self.build_group})"
+            tty.debug(f"Generated CDash build name ({build_name}) from the {spec.name}")
+            return build_name
+
+        env_build_name = os.environ.get("SPACK_CDASH_BUILD_NAME")
+        tty.debug(f"Using CDash build name ({env_build_name}) from the environment")
+        return env_build_name
+
+    @property  # type: ignore
+    def build_stamp(self):
+        """Returns the CDash build stamp.
+
+        The one defined by SPACK_CDASH_BUILD_STAMP environment variable
+        is preferred due to the representation of timestamps; otherwise,
+        one will be built.
+
+        Returns: (str) current CDash build stamp"""
+        build_stamp = os.environ.get("SPACK_CDASH_BUILD_STAMP")
+        if build_stamp:
+            tty.debug(f"Using build stamp ({build_stamp}) from the environment")
+            return build_stamp
+
+        build_stamp = cdash_build_stamp(self.build_group, time.time())
+        tty.debug(f"Generated new build stamp ({build_stamp})")
+        return build_stamp
+
+    @property  # type: ignore
+    @memoized
+    def project_enc(self):
+        tty.debug(f"Encoding project ({type(self.project)}): {self.project})")
+        encode = urlencode({"project": self.project})
+        index = encode.find("=") + 1
+        return encode[index:]
+
+    @property
+    def upload_url(self):
+        url_format = f"{self.url}/submit.php?project={self.project_enc}"
+        return url_format
+
+    def copy_test_results(self, source, dest):
+        """Copy test results to artifacts directory."""
+        reports = fs.join_path(source, "*_Test*.xml")
+        copy_files_to_artifacts(reports, dest)
+
+    def create_buildgroup(self, opener, headers, url, group_name, group_type):
+        data = {"newbuildgroup": group_name, "project": self.project, "type": group_type}
+
+        enc_data = json.dumps(data).encode("utf-8")
+
+        request = Request(url, data=enc_data, headers=headers)
+
+        response = opener.open(request, timeout=SPACK_CDASH_TIMEOUT)
+        response_code = response.getcode()
+
+        if response_code not in [200, 201]:
+            msg = f"Creating buildgroup failed (response code = {response_code})"
+            tty.warn(msg)
+            return None
+
+        response_text = response.read()
+        response_json = json.loads(response_text)
+        build_group_id = response_json["id"]
+
+        return build_group_id
+
+    def populate_buildgroup(self, job_names):
+        url = f"{self.url}/api/v1/buildgroup.php"
+
+        headers = {
+            "Authorization": f"Bearer {self.auth_token}",
+            "Content-Type": "application/json",
+        }
+
+        opener = build_opener(HTTPHandler)
+
+        parent_group_id = self.create_buildgroup(opener, headers, url, self.build_group, "Daily")
+        group_id = self.create_buildgroup(
+            opener, headers, url, f"Latest {self.build_group}", "Latest"
+        )
+
+        if not parent_group_id or not group_id:
+            msg = f"Failed to create or retrieve buildgroups for {self.build_group}"
+            tty.warn(msg)
+            return
+
+        data = {
+            "dynamiclist": [
+                {"match": name, "parentgroupid": parent_group_id, "site": self.site}
+                for name in job_names
+            ]
+        }
+
+        enc_data = json.dumps(data).encode("utf-8")
+
+        request = Request(url, data=enc_data, headers=headers)
+        request.get_method = lambda: "PUT"
+
+        response = opener.open(request, timeout=SPACK_CDASH_TIMEOUT)
+        response_code = response.getcode()
+
+        if response_code != 200:
+            msg = f"Error response code ({response_code}) in populate_buildgroup"
+            tty.warn(msg)
+
+    def report_skipped(self, spec: spack.spec.Spec, report_dir: str, reason: Optional[str]):
+        """Explicitly report skipping testing of a spec (e.g., it's CI
+        configuration identifies it as known to have broken tests or
+        the CI installation failed).
+
+        Args:
+            spec: spec being tested
+            report_dir: directory where the report will be written
+            reason: reason the test is being skipped
+        """
+        configuration = CDashConfiguration(
+            upload_url=self.upload_url,
+            packages=[spec.name],
+            build=self.build_name,
+            site=self.site,
+            buildstamp=self.build_stamp,
+            track=None,
+        )
+        reporter = CDash(configuration=configuration)
+        reporter.test_skipped_report(report_dir, spec, reason)
+
+
+class PipelineType(Enum):
+    COPY_ONLY = 1
+    spack_copy_only = 1
+    PROTECTED_BRANCH = 2
+    spack_protected_branch = 2
+    PULL_REQUEST = 3
+    spack_pull_request = 3
+
+
+class PipelineOptions:
+    """A container for all pipeline options that can be specified (whether
+    via cli, config/yaml, or environment variables)"""
+
+    def __init__(
+        self,
+        env: ev.Environment,
+        buildcache_destination: spack.mirrors.mirror.Mirror,
+        artifacts_root: str = "jobs_scratch_dir",
+        print_summary: bool = True,
+        output_file: Optional[str] = None,
+        check_index_only: bool = False,
+        broken_specs_url: Optional[str] = None,
+        rebuild_index: bool = True,
+        untouched_pruning_dependent_depth: Optional[int] = None,
+        prune_untouched: bool = False,
+        prune_up_to_date: bool = True,
+        prune_external: bool = True,
+        stack_name: Optional[str] = None,
+        pipeline_type: Optional[PipelineType] = None,
+        require_signing: bool = False,
+        cdash_handler: Optional["CDashHandler"] = None,
+    ):
+        """
+        Args:
+            env: Active spack environment
+            buildcache_destination: The mirror where built binaries should be pushed
+            artifacts_root: Path to location where artifacts should be stored
+            print_summary: Print a summary of the scheduled pipeline
+            output_file: Path where output file should be written
+            check_index_only: Only fetch the index or fetch all spec files
+            broken_specs_url: URL where broken specs (on develop) should be reported
+            rebuild_index: Generate a job to rebuild mirror index after rebuilds
+            untouched_pruning_dependent_depth: How many parents to traverse from changed pkg specs
+            prune_untouched: Prune jobs for specs that were unchanged in git history
+            prune_up_to_date: Prune specs from pipeline if binary exists on the mirror
+            prune_external: Prune specs from pipeline if they are external
+            stack_name: Name of spack stack
+            pipeline_type: Type of pipeline running (optional)
+            require_signing: Require buildcache to be signed (fail w/out signing key)
+            cdash_handler: Object for communicating build information with CDash
+        """
+        self.env = env
+        self.buildcache_destination = buildcache_destination
+        self.artifacts_root = artifacts_root
+        self.print_summary = print_summary
+        self.output_file = output_file
+        self.check_index_only = check_index_only
+        self.broken_specs_url = broken_specs_url
+        self.rebuild_index = rebuild_index
+        self.untouched_pruning_dependent_depth = untouched_pruning_dependent_depth
+        self.prune_untouched = prune_untouched
+        self.prune_up_to_date = prune_up_to_date
+        self.prune_external = prune_external
+        self.stack_name = stack_name
+        self.pipeline_type = pipeline_type
+        self.require_signing = require_signing
+        self.cdash_handler = cdash_handler
+
+
+class PipelineNode:
+    spec: spack.spec.Spec
+    parents: Set[str]
+    children: Set[str]
+
+    def __init__(self, spec: spack.spec.Spec):
+        self.spec = spec
+        self.parents = set()
+        self.children = set()
+
+    @property
+    def key(self):
+        """Return key of the stored spec"""
+        return PipelineDag.key(self.spec)
+
+
+class PipelineDag:
+    """Turn a list of specs into a simple directed graph, that doesn't keep track
+    of edge types."""
+
+    @classmethod
+    def key(cls, spec: spack.spec.Spec) -> str:
+        return spec.dag_hash()
+
+    def __init__(self, specs: List[spack.spec.Spec]) -> None:
+        # Build dictionary of nodes
+        self.nodes: Dict[str, PipelineNode] = {
+            PipelineDag.key(s): PipelineNode(s)
+            for s in traverse.traverse_nodes(specs, deptype=dt.ALL_TYPES, root=True)
+        }
+
+        # Create edges
+        for edge in traverse.traverse_edges(
+            specs, deptype=dt.ALL_TYPES, root=False, cover="edges"
+        ):
+            parent_key = PipelineDag.key(edge.parent)
+            child_key = PipelineDag.key(edge.spec)
+
+            self.nodes[parent_key].children.add(child_key)
+            self.nodes[child_key].parents.add(parent_key)
+
+    def prune(self, node_key: str):
+        """Remove a node from the graph, and reconnect its parents and children"""
+        node = self.nodes[node_key]
+        for parent in node.parents:
+            self.nodes[parent].children.remove(node_key)
+            self.nodes[parent].children |= node.children
+        for child in node.children:
+            self.nodes[child].parents.remove(node_key)
+            self.nodes[child].parents |= node.parents
+        del self.nodes[node_key]
+
+    def traverse_nodes(
+        self, direction: str = "children"
+    ) -> Generator[Tuple[int, PipelineNode], None, None]:
+        """Yields (depth, node) from the pipeline graph.  Traversal is topologically
+        ordered from the roots if ``direction`` is ``children``, or from the leaves
+        if ``direction`` is ``parents``. The yielded depth is the length of the
+        longest path from the starting point to the yielded node."""
+        if direction == "children":
+            get_in_edges = lambda node: node.parents
+            get_out_edges = lambda node: node.children
+        else:
+            get_in_edges = lambda node: node.children
+            get_out_edges = lambda node: node.parents
+
+        sort_key = lambda k: self.nodes[k].spec.name
+
+        out_edges = {k: sorted(get_out_edges(n), key=sort_key) for k, n in self.nodes.items()}
+        num_in_edges = {k: len(get_in_edges(n)) for k, n in self.nodes.items()}
+
+        # Populate a queue with all the nodes that have no incoming edges
+        nodes = deque(
+            sorted(
+                [(0, key) for key in self.nodes.keys() if num_in_edges[key] == 0],
+                key=lambda item: item[1],
+            )
+        )
+
+        while nodes:
+            # Remove the next node, n, from the queue and yield it
+            depth, n_key = nodes.pop()
+            yield (depth, self.nodes[n_key])
+
+            # Remove an in-edge from every node, m, pointed to by an
+            # out-edge from n.  If any of those nodes are left with
+            # 0 remaining in-edges, add them to the queue.
+            for m in out_edges[n_key]:
+                num_in_edges[m] -= 1
+                if num_in_edges[m] == 0:
+                    nodes.appendleft((depth + 1, m))
+
+    def get_dependencies(self, node: PipelineNode) -> List[PipelineNode]:
+        """Returns a list of nodes corresponding to the direct dependencies
+        of the given node."""
+        return [self.nodes[k] for k in node.children]
+
+
+class SpackCIConfig:
+    """Spack CI object used to generate intermediate representation
+    used by the CI generator(s).
+    """
+
+    def __init__(self, ci_config):
+        """Given the information from the ci section of the config
+        and the staged jobs, set up meta data needed for generating Spack
+        CI IR.
+        """
+
+        self.ci_config = ci_config
+        self.named_jobs = ["any", "build", "copy", "cleanup", "noop", "reindex", "signing"]
+
+        self.ir = {
+            "jobs": {},
+            "rebuild-index": self.ci_config.get("rebuild-index", True),
+            "broken-specs-url": self.ci_config.get("broken-specs-url", None),
+            "broken-tests-packages": self.ci_config.get("broken-tests-packages", []),
+            "target": self.ci_config.get("target", "gitlab"),
+        }
+        jobs = self.ir["jobs"]
+
+        for name in self.named_jobs:
+            # Skip the special named jobs
+            if name not in ["any", "build"]:
+                jobs[name] = self.__init_job("")
+
+    def __init_job(self, release_spec):
+        """Initialize job object"""
+        job_object = {"spec": release_spec, "attributes": {}}
+        if release_spec:
+            job_vars = job_object["attributes"].setdefault("variables", {})
+            job_vars["SPACK_JOB_SPEC_DAG_HASH"] = release_spec.dag_hash()
+            job_vars["SPACK_JOB_SPEC_PKG_NAME"] = release_spec.name
+            job_vars["SPACK_JOB_SPEC_PKG_VERSION"] = release_spec.format("{version}")
+            job_vars["SPACK_JOB_SPEC_COMPILER_NAME"] = release_spec.format("{compiler.name}")
+            job_vars["SPACK_JOB_SPEC_COMPILER_VERSION"] = release_spec.format("{compiler.version}")
+            job_vars["SPACK_JOB_SPEC_ARCH"] = release_spec.format("{architecture}")
+            job_vars["SPACK_JOB_SPEC_VARIANTS"] = release_spec.format("{variants}")
+
+        return job_object
+
+    def __is_named(self, section):
+        """Check if a pipeline-gen configuration section is for a named job,
+        and if so return the name otherwise return none.
+        """
+        for _name in self.named_jobs:
+            keys = [f"{_name}-job", f"{_name}-job-remove"]
+            if any([key for key in keys if key in section]):
+                return _name
+
+        return None
+
+    @staticmethod
+    def __job_name(name, suffix=""):
+        """Compute the name of a named job with appropriate suffix.
+        Valid suffixes are either '-remove' or empty string or None
+        """
+        assert isinstance(name, str)
+
+        jname = name
+        if suffix:
+            jname = f"{name}-job{suffix}"
+        else:
+            jname = f"{name}-job"
+
+        return jname
+
+    def __apply_submapping(self, dest, spec, section):
+        """Apply submapping setion to the IR dict"""
+        matched = False
+        only_first = section.get("match_behavior", "first") == "first"
+
+        for match_attrs in reversed(section["submapping"]):
+            attrs = cfg.InternalConfigScope._process_dict_keyname_overrides(match_attrs)
+            for match_string in match_attrs["match"]:
+                if _spec_matches(spec, match_string):
+                    matched = True
+                    if "build-job-remove" in match_attrs:
+                        spack.config.remove_yaml(dest, attrs["build-job-remove"])
+                    if "build-job" in match_attrs:
+                        spack.config.merge_yaml(dest, attrs["build-job"])
+                    break
+            if matched and only_first:
+                break
+
+        return dest
+
+    # Create jobs for all the pipeline specs
+    def init_pipeline_jobs(self, pipeline: PipelineDag):
+        for _, node in pipeline.traverse_nodes():
+            dag_hash = node.spec.dag_hash()
+            self.ir["jobs"][dag_hash] = self.__init_job(node.spec)
+
+    # Generate IR from the configs
+    def generate_ir(self):
+        """Generate the IR from the Spack CI configurations."""
+
+        jobs = self.ir["jobs"]
+
+        # Implicit job defaults
+        defaults = [
+            {
+                "build-job": {
+                    "script": [
+                        "cd {env_dir}",
+                        "spack env activate --without-view .",
+                        "spack ci rebuild",
+                    ]
+                }
+            },
+            {"noop-job": {"script": ['echo "All specs already up to date, nothing to rebuild."']}},
+        ]
+
+        # Job overrides
+        overrides = [
+            # Reindex script
+            {
+                "reindex-job": {
+                    "script:": ["spack buildcache update-index --keys {index_target_mirror}"]
+                }
+            },
+            # Cleanup script
+            {
+                "cleanup-job": {
+                    "script:": ["spack -d mirror destroy {mirror_prefix}/$CI_PIPELINE_ID"]
+                }
+            },
+            # Add signing job tags
+            {"signing-job": {"tags": ["aws", "protected", "notary"]}},
+            # Remove reserved tags
+            {"any-job-remove": {"tags": SPACK_RESERVED_TAGS}},
+        ]
+
+        pipeline_gen = overrides + self.ci_config.get("pipeline-gen", []) + defaults
+
+        for section in reversed(pipeline_gen):
+            name = self.__is_named(section)
+            has_submapping = "submapping" in section
+            has_dynmapping = "dynamic-mapping" in section
+            section = cfg.InternalConfigScope._process_dict_keyname_overrides(section)
+
+            if name:
+                remove_job_name = self.__job_name(name, suffix="-remove")
+                merge_job_name = self.__job_name(name)
+                do_remove = remove_job_name in section
+                do_merge = merge_job_name in section
+
+                def _apply_section(dest, src):
+                    if do_remove:
+                        dest = spack.config.remove_yaml(dest, src[remove_job_name])
+                    if do_merge:
+                        dest = copy.copy(spack.config.merge_yaml(dest, src[merge_job_name]))
+
+                if name == "build":
+                    # Apply attributes to all build jobs
+                    for _, job in jobs.items():
+                        if job["spec"]:
+                            _apply_section(job["attributes"], section)
+                elif name == "any":
+                    # Apply section attributes too all jobs
+                    for _, job in jobs.items():
+                        _apply_section(job["attributes"], section)
+                else:
+                    # Create a signing job if there is script and the job hasn't
+                    # been initialized yet
+                    if name == "signing" and name not in jobs:
+                        if "signing-job" in section:
+                            if "script" not in section["signing-job"]:
+                                continue
+                            else:
+                                jobs[name] = self.__init_job("")
+                    # Apply attributes to named job
+                    _apply_section(jobs[name]["attributes"], section)
+
+            elif has_submapping:
+                # Apply section jobs with specs to match
+                for _, job in jobs.items():
+                    if job["spec"]:
+                        job["attributes"] = self.__apply_submapping(
+                            job["attributes"], job["spec"], section
+                        )
+            elif has_dynmapping:
+                mapping = section["dynamic-mapping"]
+
+                dynmap_name = mapping.get("name")
+
+                # Check if this section should be skipped
+                dynmap_skip = os.environ.get("SPACK_CI_SKIP_DYNAMIC_MAPPING")
+                if dynmap_name and dynmap_skip:
+                    if re.match(dynmap_skip, dynmap_name):
+                        continue
+
+                # Get the endpoint
+                endpoint = mapping["endpoint"]
+                endpoint_url = urlparse(endpoint)
+
+                # Configure the request header
+                header = {"User-Agent": web_util.SPACK_USER_AGENT}
+                header.update(mapping.get("header", {}))
+
+                # Expand header environment variables
+                # ie. if tokens are passed
+                for value in header.values():
+                    value = os.path.expandvars(value)
+
+                verify_ssl = mapping.get("verify_ssl", spack.config.get("config:verify_ssl", True))
+                timeout = mapping.get("timeout", spack.config.get("config:connect_timeout", 1))
+
+                required = mapping.get("require", [])
+                allowed = mapping.get("allow", [])
+                ignored = mapping.get("ignore", [])
+
+                # required keys are implicitly allowed
+                allowed = sorted(set(allowed + required))
+                ignored = sorted(set(ignored))
+                required = sorted(set(required))
+
+                # Make sure required things are not also ignored
+                assert not any([ikey in required for ikey in ignored])
+
+                def job_query(job):
+                    job_vars = job["attributes"]["variables"]
+                    query = (
+                        "{SPACK_JOB_SPEC_PKG_NAME}@{SPACK_JOB_SPEC_PKG_VERSION}"
+                        # The preceding spaces are required (ref. https://github.com/spack/spack-gantry/blob/develop/docs/api.md#allocation)
+                        " {SPACK_JOB_SPEC_VARIANTS}"
+                        " arch={SPACK_JOB_SPEC_ARCH}"
+                        "%{SPACK_JOB_SPEC_COMPILER_NAME}@{SPACK_JOB_SPEC_COMPILER_VERSION}"
+                    ).format_map(job_vars)
+                    return f"spec={quote(query)}"
+
+                for job in jobs.values():
+                    if not job["spec"]:
+                        continue
+
+                    # Create request for this job
+                    query = job_query(job)
+                    request = Request(
+                        endpoint_url._replace(query=query).geturl(), headers=header, method="GET"
+                    )
+                    try:
+                        response = _dyn_mapping_urlopener(
+                            request, verify_ssl=verify_ssl, timeout=timeout
+                        )
+                    except Exception as e:
+                        # For now just ignore any errors from dynamic mapping and continue
+                        # This is still experimental, and failures should not stop CI
+                        # from running normally
+                        tty.warn(f"Failed to fetch dynamic mapping for query:\n\t{query}")
+                        tty.warn(f"{e}")
+                        continue
+
+                    config = json.load(codecs.getreader("utf-8")(response))
+
+                    # Strip ignore keys
+                    if ignored:
+                        for key in ignored:
+                            if key in config:
+                                config.pop(key)
+
+                    # Only keep allowed keys
+                    clean_config = {}
+                    if allowed:
+                        for key in allowed:
+                            if key in config:
+                                clean_config[key] = config[key]
+                    else:
+                        clean_config = config
+
+                    # Verify all of the required keys are present
+                    if required:
+                        missing_keys = []
+                        for key in required:
+                            if key not in clean_config.keys():
+                                missing_keys.append(key)
+
+                        if missing_keys:
+                            tty.warn(f"Response missing required keys: {missing_keys}")
+
+                    if clean_config:
+                        job["attributes"] = spack.config.merge_yaml(
+                            job.get("attributes", {}), clean_config
+                        )
+
+        for _, job in jobs.items():
+            if job["spec"]:
+                job["spec"] = job["spec"].name
+
+        return self.ir
+
+
+class SpackCIError(spack.error.SpackError):
+    def __init__(self, msg):
+        super().__init__(msg)
--- a/lib/spack/spack/ci/generator_registry.py
+++ b/lib/spack/spack/ci/generator_registry.py
@@ -0,0 +1,36 @@
+# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+# Holds all known formatters
+"""Generators that support writing out pipelines for various CI platforms,
+using a common pipeline graph definition.
+"""
+import spack.error
+
+_generators = {}
+
+
+def generator(name):
+    """Decorator to register a pipeline generator method.
+    A generator method should take PipelineDag, SpackCIConfig, and
+    PipelineOptions arguments, and should produce a pipeline file.
+    """
+
+    def _decorator(generate_method):
+        _generators[name] = generate_method
+        return generate_method
+
+    return _decorator
+
+
+def get_generator(name):
+    try:
+        return _generators[name]
+    except KeyError:
+        raise UnknownGeneratorException(name)
+
+
+class UnknownGeneratorException(spack.error.SpackError):
+    def __init__(self, generator_name):
+        super().__init__(f"No registered generator for {generator_name}")
--- a/lib/spack/spack/ci/gitlab.py
+++ b/lib/spack/spack/ci/gitlab.py
@@ -0,0 +1,415 @@
+# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+import copy
+import os
+import shutil
+from typing import List, Optional
+
+import ruamel.yaml
+
+import llnl.util.tty as tty
+
+import spack
+import spack.binary_distribution as bindist
+import spack.config as cfg
+import spack.mirrors.mirror
+import spack.spec
+import spack.util.spack_yaml as syaml
+
+from .common import (
+    SPACK_RESERVED_TAGS,
+    PipelineDag,
+    PipelineOptions,
+    PipelineType,
+    SpackCIConfig,
+    SpackCIError,
+    ensure_expected_target_path,
+    unpack_script,
+    update_env_scopes,
+    write_pipeline_manifest,
+)
+from .generator_registry import generator
+
+# See https://docs.gitlab.com/ee/ci/yaml/#retry for descriptions of conditions
+JOB_RETRY_CONDITIONS = [
+    # "always",
+    "unknown_failure",
+    "script_failure",
+    "api_failure",
+    "stuck_or_timeout_failure",
+    "runner_system_failure",
+    "runner_unsupported",
+    "stale_schedule",
+    # "job_execution_timeout",
+    "archived_failure",
+    "unmet_prerequisites",
+    "scheduler_failure",
+    "data_integrity_failure",
+]
+JOB_NAME_FORMAT = "{name}{@version} {/hash}"
+
+
+def _remove_reserved_tags(tags):
+    """Convenience function to strip reserved tags from jobs"""
+    return [tag for tag in tags if tag not in SPACK_RESERVED_TAGS]
+
+
+def get_job_name(spec: spack.spec.Spec, build_group: Optional[str] = None) -> str:
+    """Given a spec and possibly a build group, return the job name. If the
+    resulting name is longer than 255 characters, it will be truncated.
+
+    Arguments:
+        spec: Spec job will build
+        build_group: Name of build group this job belongs to (a CDash notion)
+
+    Returns: The job name
+    """
+    job_name = spec.format(JOB_NAME_FORMAT)
+
+    if build_group:
+        job_name = f"{job_name} {build_group}"
+
+    return job_name[:255]
+
+
+def maybe_generate_manifest(pipeline: PipelineDag, options: PipelineOptions, manifest_path):
+    # TODO: Consider including only hashes of rebuilt specs in the manifest,
+    # instead of full source and destination urls.  Also, consider renaming
+    # the variable that controls whether or not to write the manifest from
+    # "SPACK_COPY_BUILDCACHE" to "SPACK_WRITE_PIPELINE_MANIFEST" or similar.
+    spack_buildcache_copy = os.environ.get("SPACK_COPY_BUILDCACHE", None)
+    if spack_buildcache_copy:
+        buildcache_copy_src_prefix = options.buildcache_destination.fetch_url
+        buildcache_copy_dest_prefix = spack_buildcache_copy
+
+        if options.pipeline_type == PipelineType.COPY_ONLY:
+            manifest_specs = [s for s in options.env.all_specs() if not s.external]
+        else:
+            manifest_specs = [n.spec for _, n in pipeline.traverse_nodes(direction="children")]
+
+        write_pipeline_manifest(
+            manifest_specs, buildcache_copy_src_prefix, buildcache_copy_dest_prefix, manifest_path
+        )
+
+
+@generator("gitlab")
+def generate_gitlab_yaml(pipeline: PipelineDag, spack_ci: SpackCIConfig, options: PipelineOptions):
+    """Given a pipeline graph, job attributes, and pipeline options,
+    write a pipeline that can be consumed by GitLab to the given output file.
+
+    Arguments:
+        pipeline: An already pruned graph of jobs representing all the specs to build
+        spack_ci: An object containing the configured attributes of all jobs in the pipeline
+        options: An object containing all the pipeline options gathered from yaml, env, etc...
+    """
+    ci_project_dir = os.environ.get("CI_PROJECT_DIR") or os.getcwd()
+    generate_job_name = os.environ.get("CI_JOB_NAME", "job-does-not-exist")
+    generate_pipeline_id = os.environ.get("CI_PIPELINE_ID", "pipeline-does-not-exist")
+    artifacts_root = options.artifacts_root
+    if artifacts_root.startswith(ci_project_dir):
+        artifacts_root = os.path.relpath(artifacts_root, ci_project_dir)
+    pipeline_artifacts_dir = os.path.join(ci_project_dir, artifacts_root)
+    output_file = options.output_file
+
+    if not output_file:
+        output_file = os.path.abspath(".gitlab-ci.yml")
+    else:
+        output_file_path = os.path.abspath(output_file)
+        gen_ci_dir = os.path.dirname(output_file_path)
+        if not os.path.exists(gen_ci_dir):
+            os.makedirs(gen_ci_dir)
+
+    spack_ci_ir = spack_ci.generate_ir()
+
+    concrete_env_dir = os.path.join(pipeline_artifacts_dir, "concrete_environment")
+
+    # Now that we've added the mirrors we know about, they should be properly
+    # reflected in the environment manifest file, so copy that into the
+    # concrete environment directory, along with the spack.lock file.
+    if not os.path.exists(concrete_env_dir):
+        os.makedirs(concrete_env_dir)
+    shutil.copyfile(options.env.manifest_path, os.path.join(concrete_env_dir, "spack.yaml"))
+    shutil.copyfile(options.env.lock_path, os.path.join(concrete_env_dir, "spack.lock"))
+
+    update_env_scopes(
+        options.env,
+        [
+            os.path.relpath(s.path, concrete_env_dir)
+            for s in cfg.scopes().values()
+            if not s.writable
+            and isinstance(s, (cfg.DirectoryConfigScope))
+            and os.path.exists(s.path)
+        ],
+        os.path.join(concrete_env_dir, "spack.yaml"),
+        # Here transforming windows paths is only required in the special case
+        # of copy_only_pipelines, a unique scenario where the generate job and
+        # child pipelines are run on different platforms. To make this compatible
+        # w/ Windows, we cannot write Windows style path separators that will be
+        # consumed on by the Posix copy job runner.
+        #
+        # TODO (johnwparent): Refactor config + cli read/write to deal only in
+        # posix style paths
+        transform_windows_paths=(options.pipeline_type == PipelineType.COPY_ONLY),
+    )
+
+    job_log_dir = os.path.join(pipeline_artifacts_dir, "logs")
+    job_repro_dir = os.path.join(pipeline_artifacts_dir, "reproduction")
+    job_test_dir = os.path.join(pipeline_artifacts_dir, "tests")
+    user_artifacts_dir = os.path.join(pipeline_artifacts_dir, "user_data")
+
+    # We communicate relative paths to the downstream jobs to avoid issues in
+    # situations where the CI_PROJECT_DIR varies between the pipeline
+    # generation job and the rebuild jobs.  This can happen when gitlab
+    # checks out the project into a runner-specific directory, for example,
+    # and different runners are picked for generate and rebuild jobs.
+
+    rel_concrete_env_dir = os.path.relpath(concrete_env_dir, ci_project_dir)
+    rel_job_log_dir = os.path.relpath(job_log_dir, ci_project_dir)
+    rel_job_repro_dir = os.path.relpath(job_repro_dir, ci_project_dir)
+    rel_job_test_dir = os.path.relpath(job_test_dir, ci_project_dir)
+    rel_user_artifacts_dir = os.path.relpath(user_artifacts_dir, ci_project_dir)
+
+    def main_script_replacements(cmd):
+        return cmd.replace("{env_dir}", rel_concrete_env_dir)
+
+    output_object = {}
+    job_id = 0
+    stage_id = 0
+    stages: List[List] = []
+    stage_names = []
+
+    max_length_needs = 0
+    max_needs_job = ""
+
+    if not options.pipeline_type == PipelineType.COPY_ONLY:
+        for level, node in pipeline.traverse_nodes(direction="parents"):
+            stage_id = level
+            if len(stages) == stage_id:
+                stages.append([])
+            stages[stage_id].append(node.spec)
+            stage_name = f"stage-{level}"
+
+            if stage_name not in stage_names:
+                stage_names.append(stage_name)
+
+            release_spec = node.spec
+            release_spec_dag_hash = release_spec.dag_hash()
+
+            job_object = spack_ci_ir["jobs"][release_spec_dag_hash]["attributes"]
+
+            if not job_object:
+                tty.warn(f"No match found for {release_spec}, skipping it")
+                continue
+
+            if options.pipeline_type is not None:
+                # For spack pipelines "public" and "protected" are reserved tags
+                job_object["tags"] = _remove_reserved_tags(job_object.get("tags", []))
+                if options.pipeline_type == PipelineType.PROTECTED_BRANCH:
+                    job_object["tags"].extend(["protected"])
+                elif options.pipeline_type == PipelineType.PULL_REQUEST:
+                    job_object["tags"].extend(["public"])
+
+            if "script" not in job_object:
+                raise AttributeError
+
+            job_object["script"] = unpack_script(job_object["script"], op=main_script_replacements)
+
+            if "before_script" in job_object:
+                job_object["before_script"] = unpack_script(job_object["before_script"])
+
+            if "after_script" in job_object:
+                job_object["after_script"] = unpack_script(job_object["after_script"])
+
+            build_group = options.cdash_handler.build_group if options.cdash_handler else None
+            job_name = get_job_name(release_spec, build_group)
+
+            dep_nodes = pipeline.get_dependencies(node)
+            job_object["needs"] = [
+                {"job": get_job_name(dep_node.spec, build_group), "artifacts": False}
+                for dep_node in dep_nodes
+            ]
+
+            job_object["needs"].append(
+                {"job": generate_job_name, "pipeline": f"{generate_pipeline_id}"}
+            )
+
+            job_vars = job_object["variables"]
+
+            # Let downstream jobs know whether the spec needed rebuilding, regardless
+            # whether DAG pruning was enabled or not.
+            already_built = bindist.get_mirrors_for_spec(spec=release_spec, index_only=True)
+            job_vars["SPACK_SPEC_NEEDS_REBUILD"] = "False" if already_built else "True"
+
+            if options.cdash_handler:
+                build_name = options.cdash_handler.build_name(release_spec)
+                job_vars["SPACK_CDASH_BUILD_NAME"] = build_name
+                build_stamp = options.cdash_handler.build_stamp
+                job_vars["SPACK_CDASH_BUILD_STAMP"] = build_stamp
+
+            job_object["artifacts"] = cfg.merge_yaml(
+                job_object.get("artifacts", {}),
+                {
+                    "when": "always",
+                    "paths": [
+                        rel_job_log_dir,
+                        rel_job_repro_dir,
+                        rel_job_test_dir,
+                        rel_user_artifacts_dir,
+                    ],
+                },
+            )
+
+            job_object["stage"] = stage_name
+            job_object["retry"] = {"max": 2, "when": JOB_RETRY_CONDITIONS}
+            job_object["interruptible"] = True
+
+            length_needs = len(job_object["needs"])
+            if length_needs > max_length_needs:
+                max_length_needs = length_needs
+                max_needs_job = job_name
+
+            output_object[job_name] = job_object
+            job_id += 1
+
+        tty.debug(f"{job_id} build jobs generated in {stage_id} stages")
+
+    if job_id > 0:
+        tty.debug(f"The max_needs_job is {max_needs_job}, with {max_length_needs} needs")
+
+    service_job_retries = {
+        "max": 2,
+        "when": ["runner_system_failure", "stuck_or_timeout_failure", "script_failure"],
+    }
+
+    # In some cases, pipeline generation should write a manifest.  Currently
+    # the only purpose is to specify a list of sources and destinations for
+    # everything that should be copied.
+    distinguish_stack = options.stack_name if options.stack_name else "rebuilt"
+    manifest_path = os.path.join(
+        pipeline_artifacts_dir, "specs_to_copy", f"copy_{distinguish_stack}_specs.json"
+    )
+    maybe_generate_manifest(pipeline, options, manifest_path)
+
+    if options.pipeline_type == PipelineType.COPY_ONLY:
+        stage_names.append("copy")
+        sync_job = copy.deepcopy(spack_ci_ir["jobs"]["copy"]["attributes"])
+        sync_job["stage"] = "copy"
+        sync_job["needs"] = [{"job": generate_job_name, "pipeline": f"{generate_pipeline_id}"}]
+
+        if "variables" not in sync_job:
+            sync_job["variables"] = {}
+
+        sync_job["variables"][
+            "SPACK_COPY_ONLY_DESTINATION"
+        ] = options.buildcache_destination.fetch_url
+
+        pipeline_mirrors = spack.mirrors.mirror.MirrorCollection(binary=True)
+        if "buildcache-source" not in pipeline_mirrors:
+            raise SpackCIError("Copy-only pipelines require a mirror named 'buildcache-source'")
+
+        buildcache_source = pipeline_mirrors["buildcache-source"].fetch_url
+        sync_job["variables"]["SPACK_BUILDCACHE_SOURCE"] = buildcache_source
+        sync_job["dependencies"] = []
+
+        output_object["copy"] = sync_job
+        job_id += 1
+
+    if job_id > 0:
+        if (
+            "script" in spack_ci_ir["jobs"]["signing"]["attributes"]
+            and options.pipeline_type == PipelineType.PROTECTED_BRANCH
+        ):
+            # External signing: generate a job to check and sign binary pkgs
+            stage_names.append("stage-sign-pkgs")
+            signing_job = spack_ci_ir["jobs"]["signing"]["attributes"]
+
+            signing_job["script"] = unpack_script(signing_job["script"])
+
+            signing_job["stage"] = "stage-sign-pkgs"
+            signing_job["when"] = "always"
+            signing_job["retry"] = {"max": 2, "when": ["always"]}
+            signing_job["interruptible"] = True
+            if "variables" not in signing_job:
+                signing_job["variables"] = {}
+            signing_job["variables"][
+                "SPACK_BUILDCACHE_DESTINATION"
+            ] = options.buildcache_destination.push_url
+            signing_job["dependencies"] = []
+
+            output_object["sign-pkgs"] = signing_job
+
+        if options.rebuild_index:
+            # Add a final job to regenerate the index
+            stage_names.append("stage-rebuild-index")
+            final_job = spack_ci_ir["jobs"]["reindex"]["attributes"]
+
+            final_job["stage"] = "stage-rebuild-index"
+            target_mirror = options.buildcache_destination.push_url
+            final_job["script"] = unpack_script(
+                final_job["script"],
+                op=lambda cmd: cmd.replace("{index_target_mirror}", target_mirror),
+            )
+
+            final_job["when"] = "always"
+            final_job["retry"] = service_job_retries
+            final_job["interruptible"] = True
+            final_job["dependencies"] = []
+
+            output_object["rebuild-index"] = final_job
+
+        output_object["stages"] = stage_names
+
+        # Capture the version of Spack used to generate the pipeline, that can be
+        # passed to `git checkout` for version consistency. If we aren't in a Git
+        # repository, presume we are a Spack release and use the Git tag instead.
+        spack_version = spack.get_version()
+        version_to_clone = spack.get_spack_commit() or f"v{spack.spack_version}"
+
+        rebuild_everything = not options.prune_up_to_date and not options.prune_untouched
+
+        output_object["variables"] = {
+            "SPACK_ARTIFACTS_ROOT": artifacts_root,
+            "SPACK_CONCRETE_ENV_DIR": rel_concrete_env_dir,
+            "SPACK_VERSION": spack_version,
+            "SPACK_CHECKOUT_VERSION": version_to_clone,
+            "SPACK_JOB_LOG_DIR": rel_job_log_dir,
+            "SPACK_JOB_REPRO_DIR": rel_job_repro_dir,
+            "SPACK_JOB_TEST_DIR": rel_job_test_dir,
+            "SPACK_PIPELINE_TYPE": options.pipeline_type.name if options.pipeline_type else "None",
+            "SPACK_CI_STACK_NAME": os.environ.get("SPACK_CI_STACK_NAME", "None"),
+            "SPACK_REBUILD_CHECK_UP_TO_DATE": str(options.prune_up_to_date),
+            "SPACK_REBUILD_EVERYTHING": str(rebuild_everything),
+            "SPACK_REQUIRE_SIGNING": str(options.require_signing),
+        }
+
+        if options.stack_name:
+            output_object["variables"]["SPACK_CI_STACK_NAME"] = options.stack_name
+
+        output_vars = output_object["variables"]
+        for item, val in output_vars.items():
+            output_vars[item] = ensure_expected_target_path(val)
+
+    else:
+        # No jobs were generated
+        noop_job = spack_ci_ir["jobs"]["noop"]["attributes"]
+        # If this job fails ignore the status and carry on
+        noop_job["retry"] = 0
+        noop_job["allow_failure"] = True
+
+        tty.debug("No specs to rebuild, generating no-op job")
+        output_object = {"no-specs-to-rebuild": noop_job}
+
+    # Ensure the child pipeline always runs
+    output_object["workflow"] = {"rules": [{"when": "always"}]}
+
+    sorted_output = {}
+    for output_key, output_value in sorted(output_object.items()):
+        sorted_output[output_key] = output_value
+
+    # Minimize yaml output size through use of anchors
+    syaml.anchorify(sorted_output)
+
+    with open(output_file, "w") as f:
+        ruamel.yaml.YAML().dump(sorted_output, f)
--- a/lib/spack/spack/cmd/ci.py
+++ b/lib/spack/spack/cmd/ci.py
@@ -6,7 +6,6 @@
 import json
 import os
 import shutil
-import warnings
 from urllib.parse import urlparse, urlunparse

 import llnl.util.filesystem as fs
@@ -62,22 +61,8 @@ def setup_parser(subparser):
        "path to the file where generated jobs file should be written. "
        "default is .gitlab-ci.yml in the root of the repository",
    )
-    generate.add_argument(
-        "--optimize",
-        action="store_true",
-        default=False,
-        help="(DEPRECATED) optimize the gitlab yaml file for size\n\n"
-        "run the generated document through a series of optimization passes "
-        "designed to reduce the size of the generated file",
-    )
-    generate.add_argument(
-        "--dependencies",
-        action="store_true",
-        default=False,
-        help="(DEPRECATED) disable DAG scheduling (use 'plain' dependencies)",
-    )
-    prune_group = generate.add_mutually_exclusive_group()
-    prune_group.add_argument(
+    prune_dag_group = generate.add_mutually_exclusive_group()
+    prune_dag_group.add_argument(
        "--prune-dag",
        action="store_true",
        dest="prune_dag",
@@ -85,7 +70,7 @@ def setup_parser(subparser):
        help="skip up-to-date specs\n\n"
        "do not generate jobs for specs that are up-to-date on the mirror",
    )
-    prune_group.add_argument(
+    prune_dag_group.add_argument(
        "--no-prune-dag",
        action="store_false",
        dest="prune_dag",
@@ -93,6 +78,23 @@ def setup_parser(subparser):
        help="process up-to-date specs\n\n"
        "generate jobs for specs even when they are up-to-date on the mirror",
    )
+    prune_ext_group = generate.add_mutually_exclusive_group()
+    prune_ext_group.add_argument(
+        "--prune-externals",
+        action="store_true",
+        dest="prune_externals",
+        default=True,
+        help="skip external specs\n\n"
+        "do not generate jobs for specs that are marked as external",
+    )
+    prune_ext_group.add_argument(
+        "--no-prune-externals",
+        action="store_false",
+        dest="prune_externals",
+        default=True,
+        help="process external specs\n\n"
+        "generate jobs for specs even when they are marked as external",
+    )
    generate.add_argument(
        "--check-index-only",
        action="store_true",
@@ -108,11 +110,12 @@ def setup_parser(subparser):
    )
    generate.add_argument(
        "--artifacts-root",
-        default=None,
+        default="jobs_scratch_dir",
        help="path to the root of the artifacts directory\n\n"
-        "if provided, concrete environment files (spack.yaml, spack.lock) will be generated under "
-        "this directory. their location will be passed to generated child jobs through the "
-        "SPACK_CONCRETE_ENVIRONMENT_PATH variable",
+        "The spack ci module assumes it will normally be run from within your project "
+        "directory, wherever that is checked out to run your ci.  The artifacts root directory "
+        "should specifiy a name that can safely be used for artifacts within your project "
+        "directory.",
    )
    generate.set_defaults(func=ci_generate)

@@ -187,42 +190,8 @@ def ci_generate(args):
    before invoking this command. the value must be the CDash authorization token needed to create
    a build group and register all generated jobs under it
    """
-    if args.optimize:
-        warnings.warn(
-            "The --optimize option has been deprecated, and currently has no effect. "
-            "It will be removed in Spack v0.24."
-        )
-
-    if args.dependencies:
-        warnings.warn(
-            "The --dependencies option has been deprecated, and currently has no effect. "
-            "It will be removed in Spack v0.24."
-        )
-
    env = spack.cmd.require_active_env(cmd_name="ci generate")
-
-    output_file = args.output_file
-    prune_dag = args.prune_dag
-    index_only = args.index_only
-    artifacts_root = args.artifacts_root
-
-    if not output_file:
-        output_file = os.path.abspath(".gitlab-ci.yml")
-    else:
-        output_file_path = os.path.abspath(output_file)
-        gen_ci_dir = os.path.dirname(output_file_path)
-        if not os.path.exists(gen_ci_dir):
-            os.makedirs(gen_ci_dir)
-
-    # Generate the jobs
-    spack_ci.generate_gitlab_ci_yaml(
-        env,
-        True,
-        output_file,
-        prune_dag=prune_dag,
-        check_index_only=index_only,
-        artifacts_root=artifacts_root,
-    )
+    spack_ci.generate_pipeline(env, args)


 def ci_reindex(args):
@@ -433,14 +402,16 @@ def ci_rebuild(args):
    if not config["verify_ssl"]:
        spack_cmd.append("-k")

-    install_args = [f'--use-buildcache={spack_ci.win_quote("package:never,dependencies:only")}']
+    install_args = [
+        f'--use-buildcache={spack_ci.common.win_quote("package:never,dependencies:only")}'
+    ]

    can_verify = spack_ci.can_verify_binaries()
    verify_binaries = can_verify and spack_is_pr_pipeline is False
    if not verify_binaries:
        install_args.append("--no-check-signature")

-    slash_hash = spack_ci.win_quote("/" + job_spec.dag_hash())
+    slash_hash = spack_ci.common.win_quote("/" + job_spec.dag_hash())

    # Arguments when installing the root from sources
    deps_install_args = install_args + ["--only=dependencies"]
--- a/lib/spack/spack/test/ci.py
+++ b/lib/spack/spack/test/ci.py
@@ -13,8 +13,12 @@
 import spack.environment as ev
 import spack.error
 import spack.paths as spack_paths
+import spack.repo as repo
 import spack.spec
 import spack.util.git
+from spack.spec import Spec
+
+pytestmark = [pytest.mark.usefixtures("mock_packages")]


@pytest.fixture
@@ -25,8 +29,128 @@ def repro_dir(tmp_path):
        yield result


-def test_urlencode_string():
-    assert ci._url_encode_string("Spack Test Project") == "Spack+Test+Project"
+def test_pipeline_dag(config, tmpdir):
+    r"""Test creation, pruning, and traversal of PipelineDAG using the
+    following package dependency graph:
+
+        a                           a
+       /|                          /|
+      c b                         c b
+        |\        prune 'd'        /|\
+        e d        =====>         e | g
+        | |\                      | |
+        h | g                     h |
+         \|                        \|
+          f                         f
+
+    """
+    builder = repo.MockRepositoryBuilder(tmpdir)
+    builder.add_package("pkg-h", dependencies=[("pkg-f", None, None)])
+    builder.add_package("pkg-g")
+    builder.add_package("pkg-f")
+    builder.add_package("pkg-e", dependencies=[("pkg-h", None, None)])
+    builder.add_package("pkg-d", dependencies=[("pkg-f", None, None), ("pkg-g", None, None)])
+    builder.add_package("pkg-c")
+    builder.add_package("pkg-b", dependencies=[("pkg-d", None, None), ("pkg-e", None, None)])
+    builder.add_package("pkg-a", dependencies=[("pkg-b", None, None), ("pkg-c", None, None)])
+
+    with repo.use_repositories(builder.root):
+        spec_a = Spec("pkg-a").concretized()
+
+        key_a = ci.common.PipelineDag.key(spec_a)
+        key_b = ci.common.PipelineDag.key(spec_a["pkg-b"])
+        key_c = ci.common.PipelineDag.key(spec_a["pkg-c"])
+        key_d = ci.common.PipelineDag.key(spec_a["pkg-d"])
+        key_e = ci.common.PipelineDag.key(spec_a["pkg-e"])
+        key_f = ci.common.PipelineDag.key(spec_a["pkg-f"])
+        key_g = ci.common.PipelineDag.key(spec_a["pkg-g"])
+        key_h = ci.common.PipelineDag.key(spec_a["pkg-h"])
+
+        pipeline = ci.common.PipelineDag([spec_a])
+
+        expected_bottom_up_traversal = {
+            key_a: 4,
+            key_b: 3,
+            key_c: 0,
+            key_d: 1,
+            key_e: 2,
+            key_f: 0,
+            key_g: 0,
+            key_h: 1,
+        }
+
+        visited = []
+        for stage, node in pipeline.traverse_nodes(direction="parents"):
+            assert expected_bottom_up_traversal[node.key] == stage
+            visited.append(node.key)
+
+        assert len(visited) == len(expected_bottom_up_traversal)
+        assert all(k in visited for k in expected_bottom_up_traversal.keys())
+
+        expected_top_down_traversal = {
+            key_a: 0,
+            key_b: 1,
+            key_c: 1,
+            key_d: 2,
+            key_e: 2,
+            key_f: 4,
+            key_g: 3,
+            key_h: 3,
+        }
+
+        visited = []
+        for stage, node in pipeline.traverse_nodes(direction="children"):
+            assert expected_top_down_traversal[node.key] == stage
+            visited.append(node.key)
+
+        assert len(visited) == len(expected_top_down_traversal)
+        assert all(k in visited for k in expected_top_down_traversal.keys())
+
+        pipeline.prune(key_d)
+        b_children = pipeline.nodes[key_b].children
+        assert len(b_children) == 3
+        assert all([k in b_children for k in [key_e, key_f, key_g]])
+
+        # check another bottom-up traversal after pruning pkg-d
+        expected_bottom_up_traversal = {
+            key_a: 4,
+            key_b: 3,
+            key_c: 0,
+            key_e: 2,
+            key_f: 0,
+            key_g: 0,
+            key_h: 1,
+        }
+
+        visited = []
+        for stage, node in pipeline.traverse_nodes(direction="parents"):
+            assert expected_bottom_up_traversal[node.key] == stage
+            visited.append(node.key)
+
+        assert len(visited) == len(expected_bottom_up_traversal)
+        assert all(k in visited for k in expected_bottom_up_traversal.keys())
+
+        # check top-down traversal after pruning pkg-d
+        expected_top_down_traversal = {
+            key_a: 0,
+            key_b: 1,
+            key_c: 1,
+            key_e: 2,
+            key_f: 4,
+            key_g: 2,
+            key_h: 3,
+        }
+
+        visited = []
+        for stage, node in pipeline.traverse_nodes(direction="children"):
+            assert expected_top_down_traversal[node.key] == stage
+            visited.append(node.key)
+
+        assert len(visited) == len(expected_top_down_traversal)
+        assert all(k in visited for k in expected_top_down_traversal.keys())
+
+        a_deps_direct = [n.spec for n in pipeline.get_dependencies(pipeline.nodes[key_a])]
+        assert all([s in a_deps_direct for s in [spec_a["pkg-b"], spec_a["pkg-c"]]])


@pytest.mark.not_on_windows("Not supported on Windows (yet)")
--- a/lib/spack/spack/test/cmd/ci.py
+++ b/lib/spack/spack/test/cmd/ci.py
@@ -23,8 +23,10 @@
 import spack.hash_types as ht
 import spack.main
 import spack.paths as spack_paths
-import spack.repo as repo
 import spack.util.spack_yaml as syaml
+from spack.ci import gitlab as gitlab_generator
+from spack.ci.common import PipelineDag, PipelineOptions, SpackCIConfig
+from spack.ci.generator_registry import generator
 from spack.cmd.ci import FAILED_CREATE_BUILDCACHE_CODE
 from spack.schema.buildcache_spec import schema as specfile_schema
 from spack.schema.database_index import schema as db_idx_schema
@@ -119,62 +121,6 @@ def _func(spack_yaml_content, *args, fail_on_error=True):
    return _func


-def test_specs_staging(config, tmpdir):
-    """Make sure we achieve the best possible staging for the following
-spec DAG::
-
-        a
-       /|
-      c b
-        |\
-        e d
-          |\
-          f g
-
-In this case, we would expect 'c', 'e', 'f', and 'g' to be in the first stage,
-and then 'd', 'b', and 'a' to be put in the next three stages, respectively.
-
-"""
-    builder = repo.MockRepositoryBuilder(tmpdir)
-    builder.add_package("pkg-g")
-    builder.add_package("pkg-f")
-    builder.add_package("pkg-e")
-    builder.add_package("pkg-d", dependencies=[("pkg-f", None, None), ("pkg-g", None, None)])
-    builder.add_package("pkg-c")
-    builder.add_package("pkg-b", dependencies=[("pkg-d", None, None), ("pkg-e", None, None)])
-    builder.add_package("pkg-a", dependencies=[("pkg-b", None, None), ("pkg-c", None, None)])
-
-    with repo.use_repositories(builder.root):
-        spec_a = Spec("pkg-a").concretized()
-
-        spec_a_label = ci._spec_ci_label(spec_a)
-        spec_b_label = ci._spec_ci_label(spec_a["pkg-b"])
-        spec_c_label = ci._spec_ci_label(spec_a["pkg-c"])
-        spec_d_label = ci._spec_ci_label(spec_a["pkg-d"])
-        spec_e_label = ci._spec_ci_label(spec_a["pkg-e"])
-        spec_f_label = ci._spec_ci_label(spec_a["pkg-f"])
-        spec_g_label = ci._spec_ci_label(spec_a["pkg-g"])
-
-        spec_labels, dependencies, stages = ci.stage_spec_jobs([spec_a])
-
-        assert len(stages) == 4
-
-        assert len(stages[0]) == 4
-        assert spec_c_label in stages[0]
-        assert spec_e_label in stages[0]
-        assert spec_f_label in stages[0]
-        assert spec_g_label in stages[0]
-
-        assert len(stages[1]) == 1
-        assert spec_d_label in stages[1]
-
-        assert len(stages[2]) == 1
-        assert spec_b_label in stages[2]
-
-        assert len(stages[3]) == 1
-        assert spec_a_label in stages[3]
-
-
 def test_ci_generate_with_env(ci_generate_test, tmp_path, mock_binary_index):
    """Make sure we can get a .gitlab-ci.yml from an environment file
    which has the gitlab-ci, cdash, and mirrors sections.
@@ -452,7 +398,10 @@ def test_ci_generate_for_pr_pipeline(ci_generate_test, tmp_path, monkeypatch):
    assert "rebuild-index" not in yaml_contents
    assert "variables" in yaml_contents
    assert "SPACK_PIPELINE_TYPE" in yaml_contents["variables"]
-    assert yaml_contents["variables"]["SPACK_PIPELINE_TYPE"] == "spack_pull_request"
+    assert (
+        ci.common.PipelineType[yaml_contents["variables"]["SPACK_PIPELINE_TYPE"]]
+        == ci.common.PipelineType.PULL_REQUEST
+    )


 def test_ci_generate_with_external_pkg(ci_generate_test, tmp_path, monkeypatch):
@@ -1391,7 +1340,7 @@ def test_ci_reproduce(
            str(artifacts_root),
        )

-        job_name = ci.get_job_name(job_spec)
+        job_name = gitlab_generator.get_job_name(job_spec)

        with open(repro_dir / "repro.json", "w") as f:
            f.write(
@@ -1588,7 +1537,7 @@ def dynamic_mapping_setup(tmpdir):
    spec_a = Spec("pkg-a")
    spec_a.concretize()

-    return ci.get_job_name(spec_a)
+    return gitlab_generator.get_job_name(spec_a)


 def test_ci_dynamic_mapping_empty(
@@ -1604,7 +1553,7 @@ def test_ci_dynamic_mapping_empty(
    def fake_dyn_mapping_urlopener(*args, **kwargs):
        return BytesIO("{}".encode())

-    monkeypatch.setattr(ci, "_dyn_mapping_urlopener", fake_dyn_mapping_urlopener)
+    monkeypatch.setattr(ci.common, "_dyn_mapping_urlopener", fake_dyn_mapping_urlopener)

    _ = dynamic_mapping_setup(tmpdir)
    with tmpdir.as_cwd():
@@ -1633,7 +1582,7 @@ def fake_dyn_mapping_urlopener(*args, **kwargs):
            ).encode()
        )

-    monkeypatch.setattr(ci, "_dyn_mapping_urlopener", fake_dyn_mapping_urlopener)
+    monkeypatch.setattr(ci.common, "_dyn_mapping_urlopener", fake_dyn_mapping_urlopener)

    label = dynamic_mapping_setup(tmpdir)
    with tmpdir.as_cwd():
@@ -1651,3 +1600,179 @@ def fake_dyn_mapping_urlopener(*args, **kwargs):
                assert job.get("variables", {}).get("MY_VAR") == "hello"
                assert "ignored_field" not in job
                assert "unallowed_field" not in job
+
+
+def test_ci_generate_unknown_generator(
+    ci_generate_test,
+    tmp_path,
+    mutable_mock_env_path,
+    install_mockery,
+    mock_packages,
+    ci_base_environment,
+):
+    """Ensure unrecognized ci targets are detected and the user
+    sees an intelligible and actionable message"""
+    src_mirror_url = tmp_path / "ci-src-mirror"
+    bin_mirror_url = tmp_path / "ci-bin-mirror"
+    spack_yaml_contents = f"""
+spack:
+  specs:
+    - archive-files
+  mirrors:
+    some-mirror: {src_mirror_url}
+    buildcache-destination: {bin_mirror_url}
+  ci:
+    target: unknown
+    pipeline-gen:
+    - submapping:
+      - match:
+          - archive-files
+        build-job:
+          tags:
+            - donotcare
+          image: donotcare
+"""
+    expect = "Spack CI module cannot generate a pipeline for format unknown"
+    with pytest.raises(ci.SpackCIError, match=expect):
+        ci_generate_test(spack_yaml_contents)
+
+
+def test_ci_generate_copy_only(
+    ci_generate_test,
+    tmp_path,
+    monkeypatch,
+    mutable_mock_env_path,
+    install_mockery,
+    mock_packages,
+    ci_base_environment,
+):
+    """Ensure the correct jobs are generated for a copy-only pipeline,
+    and verify that pipeline manifest is produced containing the right
+    number of entries."""
+    src_mirror_url = tmp_path / "ci-src-mirror"
+    bin_mirror_url = tmp_path / "ci-bin-mirror"
+    copy_mirror_url = tmp_path / "ci-copy-mirror"
+
+    monkeypatch.setenv("SPACK_PIPELINE_TYPE", "spack_copy_only")
+    monkeypatch.setenv("SPACK_COPY_BUILDCACHE", copy_mirror_url)
+
+    spack_yaml_contents = f"""
+spack:
+  specs:
+    - archive-files
+  mirrors:
+    buildcache-source:
+      fetch: {src_mirror_url}
+      push: {src_mirror_url}
+      source: False
+      binary: True
+    buildcache-destination:
+      fetch: {bin_mirror_url}
+      push: {bin_mirror_url}
+      source: False
+      binary: True
+  ci:
+    target: gitlab
+    pipeline-gen:
+    - submapping:
+      - match:
+          - archive-files
+        build-job:
+          tags:
+            - donotcare
+          image: donotcare
+"""
+    _, output_file, _ = ci_generate_test(spack_yaml_contents)
+
+    with open(output_file) as of:
+        pipeline_doc = syaml.load(of.read())
+
+    expected_keys = ["copy", "rebuild-index", "stages", "variables", "workflow"]
+    assert all([k in pipeline_doc for k in expected_keys])
+
+    # Make sure there are only two jobs and two stages
+    stages = pipeline_doc["stages"]
+    copy_stage = "copy"
+    rebuild_index_stage = "stage-rebuild-index"
+
+    assert len(stages) == 2
+    assert stages[0] == copy_stage
+    assert stages[1] == rebuild_index_stage
+
+    rebuild_index_job = pipeline_doc["rebuild-index"]
+    assert rebuild_index_job["stage"] == rebuild_index_stage
+
+    copy_job = pipeline_doc["copy"]
+    assert copy_job["stage"] == copy_stage
+
+    # Make sure a pipeline manifest was generated
+    output_directory = os.path.dirname(output_file)
+    assert "SPACK_ARTIFACTS_ROOT" in pipeline_doc["variables"]
+    artifacts_root = pipeline_doc["variables"]["SPACK_ARTIFACTS_ROOT"]
+    pipeline_manifest_path = os.path.join(
+        output_directory, artifacts_root, "specs_to_copy", "copy_rebuilt_specs.json"
+    )
+
+    assert os.path.exists(pipeline_manifest_path)
+    assert os.path.isfile(pipeline_manifest_path)
+
+    with open(pipeline_manifest_path) as fd:
+        manifest_data = json.load(fd)
+
+    with ev.read("test") as active_env:
+        active_env.concretize()
+        for s in active_env.all_specs():
+            assert s.dag_hash() in manifest_data
+
+
+@generator("unittestgenerator")
+def generate_unittest_pipeline(
+    pipeline: PipelineDag, spack_ci: SpackCIConfig, options: PipelineOptions
+):
+    """Define a custom pipeline generator for the target 'unittestgenerator'."""
+    output_file = options.output_file
+    assert output_file is not None
+    with open(output_file, "w") as fd:
+        fd.write("unittestpipeline\n")
+        for _, node in pipeline.traverse_nodes(direction="children"):
+            release_spec = node.spec
+            fd.write(f"  {release_spec.name}\n")
+
+
+def test_ci_generate_alternate_target(
+    ci_generate_test,
+    tmp_path,
+    mutable_mock_env_path,
+    install_mockery,
+    mock_packages,
+    ci_base_environment,
+):
+    """Ensure the above pipeline generator was correctly registered and
+    is used to generate a pipeline for the stack/config defined here."""
+    bin_mirror_url = tmp_path / "ci-bin-mirror"
+
+    spack_yaml_contents = f"""
+spack:
+  specs:
+    - archive-files
+    - externaltest
+  mirrors:
+    buildcache-destination: {bin_mirror_url}
+  ci:
+    target: unittestgenerator
+    pipeline-gen:
+    - submapping:
+      - match:
+          - archive-files
+        build-job:
+          tags:
+            - donotcare
+          image: donotcare
+"""
+    _, output_file, _ = ci_generate_test(spack_yaml_contents, "--no-prune-externals")
+
+    with open(output_file) as of:
+        pipeline_doc = of.read()
+
+    assert pipeline_doc.startswith("unittestpipeline")
+    assert "externaltest" in pipeline_doc