ci: deprecate the --dependencies and --optimize option (#45005)

This commit is contained in:
Massimiliano Culpo 2024-07-02 22:06:52 +02:00 committed by GitHub
parent 5686a6b928
commit 6b85f6b405
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 18 additions and 646 deletions

View File

@ -253,17 +253,6 @@ can easily happen if it is not updated frequently, this behavior ensures that
spack has a way to know for certain about the status of any concrete spec on
the remote mirror, but can slow down pipeline generation significantly.
The ``--optimize`` argument is experimental and runs the generated pipeline
document through a series of optimization passes designed to reduce the size
of the generated file.
The ``--dependencies`` is also experimental and disables what in Gitlab is
referred to as DAG scheduling, internally using the ``dependencies`` keyword
rather than ``needs`` to list dependency jobs. The drawback of using this option
is that before any job can begin, all jobs in previous stages must first
complete. The benefit is that Gitlab allows more dependencies to be listed
when using ``dependencies`` instead of ``needs``.
The optional ``--output-file`` argument should be an absolute path (including
file name) to the generated pipeline, and if not given, the default is
``./.gitlab-ci.yml``.

View File

@ -553,10 +553,9 @@ def generate_gitlab_ci_yaml(
env,
print_summary,
output_file,
*,
prune_dag=False,
check_index_only=False,
run_optimizer=False,
use_dependencies=False,
artifacts_root=None,
remote_mirror_override=None,
):
@ -577,12 +576,6 @@ def generate_gitlab_ci_yaml(
this mode results in faster yaml generation time). Otherwise, also
check each spec directly by url (useful if there is no index or it
might be out of date).
run_optimizer (bool): If True, post-process the generated yaml to try
try to reduce the size (attempts to collect repeated configuration
and replace with definitions).)
use_dependencies (bool): If true, use "dependencies" rather than "needs"
("needs" allows DAG scheduling). Useful if gitlab instance cannot
be configured to handle more than a few "needs" per job.
artifacts_root (str): Path where artifacts like logs, environment
files (spack.yaml, spack.lock), etc should be written. GitLab
requires this to be within the project directory.
@ -1273,17 +1266,6 @@ def main_script_replacements(cmd):
with open(copy_specs_file, "w") as fd:
fd.write(json.dumps(buildcache_copies))
# TODO(opadron): remove this or refactor
if run_optimizer:
import spack.ci_optimization as ci_opt
output_object = ci_opt.optimizer(output_object)
# TODO(opadron): remove this or refactor
if use_dependencies:
import spack.ci_needs_workaround as cinw
output_object = cinw.needs_to_dependencies(output_object)
else:
# No jobs were generated
noop_job = spack_ci_ir["jobs"]["noop"]["attributes"]

View File

@ -1,34 +0,0 @@
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import collections.abc
get_job_name = lambda needs_entry: (
needs_entry.get("job")
if (isinstance(needs_entry, collections.abc.Mapping) and needs_entry.get("artifacts", True))
else needs_entry if isinstance(needs_entry, str) else None
)
def convert_job(job_entry):
if not isinstance(job_entry, collections.abc.Mapping):
return job_entry
needs = job_entry.get("needs")
if needs is None:
return job_entry
new_job = {}
new_job.update(job_entry)
del new_job["needs"]
new_job["dependencies"] = list(
filter((lambda x: x is not None), (get_job_name(needs_entry) for needs_entry in needs))
)
return new_job
def needs_to_dependencies(yaml):
return dict((k, convert_job(v)) for k, v in yaml.items())

View File

@ -1,363 +0,0 @@
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import collections
import collections.abc
import copy
import hashlib
import spack.util.spack_yaml as syaml
def sort_yaml_obj(obj):
if isinstance(obj, collections.abc.Mapping):
return syaml.syaml_dict(
(k, sort_yaml_obj(v)) for k, v in sorted(obj.items(), key=(lambda item: str(item[0])))
)
if isinstance(obj, collections.abc.Sequence) and not isinstance(obj, str):
return syaml.syaml_list(sort_yaml_obj(x) for x in obj)
return obj
def matches(obj, proto):
"""Returns True if the test object "obj" matches the prototype object
"proto".
If obj and proto are mappings, obj matches proto if (key in obj) and
(obj[key] matches proto[key]) for every key in proto.
If obj and proto are sequences, obj matches proto if they are of the same
length and (a matches b) for every (a,b) in zip(obj, proto).
Otherwise, obj matches proto if obj == proto.
Precondition: proto must not have any reference cycles
"""
if isinstance(obj, collections.abc.Mapping):
if not isinstance(proto, collections.abc.Mapping):
return False
return all((key in obj and matches(obj[key], val)) for key, val in proto.items())
if isinstance(obj, collections.abc.Sequence) and not isinstance(obj, str):
if not (isinstance(proto, collections.abc.Sequence) and not isinstance(proto, str)):
return False
if len(obj) != len(proto):
return False
return all(matches(obj[index], val) for index, val in enumerate(proto))
return obj == proto
def subkeys(obj, proto):
"""Returns the test mapping "obj" after factoring out the items it has in
common with the prototype mapping "proto".
Consider a recursive merge operation, merge(a, b) on mappings a and b, that
returns a mapping, m, whose keys are the union of the keys of a and b, and
for every such key, "k", its corresponding value is:
- merge(a[key], b[key]) if a[key] and b[key] are mappings, or
- b[key] if (key in b) and not matches(a[key], b[key]),
or
- a[key] otherwise
If obj and proto are mappings, the returned object is the smallest object,
"a", such that merge(a, proto) matches obj.
Otherwise, obj is returned.
"""
if not (
isinstance(obj, collections.abc.Mapping) and isinstance(proto, collections.abc.Mapping)
):
return obj
new_obj = {}
for key, value in obj.items():
if key not in proto:
new_obj[key] = value
continue
if matches(value, proto[key]) and matches(proto[key], value):
continue
if isinstance(value, collections.abc.Mapping):
new_obj[key] = subkeys(value, proto[key])
continue
new_obj[key] = value
return new_obj
def add_extends(yaml, key):
"""Modifies the given object "yaml" so that it includes an "extends" key
whose value features "key".
If "extends" is not in yaml, then yaml is modified such that
yaml["extends"] == key.
If yaml["extends"] is a str, then yaml is modified such that
yaml["extends"] == [yaml["extends"], key]
If yaml["extends"] is a list that does not include key, then key is
appended to the list.
Otherwise, yaml is left unchanged.
"""
has_key = "extends" in yaml
extends = yaml.get("extends")
if has_key and not isinstance(extends, (str, collections.abc.Sequence)):
return
if extends is None:
yaml["extends"] = key
return
if isinstance(extends, str):
if extends != key:
yaml["extends"] = [extends, key]
return
if key not in extends:
extends.append(key)
def common_subobject(yaml, sub):
"""Factor prototype object "sub" out of the values of mapping "yaml".
Consider a modified copy of yaml, "new", where for each key, "key" in yaml:
- If yaml[key] matches sub, then new[key] = subkeys(yaml[key], sub).
- Otherwise, new[key] = yaml[key].
If the above match criteria is not satisfied for any such key, then (yaml,
None) is returned. The yaml object is returned unchanged.
Otherwise, each matching value in new is modified as in
add_extends(new[key], common_key), and then new[common_key] is set to sub.
The common_key value is chosen such that it does not match any preexisting
key in new. In this case, (new, common_key) is returned.
"""
match_list = set(k for k, v in yaml.items() if matches(v, sub))
if not match_list:
return yaml, None
common_prefix = ".c"
common_index = 0
while True:
common_key = "".join((common_prefix, str(common_index)))
if common_key not in yaml:
break
common_index += 1
new_yaml = {}
for key, val in yaml.items():
new_yaml[key] = copy.deepcopy(val)
if not matches(val, sub):
continue
new_yaml[key] = subkeys(new_yaml[key], sub)
add_extends(new_yaml[key], common_key)
new_yaml[common_key] = sub
return new_yaml, common_key
def print_delta(name, old, new, applied=None):
delta = new - old
reldelta = (1000 * delta) // old
reldelta = (reldelta // 10, reldelta % 10)
if applied is None:
applied = new <= old
print(
"\n".join(
(
"{0} {1}:",
" before: {2: 10d}",
" after : {3: 10d}",
" delta : {4:+10d} ({5:=+3d}.{6}%)",
)
).format(name, ("+" if applied else "x"), old, new, delta, reldelta[0], reldelta[1])
)
def try_optimization_pass(name, yaml, optimization_pass, *args, **kwargs):
"""Try applying an optimization pass and return information about the
result
"name" is a string describing the nature of the pass. If it is a non-empty
string, summary statistics are also printed to stdout.
"yaml" is the object to apply the pass to.
"optimization_pass" is the function implementing the pass to be applied.
"args" and "kwargs" are the additional arguments to pass to optimization
pass. The pass is applied as
>>> (new_yaml, *other_results) = optimization_pass(yaml, *args, **kwargs)
The pass's results are greedily rejected if it does not modify the original
yaml document, or if it produces a yaml document that serializes to a
larger string.
Returns (new_yaml, yaml, applied, other_results) if applied, or
(yaml, new_yaml, applied, other_results) otherwise.
"""
result = optimization_pass(yaml, *args, **kwargs)
new_yaml, other_results = result[0], result[1:]
if new_yaml is yaml:
# pass was not applied
return (yaml, new_yaml, False, other_results)
pre_size = len(syaml.dump_config(sort_yaml_obj(yaml), default_flow_style=True))
post_size = len(syaml.dump_config(sort_yaml_obj(new_yaml), default_flow_style=True))
# pass makes the size worse: not applying
applied = post_size <= pre_size
if applied:
yaml, new_yaml = new_yaml, yaml
if name:
print_delta(name, pre_size, post_size, applied)
return (yaml, new_yaml, applied, other_results)
def build_histogram(iterator, key):
"""Builds a histogram of values given an iterable of mappings and a key.
For each mapping "m" with key "key" in iterator, the value m[key] is
considered.
Returns a list of tuples (hash, count, proportion, value), where
- "hash" is a sha1sum hash of the value.
- "count" is the number of occurences of values that hash to "hash".
- "proportion" is the proportion of all values considered above that
hash to "hash".
- "value" is one of the values considered above that hash to "hash".
Which value is chosen when multiple values hash to the same "hash" is
undefined.
The list is sorted in descending order by count, yielding the most
frequently occuring hashes first.
"""
buckets = collections.defaultdict(int)
values = {}
num_objects = 0
for obj in iterator:
num_objects += 1
try:
val = obj[key]
except (KeyError, TypeError):
continue
value_hash = hashlib.sha1()
value_hash.update(syaml.dump_config(sort_yaml_obj(val)).encode())
value_hash = value_hash.hexdigest()
buckets[value_hash] += 1
values[value_hash] = val
return [
(h, buckets[h], float(buckets[h]) / num_objects, values[h])
for h in sorted(buckets.keys(), key=lambda k: -buckets[k])
]
def optimizer(yaml):
original_size = len(syaml.dump_config(sort_yaml_obj(yaml), default_flow_style=True))
# try factoring out commonly repeated portions
common_job = {
"variables": {"SPACK_COMPILER_ACTION": "NONE"},
"after_script": ['rm -rf "./spack"'],
"artifacts": {"paths": ["jobs_scratch_dir", "cdash_report"], "when": "always"},
}
# look for a list of tags that appear frequently
_, count, proportion, tags = next(iter(build_histogram(yaml.values(), "tags")), (None,) * 4)
# If a list of tags is found, and there are more than one job that uses it,
# *and* the jobs that do use it represent at least 70% of all jobs, then
# add the list to the prototype object.
if tags and count > 1 and proportion >= 0.70:
common_job["tags"] = tags
# apply common object factorization
yaml, other, applied, rest = try_optimization_pass(
"general common object factorization", yaml, common_subobject, common_job
)
# look for a common script, and try factoring that out
_, count, proportion, script = next(
iter(build_histogram(yaml.values(), "script")), (None,) * 4
)
if script and count > 1 and proportion >= 0.70:
yaml, other, applied, rest = try_optimization_pass(
"script factorization", yaml, common_subobject, {"script": script}
)
# look for a common before_script, and try factoring that out
_, count, proportion, script = next(
iter(build_histogram(yaml.values(), "before_script")), (None,) * 4
)
if script and count > 1 and proportion >= 0.70:
yaml, other, applied, rest = try_optimization_pass(
"before_script factorization", yaml, common_subobject, {"before_script": script}
)
# Look specifically for the SPACK_ROOT_SPEC environment variables.
# Try to factor them out.
h = build_histogram(
(getattr(val, "get", lambda *args: {})("variables") for val in yaml.values()),
"SPACK_ROOT_SPEC",
)
# In this case, we try to factor out *all* instances of the SPACK_ROOT_SPEC
# environment variable; not just the one that appears with the greatest
# frequency. We only require that more than 1 job uses a given instance's
# value, because we expect the value to be very large, and so expect even
# few-to-one factorizations to yield large space savings.
counter = 0
for _, count, proportion, spec in h:
if count <= 1:
continue
counter += 1
yaml, other, applied, rest = try_optimization_pass(
"SPACK_ROOT_SPEC factorization ({count})".format(count=counter),
yaml,
common_subobject,
{"variables": {"SPACK_ROOT_SPEC": spec}},
)
new_size = len(syaml.dump_config(sort_yaml_obj(yaml), default_flow_style=True))
print("\n")
print_delta("overall summary", original_size, new_size)
print("\n")
return yaml

View File

@ -6,6 +6,7 @@
import json
import os
import shutil
import warnings
from urllib.parse import urlparse, urlunparse
import llnl.util.filesystem as fs
@ -73,7 +74,7 @@ def setup_parser(subparser):
"--optimize",
action="store_true",
default=False,
help="(experimental) optimize the gitlab yaml file for size\n\n"
help="(DEPRECATED) optimize the gitlab yaml file for size\n\n"
"run the generated document through a series of optimization passes "
"designed to reduce the size of the generated file",
)
@ -81,7 +82,7 @@ def setup_parser(subparser):
"--dependencies",
action="store_true",
default=False,
help="(experimental) disable DAG scheduling (use 'plain' dependencies)",
help="(DEPRECATED) disable DAG scheduling (use 'plain' dependencies)",
)
generate.add_argument(
"--buildcache-destination",
@ -200,6 +201,18 @@ def ci_generate(args):
before invoking this command. the value must be the CDash authorization token needed to create
a build group and register all generated jobs under it
"""
if args.optimize:
warnings.warn(
"The --optimize option has been deprecated, and currently has no effect. "
"It will be removed in Spack v0.24."
)
if args.dependencies:
warnings.warn(
"The --dependencies option has been deprecated, and currently has no effect. "
"It will be removed in Spack v0.24."
)
env = spack.cmd.require_active_env(cmd_name="ci generate")
if args.copy_to:
@ -212,8 +225,6 @@ def ci_generate(args):
output_file = args.output_file
copy_yaml_to = args.copy_to
run_optimizer = args.optimize
use_dependencies = args.dependencies
prune_dag = args.prune_dag
index_only = args.index_only
artifacts_root = args.artifacts_root
@ -234,8 +245,6 @@ def ci_generate(args):
output_file,
prune_dag=prune_dag,
check_index_only=index_only,
run_optimizer=run_optimizer,
use_dependencies=use_dependencies,
artifacts_root=artifacts_root,
remote_mirror_override=buildcache_destination,
)

View File

@ -2,7 +2,6 @@
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import itertools
import os
import subprocess
@ -11,15 +10,12 @@
import llnl.util.filesystem as fs
import spack.ci as ci
import spack.ci_needs_workaround as cinw
import spack.ci_optimization as ci_opt
import spack.config
import spack.environment as ev
import spack.error
import spack.paths as spack_paths
import spack.util.git
import spack.util.gpg
import spack.util.spack_yaml as syaml
@pytest.fixture
@ -203,164 +199,6 @@ def __call__(self, *args, **kwargs):
assert "Unable to merge {0}".format(c1) in err
@pytest.mark.parametrize("obj, proto", [({}, [])])
def test_ci_opt_argument_checking(obj, proto):
"""Check that matches() and subkeys() return False when `proto` is not a dict."""
assert not ci_opt.matches(obj, proto)
assert not ci_opt.subkeys(obj, proto)
@pytest.mark.parametrize("yaml", [{"extends": 1}])
def test_ci_opt_add_extends_non_sequence(yaml):
"""Check that add_extends() exits if 'extends' is not a sequence."""
yaml_copy = yaml.copy()
ci_opt.add_extends(yaml, None)
assert yaml == yaml_copy
def test_ci_workarounds():
fake_root_spec = "x" * 544
fake_spack_ref = "x" * 40
common_variables = {"SPACK_IS_PR_PIPELINE": "False"}
common_before_script = [
'git clone "https://github.com/spack/spack"',
" && ".join(("pushd ./spack", 'git checkout "{ref}"'.format(ref=fake_spack_ref), "popd")),
'. "./spack/share/spack/setup-env.sh"',
]
def make_build_job(name, deps, stage, use_artifact_buildcache, optimize, use_dependencies):
variables = common_variables.copy()
variables["SPACK_JOB_SPEC_PKG_NAME"] = name
result = {
"stage": stage,
"tags": ["tag-0", "tag-1"],
"artifacts": {
"paths": ["jobs_scratch_dir", "cdash_report", name + ".spec.json", name],
"when": "always",
},
"retry": {"max": 2, "when": ["always"]},
"after_script": ['rm -rf "./spack"'],
"script": ["spack ci rebuild"],
"image": {"name": "spack/centos7", "entrypoint": [""]},
}
if optimize:
result["extends"] = [".c0", ".c1"]
else:
variables["SPACK_ROOT_SPEC"] = fake_root_spec
result["before_script"] = common_before_script
result["variables"] = variables
if use_dependencies:
result["dependencies"] = list(deps) if use_artifact_buildcache else []
else:
result["needs"] = [{"job": dep, "artifacts": use_artifact_buildcache} for dep in deps]
return {name: result}
def make_rebuild_index_job(use_artifact_buildcache, optimize, use_dependencies):
result = {
"stage": "stage-rebuild-index",
"script": "spack buildcache update-index s3://mirror",
"tags": ["tag-0", "tag-1"],
"image": {"name": "spack/centos7", "entrypoint": [""]},
"after_script": ['rm -rf "./spack"'],
}
if optimize:
result["extends"] = ".c0"
else:
result["before_script"] = common_before_script
return {"rebuild-index": result}
def make_factored_jobs(optimize):
return (
{
".c0": {"before_script": common_before_script},
".c1": {"variables": {"SPACK_ROOT_SPEC": fake_root_spec}},
}
if optimize
else {}
)
def make_stage_list(num_build_stages):
return {
"stages": (
["-".join(("stage", str(i))) for i in range(num_build_stages)]
+ ["stage-rebuild-index"]
)
}
def make_yaml_obj(use_artifact_buildcache, optimize, use_dependencies):
result = {}
result.update(
make_build_job(
"pkg-a", [], "stage-0", use_artifact_buildcache, optimize, use_dependencies
)
)
result.update(
make_build_job(
"pkg-b", ["pkg-a"], "stage-1", use_artifact_buildcache, optimize, use_dependencies
)
)
result.update(
make_build_job(
"pkg-c",
["pkg-a", "pkg-b"],
"stage-2",
use_artifact_buildcache,
optimize,
use_dependencies,
)
)
result.update(make_rebuild_index_job(use_artifact_buildcache, optimize, use_dependencies))
result.update(make_factored_jobs(optimize))
result.update(make_stage_list(3))
return result
# test every combination of:
# use artifact buildcache: true or false
# run optimization pass: true or false
# convert needs to dependencies: true or false
for use_ab in (False, True):
original = make_yaml_obj(
use_artifact_buildcache=use_ab, optimize=False, use_dependencies=False
)
for opt, deps in itertools.product(*(((False, True),) * 2)):
# neither optimizing nor converting needs->dependencies
if not (opt or deps):
# therefore, nothing to test
continue
predicted = make_yaml_obj(
use_artifact_buildcache=use_ab, optimize=opt, use_dependencies=deps
)
actual = original.copy()
if opt:
actual = ci_opt.optimizer(actual)
if deps:
actual = cinw.needs_to_dependencies(actual)
predicted = syaml.dump_config(ci_opt.sort_yaml_obj(predicted), default_flow_style=True)
actual = syaml.dump_config(ci_opt.sort_yaml_obj(actual), default_flow_style=True)
assert predicted == actual
def test_get_spec_filter_list(mutable_mock_env_path, config, mutable_mock_repo):
"""Test that given an active environment and list of touched pkgs,
we get the right list of possibly-changed env specs"""

View File

@ -1432,55 +1432,6 @@ def test_ci_generate_override_runner_attrs(
assert the_elt["after_script"][0] == "post step one"
def test_ci_generate_with_workarounds(
tmpdir, mutable_mock_env_path, install_mockery, mock_packages, monkeypatch, ci_base_environment
):
"""Make sure the post-processing cli workarounds do what they should"""
filename = str(tmpdir.join("spack.yaml"))
with open(filename, "w") as f:
f.write(
"""\
spack:
specs:
- callpath%gcc@=9.5
mirrors:
some-mirror: https://my.fake.mirror
ci:
pipeline-gen:
- submapping:
- match: ['%gcc@9.5']
build-job:
tags:
- donotcare
image: donotcare
enable-artifacts-buildcache: true
"""
)
with tmpdir.as_cwd():
env_cmd("create", "test", "./spack.yaml")
outputfile = str(tmpdir.join(".gitlab-ci.yml"))
with ev.read("test"):
ci_cmd("generate", "--output-file", outputfile, "--dependencies")
with open(outputfile) as f:
contents = f.read()
yaml_contents = syaml.load(contents)
found_one = False
non_rebuild_keys = ["workflow", "stages", "variables", "rebuild-index"]
for ci_key in yaml_contents.keys():
if ci_key not in non_rebuild_keys:
found_one = True
job_obj = yaml_contents[ci_key]
assert "needs" not in job_obj
assert "dependencies" in job_obj
assert found_one is True
@pytest.mark.disable_clean_stage_check
def test_ci_rebuild_index(
tmpdir,

View File

@ -962,9 +962,9 @@ complete -c spack -n '__fish_spack_using_command ci generate' -l output-file -r
complete -c spack -n '__fish_spack_using_command ci generate' -l copy-to -r -f -a copy_to
complete -c spack -n '__fish_spack_using_command ci generate' -l copy-to -r -d 'path to additional directory for job files'
complete -c spack -n '__fish_spack_using_command ci generate' -l optimize -f -a optimize
complete -c spack -n '__fish_spack_using_command ci generate' -l optimize -d '(experimental) optimize the gitlab yaml file for size'
complete -c spack -n '__fish_spack_using_command ci generate' -l optimize -d '(DEPRECATED) optimize the gitlab yaml file for size'
complete -c spack -n '__fish_spack_using_command ci generate' -l dependencies -f -a dependencies
complete -c spack -n '__fish_spack_using_command ci generate' -l dependencies -d '(experimental) disable DAG scheduling (use \'plain\' dependencies)'
complete -c spack -n '__fish_spack_using_command ci generate' -l dependencies -d '(DEPRECATED) disable DAG scheduling (use \'plain\' dependencies)'
complete -c spack -n '__fish_spack_using_command ci generate' -l buildcache-destination -r -f -a buildcache_destination
complete -c spack -n '__fish_spack_using_command ci generate' -l buildcache-destination -r -d 'override the mirror configured in the environment'
complete -c spack -n '__fish_spack_using_command ci generate' -l prune-dag -f -a prune_dag