CI RESTful Configuration (#41622)

* CI: Add dynamic mapping section

* Doc: Add documentation for dynamic mapping section

* Add missing schema property

* Fixes from review

* query build fix up
* add warning output for dynamic mapping request errors

* Cleanup ci schema

* Add more protections for disabling/mitigating bad endpoints for dynamic
mapping

* Remove references to "gantry" in the docs

* Fixup rtd header

* Add unit testing for dynamic-mapping section

* Add arch to dynamic-mapping query string

* Tests and cleanup schema
This commit is contained in:
kwryankrattiger 2024-10-16 15:06:09 -05:00 committed by GitHub
parent e1ea9e12a6
commit 34c89c0f7b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 347 additions and 59 deletions

View File

@ -592,6 +592,77 @@ the attributes will be merged starting from the bottom match going up to the top
In the case that no match is found in a submapping section, no additional attributes will be applied.
^^^^^^^^^^^^^^^^^^^^^^^^
Dynamic Mapping Sections
^^^^^^^^^^^^^^^^^^^^^^^^
For large scale CI where cost optimization is required, dynamic mapping allows for the use of real-time
mapping schemes served by a web service. This type of mapping does not support the ``-remove`` type
behavior, but it does follow the rest of the merge rules for configurations.
The dynamic mapping service needs to implement a single REST API interface for getting
requests ``GET <URL>[:PORT][/PATH]?spec=<pkg_name@pkg_version +variant1+variant2%compiler@compiler_version>``.
example request.
.. code-block::
https://my-dyn-mapping.spack.io/allocation?spec=zlib-ng@2.1.6 +compat+opt+shared+pic+new_strategies arch=linux-ubuntu20.04-x86_64_v3%gcc@12.0.0
With an example response the updates kubernetes request variables, overrides the max retries for gitlab,
and prepends a note about the modifications made by the my-dyn-mapping.spack.io service.
.. code-block::
200 OK
{
"variables":
{
"KUBERNETES_CPU_REQUEST": "500m",
"KUBERNETES_MEMORY_REQUEST": "2G",
},
"retry": { "max:": "1"}
"script+:":
[
"echo \"Job modified by my-dyn-mapping.spack.io\""
]
}
The ci.yaml configuration section takes the URL endpoint as well as a number of options to configure how responses are handled.
It is possible to specify a list of allowed and ignored configuration attributes under ``allow`` and ``ignore``
respectively. It is also possible to configure required attributes under ``required`` section.
Options to configure the client timeout and SSL verification using the ``timeout`` and ``verify_ssl`` options.
By default, the ``timeout`` is set to the option in ``config:timeout`` and ``veryify_ssl`` is set the the option in ``config::verify_ssl``.
Passing header parameters to the request can be achieved through the ``header`` section. The values of the variables passed to the
header may be environment variables that are expanded at runtime, such as a private token configured on the runner.
Here is an example configuration pointing to ``my-dyn-mapping.spack.io/allocation``.
.. code-block:: yaml
ci:
- dynamic-mapping:
endpoint: my-dyn-mapping.spack.io/allocation
timeout: 10
verify_ssl: True
header:
PRIVATE_TOKEN: ${MY_PRIVATE_TOKEN}
MY_CONFIG: "fuzz_allocation:false"
allow:
- variables
ignore:
- script
require: []
^^^^^^^^^^^^^
Bootstrapping
^^^^^^^^^^^^^

View File

@ -10,6 +10,7 @@
import os
import re
import shutil
import ssl
import stat
import subprocess
import sys
@ -19,14 +20,14 @@
from collections import defaultdict, namedtuple
from typing import Dict, List, Optional, Set, Tuple
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import HTTPHandler, Request, build_opener
from urllib.parse import quote, urlencode, urlparse
from urllib.request import HTTPHandler, HTTPSHandler, Request, build_opener
import ruamel.yaml
import llnl.util.filesystem as fs
import llnl.util.tty as tty
from llnl.util.lang import memoized
from llnl.util.lang import Singleton, memoized
from llnl.util.tty.color import cescape, colorize
import spack
@ -50,6 +51,31 @@
from spack.reporters.cdash import SPACK_CDASH_TIMEOUT
from spack.reporters.cdash import build_stamp as cdash_build_stamp
def _urlopen():
error_handler = web_util.SpackHTTPDefaultErrorHandler()
# One opener with HTTPS ssl enabled
with_ssl = build_opener(
HTTPHandler(), HTTPSHandler(context=web_util.ssl_create_default_context()), error_handler
)
# One opener with HTTPS ssl disabled
without_ssl = build_opener(
HTTPHandler(), HTTPSHandler(context=ssl._create_unverified_context()), error_handler
)
# And dynamically dispatch based on the config:verify_ssl.
def dispatch_open(fullurl, data=None, timeout=None, verify_ssl=True):
opener = with_ssl if verify_ssl else without_ssl
timeout = timeout or spack.config.get("config:connect_timeout", 1)
return opener.open(fullurl, data, timeout)
return dispatch_open
_dyn_mapping_urlopener = Singleton(_urlopen)
# See https://docs.gitlab.com/ee/ci/yaml/#retry for descriptions of conditions
JOB_RETRY_CONDITIONS = [
# "always",
@ -405,9 +431,20 @@ def __init__(self, ci_config, spec_labels, stages):
if name not in ["any", "build"]:
jobs[name] = self.__init_job("")
def __init_job(self, spec):
def __init_job(self, release_spec):
"""Initialize job object"""
return {"spec": spec, "attributes": {}}
job_object = {"spec": release_spec, "attributes": {}}
if release_spec:
job_vars = job_object["attributes"].setdefault("variables", {})
job_vars["SPACK_JOB_SPEC_DAG_HASH"] = release_spec.dag_hash()
job_vars["SPACK_JOB_SPEC_PKG_NAME"] = release_spec.name
job_vars["SPACK_JOB_SPEC_PKG_VERSION"] = release_spec.format("{version}")
job_vars["SPACK_JOB_SPEC_COMPILER_NAME"] = release_spec.format("{compiler.name}")
job_vars["SPACK_JOB_SPEC_COMPILER_VERSION"] = release_spec.format("{compiler.version}")
job_vars["SPACK_JOB_SPEC_ARCH"] = release_spec.format("{architecture}")
job_vars["SPACK_JOB_SPEC_VARIANTS"] = release_spec.format("{variants}")
return job_object
def __is_named(self, section):
"""Check if a pipeline-gen configuration section is for a named job,
@ -500,6 +537,7 @@ def generate_ir(self):
for section in reversed(pipeline_gen):
name = self.__is_named(section)
has_submapping = "submapping" in section
has_dynmapping = "dynamic-mapping" in section
section = cfg.InternalConfigScope._process_dict_keyname_overrides(section)
if name:
@ -542,6 +580,108 @@ def _apply_section(dest, src):
job["attributes"] = self.__apply_submapping(
job["attributes"], job["spec"], section
)
elif has_dynmapping:
mapping = section["dynamic-mapping"]
dynmap_name = mapping.get("name")
# Check if this section should be skipped
dynmap_skip = os.environ.get("SPACK_CI_SKIP_DYNAMIC_MAPPING")
if dynmap_name and dynmap_skip:
if re.match(dynmap_skip, dynmap_name):
continue
# Get the endpoint
endpoint = mapping["endpoint"]
endpoint_url = urlparse(endpoint)
# Configure the request header
header = {"User-Agent": web_util.SPACK_USER_AGENT}
header.update(mapping.get("header", {}))
# Expand header environment variables
# ie. if tokens are passed
for value in header.values():
value = os.path.expandvars(value)
verify_ssl = mapping.get("verify_ssl", spack.config.get("config:verify_ssl", True))
timeout = mapping.get("timeout", spack.config.get("config:connect_timeout", 1))
required = mapping.get("require", [])
allowed = mapping.get("allow", [])
ignored = mapping.get("ignore", [])
# required keys are implicitly allowed
allowed = sorted(set(allowed + required))
ignored = sorted(set(ignored))
required = sorted(set(required))
# Make sure required things are not also ignored
assert not any([ikey in required for ikey in ignored])
def job_query(job):
job_vars = job["attributes"]["variables"]
query = (
"{SPACK_JOB_SPEC_PKG_NAME}@{SPACK_JOB_SPEC_PKG_VERSION}"
# The preceding spaces are required (ref. https://github.com/spack/spack-gantry/blob/develop/docs/api.md#allocation)
" {SPACK_JOB_SPEC_VARIANTS}"
" arch={SPACK_JOB_SPEC_ARCH}"
"%{SPACK_JOB_SPEC_COMPILER_NAME}@{SPACK_JOB_SPEC_COMPILER_VERSION}"
).format_map(job_vars)
return f"spec={quote(query)}"
for job in jobs.values():
if not job["spec"]:
continue
# Create request for this job
query = job_query(job)
request = Request(
endpoint_url._replace(query=query).geturl(), headers=header, method="GET"
)
try:
response = _dyn_mapping_urlopener(
request, verify_ssl=verify_ssl, timeout=timeout
)
except Exception as e:
# For now just ignore any errors from dynamic mapping and continue
# This is still experimental, and failures should not stop CI
# from running normally
tty.warn(f"Failed to fetch dynamic mapping for query:\n\t{query}")
tty.warn(f"{e}")
continue
config = json.load(codecs.getreader("utf-8")(response))
# Strip ignore keys
if ignored:
for key in ignored:
if key in config:
config.pop(key)
# Only keep allowed keys
clean_config = {}
if allowed:
for key in allowed:
if key in config:
clean_config[key] = config[key]
else:
clean_config = config
# Verify all of the required keys are present
if required:
missing_keys = []
for key in required:
if key not in clean_config.keys():
missing_keys.append(key)
if missing_keys:
tty.warn(f"Response missing required keys: {missing_keys}")
if clean_config:
job["attributes"] = spack.config.merge_yaml(
job.get("attributes", {}), clean_config
)
for _, job in jobs.items():
if job["spec"]:
@ -952,15 +1092,6 @@ def main_script_replacements(cmd):
job_name = get_job_name(release_spec, build_group)
job_vars = job_object.setdefault("variables", {})
job_vars["SPACK_JOB_SPEC_DAG_HASH"] = release_spec_dag_hash
job_vars["SPACK_JOB_SPEC_PKG_NAME"] = release_spec.name
job_vars["SPACK_JOB_SPEC_PKG_VERSION"] = release_spec.format("{version}")
job_vars["SPACK_JOB_SPEC_COMPILER_NAME"] = release_spec.format("{compiler.name}")
job_vars["SPACK_JOB_SPEC_COMPILER_VERSION"] = release_spec.format("{compiler.version}")
job_vars["SPACK_JOB_SPEC_ARCH"] = release_spec.format("{architecture}")
job_vars["SPACK_JOB_SPEC_VARIANTS"] = release_spec.format("{variants}")
job_object["needs"] = []
if spec_label in dependencies:
if enable_artifacts_buildcache:
@ -1038,6 +1169,7 @@ def main_script_replacements(cmd):
# Let downstream jobs know whether the spec needed rebuilding, regardless
# whether DAG pruning was enabled or not.
job_vars = job_object["variables"]
job_vars["SPACK_SPEC_NEEDS_REBUILD"] = str(rebuild_spec)
if cdash_handler:

View File

@ -77,58 +77,54 @@
},
}
named_attributes_schema = {
"oneOf": [
{
dynamic_mapping_schema = {
"type": "object",
"additionalProperties": False,
"required": ["dynamic-mapping"],
"properties": {
"dynamic-mapping": {
"type": "object",
"additionalProperties": False,
"properties": {"noop-job": attributes_schema, "noop-job-remove": attributes_schema},
},
{
"type": "object",
"additionalProperties": False,
"properties": {"build-job": attributes_schema, "build-job-remove": attributes_schema},
},
{
"type": "object",
"additionalProperties": False,
"properties": {"copy-job": attributes_schema, "copy-job-remove": attributes_schema},
},
{
"type": "object",
"additionalProperties": False,
"required": ["endpoint"],
"properties": {
"reindex-job": attributes_schema,
"reindex-job-remove": attributes_schema,
"name": {"type": "string"},
# "endpoint" cannot have http patternProperties constaint as it is a required field
# Constrain is applied in code
"endpoint": {"type": "string"},
"timeout": {"type": "integer", "minimum": 0},
"verify_ssl": {"type": "boolean", "default": False},
"header": {"type": "object", "additionalProperties": False},
"allow": {"type": "array", "items": {"type": "string"}},
"require": {"type": "array", "items": {"type": "string"}},
"ignore": {"type": "array", "items": {"type": "string"}},
},
},
{
"type": "object",
"additionalProperties": False,
"properties": {
"signing-job": attributes_schema,
"signing-job-remove": attributes_schema,
},
},
{
"type": "object",
"additionalProperties": False,
"properties": {
"cleanup-job": attributes_schema,
"cleanup-job-remove": attributes_schema,
},
},
{
"type": "object",
"additionalProperties": False,
"properties": {"any-job": attributes_schema, "any-job-remove": attributes_schema},
},
]
}
},
}
def job_schema(name: str):
return {
"type": "object",
"additionalProperties": False,
"properties": {f"{name}-job": attributes_schema, f"{name}-job-remove": attributes_schema},
}
pipeline_gen_schema = {
"type": "array",
"items": {"oneOf": [submapping_schema, named_attributes_schema]},
"items": {
"oneOf": [
submapping_schema,
dynamic_mapping_schema,
job_schema("any"),
job_schema("build"),
job_schema("cleanup"),
job_schema("copy"),
job_schema("noop"),
job_schema("reindex"),
job_schema("signing"),
]
},
}
core_shared_properties = union_dicts(

View File

@ -7,6 +7,7 @@
import os
import pathlib
import shutil
from io import BytesIO
from typing import NamedTuple
import jsonschema
@ -1846,3 +1847,91 @@ def test_ci_generate_mirror_config(
pipeline_doc = syaml.load(f)
assert fst not in pipeline_doc["rebuild-index"]["script"][0]
assert snd in pipeline_doc["rebuild-index"]["script"][0]
def dynamic_mapping_setup(tmpdir):
filename = str(tmpdir.join("spack.yaml"))
with open(filename, "w") as f:
f.write(
"""\
spack:
specs:
- pkg-a
mirrors:
some-mirror: https://my.fake.mirror
ci:
pipeline-gen:
- dynamic-mapping:
endpoint: https://fake.spack.io/mapper
require: ["variables"]
ignore: ["ignored_field"]
allow: ["variables", "retry"]
"""
)
spec_a = Spec("pkg-a")
spec_a.concretize()
return ci.get_job_name(spec_a)
def test_ci_dynamic_mapping_empty(
tmpdir,
working_env,
mutable_mock_env_path,
install_mockery,
mock_packages,
monkeypatch,
ci_base_environment,
):
# The test will always return an empty dictionary
def fake_dyn_mapping_urlopener(*args, **kwargs):
return BytesIO("{}".encode())
monkeypatch.setattr(ci, "_dyn_mapping_urlopener", fake_dyn_mapping_urlopener)
_ = dynamic_mapping_setup(tmpdir)
with tmpdir.as_cwd():
env_cmd("create", "test", "./spack.yaml")
outputfile = str(tmpdir.join(".gitlab-ci.yml"))
with ev.read("test"):
output = ci_cmd("generate", "--output-file", outputfile)
assert "Response missing required keys: ['variables']" in output
def test_ci_dynamic_mapping_full(
tmpdir,
working_env,
mutable_mock_env_path,
install_mockery,
mock_packages,
monkeypatch,
ci_base_environment,
):
# The test will always return an empty dictionary
def fake_dyn_mapping_urlopener(*args, **kwargs):
return BytesIO(
json.dumps(
{"variables": {"MY_VAR": "hello"}, "ignored_field": 0, "unallowed_field": 0}
).encode()
)
monkeypatch.setattr(ci, "_dyn_mapping_urlopener", fake_dyn_mapping_urlopener)
label = dynamic_mapping_setup(tmpdir)
with tmpdir.as_cwd():
env_cmd("create", "test", "./spack.yaml")
outputfile = str(tmpdir.join(".gitlab-ci.yml"))
with ev.read("test"):
ci_cmd("generate", "--output-file", outputfile)
with open(outputfile) as of:
pipeline_doc = syaml.load(of.read())
assert label in pipeline_doc
job = pipeline_doc[label]
assert job.get("variables", {}).get("MY_VAR") == "hello"
assert "ignored_field" not in job
assert "unallowed_field" not in job