CI RESTful Configuration (#41622)

* CI: Add dynamic mapping section

* Doc: Add documentation for dynamic mapping section

* Add missing schema property

* Fixes from review

* query build fix up
* add warning output for dynamic mapping request errors

* Cleanup ci schema

* Add more protections for disabling/mitigating bad endpoints for dynamic
mapping

* Remove references to "gantry" in the docs

* Fixup rtd header

* Add unit testing for dynamic-mapping section

* Add arch to dynamic-mapping query string

* Tests and cleanup schema
This commit is contained in:
kwryankrattiger 2024-10-16 15:06:09 -05:00 committed by GitHub
parent e1ea9e12a6
commit 34c89c0f7b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 347 additions and 59 deletions

View File

@ -592,6 +592,77 @@ the attributes will be merged starting from the bottom match going up to the top
In the case that no match is found in a submapping section, no additional attributes will be applied. In the case that no match is found in a submapping section, no additional attributes will be applied.
^^^^^^^^^^^^^^^^^^^^^^^^
Dynamic Mapping Sections
^^^^^^^^^^^^^^^^^^^^^^^^
For large scale CI where cost optimization is required, dynamic mapping allows for the use of real-time
mapping schemes served by a web service. This type of mapping does not support the ``-remove`` type
behavior, but it does follow the rest of the merge rules for configurations.
The dynamic mapping service needs to implement a single REST API interface for getting
requests ``GET <URL>[:PORT][/PATH]?spec=<pkg_name@pkg_version +variant1+variant2%compiler@compiler_version>``.
example request.
.. code-block::
https://my-dyn-mapping.spack.io/allocation?spec=zlib-ng@2.1.6 +compat+opt+shared+pic+new_strategies arch=linux-ubuntu20.04-x86_64_v3%gcc@12.0.0
With an example response the updates kubernetes request variables, overrides the max retries for gitlab,
and prepends a note about the modifications made by the my-dyn-mapping.spack.io service.
.. code-block::
200 OK
{
"variables":
{
"KUBERNETES_CPU_REQUEST": "500m",
"KUBERNETES_MEMORY_REQUEST": "2G",
},
"retry": { "max:": "1"}
"script+:":
[
"echo \"Job modified by my-dyn-mapping.spack.io\""
]
}
The ci.yaml configuration section takes the URL endpoint as well as a number of options to configure how responses are handled.
It is possible to specify a list of allowed and ignored configuration attributes under ``allow`` and ``ignore``
respectively. It is also possible to configure required attributes under ``required`` section.
Options to configure the client timeout and SSL verification using the ``timeout`` and ``verify_ssl`` options.
By default, the ``timeout`` is set to the option in ``config:timeout`` and ``veryify_ssl`` is set the the option in ``config::verify_ssl``.
Passing header parameters to the request can be achieved through the ``header`` section. The values of the variables passed to the
header may be environment variables that are expanded at runtime, such as a private token configured on the runner.
Here is an example configuration pointing to ``my-dyn-mapping.spack.io/allocation``.
.. code-block:: yaml
ci:
- dynamic-mapping:
endpoint: my-dyn-mapping.spack.io/allocation
timeout: 10
verify_ssl: True
header:
PRIVATE_TOKEN: ${MY_PRIVATE_TOKEN}
MY_CONFIG: "fuzz_allocation:false"
allow:
- variables
ignore:
- script
require: []
^^^^^^^^^^^^^ ^^^^^^^^^^^^^
Bootstrapping Bootstrapping
^^^^^^^^^^^^^ ^^^^^^^^^^^^^

View File

@ -10,6 +10,7 @@
import os import os
import re import re
import shutil import shutil
import ssl
import stat import stat
import subprocess import subprocess
import sys import sys
@ -19,14 +20,14 @@
from collections import defaultdict, namedtuple from collections import defaultdict, namedtuple
from typing import Dict, List, Optional, Set, Tuple from typing import Dict, List, Optional, Set, Tuple
from urllib.error import HTTPError, URLError from urllib.error import HTTPError, URLError
from urllib.parse import urlencode from urllib.parse import quote, urlencode, urlparse
from urllib.request import HTTPHandler, Request, build_opener from urllib.request import HTTPHandler, HTTPSHandler, Request, build_opener
import ruamel.yaml import ruamel.yaml
import llnl.util.filesystem as fs import llnl.util.filesystem as fs
import llnl.util.tty as tty import llnl.util.tty as tty
from llnl.util.lang import memoized from llnl.util.lang import Singleton, memoized
from llnl.util.tty.color import cescape, colorize from llnl.util.tty.color import cescape, colorize
import spack import spack
@ -50,6 +51,31 @@
from spack.reporters.cdash import SPACK_CDASH_TIMEOUT from spack.reporters.cdash import SPACK_CDASH_TIMEOUT
from spack.reporters.cdash import build_stamp as cdash_build_stamp from spack.reporters.cdash import build_stamp as cdash_build_stamp
def _urlopen():
error_handler = web_util.SpackHTTPDefaultErrorHandler()
# One opener with HTTPS ssl enabled
with_ssl = build_opener(
HTTPHandler(), HTTPSHandler(context=web_util.ssl_create_default_context()), error_handler
)
# One opener with HTTPS ssl disabled
without_ssl = build_opener(
HTTPHandler(), HTTPSHandler(context=ssl._create_unverified_context()), error_handler
)
# And dynamically dispatch based on the config:verify_ssl.
def dispatch_open(fullurl, data=None, timeout=None, verify_ssl=True):
opener = with_ssl if verify_ssl else without_ssl
timeout = timeout or spack.config.get("config:connect_timeout", 1)
return opener.open(fullurl, data, timeout)
return dispatch_open
_dyn_mapping_urlopener = Singleton(_urlopen)
# See https://docs.gitlab.com/ee/ci/yaml/#retry for descriptions of conditions # See https://docs.gitlab.com/ee/ci/yaml/#retry for descriptions of conditions
JOB_RETRY_CONDITIONS = [ JOB_RETRY_CONDITIONS = [
# "always", # "always",
@ -405,9 +431,20 @@ def __init__(self, ci_config, spec_labels, stages):
if name not in ["any", "build"]: if name not in ["any", "build"]:
jobs[name] = self.__init_job("") jobs[name] = self.__init_job("")
def __init_job(self, spec): def __init_job(self, release_spec):
"""Initialize job object""" """Initialize job object"""
return {"spec": spec, "attributes": {}} job_object = {"spec": release_spec, "attributes": {}}
if release_spec:
job_vars = job_object["attributes"].setdefault("variables", {})
job_vars["SPACK_JOB_SPEC_DAG_HASH"] = release_spec.dag_hash()
job_vars["SPACK_JOB_SPEC_PKG_NAME"] = release_spec.name
job_vars["SPACK_JOB_SPEC_PKG_VERSION"] = release_spec.format("{version}")
job_vars["SPACK_JOB_SPEC_COMPILER_NAME"] = release_spec.format("{compiler.name}")
job_vars["SPACK_JOB_SPEC_COMPILER_VERSION"] = release_spec.format("{compiler.version}")
job_vars["SPACK_JOB_SPEC_ARCH"] = release_spec.format("{architecture}")
job_vars["SPACK_JOB_SPEC_VARIANTS"] = release_spec.format("{variants}")
return job_object
def __is_named(self, section): def __is_named(self, section):
"""Check if a pipeline-gen configuration section is for a named job, """Check if a pipeline-gen configuration section is for a named job,
@ -500,6 +537,7 @@ def generate_ir(self):
for section in reversed(pipeline_gen): for section in reversed(pipeline_gen):
name = self.__is_named(section) name = self.__is_named(section)
has_submapping = "submapping" in section has_submapping = "submapping" in section
has_dynmapping = "dynamic-mapping" in section
section = cfg.InternalConfigScope._process_dict_keyname_overrides(section) section = cfg.InternalConfigScope._process_dict_keyname_overrides(section)
if name: if name:
@ -542,6 +580,108 @@ def _apply_section(dest, src):
job["attributes"] = self.__apply_submapping( job["attributes"] = self.__apply_submapping(
job["attributes"], job["spec"], section job["attributes"], job["spec"], section
) )
elif has_dynmapping:
mapping = section["dynamic-mapping"]
dynmap_name = mapping.get("name")
# Check if this section should be skipped
dynmap_skip = os.environ.get("SPACK_CI_SKIP_DYNAMIC_MAPPING")
if dynmap_name and dynmap_skip:
if re.match(dynmap_skip, dynmap_name):
continue
# Get the endpoint
endpoint = mapping["endpoint"]
endpoint_url = urlparse(endpoint)
# Configure the request header
header = {"User-Agent": web_util.SPACK_USER_AGENT}
header.update(mapping.get("header", {}))
# Expand header environment variables
# ie. if tokens are passed
for value in header.values():
value = os.path.expandvars(value)
verify_ssl = mapping.get("verify_ssl", spack.config.get("config:verify_ssl", True))
timeout = mapping.get("timeout", spack.config.get("config:connect_timeout", 1))
required = mapping.get("require", [])
allowed = mapping.get("allow", [])
ignored = mapping.get("ignore", [])
# required keys are implicitly allowed
allowed = sorted(set(allowed + required))
ignored = sorted(set(ignored))
required = sorted(set(required))
# Make sure required things are not also ignored
assert not any([ikey in required for ikey in ignored])
def job_query(job):
job_vars = job["attributes"]["variables"]
query = (
"{SPACK_JOB_SPEC_PKG_NAME}@{SPACK_JOB_SPEC_PKG_VERSION}"
# The preceding spaces are required (ref. https://github.com/spack/spack-gantry/blob/develop/docs/api.md#allocation)
" {SPACK_JOB_SPEC_VARIANTS}"
" arch={SPACK_JOB_SPEC_ARCH}"
"%{SPACK_JOB_SPEC_COMPILER_NAME}@{SPACK_JOB_SPEC_COMPILER_VERSION}"
).format_map(job_vars)
return f"spec={quote(query)}"
for job in jobs.values():
if not job["spec"]:
continue
# Create request for this job
query = job_query(job)
request = Request(
endpoint_url._replace(query=query).geturl(), headers=header, method="GET"
)
try:
response = _dyn_mapping_urlopener(
request, verify_ssl=verify_ssl, timeout=timeout
)
except Exception as e:
# For now just ignore any errors from dynamic mapping and continue
# This is still experimental, and failures should not stop CI
# from running normally
tty.warn(f"Failed to fetch dynamic mapping for query:\n\t{query}")
tty.warn(f"{e}")
continue
config = json.load(codecs.getreader("utf-8")(response))
# Strip ignore keys
if ignored:
for key in ignored:
if key in config:
config.pop(key)
# Only keep allowed keys
clean_config = {}
if allowed:
for key in allowed:
if key in config:
clean_config[key] = config[key]
else:
clean_config = config
# Verify all of the required keys are present
if required:
missing_keys = []
for key in required:
if key not in clean_config.keys():
missing_keys.append(key)
if missing_keys:
tty.warn(f"Response missing required keys: {missing_keys}")
if clean_config:
job["attributes"] = spack.config.merge_yaml(
job.get("attributes", {}), clean_config
)
for _, job in jobs.items(): for _, job in jobs.items():
if job["spec"]: if job["spec"]:
@ -952,15 +1092,6 @@ def main_script_replacements(cmd):
job_name = get_job_name(release_spec, build_group) job_name = get_job_name(release_spec, build_group)
job_vars = job_object.setdefault("variables", {})
job_vars["SPACK_JOB_SPEC_DAG_HASH"] = release_spec_dag_hash
job_vars["SPACK_JOB_SPEC_PKG_NAME"] = release_spec.name
job_vars["SPACK_JOB_SPEC_PKG_VERSION"] = release_spec.format("{version}")
job_vars["SPACK_JOB_SPEC_COMPILER_NAME"] = release_spec.format("{compiler.name}")
job_vars["SPACK_JOB_SPEC_COMPILER_VERSION"] = release_spec.format("{compiler.version}")
job_vars["SPACK_JOB_SPEC_ARCH"] = release_spec.format("{architecture}")
job_vars["SPACK_JOB_SPEC_VARIANTS"] = release_spec.format("{variants}")
job_object["needs"] = [] job_object["needs"] = []
if spec_label in dependencies: if spec_label in dependencies:
if enable_artifacts_buildcache: if enable_artifacts_buildcache:
@ -1038,6 +1169,7 @@ def main_script_replacements(cmd):
# Let downstream jobs know whether the spec needed rebuilding, regardless # Let downstream jobs know whether the spec needed rebuilding, regardless
# whether DAG pruning was enabled or not. # whether DAG pruning was enabled or not.
job_vars = job_object["variables"]
job_vars["SPACK_SPEC_NEEDS_REBUILD"] = str(rebuild_spec) job_vars["SPACK_SPEC_NEEDS_REBUILD"] = str(rebuild_spec)
if cdash_handler: if cdash_handler:

View File

@ -77,58 +77,54 @@
}, },
} }
named_attributes_schema = { dynamic_mapping_schema = {
"oneOf": [ "type": "object",
{ "additionalProperties": False,
"required": ["dynamic-mapping"],
"properties": {
"dynamic-mapping": {
"type": "object", "type": "object",
"additionalProperties": False, "required": ["endpoint"],
"properties": {"noop-job": attributes_schema, "noop-job-remove": attributes_schema},
},
{
"type": "object",
"additionalProperties": False,
"properties": {"build-job": attributes_schema, "build-job-remove": attributes_schema},
},
{
"type": "object",
"additionalProperties": False,
"properties": {"copy-job": attributes_schema, "copy-job-remove": attributes_schema},
},
{
"type": "object",
"additionalProperties": False,
"properties": { "properties": {
"reindex-job": attributes_schema, "name": {"type": "string"},
"reindex-job-remove": attributes_schema, # "endpoint" cannot have http patternProperties constaint as it is a required field
# Constrain is applied in code
"endpoint": {"type": "string"},
"timeout": {"type": "integer", "minimum": 0},
"verify_ssl": {"type": "boolean", "default": False},
"header": {"type": "object", "additionalProperties": False},
"allow": {"type": "array", "items": {"type": "string"}},
"require": {"type": "array", "items": {"type": "string"}},
"ignore": {"type": "array", "items": {"type": "string"}},
}, },
}, }
{ },
"type": "object",
"additionalProperties": False,
"properties": {
"signing-job": attributes_schema,
"signing-job-remove": attributes_schema,
},
},
{
"type": "object",
"additionalProperties": False,
"properties": {
"cleanup-job": attributes_schema,
"cleanup-job-remove": attributes_schema,
},
},
{
"type": "object",
"additionalProperties": False,
"properties": {"any-job": attributes_schema, "any-job-remove": attributes_schema},
},
]
} }
def job_schema(name: str):
return {
"type": "object",
"additionalProperties": False,
"properties": {f"{name}-job": attributes_schema, f"{name}-job-remove": attributes_schema},
}
pipeline_gen_schema = { pipeline_gen_schema = {
"type": "array", "type": "array",
"items": {"oneOf": [submapping_schema, named_attributes_schema]}, "items": {
"oneOf": [
submapping_schema,
dynamic_mapping_schema,
job_schema("any"),
job_schema("build"),
job_schema("cleanup"),
job_schema("copy"),
job_schema("noop"),
job_schema("reindex"),
job_schema("signing"),
]
},
} }
core_shared_properties = union_dicts( core_shared_properties = union_dicts(

View File

@ -7,6 +7,7 @@
import os import os
import pathlib import pathlib
import shutil import shutil
from io import BytesIO
from typing import NamedTuple from typing import NamedTuple
import jsonschema import jsonschema
@ -1846,3 +1847,91 @@ def test_ci_generate_mirror_config(
pipeline_doc = syaml.load(f) pipeline_doc = syaml.load(f)
assert fst not in pipeline_doc["rebuild-index"]["script"][0] assert fst not in pipeline_doc["rebuild-index"]["script"][0]
assert snd in pipeline_doc["rebuild-index"]["script"][0] assert snd in pipeline_doc["rebuild-index"]["script"][0]
def dynamic_mapping_setup(tmpdir):
filename = str(tmpdir.join("spack.yaml"))
with open(filename, "w") as f:
f.write(
"""\
spack:
specs:
- pkg-a
mirrors:
some-mirror: https://my.fake.mirror
ci:
pipeline-gen:
- dynamic-mapping:
endpoint: https://fake.spack.io/mapper
require: ["variables"]
ignore: ["ignored_field"]
allow: ["variables", "retry"]
"""
)
spec_a = Spec("pkg-a")
spec_a.concretize()
return ci.get_job_name(spec_a)
def test_ci_dynamic_mapping_empty(
tmpdir,
working_env,
mutable_mock_env_path,
install_mockery,
mock_packages,
monkeypatch,
ci_base_environment,
):
# The test will always return an empty dictionary
def fake_dyn_mapping_urlopener(*args, **kwargs):
return BytesIO("{}".encode())
monkeypatch.setattr(ci, "_dyn_mapping_urlopener", fake_dyn_mapping_urlopener)
_ = dynamic_mapping_setup(tmpdir)
with tmpdir.as_cwd():
env_cmd("create", "test", "./spack.yaml")
outputfile = str(tmpdir.join(".gitlab-ci.yml"))
with ev.read("test"):
output = ci_cmd("generate", "--output-file", outputfile)
assert "Response missing required keys: ['variables']" in output
def test_ci_dynamic_mapping_full(
tmpdir,
working_env,
mutable_mock_env_path,
install_mockery,
mock_packages,
monkeypatch,
ci_base_environment,
):
# The test will always return an empty dictionary
def fake_dyn_mapping_urlopener(*args, **kwargs):
return BytesIO(
json.dumps(
{"variables": {"MY_VAR": "hello"}, "ignored_field": 0, "unallowed_field": 0}
).encode()
)
monkeypatch.setattr(ci, "_dyn_mapping_urlopener", fake_dyn_mapping_urlopener)
label = dynamic_mapping_setup(tmpdir)
with tmpdir.as_cwd():
env_cmd("create", "test", "./spack.yaml")
outputfile = str(tmpdir.join(".gitlab-ci.yml"))
with ev.read("test"):
ci_cmd("generate", "--output-file", outputfile)
with open(outputfile) as of:
pipeline_doc = syaml.load(of.read())
assert label in pipeline_doc
job = pipeline_doc[label]
assert job.get("variables", {}).get("MY_VAR") == "hello"
assert "ignored_field" not in job
assert "unallowed_field" not in job