oci: support --only=package (#45775)

Previously `spack buildcache push --only=package` errored in the OCI
case, but it's been requested that OCI can be used as pure storage w/o
the need for runnable container images.

This commit makes it so that

1. manifests refer only to runtime dependencies that were selected to be
   pushed
2. failure to upload a blob among the selected specs does not prevent a
   manifest/tag to be created for dependents: they just don't refer to
   the missing blob as a layer/dependency

This fixes the following issues:

1. dependents of non-redistributable specs can now be pushed to oci
   build caches without error
2. failure to upload one tarball does not cause cascading failures for
   dependents whose tarballs do upload succesfully -- so it's better
   best-effort behavior
3. for some people uploading with deps caused a massive amount of
   fetches of their manifests (which certain registries count as a
   download of an image, even though their layers are not fetched) --
   being able to specify --only=package reduces the number of fetches.
This commit is contained in:
Harmen Stoppels 2024-08-16 15:24:04 +02:00 committed by GitHub
parent f51a9a9107
commit 725ef8f5c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 46 additions and 31 deletions

View File

@ -1431,12 +1431,9 @@ def _oci_put_manifest(
for s in expected_blobs:
# If a layer for a dependency has gone missing (due to removed manifest in the registry, a
# failed push, or a local forced uninstall), we cannot create a runnable container image.
# If an OCI registry is only used for storage, this is not a hard error, but for now we
# raise an exception unconditionally, until someone requests a more lenient behavior.
checksum = checksums.get(s.dag_hash())
if not checksum:
raise MissingLayerError(f"missing layer for {_format_spec(s)}")
config["rootfs"]["diff_ids"].append(str(checksum.uncompressed_digest))
if checksum:
config["rootfs"]["diff_ids"].append(str(checksum.uncompressed_digest))
# Set the environment variables
config["config"]["Env"] = [f"{k}={v}" for k, v in env.items()]
@ -1481,6 +1478,7 @@ def _oci_put_manifest(
"size": checksums[s.dag_hash()].size,
}
for s in expected_blobs
if s.dag_hash() in checksums
),
],
}
@ -3096,7 +3094,3 @@ class CannotListKeys(GenerateIndexError):
class PushToBuildCacheError(spack.error.SpackError):
"""Raised when unable to push objects to binary mirror"""
class MissingLayerError(spack.error.SpackError):
"""Raised when a required layer for a dependency is missing in an OCI registry."""

View File

@ -410,8 +410,6 @@ def push_fn(args):
# For OCI images, we require dependencies to be pushed for now.
if target_image:
if "dependencies" not in args.things_to_install:
tty.die("Dependencies must be pushed for OCI images.")
if not unsigned:
tty.warn(
"Code signing is currently not supported for OCI images. "

View File

@ -11,6 +11,7 @@
import os
import pathlib
import re
import urllib.error
from contextlib import contextmanager
import pytest
@ -293,7 +294,10 @@ def test_uploading_with_base_image_in_docker_image_manifest_v2_format(
def test_best_effort_upload(mutable_database: spack.database.Database, monkeypatch):
"""Failure to upload a blob or manifest should not prevent others from being uploaded"""
"""Failure to upload a blob or manifest should not prevent others from being uploaded -- it
should be a best-effort operation. If any runtime dep fails to upload, it results in a missing
layer for dependents. But we do still create manifests for dependents, so that the build cache
is maximally useful. (The downside is that container images are not runnable)."""
_push_blob = spack.binary_distribution._oci_push_pkg_blob
_push_manifest = spack.binary_distribution._oci_put_manifest
@ -315,32 +319,51 @@ def put_manifest(base_images, checksums, image_ref, tmpdir, extra_config, annota
monkeypatch.setattr(spack.binary_distribution, "_oci_push_pkg_blob", push_blob)
monkeypatch.setattr(spack.binary_distribution, "_oci_put_manifest", put_manifest)
mirror("add", "oci-test", "oci://example.com/image")
registry = InMemoryOCIRegistry("example.com")
with oci_servers(registry):
mirror("add", "oci-test", "oci://example.com/image")
image = ImageReference.from_string("example.com/image")
with pytest.raises(spack.error.SpackError, match="The following 4 errors occurred") as e:
with oci_servers(registry):
with pytest.raises(spack.error.SpackError, match="The following 2 errors occurred") as e:
buildcache("push", "--update-index", "oci-test", "mpileaks^mpich")
error = str(e.value)
# mpich's blob failed to upload and libdwarf's manifest failed to upload
assert re.search("mpich.+: Exception: Blob Server Error", e.value)
assert re.search("libdwarf.+: Exception: Manifest Server Error", e.value)
# mpich's blob failed to upload
assert re.search("mpich.+: Exception: Blob Server Error", error)
mpileaks: spack.spec.Spec = mutable_database.query_local("mpileaks^mpich")[0]
# libdwarf's manifest failed to upload
assert re.search("libdwarf.+: Exception: Manifest Server Error", error)
without_manifest = ("mpich", "libdwarf")
# since there is no blob for mpich, runtime dependents cannot refer to it in their
# manifests, which is a transitive error.
assert re.search("callpath.+: MissingLayerError: missing layer for mpich", error)
assert re.search("mpileaks.+: MissingLayerError: missing layer for mpich", error)
# Verify that manifests of mpich/libdwarf are missing due to upload failure.
for name in without_manifest:
tagged_img = image.with_tag(default_tag(mpileaks[name]))
with pytest.raises(urllib.error.HTTPError, match="404"):
get_manifest_and_config(tagged_img)
mpileaks: spack.spec.Spec = mutable_database.query_local("mpileaks^mpich")[0]
# Collect the layer digests of successfully uploaded packages. Every package should refer
# to its own tarballs and those of its runtime deps that were uploaded.
pkg_to_all_digests = {}
pkg_to_own_digest = {}
for s in mpileaks.traverse():
if s.name in without_manifest:
continue
# This should not raise a 404.
manifest, _ = get_manifest_and_config(image.with_tag(default_tag(s)))
# ensure that packages not affected by errors were uploaded still.
uploaded_tags = {tag for _, tag in registry.manifests.keys()}
failures = {"mpich", "libdwarf", "callpath", "mpileaks"}
expected_tags = {default_tag(s) for s in mpileaks.traverse() if s.name not in failures}
# Collect layer digests
pkg_to_all_digests[s.name] = {layer["digest"] for layer in manifest["layers"]}
pkg_to_own_digest[s.name] = manifest["layers"][-1]["digest"]
assert expected_tags
assert uploaded_tags == expected_tags
# Verify that all packages reference blobs of their runtime deps that uploaded fine.
for s in mpileaks.traverse():
if s.name in without_manifest:
continue
expected_digests = {
pkg_to_own_digest[t.name]
for t in s.traverse(deptype=("link", "run"), root=True)
if t.name not in without_manifest
}
# Test with issubset, cause we don't have the blob of libdwarf as it has no manifest.
assert expected_digests and expected_digests.issubset(pkg_to_all_digests[s.name])