oci: support --only=package (#45775)

Previously `spack buildcache push --only=package` errored in the OCI case, but it's been requested that OCI can be used as pure storage w/o the need for runnable container images. This commit makes it so that 1. manifests refer only to runtime dependencies that were selected to be pushed 2. failure to upload a blob among the selected specs does not prevent a manifest/tag to be created for dependents: they just don't refer to the missing blob as a layer/dependency This fixes the following issues: 1. dependents of non-redistributable specs can now be pushed to oci build caches without error 2. failure to upload one tarball does not cause cascading failures for dependents whose tarballs do upload succesfully -- so it's better best-effort behavior 3. for some people uploading with deps caused a massive amount of fetches of their manifests (which certain registries count as a download of an image, even though their layers are not fetched) -- being able to specify --only=package reduces the number of fetches.
2024-08-16 15:24:04 +02:00
parent f51a9a9107
commit 725ef8f5c8
3 changed files with 46 additions and 31 deletions
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -1431,12 +1431,9 @@ def _oci_put_manifest(
    for s in expected_blobs:
        # If a layer for a dependency has gone missing (due to removed manifest in the registry, a
        # failed push, or a local forced uninstall), we cannot create a runnable container image.
-        # If an OCI registry is only used for storage, this is not a hard error, but for now we
-        # raise an exception unconditionally, until someone requests a more lenient behavior.
        checksum = checksums.get(s.dag_hash())
-        if not checksum:
-            raise MissingLayerError(f"missing layer for {_format_spec(s)}")
-        config["rootfs"]["diff_ids"].append(str(checksum.uncompressed_digest))
+        if checksum:
+            config["rootfs"]["diff_ids"].append(str(checksum.uncompressed_digest))

    # Set the environment variables
    config["config"]["Env"] = [f"{k}={v}" for k, v in env.items()]
@@ -1481,6 +1478,7 @@ def _oci_put_manifest(
                    "size": checksums[s.dag_hash()].size,
                }
                for s in expected_blobs
+                if s.dag_hash() in checksums
            ),
        ],
    }
@@ -3096,7 +3094,3 @@ class CannotListKeys(GenerateIndexError):

 class PushToBuildCacheError(spack.error.SpackError):
    """Raised when unable to push objects to binary mirror"""
-
-
-class MissingLayerError(spack.error.SpackError):
-    """Raised when a required layer for a dependency is missing in an OCI registry."""
--- a/lib/spack/spack/cmd/buildcache.py
+++ b/lib/spack/spack/cmd/buildcache.py
@@ -410,8 +410,6 @@ def push_fn(args):

    # For OCI images, we require dependencies to be pushed for now.
    if target_image:
-        if "dependencies" not in args.things_to_install:
-            tty.die("Dependencies must be pushed for OCI images.")
        if not unsigned:
            tty.warn(
                "Code signing is currently not supported for OCI images. "
--- a/lib/spack/spack/test/oci/integration_test.py
+++ b/lib/spack/spack/test/oci/integration_test.py
@@ -11,6 +11,7 @@
 import os
 import pathlib
 import re
+import urllib.error
 from contextlib import contextmanager

 import pytest
@@ -293,7 +294,10 @@ def test_uploading_with_base_image_in_docker_image_manifest_v2_format(


 def test_best_effort_upload(mutable_database: spack.database.Database, monkeypatch):
-    """Failure to upload a blob or manifest should not prevent others from being uploaded"""
+    """Failure to upload a blob or manifest should not prevent others from being uploaded -- it
+    should be a best-effort operation. If any runtime dep fails to upload, it results in a missing
+    layer for dependents. But we do still create manifests for dependents, so that the build cache
+    is maximally useful. (The downside is that container images are not runnable)."""

    _push_blob = spack.binary_distribution._oci_push_pkg_blob
    _push_manifest = spack.binary_distribution._oci_put_manifest
@@ -315,32 +319,51 @@ def put_manifest(base_images, checksums, image_ref, tmpdir, extra_config, annota
    monkeypatch.setattr(spack.binary_distribution, "_oci_push_pkg_blob", push_blob)
    monkeypatch.setattr(spack.binary_distribution, "_oci_put_manifest", put_manifest)

+    mirror("add", "oci-test", "oci://example.com/image")
    registry = InMemoryOCIRegistry("example.com")
-    with oci_servers(registry):
-        mirror("add", "oci-test", "oci://example.com/image")
+    image = ImageReference.from_string("example.com/image")

-        with pytest.raises(spack.error.SpackError, match="The following 4 errors occurred") as e:
+    with oci_servers(registry):
+        with pytest.raises(spack.error.SpackError, match="The following 2 errors occurred") as e:
            buildcache("push", "--update-index", "oci-test", "mpileaks^mpich")

-    error = str(e.value)
+            # mpich's blob failed to upload and libdwarf's manifest failed to upload
+            assert re.search("mpich.+: Exception: Blob Server Error", e.value)
+            assert re.search("libdwarf.+: Exception: Manifest Server Error", e.value)

-    # mpich's blob failed to upload
-    assert re.search("mpich.+: Exception: Blob Server Error", error)
+        mpileaks: spack.spec.Spec = mutable_database.query_local("mpileaks^mpich")[0]

-    # libdwarf's manifest failed to upload
-    assert re.search("libdwarf.+: Exception: Manifest Server Error", error)
+        without_manifest = ("mpich", "libdwarf")

-    # since there is no blob for mpich, runtime dependents cannot refer to it in their
-    # manifests, which is a transitive error.
-    assert re.search("callpath.+: MissingLayerError: missing layer for mpich", error)
-    assert re.search("mpileaks.+: MissingLayerError: missing layer for mpich", error)
+        # Verify that manifests of mpich/libdwarf are missing due to upload failure.
+        for name in without_manifest:
+            tagged_img = image.with_tag(default_tag(mpileaks[name]))
+            with pytest.raises(urllib.error.HTTPError, match="404"):
+                get_manifest_and_config(tagged_img)

-    mpileaks: spack.spec.Spec = mutable_database.query_local("mpileaks^mpich")[0]
+        # Collect the layer digests of successfully uploaded packages. Every package should refer
+        # to its own tarballs and those of its runtime deps that were uploaded.
+        pkg_to_all_digests = {}
+        pkg_to_own_digest = {}
+        for s in mpileaks.traverse():
+            if s.name in without_manifest:
+                continue
+            # This should not raise a 404.
+            manifest, _ = get_manifest_and_config(image.with_tag(default_tag(s)))

-    # ensure that packages not affected by errors were uploaded still.
-    uploaded_tags = {tag for _, tag in registry.manifests.keys()}
-    failures = {"mpich", "libdwarf", "callpath", "mpileaks"}
-    expected_tags = {default_tag(s) for s in mpileaks.traverse() if s.name not in failures}
+            # Collect layer digests
+            pkg_to_all_digests[s.name] = {layer["digest"] for layer in manifest["layers"]}
+            pkg_to_own_digest[s.name] = manifest["layers"][-1]["digest"]

-    assert expected_tags
-    assert uploaded_tags == expected_tags
+        # Verify that all packages reference blobs of their runtime deps that uploaded fine.
+        for s in mpileaks.traverse():
+            if s.name in without_manifest:
+                continue
+            expected_digests = {
+                pkg_to_own_digest[t.name]
+                for t in s.traverse(deptype=("link", "run"), root=True)
+                if t.name not in without_manifest
+            }
+
+            # Test with issubset, cause we don't have the blob of libdwarf as it has no manifest.
+            assert expected_digests and expected_digests.issubset(pkg_to_all_digests[s.name])