stage: provide mirrors in constructor (#45792)

Stage objects create mirrors ad-hoc from current config.

- There is no way to prevent mirrors from being used
- There is no way to restrict mirrors to source/binary, which is of
  course context dependent.
- Stage is also used in build caches, where iterating over mirrors is
  already implemented differently, and wouldn't work anyways cause it's
  source only, and in particular it makes no sense for OCI build caches.

This commit:

1. Injects the sensible mirrors into the stage object from contexts
   where it is relevant
2. Separates mirrors from cache, so that w/o mirrors download cache can
   still be used
This commit is contained in:
Harmen Stoppels 2024-08-16 15:21:47 +02:00 committed by GitHub
parent 4f0e336ed0
commit f51a9a9107
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 87 additions and 98 deletions

View File

@ -426,48 +426,36 @@ def _determine_extension(fetcher):
return ext
class MirrorReference:
"""A ``MirrorReference`` stores the relative paths where you can store a
package/resource in a mirror directory.
class MirrorLayout:
"""A ``MirrorLayout`` stores the relative locations of files in a mirror directory. The main
storage location is ``storage_path``. An additional, human-readable path may be obtained as the
second entry when iterating this object."""
The appropriate storage location is given by ``storage_path``. The
``cosmetic_path`` property provides a reference that a human could generate
themselves based on reading the details of the package.
def __init__(self, storage_path: str) -> None:
self.storage_path = storage_path
A user can iterate over a ``MirrorReference`` object to get all the
possible names that might be used to refer to the resource in a mirror;
this includes names generated by previous naming schemes that are no-longer
reported by ``storage_path`` or ``cosmetic_path``.
"""
def __iter__(self):
yield self.storage_path
def __init__(self, cosmetic_path, global_path=None):
class DefaultLayout(MirrorLayout):
def __init__(self, cosmetic_path: str, global_path: Optional[str] = None) -> None:
super().__init__(global_path or cosmetic_path)
self.global_path = global_path
self.cosmetic_path = cosmetic_path
@property
def storage_path(self):
if self.global_path:
return self.global_path
else:
return self.cosmetic_path
def __iter__(self):
if self.global_path:
yield self.global_path
yield self.cosmetic_path
class OCIImageLayout:
"""Follow the OCI Image Layout Specification to archive blobs
Paths are of the form `blobs/<algorithm>/<digest>`
"""
class OCILayout(MirrorLayout):
"""Follow the OCI Image Layout Specification to archive blobs where paths are of the form
``blobs/<algorithm>/<digest>``"""
def __init__(self, digest: spack.oci.image.Digest) -> None:
self.storage_path = os.path.join("blobs", digest.algorithm, digest.digest)
def __iter__(self):
yield self.storage_path
super().__init__(os.path.join("blobs", digest.algorithm, digest.digest))
def mirror_archive_paths(fetcher, per_package_ref, spec=None):
@ -494,7 +482,7 @@ def mirror_archive_paths(fetcher, per_package_ref, spec=None):
if global_ref and ext:
global_ref += ".%s" % ext
return MirrorReference(per_package_ref, global_ref)
return DefaultLayout(per_package_ref, global_ref)
def get_all_versions(specs):

View File

@ -397,8 +397,5 @@ def make_stage(
# is the `oci-layout` and `index.json` files, which are
# required by the spec.
return spack.stage.Stage(
fetch_strategy,
mirror_paths=spack.mirror.OCIImageLayout(digest),
name=digest.digest,
keep=keep,
fetch_strategy, mirror_paths=spack.mirror.OCILayout(digest), name=digest.digest, keep=keep
)

View File

@ -1101,6 +1101,7 @@ def _make_resource_stage(self, root_stage, resource):
mirror_paths=spack.mirror.mirror_archive_paths(
resource.fetcher, os.path.join(self.name, pretty_resource_name)
),
mirrors=spack.mirror.MirrorCollection(source=True).values(),
path=self.path,
)
@ -1121,6 +1122,7 @@ def _make_root_stage(self, fetcher):
stage = Stage(
fetcher,
mirror_paths=mirror_paths,
mirrors=spack.mirror.MirrorCollection(source=True).values(),
name=stage_name,
path=self.path,
search_fn=self._download_search,

View File

@ -331,6 +331,7 @@ def stage(self) -> "spack.stage.Stage":
fetcher,
name=f"{spack.stage.stage_prefix}patch-{fetch_digest}",
mirror_paths=mirror_ref,
mirrors=spack.mirror.MirrorCollection(source=True).values(),
)
return self._stage

View File

@ -13,7 +13,7 @@
import stat
import sys
import tempfile
from typing import Callable, Dict, Iterable, List, Optional, Set
from typing import Callable, Dict, Generator, Iterable, List, Optional, Set
import llnl.string
import llnl.util.lang
@ -352,8 +352,10 @@ class Stage(LockableStagingDir):
def __init__(
self,
url_or_fetch_strategy,
*,
name=None,
mirror_paths=None,
mirror_paths: Optional[spack.mirror.MirrorLayout] = None,
mirrors: Optional[Iterable[spack.mirror.Mirror]] = None,
keep=False,
path=None,
lock=True,
@ -407,12 +409,18 @@ def __init__(
# self.fetcher can change with mirrors.
self.default_fetcher = self.fetcher
self.search_fn = search_fn
# used for mirrored archives of repositories.
self.skip_checksum_for_mirror = True
# If we fetch from a mirror, but the original data is from say git, we can currently not
# prove that they are equal (we don't even have a tree hash in package.py). This bool is
# used to skip checksum verification and instead warn the user.
if isinstance(self.default_fetcher, fs.URLFetchStrategy):
self.skip_checksum_for_mirror = not bool(self.default_fetcher.digest)
else:
self.skip_checksum_for_mirror = True
self.srcdir = None
self.mirror_paths = mirror_paths
self.mirrors = list(mirrors) if mirrors else []
@property
def expected_archive_files(self):
@ -467,7 +475,55 @@ def disable_mirrors(self):
"""The Stage will not attempt to look for the associated fetcher
target in any of Spack's mirrors (including the local download cache).
"""
self.mirror_paths = []
self.mirror_paths = None
def _generate_fetchers(self, mirror_only=False) -> Generator[fs.FetchStrategy, None, None]:
fetchers = []
if not mirror_only:
fetchers.append(self.default_fetcher)
# If this archive is normally fetched from a URL, then use the same digest.
if isinstance(self.default_fetcher, fs.URLFetchStrategy):
digest = self.default_fetcher.digest
expand = self.default_fetcher.expand_archive
extension = self.default_fetcher.extension
else:
digest = None
expand = True
extension = None
# TODO: move mirror logic out of here and clean it up!
# TODO: Or @alalazo may have some ideas about how to use a
# TODO: CompositeFetchStrategy here.
if self.mirror_paths and self.mirrors:
# Add URL strategies for all the mirrors with the digest
# Insert fetchers in the order that the URLs are provided.
fetchers[:0] = (
fs.from_url_scheme(
url_util.join(mirror.fetch_url, rel_path),
digest,
expand=expand,
extension=extension,
)
for mirror in self.mirrors
if not mirror.fetch_url.startswith("oci://")
for rel_path in self.mirror_paths
)
if self.mirror_paths and self.default_fetcher.cachable:
fetchers[:0] = (
spack.caches.FETCH_CACHE.fetcher(
rel_path, digest, expand=expand, extension=extension
)
for rel_path in self.mirror_paths
)
yield from fetchers
# The search function may be expensive, so wait until now to call it so the user can stop
# if a prior fetcher succeeded
if self.search_fn and not mirror_only:
yield from self.search_fn()
def fetch(self, mirror_only=False, err_msg=None):
"""Retrieves the code or archive
@ -477,66 +533,8 @@ def fetch(self, mirror_only=False, err_msg=None):
err_msg (str or None): the error message to display if all fetchers
fail or ``None`` for the default fetch failure message
"""
fetchers = []
if not mirror_only:
fetchers.append(self.default_fetcher)
# TODO: move mirror logic out of here and clean it up!
# TODO: Or @alalazo may have some ideas about how to use a
# TODO: CompositeFetchStrategy here.
self.skip_checksum_for_mirror = True
if self.mirror_paths:
# Join URLs of mirror roots with mirror paths. Because
# urljoin() will strip everything past the final '/' in
# the root, so we add a '/' if it is not present.
mirror_urls = [
url_util.join(mirror.fetch_url, rel_path)
for mirror in spack.mirror.MirrorCollection(source=True).values()
if not mirror.fetch_url.startswith("oci://")
for rel_path in self.mirror_paths
]
# If this archive is normally fetched from a tarball URL,
# then use the same digest. `spack mirror` ensures that
# the checksum will be the same.
digest = None
expand = True
extension = None
if isinstance(self.default_fetcher, fs.URLFetchStrategy):
digest = self.default_fetcher.digest
expand = self.default_fetcher.expand_archive
extension = self.default_fetcher.extension
# Have to skip the checksum for things archived from
# repositories. How can this be made safer?
self.skip_checksum_for_mirror = not bool(digest)
# Add URL strategies for all the mirrors with the digest
# Insert fetchers in the order that the URLs are provided.
for url in reversed(mirror_urls):
fetchers.insert(
0, fs.from_url_scheme(url, digest, expand=expand, extension=extension)
)
if self.default_fetcher.cachable:
for rel_path in reversed(list(self.mirror_paths)):
cache_fetcher = spack.caches.FETCH_CACHE.fetcher(
rel_path, digest, expand=expand, extension=extension
)
fetchers.insert(0, cache_fetcher)
def generate_fetchers():
for fetcher in fetchers:
yield fetcher
# The search function may be expensive, so wait until now to
# call it so the user can stop if a prior fetcher succeeded
if self.search_fn and not mirror_only:
dynamic_fetchers = self.search_fn()
for fetcher in dynamic_fetchers:
yield fetcher
errors: List[str] = []
for fetcher in generate_fetchers():
for fetcher in self._generate_fetchers(mirror_only):
try:
fetcher.stage = self
self.fetcher = fetcher

View File

@ -10,7 +10,10 @@
from llnl.util.symlink import resolve_link_target_relative_to_the_link
import spack.caches
import spack.fetch_strategy
import spack.mirror
import spack.patch
import spack.repo
import spack.util.executable
import spack.util.spack_json as sjson
@ -273,7 +276,7 @@ def test_mirror_cache_symlinks(tmpdir):
cosmetic_path = "zlib/zlib-1.2.11.tar.gz"
global_path = "_source-cache/archive/c3/c3e5.tar.gz"
cache = spack.caches.MirrorCache(str(tmpdir), False)
reference = spack.mirror.MirrorReference(cosmetic_path, global_path)
reference = spack.mirror.DefaultLayout(cosmetic_path, global_path)
cache.store(MockFetcher(), reference.storage_path)
cache.symlink(reference)