diff --git a/lib/spack/spack/cmd/repo.py b/lib/spack/spack/cmd/repo.py index c687710d926..af4f55ad561 100644 --- a/lib/spack/spack/cmd/repo.py +++ b/lib/spack/spack/cmd/repo.py @@ -168,8 +168,12 @@ def repo_zip(args): except spack.repo.RepoError: tty.die(f"No repository at path: {key}") - def _zip_repo_skip(entry: os.DirEntry): - return entry.name == "__pycache__" + def _zip_repo_skip(entry: os.DirEntry, depth: int): + if entry.name == "__pycache__": + return True + if depth == 0 and not os.path.exists(os.path.join(entry.path, "package.py")): + return True + return False def _zip_repo_path_to_name(path: str) -> str: # use spack/pkg//* prefix and rename `package.py` as `__init__.py` diff --git a/lib/spack/spack/patch.py b/lib/spack/spack/patch.py index 7e2f3dcdc04..fb1964e06c6 100644 --- a/lib/spack/spack/patch.py +++ b/lib/spack/spack/patch.py @@ -9,7 +9,7 @@ import pathlib import sys import zipfile -from typing import Any, Dict, Optional, Tuple, Type, Union +from typing import Any, Dict, Optional, Set, Tuple, Type, Union import llnl.util.filesystem from llnl.url import allowed_archive @@ -155,7 +155,7 @@ def __hash__(self) -> int: return hash(self.sha256) -zipfilecache = {} +zipfilecache: Dict[str, Tuple[zipfile.ZipFile, Set[str]]] = {} class FilePatch(Patch): @@ -202,9 +202,8 @@ def __init__( if "packages.zip" in path.parts: # check if it exists in the zip file. idx = path.parts.index("packages.zip") - zip_path, entry_path = pathlib.PurePath(*path.parts[: idx + 1]), pathlib.PurePath( - *path.parts[idx + 1 :] - ) + zip_path = str(pathlib.PurePath(*path.parts[: idx + 1])) + entry_path = str(pathlib.PurePath(*path.parts[idx + 1 :])) lookup = zipfilecache.get(zip_path) if lookup is None: @@ -213,7 +212,7 @@ def __init__( zipfilecache[zip_path] = (zip, namelist) else: zip, namelist = lookup - if str(entry_path) in namelist: + if entry_path in namelist: abs_path = str(path) break elif path.exists(): @@ -241,17 +240,16 @@ def sha256(self) -> str: if "packages.zip" in path.parts: # split in path to packages.zip and the path within the zip idx = path.parts.index("packages.zip") - path_to_zip, path_in_zip = pathlib.PurePath( - *path.parts[: idx + 1] - ), pathlib.PurePath(*path.parts[idx + 1 :]) - lookup = zipfilecache.get(path_to_zip) + zip_path = str(pathlib.PurePath(*path.parts[: idx + 1])) + entry_path = str(pathlib.PurePath(*path.parts[idx + 1 :])) + lookup = zipfilecache.get(zip_path) if lookup is None: - zip = zipfile.ZipFile(path_to_zip, "r") + zip = zipfile.ZipFile(zip_path, "r") namelist = set(zip.namelist()) - zipfilecache[path_to_zip] = (zip, namelist) + zipfilecache[zip_path] = (zip, namelist) else: zip, namelist = lookup - f = zip.open(str(path_in_zip), "r") + f = zip.open(entry_path, "r") else: f = open(self.path, "rb") self._sha256 = checksum_stream(hashlib.sha256, f) diff --git a/lib/spack/spack/repo.py b/lib/spack/spack/repo.py index 042e8fdb126..4e7650b9005 100644 --- a/lib/spack/spack/repo.py +++ b/lib/spack/spack/repo.py @@ -26,6 +26,7 @@ import types import uuid import warnings +import zipfile import zipimport from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union @@ -165,10 +166,10 @@ def compute_loader(self, fullname): if package_name: # annoyingly there is a many to one mapping for pkg module to file, have to # figure out how to deal with this properly. - return ( - (repo.zipimporter, f"{namespace}.{package_name}") - if repo.zipimporter - else ( + if repo.zipimporter: + return repo.zipimporter, f"{namespace}.{package_name}" + else: + return ( _PrependFileLoader( fullname=fullname, path=repo.filename_for_package_name(package_name), @@ -176,7 +177,6 @@ def compute_loader(self, fullname): ), fullname, ) - ) # We are importing a full namespace like 'spack.pkg.builtin' if fullname == repo.full_namespace: @@ -364,6 +364,37 @@ def __getattr__(self, name): return getattr(self, name) +class EvenFasterPackageChecker(collections.abc.Mapping): + def __init__(self, packages_path): + # The path of the repository managed by this instance + self.packages_path = packages_path + self.zipfile = zipfile.ZipFile(os.path.join(packages_path, "..", "packages.zip"), "r") + self.invalidate() + + def invalidate(self): + self.mtime = os.stat(self.zipfile.filename).st_mtime + self.pkgs = { + f.rstrip("/"): self.mtime + for f in self.zipfile.namelist() + if f.endswith("/") and f.count("/") == 1 and f != "./" + } + + def last_mtime(self): + return self.mtime + + def modified_since(self, since: float) -> List[str]: + return list(self.pkgs) if self.mtime > since else [] + + def __getitem__(self, item): + return self.pkgs[item] + + def __iter__(self): + return iter(self.pkgs) + + def __len__(self): + return len(self.pkgs) + + class FastPackageChecker(collections.abc.Mapping): """Cache that maps package names to the stats obtained on the 'package.py' files associated with them. @@ -578,7 +609,7 @@ class RepoIndex: def __init__( self, - package_checker: FastPackageChecker, + package_checker: Union[FastPackageChecker, EvenFasterPackageChecker], namespace: str, cache: "spack.caches.FileCacheType", ): @@ -1016,7 +1047,9 @@ def check(condition, msg): self._finder: Optional[RepoPath] = None # Maps that goes from package name to corresponding file stat - self._fast_package_checker: Optional[FastPackageChecker] = None + self._fast_package_checker: Optional[ + Union[EvenFasterPackageChecker, FastPackageChecker] + ] = None # Indexes for this repository, computed lazily self._repo_index: Optional[RepoIndex] = None @@ -1190,9 +1223,12 @@ def filename_for_package_name(self, pkg_name: str) -> str: return os.path.join(pkg_dir, package_file_name) @property - def _pkg_checker(self) -> FastPackageChecker: + def _pkg_checker(self) -> Union[FastPackageChecker, EvenFasterPackageChecker]: if self._fast_package_checker is None: - self._fast_package_checker = FastPackageChecker(self.packages_path) + if self.zipimporter: + self._fast_package_checker = EvenFasterPackageChecker(self.packages_path) + else: + self._fast_package_checker = FastPackageChecker(self.packages_path) return self._fast_package_checker def all_package_names(self, include_virtuals: bool = False) -> List[str]: diff --git a/lib/spack/spack/util/archive.py b/lib/spack/spack/util/archive.py index f1360757513..45239d3519f 100644 --- a/lib/spack/spack/util/archive.py +++ b/lib/spack/spack/util/archive.py @@ -12,7 +12,7 @@ import zipfile from contextlib import closing, contextmanager from gzip import GzipFile -from typing import Callable, Dict, Tuple +from typing import Callable, Dict, List, Tuple from llnl.util.symlink import readlink @@ -236,13 +236,13 @@ def reproducible_zipfile_from_prefix( zip: zipfile.ZipFile, prefix: str, *, - skip: Callable[[os.DirEntry], bool] = lambda entry: False, + skip: Callable[[os.DirEntry, int], bool] = lambda entry, depth: False, path_to_name: Callable[[str], str] = default_path_to_name, ) -> None: """Similar to ``reproducible_tarfile_from_prefix`` but for zipfiles.""" - dir_stack = [prefix] + dir_stack: List[Tuple[str, int]] = [(prefix, 0)] while dir_stack: - dir = dir_stack.pop() + dir, depth = dir_stack.pop() # Add the dir before its contents. zip.mkdir is Python 3.11. dir_info = zipfile.ZipInfo(path_to_name(dir)) @@ -259,11 +259,11 @@ def reproducible_zipfile_from_prefix( new_dirs = [] for entry in entries: - if skip(entry): + if skip(entry, depth): continue if entry.is_dir(follow_symlinks=False): - new_dirs.append(entry.path) + new_dirs.append((entry.path, depth + 1)) continue # symlink / hardlink support in ZIP is poor or non-existent: make copies. diff --git a/lib/spack/spack/util/crypto.py b/lib/spack/spack/util/crypto.py index 283b5ebfc71..4766ddf2deb 100644 --- a/lib/spack/spack/util/crypto.py +++ b/lib/spack/spack/util/crypto.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import hashlib -from typing import BinaryIO, Callable, Dict, Optional +from typing import IO, Callable, Dict, Optional import llnl.util.tty as tty @@ -80,7 +80,7 @@ def hash_fun_for_digest(hexdigest: str) -> HashFactory: return hash_fun_for_algo(hash_algo_for_digest(hexdigest)) -def checksum_stream(hashlib_algo: HashFactory, fp: BinaryIO, *, block_size: int = 2**20) -> str: +def checksum_stream(hashlib_algo: HashFactory, fp: IO[bytes], *, block_size: int = 2**20) -> str: """Returns a hex digest of the stream generated using given algorithm from hashlib.""" hasher = hashlib_algo() while True: