avoid 7K stat calls
This commit is contained in:
parent
ea823d2308
commit
225a4ed1ff
@ -168,8 +168,12 @@ def repo_zip(args):
|
||||
except spack.repo.RepoError:
|
||||
tty.die(f"No repository at path: {key}")
|
||||
|
||||
def _zip_repo_skip(entry: os.DirEntry):
|
||||
return entry.name == "__pycache__"
|
||||
def _zip_repo_skip(entry: os.DirEntry, depth: int):
|
||||
if entry.name == "__pycache__":
|
||||
return True
|
||||
if depth == 0 and not os.path.exists(os.path.join(entry.path, "package.py")):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _zip_repo_path_to_name(path: str) -> str:
|
||||
# use spack/pkg/<repo>/* prefix and rename `package.py` as `__init__.py`
|
||||
|
@ -9,7 +9,7 @@
|
||||
import pathlib
|
||||
import sys
|
||||
import zipfile
|
||||
from typing import Any, Dict, Optional, Tuple, Type, Union
|
||||
from typing import Any, Dict, Optional, Set, Tuple, Type, Union
|
||||
|
||||
import llnl.util.filesystem
|
||||
from llnl.url import allowed_archive
|
||||
@ -155,7 +155,7 @@ def __hash__(self) -> int:
|
||||
return hash(self.sha256)
|
||||
|
||||
|
||||
zipfilecache = {}
|
||||
zipfilecache: Dict[str, Tuple[zipfile.ZipFile, Set[str]]] = {}
|
||||
|
||||
|
||||
class FilePatch(Patch):
|
||||
@ -202,9 +202,8 @@ def __init__(
|
||||
if "packages.zip" in path.parts:
|
||||
# check if it exists in the zip file.
|
||||
idx = path.parts.index("packages.zip")
|
||||
zip_path, entry_path = pathlib.PurePath(*path.parts[: idx + 1]), pathlib.PurePath(
|
||||
*path.parts[idx + 1 :]
|
||||
)
|
||||
zip_path = str(pathlib.PurePath(*path.parts[: idx + 1]))
|
||||
entry_path = str(pathlib.PurePath(*path.parts[idx + 1 :]))
|
||||
|
||||
lookup = zipfilecache.get(zip_path)
|
||||
if lookup is None:
|
||||
@ -213,7 +212,7 @@ def __init__(
|
||||
zipfilecache[zip_path] = (zip, namelist)
|
||||
else:
|
||||
zip, namelist = lookup
|
||||
if str(entry_path) in namelist:
|
||||
if entry_path in namelist:
|
||||
abs_path = str(path)
|
||||
break
|
||||
elif path.exists():
|
||||
@ -241,17 +240,16 @@ def sha256(self) -> str:
|
||||
if "packages.zip" in path.parts:
|
||||
# split in path to packages.zip and the path within the zip
|
||||
idx = path.parts.index("packages.zip")
|
||||
path_to_zip, path_in_zip = pathlib.PurePath(
|
||||
*path.parts[: idx + 1]
|
||||
), pathlib.PurePath(*path.parts[idx + 1 :])
|
||||
lookup = zipfilecache.get(path_to_zip)
|
||||
zip_path = str(pathlib.PurePath(*path.parts[: idx + 1]))
|
||||
entry_path = str(pathlib.PurePath(*path.parts[idx + 1 :]))
|
||||
lookup = zipfilecache.get(zip_path)
|
||||
if lookup is None:
|
||||
zip = zipfile.ZipFile(path_to_zip, "r")
|
||||
zip = zipfile.ZipFile(zip_path, "r")
|
||||
namelist = set(zip.namelist())
|
||||
zipfilecache[path_to_zip] = (zip, namelist)
|
||||
zipfilecache[zip_path] = (zip, namelist)
|
||||
else:
|
||||
zip, namelist = lookup
|
||||
f = zip.open(str(path_in_zip), "r")
|
||||
f = zip.open(entry_path, "r")
|
||||
else:
|
||||
f = open(self.path, "rb")
|
||||
self._sha256 = checksum_stream(hashlib.sha256, f)
|
||||
|
@ -26,6 +26,7 @@
|
||||
import types
|
||||
import uuid
|
||||
import warnings
|
||||
import zipfile
|
||||
import zipimport
|
||||
from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union
|
||||
|
||||
@ -165,10 +166,10 @@ def compute_loader(self, fullname):
|
||||
if package_name:
|
||||
# annoyingly there is a many to one mapping for pkg module to file, have to
|
||||
# figure out how to deal with this properly.
|
||||
return (
|
||||
(repo.zipimporter, f"{namespace}.{package_name}")
|
||||
if repo.zipimporter
|
||||
else (
|
||||
if repo.zipimporter:
|
||||
return repo.zipimporter, f"{namespace}.{package_name}"
|
||||
else:
|
||||
return (
|
||||
_PrependFileLoader(
|
||||
fullname=fullname,
|
||||
path=repo.filename_for_package_name(package_name),
|
||||
@ -176,7 +177,6 @@ def compute_loader(self, fullname):
|
||||
),
|
||||
fullname,
|
||||
)
|
||||
)
|
||||
|
||||
# We are importing a full namespace like 'spack.pkg.builtin'
|
||||
if fullname == repo.full_namespace:
|
||||
@ -364,6 +364,37 @@ def __getattr__(self, name):
|
||||
return getattr(self, name)
|
||||
|
||||
|
||||
class EvenFasterPackageChecker(collections.abc.Mapping):
|
||||
def __init__(self, packages_path):
|
||||
# The path of the repository managed by this instance
|
||||
self.packages_path = packages_path
|
||||
self.zipfile = zipfile.ZipFile(os.path.join(packages_path, "..", "packages.zip"), "r")
|
||||
self.invalidate()
|
||||
|
||||
def invalidate(self):
|
||||
self.mtime = os.stat(self.zipfile.filename).st_mtime
|
||||
self.pkgs = {
|
||||
f.rstrip("/"): self.mtime
|
||||
for f in self.zipfile.namelist()
|
||||
if f.endswith("/") and f.count("/") == 1 and f != "./"
|
||||
}
|
||||
|
||||
def last_mtime(self):
|
||||
return self.mtime
|
||||
|
||||
def modified_since(self, since: float) -> List[str]:
|
||||
return list(self.pkgs) if self.mtime > since else []
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.pkgs[item]
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.pkgs)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.pkgs)
|
||||
|
||||
|
||||
class FastPackageChecker(collections.abc.Mapping):
|
||||
"""Cache that maps package names to the stats obtained on the
|
||||
'package.py' files associated with them.
|
||||
@ -578,7 +609,7 @@ class RepoIndex:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
package_checker: FastPackageChecker,
|
||||
package_checker: Union[FastPackageChecker, EvenFasterPackageChecker],
|
||||
namespace: str,
|
||||
cache: "spack.caches.FileCacheType",
|
||||
):
|
||||
@ -1016,7 +1047,9 @@ def check(condition, msg):
|
||||
self._finder: Optional[RepoPath] = None
|
||||
|
||||
# Maps that goes from package name to corresponding file stat
|
||||
self._fast_package_checker: Optional[FastPackageChecker] = None
|
||||
self._fast_package_checker: Optional[
|
||||
Union[EvenFasterPackageChecker, FastPackageChecker]
|
||||
] = None
|
||||
|
||||
# Indexes for this repository, computed lazily
|
||||
self._repo_index: Optional[RepoIndex] = None
|
||||
@ -1190,9 +1223,12 @@ def filename_for_package_name(self, pkg_name: str) -> str:
|
||||
return os.path.join(pkg_dir, package_file_name)
|
||||
|
||||
@property
|
||||
def _pkg_checker(self) -> FastPackageChecker:
|
||||
def _pkg_checker(self) -> Union[FastPackageChecker, EvenFasterPackageChecker]:
|
||||
if self._fast_package_checker is None:
|
||||
self._fast_package_checker = FastPackageChecker(self.packages_path)
|
||||
if self.zipimporter:
|
||||
self._fast_package_checker = EvenFasterPackageChecker(self.packages_path)
|
||||
else:
|
||||
self._fast_package_checker = FastPackageChecker(self.packages_path)
|
||||
return self._fast_package_checker
|
||||
|
||||
def all_package_names(self, include_virtuals: bool = False) -> List[str]:
|
||||
|
@ -12,7 +12,7 @@
|
||||
import zipfile
|
||||
from contextlib import closing, contextmanager
|
||||
from gzip import GzipFile
|
||||
from typing import Callable, Dict, Tuple
|
||||
from typing import Callable, Dict, List, Tuple
|
||||
|
||||
from llnl.util.symlink import readlink
|
||||
|
||||
@ -236,13 +236,13 @@ def reproducible_zipfile_from_prefix(
|
||||
zip: zipfile.ZipFile,
|
||||
prefix: str,
|
||||
*,
|
||||
skip: Callable[[os.DirEntry], bool] = lambda entry: False,
|
||||
skip: Callable[[os.DirEntry, int], bool] = lambda entry, depth: False,
|
||||
path_to_name: Callable[[str], str] = default_path_to_name,
|
||||
) -> None:
|
||||
"""Similar to ``reproducible_tarfile_from_prefix`` but for zipfiles."""
|
||||
dir_stack = [prefix]
|
||||
dir_stack: List[Tuple[str, int]] = [(prefix, 0)]
|
||||
while dir_stack:
|
||||
dir = dir_stack.pop()
|
||||
dir, depth = dir_stack.pop()
|
||||
|
||||
# Add the dir before its contents. zip.mkdir is Python 3.11.
|
||||
dir_info = zipfile.ZipInfo(path_to_name(dir))
|
||||
@ -259,11 +259,11 @@ def reproducible_zipfile_from_prefix(
|
||||
|
||||
new_dirs = []
|
||||
for entry in entries:
|
||||
if skip(entry):
|
||||
if skip(entry, depth):
|
||||
continue
|
||||
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
new_dirs.append(entry.path)
|
||||
new_dirs.append((entry.path, depth + 1))
|
||||
continue
|
||||
|
||||
# symlink / hardlink support in ZIP is poor or non-existent: make copies.
|
||||
|
@ -4,7 +4,7 @@
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import hashlib
|
||||
from typing import BinaryIO, Callable, Dict, Optional
|
||||
from typing import IO, Callable, Dict, Optional
|
||||
|
||||
import llnl.util.tty as tty
|
||||
|
||||
@ -80,7 +80,7 @@ def hash_fun_for_digest(hexdigest: str) -> HashFactory:
|
||||
return hash_fun_for_algo(hash_algo_for_digest(hexdigest))
|
||||
|
||||
|
||||
def checksum_stream(hashlib_algo: HashFactory, fp: BinaryIO, *, block_size: int = 2**20) -> str:
|
||||
def checksum_stream(hashlib_algo: HashFactory, fp: IO[bytes], *, block_size: int = 2**20) -> str:
|
||||
"""Returns a hex digest of the stream generated using given algorithm from hashlib."""
|
||||
hasher = hashlib_algo()
|
||||
while True:
|
||||
|
Loading…
Reference in New Issue
Block a user