avoid 7K stat calls
This commit is contained in:
parent
ea823d2308
commit
225a4ed1ff
@ -168,8 +168,12 @@ def repo_zip(args):
|
|||||||
except spack.repo.RepoError:
|
except spack.repo.RepoError:
|
||||||
tty.die(f"No repository at path: {key}")
|
tty.die(f"No repository at path: {key}")
|
||||||
|
|
||||||
def _zip_repo_skip(entry: os.DirEntry):
|
def _zip_repo_skip(entry: os.DirEntry, depth: int):
|
||||||
return entry.name == "__pycache__"
|
if entry.name == "__pycache__":
|
||||||
|
return True
|
||||||
|
if depth == 0 and not os.path.exists(os.path.join(entry.path, "package.py")):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def _zip_repo_path_to_name(path: str) -> str:
|
def _zip_repo_path_to_name(path: str) -> str:
|
||||||
# use spack/pkg/<repo>/* prefix and rename `package.py` as `__init__.py`
|
# use spack/pkg/<repo>/* prefix and rename `package.py` as `__init__.py`
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
import pathlib
|
import pathlib
|
||||||
import sys
|
import sys
|
||||||
import zipfile
|
import zipfile
|
||||||
from typing import Any, Dict, Optional, Tuple, Type, Union
|
from typing import Any, Dict, Optional, Set, Tuple, Type, Union
|
||||||
|
|
||||||
import llnl.util.filesystem
|
import llnl.util.filesystem
|
||||||
from llnl.url import allowed_archive
|
from llnl.url import allowed_archive
|
||||||
@ -155,7 +155,7 @@ def __hash__(self) -> int:
|
|||||||
return hash(self.sha256)
|
return hash(self.sha256)
|
||||||
|
|
||||||
|
|
||||||
zipfilecache = {}
|
zipfilecache: Dict[str, Tuple[zipfile.ZipFile, Set[str]]] = {}
|
||||||
|
|
||||||
|
|
||||||
class FilePatch(Patch):
|
class FilePatch(Patch):
|
||||||
@ -202,9 +202,8 @@ def __init__(
|
|||||||
if "packages.zip" in path.parts:
|
if "packages.zip" in path.parts:
|
||||||
# check if it exists in the zip file.
|
# check if it exists in the zip file.
|
||||||
idx = path.parts.index("packages.zip")
|
idx = path.parts.index("packages.zip")
|
||||||
zip_path, entry_path = pathlib.PurePath(*path.parts[: idx + 1]), pathlib.PurePath(
|
zip_path = str(pathlib.PurePath(*path.parts[: idx + 1]))
|
||||||
*path.parts[idx + 1 :]
|
entry_path = str(pathlib.PurePath(*path.parts[idx + 1 :]))
|
||||||
)
|
|
||||||
|
|
||||||
lookup = zipfilecache.get(zip_path)
|
lookup = zipfilecache.get(zip_path)
|
||||||
if lookup is None:
|
if lookup is None:
|
||||||
@ -213,7 +212,7 @@ def __init__(
|
|||||||
zipfilecache[zip_path] = (zip, namelist)
|
zipfilecache[zip_path] = (zip, namelist)
|
||||||
else:
|
else:
|
||||||
zip, namelist = lookup
|
zip, namelist = lookup
|
||||||
if str(entry_path) in namelist:
|
if entry_path in namelist:
|
||||||
abs_path = str(path)
|
abs_path = str(path)
|
||||||
break
|
break
|
||||||
elif path.exists():
|
elif path.exists():
|
||||||
@ -241,17 +240,16 @@ def sha256(self) -> str:
|
|||||||
if "packages.zip" in path.parts:
|
if "packages.zip" in path.parts:
|
||||||
# split in path to packages.zip and the path within the zip
|
# split in path to packages.zip and the path within the zip
|
||||||
idx = path.parts.index("packages.zip")
|
idx = path.parts.index("packages.zip")
|
||||||
path_to_zip, path_in_zip = pathlib.PurePath(
|
zip_path = str(pathlib.PurePath(*path.parts[: idx + 1]))
|
||||||
*path.parts[: idx + 1]
|
entry_path = str(pathlib.PurePath(*path.parts[idx + 1 :]))
|
||||||
), pathlib.PurePath(*path.parts[idx + 1 :])
|
lookup = zipfilecache.get(zip_path)
|
||||||
lookup = zipfilecache.get(path_to_zip)
|
|
||||||
if lookup is None:
|
if lookup is None:
|
||||||
zip = zipfile.ZipFile(path_to_zip, "r")
|
zip = zipfile.ZipFile(zip_path, "r")
|
||||||
namelist = set(zip.namelist())
|
namelist = set(zip.namelist())
|
||||||
zipfilecache[path_to_zip] = (zip, namelist)
|
zipfilecache[zip_path] = (zip, namelist)
|
||||||
else:
|
else:
|
||||||
zip, namelist = lookup
|
zip, namelist = lookup
|
||||||
f = zip.open(str(path_in_zip), "r")
|
f = zip.open(entry_path, "r")
|
||||||
else:
|
else:
|
||||||
f = open(self.path, "rb")
|
f = open(self.path, "rb")
|
||||||
self._sha256 = checksum_stream(hashlib.sha256, f)
|
self._sha256 = checksum_stream(hashlib.sha256, f)
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
import types
|
import types
|
||||||
import uuid
|
import uuid
|
||||||
import warnings
|
import warnings
|
||||||
|
import zipfile
|
||||||
import zipimport
|
import zipimport
|
||||||
from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union
|
from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union
|
||||||
|
|
||||||
@ -165,10 +166,10 @@ def compute_loader(self, fullname):
|
|||||||
if package_name:
|
if package_name:
|
||||||
# annoyingly there is a many to one mapping for pkg module to file, have to
|
# annoyingly there is a many to one mapping for pkg module to file, have to
|
||||||
# figure out how to deal with this properly.
|
# figure out how to deal with this properly.
|
||||||
return (
|
if repo.zipimporter:
|
||||||
(repo.zipimporter, f"{namespace}.{package_name}")
|
return repo.zipimporter, f"{namespace}.{package_name}"
|
||||||
if repo.zipimporter
|
else:
|
||||||
else (
|
return (
|
||||||
_PrependFileLoader(
|
_PrependFileLoader(
|
||||||
fullname=fullname,
|
fullname=fullname,
|
||||||
path=repo.filename_for_package_name(package_name),
|
path=repo.filename_for_package_name(package_name),
|
||||||
@ -176,7 +177,6 @@ def compute_loader(self, fullname):
|
|||||||
),
|
),
|
||||||
fullname,
|
fullname,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
# We are importing a full namespace like 'spack.pkg.builtin'
|
# We are importing a full namespace like 'spack.pkg.builtin'
|
||||||
if fullname == repo.full_namespace:
|
if fullname == repo.full_namespace:
|
||||||
@ -364,6 +364,37 @@ def __getattr__(self, name):
|
|||||||
return getattr(self, name)
|
return getattr(self, name)
|
||||||
|
|
||||||
|
|
||||||
|
class EvenFasterPackageChecker(collections.abc.Mapping):
|
||||||
|
def __init__(self, packages_path):
|
||||||
|
# The path of the repository managed by this instance
|
||||||
|
self.packages_path = packages_path
|
||||||
|
self.zipfile = zipfile.ZipFile(os.path.join(packages_path, "..", "packages.zip"), "r")
|
||||||
|
self.invalidate()
|
||||||
|
|
||||||
|
def invalidate(self):
|
||||||
|
self.mtime = os.stat(self.zipfile.filename).st_mtime
|
||||||
|
self.pkgs = {
|
||||||
|
f.rstrip("/"): self.mtime
|
||||||
|
for f in self.zipfile.namelist()
|
||||||
|
if f.endswith("/") and f.count("/") == 1 and f != "./"
|
||||||
|
}
|
||||||
|
|
||||||
|
def last_mtime(self):
|
||||||
|
return self.mtime
|
||||||
|
|
||||||
|
def modified_since(self, since: float) -> List[str]:
|
||||||
|
return list(self.pkgs) if self.mtime > since else []
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
return self.pkgs[item]
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.pkgs)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.pkgs)
|
||||||
|
|
||||||
|
|
||||||
class FastPackageChecker(collections.abc.Mapping):
|
class FastPackageChecker(collections.abc.Mapping):
|
||||||
"""Cache that maps package names to the stats obtained on the
|
"""Cache that maps package names to the stats obtained on the
|
||||||
'package.py' files associated with them.
|
'package.py' files associated with them.
|
||||||
@ -578,7 +609,7 @@ class RepoIndex:
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
package_checker: FastPackageChecker,
|
package_checker: Union[FastPackageChecker, EvenFasterPackageChecker],
|
||||||
namespace: str,
|
namespace: str,
|
||||||
cache: "spack.caches.FileCacheType",
|
cache: "spack.caches.FileCacheType",
|
||||||
):
|
):
|
||||||
@ -1016,7 +1047,9 @@ def check(condition, msg):
|
|||||||
self._finder: Optional[RepoPath] = None
|
self._finder: Optional[RepoPath] = None
|
||||||
|
|
||||||
# Maps that goes from package name to corresponding file stat
|
# Maps that goes from package name to corresponding file stat
|
||||||
self._fast_package_checker: Optional[FastPackageChecker] = None
|
self._fast_package_checker: Optional[
|
||||||
|
Union[EvenFasterPackageChecker, FastPackageChecker]
|
||||||
|
] = None
|
||||||
|
|
||||||
# Indexes for this repository, computed lazily
|
# Indexes for this repository, computed lazily
|
||||||
self._repo_index: Optional[RepoIndex] = None
|
self._repo_index: Optional[RepoIndex] = None
|
||||||
@ -1190,9 +1223,12 @@ def filename_for_package_name(self, pkg_name: str) -> str:
|
|||||||
return os.path.join(pkg_dir, package_file_name)
|
return os.path.join(pkg_dir, package_file_name)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _pkg_checker(self) -> FastPackageChecker:
|
def _pkg_checker(self) -> Union[FastPackageChecker, EvenFasterPackageChecker]:
|
||||||
if self._fast_package_checker is None:
|
if self._fast_package_checker is None:
|
||||||
self._fast_package_checker = FastPackageChecker(self.packages_path)
|
if self.zipimporter:
|
||||||
|
self._fast_package_checker = EvenFasterPackageChecker(self.packages_path)
|
||||||
|
else:
|
||||||
|
self._fast_package_checker = FastPackageChecker(self.packages_path)
|
||||||
return self._fast_package_checker
|
return self._fast_package_checker
|
||||||
|
|
||||||
def all_package_names(self, include_virtuals: bool = False) -> List[str]:
|
def all_package_names(self, include_virtuals: bool = False) -> List[str]:
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
import zipfile
|
import zipfile
|
||||||
from contextlib import closing, contextmanager
|
from contextlib import closing, contextmanager
|
||||||
from gzip import GzipFile
|
from gzip import GzipFile
|
||||||
from typing import Callable, Dict, Tuple
|
from typing import Callable, Dict, List, Tuple
|
||||||
|
|
||||||
from llnl.util.symlink import readlink
|
from llnl.util.symlink import readlink
|
||||||
|
|
||||||
@ -236,13 +236,13 @@ def reproducible_zipfile_from_prefix(
|
|||||||
zip: zipfile.ZipFile,
|
zip: zipfile.ZipFile,
|
||||||
prefix: str,
|
prefix: str,
|
||||||
*,
|
*,
|
||||||
skip: Callable[[os.DirEntry], bool] = lambda entry: False,
|
skip: Callable[[os.DirEntry, int], bool] = lambda entry, depth: False,
|
||||||
path_to_name: Callable[[str], str] = default_path_to_name,
|
path_to_name: Callable[[str], str] = default_path_to_name,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Similar to ``reproducible_tarfile_from_prefix`` but for zipfiles."""
|
"""Similar to ``reproducible_tarfile_from_prefix`` but for zipfiles."""
|
||||||
dir_stack = [prefix]
|
dir_stack: List[Tuple[str, int]] = [(prefix, 0)]
|
||||||
while dir_stack:
|
while dir_stack:
|
||||||
dir = dir_stack.pop()
|
dir, depth = dir_stack.pop()
|
||||||
|
|
||||||
# Add the dir before its contents. zip.mkdir is Python 3.11.
|
# Add the dir before its contents. zip.mkdir is Python 3.11.
|
||||||
dir_info = zipfile.ZipInfo(path_to_name(dir))
|
dir_info = zipfile.ZipInfo(path_to_name(dir))
|
||||||
@ -259,11 +259,11 @@ def reproducible_zipfile_from_prefix(
|
|||||||
|
|
||||||
new_dirs = []
|
new_dirs = []
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
if skip(entry):
|
if skip(entry, depth):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if entry.is_dir(follow_symlinks=False):
|
if entry.is_dir(follow_symlinks=False):
|
||||||
new_dirs.append(entry.path)
|
new_dirs.append((entry.path, depth + 1))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# symlink / hardlink support in ZIP is poor or non-existent: make copies.
|
# symlink / hardlink support in ZIP is poor or non-existent: make copies.
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
from typing import BinaryIO, Callable, Dict, Optional
|
from typing import IO, Callable, Dict, Optional
|
||||||
|
|
||||||
import llnl.util.tty as tty
|
import llnl.util.tty as tty
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ def hash_fun_for_digest(hexdigest: str) -> HashFactory:
|
|||||||
return hash_fun_for_algo(hash_algo_for_digest(hexdigest))
|
return hash_fun_for_algo(hash_algo_for_digest(hexdigest))
|
||||||
|
|
||||||
|
|
||||||
def checksum_stream(hashlib_algo: HashFactory, fp: BinaryIO, *, block_size: int = 2**20) -> str:
|
def checksum_stream(hashlib_algo: HashFactory, fp: IO[bytes], *, block_size: int = 2**20) -> str:
|
||||||
"""Returns a hex digest of the stream generated using given algorithm from hashlib."""
|
"""Returns a hex digest of the stream generated using given algorithm from hashlib."""
|
||||||
hasher = hashlib_algo()
|
hasher = hashlib_algo()
|
||||||
while True:
|
while True:
|
||||||
|
Loading…
Reference in New Issue
Block a user