zipfile repo: quick and dirty wip

This commit is contained in:
Harmen Stoppels 2024-08-21 19:18:06 +02:00
parent bd85034570
commit 122a53d322
8 changed files with 97 additions and 51 deletions

View File

@ -172,12 +172,15 @@ def _zip_repo_skip(entry: os.DirEntry):
return entry.name == "__pycache__"
def _zip_repo_path_to_name(path: str) -> str:
# strip `repo.packages_path`
return str(pathlib.Path(path).relative_to(repo.packages_path))
# use spack/pkg/<repo>/* prefix and rename `package.py` as `__init__.py`
rel_path = pathlib.PurePath(path).relative_to(repo.packages_path)
if rel_path.name == "package.py":
rel_path = rel_path.with_name("__init__.py")
return str(rel_path)
# Create a zipfile in a temporary file
with tempfile.NamedTemporaryFile(delete=False, mode="wb", dir=repo.root) as f, zipfile.ZipFile(
f, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9
f, "w", compression=zipfile.ZIP_DEFLATED
) as zip:
reproducible_zipfile_from_prefix(
zip, repo.packages_path, skip=_zip_repo_skip, path_to_name=_zip_repo_path_to_name
@ -185,8 +188,8 @@ def _zip_repo_path_to_name(path: str) -> str:
packages_zip = os.path.join(repo.root, "packages.zip")
try:
# Inform the user whether or not the repo was modified
if filecmp.cmp(f.name, packages_zip):
# Inform the user whether or not the repo was modified since it was last zipped
if os.path.exists(packages_zip) and filecmp.cmp(f.name, packages_zip):
tty.msg(f"{repo.namespace}: {packages_zip} is up to date")
return
else:

View File

@ -9,6 +9,7 @@
import os.path
import pathlib
import sys
import zipfile
from typing import Any, Dict, Optional, Tuple, Type, Union
import llnl.util.filesystem
@ -21,7 +22,7 @@
import spack.repo
import spack.stage
import spack.util.spack_json as sjson
from spack.util.crypto import Checker, checksum
from spack.util.crypto import Checker, checksum_stream
from spack.util.executable import which, which_string
@ -155,6 +156,9 @@ def __hash__(self) -> int:
return hash(self.sha256)
zipfilecache = {}
class FilePatch(Patch):
"""Describes a patch that is retrieved from a file in the repository."""
@ -194,9 +198,27 @@ def __init__(
# Cannot use pkg.package_dir because it's a property and we have
# classes, not instances.
pkg_dir = os.path.abspath(os.path.dirname(cls.module.__file__))
path = os.path.join(pkg_dir, self.relative_path)
if os.path.exists(path):
abs_path = path
path = pathlib.Path(os.path.join(pkg_dir, self.relative_path))
if "packages.zip" in path.parts:
# check if it exists in the zip file.
idx = path.parts.index("packages.zip")
zip_path, entry_path = pathlib.PurePath(*path.parts[: idx + 1]), pathlib.PurePath(
*path.parts[idx + 1 :]
)
lookup = zipfilecache.get(zip_path)
if lookup is None:
zip = zipfile.ZipFile(zip_path, "r")
namelist = set(zip.namelist())
zipfilecache[zip_path] = (zip, namelist)
else:
zip, namelist = lookup
if str(entry_path) in namelist:
abs_path = str(path)
break
elif path.exists():
abs_path = str(path)
break
if abs_path is None:
@ -216,7 +238,24 @@ def sha256(self) -> str:
The sha256 of the patch file.
"""
if self._sha256 is None and self.path is not None:
self._sha256 = checksum(hashlib.sha256, self.path)
path = pathlib.PurePath(self.path)
if "packages.zip" in path.parts:
print("yes")
# split in path to packages.zip and the path within the zip
idx = path.parts.index("packages.zip")
path_to_zip, path_in_zip = pathlib.PurePath(
*path.parts[: idx + 1]
), pathlib.PurePath(*path.parts[idx + 1 :])
zip = zipfilecache.get(path_to_zip)
if not zip:
zip = zipfile.ZipFile(path_to_zip, "r")
zipfilecache[path_to_zip] = zip
f = zip.open(str(path_in_zip), "r")
else:
f = open(self.path, "rb")
self._sha256 = checksum_stream(hashlib.sha256, f)
f.close()
assert isinstance(self._sha256, str)
return self._sha256

View File

@ -26,6 +26,7 @@
import types
import uuid
import warnings
import zipimport
from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union
import llnl.path
@ -100,32 +101,6 @@ def get_data(self, path):
return self.prepend.encode() + b"\n" + data
class RepoLoader(_PrependFileLoader):
"""Loads a Python module associated with a package in specific repository"""
#: Code in ``_package_prepend`` is prepended to imported packages.
#:
#: Spack packages are expected to call `from spack.package import *`
#: themselves, but we are allowing a deprecation period before breaking
#: external repos that don't do this yet.
_package_prepend = "from spack.package import *"
def __init__(self, fullname, repo, package_name):
self.repo = repo
self.package_name = package_name
self.package_py = repo.filename_for_package_name(package_name)
self.fullname = fullname
super().__init__(self.fullname, self.package_py, prepend=self._package_prepend)
class SpackNamespaceLoader:
def create_module(self, spec):
return SpackNamespace(spec.name)
def exec_module(self, module):
module.__loader__ = self
class ReposFinder:
"""MetaPathFinder class that loads a Python module corresponding to a Spack package.
@ -165,10 +140,11 @@ def find_spec(self, fullname, python_path, target=None):
if not fullname.startswith(ROOT_PYTHON_NAMESPACE):
return None
loader = self.compute_loader(fullname)
if loader is None:
result = self.compute_loader(fullname)
if result is None:
return None
return importlib.util.spec_from_loader(fullname, loader)
loader, actual_fullname = result
return importlib.util.spec_from_loader(actual_fullname, loader)
def compute_loader(self, fullname):
# namespaces are added to repo, and package modules are leaves.
@ -187,16 +163,29 @@ def compute_loader(self, fullname):
# With 2 nested conditionals we can call "repo.real_name" only once
package_name = repo.real_name(module_name)
if package_name:
return RepoLoader(fullname, repo, package_name)
# annoyingly there is a many to one mapping for pkg module to file, have to
# figure out how to deal with this properly.
return (
(repo.zipimporter, f"{namespace}.{package_name}")
if repo.zipimporter
else (
_PrependFileLoader(
fullname=fullname,
path=repo.filename_for_package_name(package_name),
prepend="from spack.package import *",
),
fullname,
)
)
# We are importing a full namespace like 'spack.pkg.builtin'
if fullname == repo.full_namespace:
return SpackNamespaceLoader()
return SpackNamespaceLoader(), fullname
# No repo provides the namespace, but it is a valid prefix of
# something in the RepoPath.
if is_repo_path and self.current_repository.by_namespace.is_prefix(fullname):
return SpackNamespaceLoader()
return SpackNamespaceLoader(), fullname
return None
@ -207,6 +196,7 @@ def compute_loader(self, fullname):
repo_config_name = "repo.yaml" # Top-level filename for repo config.
repo_index_name = "index.yaml" # Top-level filename for repository index.
packages_dir_name = "packages" # Top-level repo directory containing pkgs.
packages_zip_name = "packages.zip" # Top-level filename for zipped packages.
package_file_name = "package.py" # Filename for packages in a repository.
#: Guaranteed unused default value for some functions.
@ -1009,9 +999,14 @@ def check(condition, msg):
self._names = self.full_namespace.split(".")
packages_dir = config.get("subdirectory", packages_dir_name)
packages_zip = os.path.join(self.root, "packages.zip")
self.zipimporter = (
zipimport.zipimporter(packages_zip) if os.path.exists(packages_zip) else None
)
self.packages_path = os.path.join(self.root, packages_dir)
check(
os.path.isdir(self.packages_path), f"No directory '{packages_dir}' found in '{root}'"
self.zipimporter or os.path.isdir(self.packages_path),
f"No '{self.packages_path}' or '{packages_zip} found in '{root}'",
)
# Class attribute overrides by package name
@ -1507,6 +1502,14 @@ def use_repositories(
PATH = saved
class SpackNamespaceLoader:
def create_module(self, spec):
return SpackNamespace(spec.name)
def exec_module(self, module):
module.__loader__ = self
class MockRepositoryBuilder:
"""Build a mock repository in a directory"""

View File

@ -927,7 +927,7 @@ def interactive_version_filter(
orig_url_dict = url_dict # only copy when using editor to modify
print_header = True
VERSION_COLOR = spack.spec.VERSION_COLOR
while True:
while sys.stdin.isatty():
if print_header:
has_filter = version_filter != VersionList([":"])
header = []
@ -944,7 +944,9 @@ def interactive_version_filter(
num_new = sum(1 for v in sorted_and_filtered if v not in known_versions)
header.append(f"{llnl.string.plural(num_new, 'new version')}")
if has_filter:
header.append(colorize(f"Filtered by {VERSION_COLOR}@@{version_filter}@."))
header.append(
colorize(f"Filtered by {VERSION_COLOR}@@{version_filter}@. (clear with c)")
)
version_with_url = [
colorize(

View File

@ -246,6 +246,8 @@ def reproducible_zipfile_from_prefix(
# Add the dir before its contents. zip.mkdir is Python 3.11.
dir_info = zipfile.ZipInfo(path_to_name(dir))
if not dir_info.filename.endswith("/"):
dir_info.filename += "/"
dir_info.external_attr = (0o40755 << 16) | 0x10
dir_info.file_size = 0
with zip.open(dir_info, "w") as dest:

View File

@ -6,8 +6,7 @@
import socket
from spack.package import *
from .blt import llnl_link_helpers
from spack.pkg.builtin.blt import llnl_link_helpers
class Chai(CachedCMakePackage, CudaPackage, ROCmPackage):

View File

@ -6,8 +6,7 @@
import socket
from spack.package import *
from .blt import llnl_link_helpers
from spack.pkg.builtin.blt import llnl_link_helpers
# Starting with 2022.03.0, the only submodule we want to fetch is tpl/desul

View File

@ -7,8 +7,7 @@
import socket
from spack.package import *
from .blt import llnl_link_helpers
from spack.pkg.builtin.blt import llnl_link_helpers
class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage):