zipfile repo: quick and dirty wip

This commit is contained in:
Harmen Stoppels 2024-08-21 19:18:06 +02:00
parent bd85034570
commit 122a53d322
8 changed files with 97 additions and 51 deletions

View File

@ -172,12 +172,15 @@ def _zip_repo_skip(entry: os.DirEntry):
return entry.name == "__pycache__" return entry.name == "__pycache__"
def _zip_repo_path_to_name(path: str) -> str: def _zip_repo_path_to_name(path: str) -> str:
# strip `repo.packages_path` # use spack/pkg/<repo>/* prefix and rename `package.py` as `__init__.py`
return str(pathlib.Path(path).relative_to(repo.packages_path)) rel_path = pathlib.PurePath(path).relative_to(repo.packages_path)
if rel_path.name == "package.py":
rel_path = rel_path.with_name("__init__.py")
return str(rel_path)
# Create a zipfile in a temporary file # Create a zipfile in a temporary file
with tempfile.NamedTemporaryFile(delete=False, mode="wb", dir=repo.root) as f, zipfile.ZipFile( with tempfile.NamedTemporaryFile(delete=False, mode="wb", dir=repo.root) as f, zipfile.ZipFile(
f, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9 f, "w", compression=zipfile.ZIP_DEFLATED
) as zip: ) as zip:
reproducible_zipfile_from_prefix( reproducible_zipfile_from_prefix(
zip, repo.packages_path, skip=_zip_repo_skip, path_to_name=_zip_repo_path_to_name zip, repo.packages_path, skip=_zip_repo_skip, path_to_name=_zip_repo_path_to_name
@ -185,8 +188,8 @@ def _zip_repo_path_to_name(path: str) -> str:
packages_zip = os.path.join(repo.root, "packages.zip") packages_zip = os.path.join(repo.root, "packages.zip")
try: try:
# Inform the user whether or not the repo was modified # Inform the user whether or not the repo was modified since it was last zipped
if filecmp.cmp(f.name, packages_zip): if os.path.exists(packages_zip) and filecmp.cmp(f.name, packages_zip):
tty.msg(f"{repo.namespace}: {packages_zip} is up to date") tty.msg(f"{repo.namespace}: {packages_zip} is up to date")
return return
else: else:

View File

@ -9,6 +9,7 @@
import os.path import os.path
import pathlib import pathlib
import sys import sys
import zipfile
from typing import Any, Dict, Optional, Tuple, Type, Union from typing import Any, Dict, Optional, Tuple, Type, Union
import llnl.util.filesystem import llnl.util.filesystem
@ -21,7 +22,7 @@
import spack.repo import spack.repo
import spack.stage import spack.stage
import spack.util.spack_json as sjson import spack.util.spack_json as sjson
from spack.util.crypto import Checker, checksum from spack.util.crypto import Checker, checksum_stream
from spack.util.executable import which, which_string from spack.util.executable import which, which_string
@ -155,6 +156,9 @@ def __hash__(self) -> int:
return hash(self.sha256) return hash(self.sha256)
zipfilecache = {}
class FilePatch(Patch): class FilePatch(Patch):
"""Describes a patch that is retrieved from a file in the repository.""" """Describes a patch that is retrieved from a file in the repository."""
@ -194,9 +198,27 @@ def __init__(
# Cannot use pkg.package_dir because it's a property and we have # Cannot use pkg.package_dir because it's a property and we have
# classes, not instances. # classes, not instances.
pkg_dir = os.path.abspath(os.path.dirname(cls.module.__file__)) pkg_dir = os.path.abspath(os.path.dirname(cls.module.__file__))
path = os.path.join(pkg_dir, self.relative_path) path = pathlib.Path(os.path.join(pkg_dir, self.relative_path))
if os.path.exists(path):
abs_path = path if "packages.zip" in path.parts:
# check if it exists in the zip file.
idx = path.parts.index("packages.zip")
zip_path, entry_path = pathlib.PurePath(*path.parts[: idx + 1]), pathlib.PurePath(
*path.parts[idx + 1 :]
)
lookup = zipfilecache.get(zip_path)
if lookup is None:
zip = zipfile.ZipFile(zip_path, "r")
namelist = set(zip.namelist())
zipfilecache[zip_path] = (zip, namelist)
else:
zip, namelist = lookup
if str(entry_path) in namelist:
abs_path = str(path)
break
elif path.exists():
abs_path = str(path)
break break
if abs_path is None: if abs_path is None:
@ -216,7 +238,24 @@ def sha256(self) -> str:
The sha256 of the patch file. The sha256 of the patch file.
""" """
if self._sha256 is None and self.path is not None: if self._sha256 is None and self.path is not None:
self._sha256 = checksum(hashlib.sha256, self.path) path = pathlib.PurePath(self.path)
if "packages.zip" in path.parts:
print("yes")
# split in path to packages.zip and the path within the zip
idx = path.parts.index("packages.zip")
path_to_zip, path_in_zip = pathlib.PurePath(
*path.parts[: idx + 1]
), pathlib.PurePath(*path.parts[idx + 1 :])
zip = zipfilecache.get(path_to_zip)
if not zip:
zip = zipfile.ZipFile(path_to_zip, "r")
zipfilecache[path_to_zip] = zip
f = zip.open(str(path_in_zip), "r")
else:
f = open(self.path, "rb")
self._sha256 = checksum_stream(hashlib.sha256, f)
f.close()
assert isinstance(self._sha256, str) assert isinstance(self._sha256, str)
return self._sha256 return self._sha256

View File

@ -26,6 +26,7 @@
import types import types
import uuid import uuid
import warnings import warnings
import zipimport
from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union
import llnl.path import llnl.path
@ -100,32 +101,6 @@ def get_data(self, path):
return self.prepend.encode() + b"\n" + data return self.prepend.encode() + b"\n" + data
class RepoLoader(_PrependFileLoader):
"""Loads a Python module associated with a package in specific repository"""
#: Code in ``_package_prepend`` is prepended to imported packages.
#:
#: Spack packages are expected to call `from spack.package import *`
#: themselves, but we are allowing a deprecation period before breaking
#: external repos that don't do this yet.
_package_prepend = "from spack.package import *"
def __init__(self, fullname, repo, package_name):
self.repo = repo
self.package_name = package_name
self.package_py = repo.filename_for_package_name(package_name)
self.fullname = fullname
super().__init__(self.fullname, self.package_py, prepend=self._package_prepend)
class SpackNamespaceLoader:
def create_module(self, spec):
return SpackNamespace(spec.name)
def exec_module(self, module):
module.__loader__ = self
class ReposFinder: class ReposFinder:
"""MetaPathFinder class that loads a Python module corresponding to a Spack package. """MetaPathFinder class that loads a Python module corresponding to a Spack package.
@ -165,10 +140,11 @@ def find_spec(self, fullname, python_path, target=None):
if not fullname.startswith(ROOT_PYTHON_NAMESPACE): if not fullname.startswith(ROOT_PYTHON_NAMESPACE):
return None return None
loader = self.compute_loader(fullname) result = self.compute_loader(fullname)
if loader is None: if result is None:
return None return None
return importlib.util.spec_from_loader(fullname, loader) loader, actual_fullname = result
return importlib.util.spec_from_loader(actual_fullname, loader)
def compute_loader(self, fullname): def compute_loader(self, fullname):
# namespaces are added to repo, and package modules are leaves. # namespaces are added to repo, and package modules are leaves.
@ -187,16 +163,29 @@ def compute_loader(self, fullname):
# With 2 nested conditionals we can call "repo.real_name" only once # With 2 nested conditionals we can call "repo.real_name" only once
package_name = repo.real_name(module_name) package_name = repo.real_name(module_name)
if package_name: if package_name:
return RepoLoader(fullname, repo, package_name) # annoyingly there is a many to one mapping for pkg module to file, have to
# figure out how to deal with this properly.
return (
(repo.zipimporter, f"{namespace}.{package_name}")
if repo.zipimporter
else (
_PrependFileLoader(
fullname=fullname,
path=repo.filename_for_package_name(package_name),
prepend="from spack.package import *",
),
fullname,
)
)
# We are importing a full namespace like 'spack.pkg.builtin' # We are importing a full namespace like 'spack.pkg.builtin'
if fullname == repo.full_namespace: if fullname == repo.full_namespace:
return SpackNamespaceLoader() return SpackNamespaceLoader(), fullname
# No repo provides the namespace, but it is a valid prefix of # No repo provides the namespace, but it is a valid prefix of
# something in the RepoPath. # something in the RepoPath.
if is_repo_path and self.current_repository.by_namespace.is_prefix(fullname): if is_repo_path and self.current_repository.by_namespace.is_prefix(fullname):
return SpackNamespaceLoader() return SpackNamespaceLoader(), fullname
return None return None
@ -207,6 +196,7 @@ def compute_loader(self, fullname):
repo_config_name = "repo.yaml" # Top-level filename for repo config. repo_config_name = "repo.yaml" # Top-level filename for repo config.
repo_index_name = "index.yaml" # Top-level filename for repository index. repo_index_name = "index.yaml" # Top-level filename for repository index.
packages_dir_name = "packages" # Top-level repo directory containing pkgs. packages_dir_name = "packages" # Top-level repo directory containing pkgs.
packages_zip_name = "packages.zip" # Top-level filename for zipped packages.
package_file_name = "package.py" # Filename for packages in a repository. package_file_name = "package.py" # Filename for packages in a repository.
#: Guaranteed unused default value for some functions. #: Guaranteed unused default value for some functions.
@ -1009,9 +999,14 @@ def check(condition, msg):
self._names = self.full_namespace.split(".") self._names = self.full_namespace.split(".")
packages_dir = config.get("subdirectory", packages_dir_name) packages_dir = config.get("subdirectory", packages_dir_name)
packages_zip = os.path.join(self.root, "packages.zip")
self.zipimporter = (
zipimport.zipimporter(packages_zip) if os.path.exists(packages_zip) else None
)
self.packages_path = os.path.join(self.root, packages_dir) self.packages_path = os.path.join(self.root, packages_dir)
check( check(
os.path.isdir(self.packages_path), f"No directory '{packages_dir}' found in '{root}'" self.zipimporter or os.path.isdir(self.packages_path),
f"No '{self.packages_path}' or '{packages_zip} found in '{root}'",
) )
# Class attribute overrides by package name # Class attribute overrides by package name
@ -1507,6 +1502,14 @@ def use_repositories(
PATH = saved PATH = saved
class SpackNamespaceLoader:
def create_module(self, spec):
return SpackNamespace(spec.name)
def exec_module(self, module):
module.__loader__ = self
class MockRepositoryBuilder: class MockRepositoryBuilder:
"""Build a mock repository in a directory""" """Build a mock repository in a directory"""

View File

@ -927,7 +927,7 @@ def interactive_version_filter(
orig_url_dict = url_dict # only copy when using editor to modify orig_url_dict = url_dict # only copy when using editor to modify
print_header = True print_header = True
VERSION_COLOR = spack.spec.VERSION_COLOR VERSION_COLOR = spack.spec.VERSION_COLOR
while True: while sys.stdin.isatty():
if print_header: if print_header:
has_filter = version_filter != VersionList([":"]) has_filter = version_filter != VersionList([":"])
header = [] header = []
@ -944,7 +944,9 @@ def interactive_version_filter(
num_new = sum(1 for v in sorted_and_filtered if v not in known_versions) num_new = sum(1 for v in sorted_and_filtered if v not in known_versions)
header.append(f"{llnl.string.plural(num_new, 'new version')}") header.append(f"{llnl.string.plural(num_new, 'new version')}")
if has_filter: if has_filter:
header.append(colorize(f"Filtered by {VERSION_COLOR}@@{version_filter}@.")) header.append(
colorize(f"Filtered by {VERSION_COLOR}@@{version_filter}@. (clear with c)")
)
version_with_url = [ version_with_url = [
colorize( colorize(

View File

@ -246,6 +246,8 @@ def reproducible_zipfile_from_prefix(
# Add the dir before its contents. zip.mkdir is Python 3.11. # Add the dir before its contents. zip.mkdir is Python 3.11.
dir_info = zipfile.ZipInfo(path_to_name(dir)) dir_info = zipfile.ZipInfo(path_to_name(dir))
if not dir_info.filename.endswith("/"):
dir_info.filename += "/"
dir_info.external_attr = (0o40755 << 16) | 0x10 dir_info.external_attr = (0o40755 << 16) | 0x10
dir_info.file_size = 0 dir_info.file_size = 0
with zip.open(dir_info, "w") as dest: with zip.open(dir_info, "w") as dest:

View File

@ -6,8 +6,7 @@
import socket import socket
from spack.package import * from spack.package import *
from spack.pkg.builtin.blt import llnl_link_helpers
from .blt import llnl_link_helpers
class Chai(CachedCMakePackage, CudaPackage, ROCmPackage): class Chai(CachedCMakePackage, CudaPackage, ROCmPackage):

View File

@ -6,8 +6,7 @@
import socket import socket
from spack.package import * from spack.package import *
from spack.pkg.builtin.blt import llnl_link_helpers
from .blt import llnl_link_helpers
# Starting with 2022.03.0, the only submodule we want to fetch is tpl/desul # Starting with 2022.03.0, the only submodule we want to fetch is tpl/desul

View File

@ -7,8 +7,7 @@
import socket import socket
from spack.package import * from spack.package import *
from spack.pkg.builtin.blt import llnl_link_helpers
from .blt import llnl_link_helpers
class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage): class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage):