zipfile repo: quick and dirty wip

This commit is contained in:
Harmen Stoppels 2024-08-21 11:23:25 +02:00
parent f9065f0c7e
commit ea823d2308
11 changed files with 249 additions and 77 deletions

1
.gitignore vendored
View File

@ -7,6 +7,7 @@
/var/spack/environments
/var/spack/repos/*/index.yaml
/var/spack/repos/*/lock
/var/spack/repos/*/packages.zip
/opt
# Ignore everything in /etc/spack except /etc/spack/defaults
/etc/spack/*

View File

@ -3,8 +3,13 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import filecmp
import os
import pathlib
import sys
import tempfile
import zipfile
from typing import List, Optional, Tuple
import llnl.util.tty as tty
@ -12,6 +17,7 @@
import spack.repo
import spack.util.path
from spack.cmd.common import arguments
from spack.util.archive import reproducible_zipfile_from_prefix
description = "manage package source repositories"
section = "config"
@ -67,6 +73,12 @@ def setup_parser(subparser):
help="configuration scope to modify",
)
# Zip
zip_parser = sp.add_parser("zip", help=repo_zip.__doc__)
zip_parser.add_argument(
"namespace_or_path", help="namespace or path of a Spack package repository"
)
def repo_create(args):
"""create a new package repository"""
@ -109,31 +121,18 @@ def repo_add(args):
def repo_remove(args):
"""remove a repository from Spack's configuration"""
repos = spack.config.get("repos", scope=args.scope)
namespace_or_path = args.namespace_or_path
# If the argument is a path, remove that repository from config.
canon_path = spack.util.path.canonicalize_path(namespace_or_path)
for repo_path in repos:
repo_canon_path = spack.util.path.canonicalize_path(repo_path)
if canon_path == repo_canon_path:
repos.remove(repo_path)
spack.config.set("repos", repos, args.scope)
tty.msg("Removed repository %s" % repo_path)
return
key, repo = _get_repo(repos, args.namespace_or_path)
# If it is a namespace, remove corresponding repo
for path in repos:
try:
repo = spack.repo.from_path(path)
if repo.namespace == namespace_or_path:
repos.remove(path)
spack.config.set("repos", repos, args.scope)
tty.msg("Removed repository %s with namespace '%s'." % (repo.root, repo.namespace))
return
except spack.repo.RepoError:
continue
if not key:
tty.die(f"No repository with path or namespace: {args.namespace_or_path}")
tty.die("No repository with path or namespace: %s" % namespace_or_path)
repos.remove(key)
spack.config.set("repos", repos, args.scope)
if repo:
tty.msg(f"Removed repository {repo.root} with namespace '{repo.namespace}'")
else:
tty.msg(f"Removed repository {key}")
def repo_list(args):
@ -147,17 +146,77 @@ def repo_list(args):
continue
if sys.stdout.isatty():
msg = "%d package repositor" % len(repos)
msg += "y." if len(repos) == 1 else "ies."
tty.msg(msg)
tty.msg(f"{len(repos)} package repositor{'y.' if len(repos) == 1 else 'ies.'}")
if not repos:
return
max_ns_len = max(len(r.namespace) for r in repos)
for repo in repos:
fmt = "%%-%ds%%s" % (max_ns_len + 4)
print(fmt % (repo.namespace, repo.root))
print(f"{repo.namespace:<{max_ns_len}} {repo.root}")
def repo_zip(args):
"""zip a package repository to make it immutable and faster to load"""
key, _ = _get_repo(spack.config.get("repos"), args.namespace_or_path)
if not key:
tty.die(f"No repository with path or namespace: {args.namespace_or_path}")
try:
repo = spack.repo.from_path(key)
except spack.repo.RepoError:
tty.die(f"No repository at path: {key}")
def _zip_repo_skip(entry: os.DirEntry):
return entry.name == "__pycache__"
def _zip_repo_path_to_name(path: str) -> str:
# use spack/pkg/<repo>/* prefix and rename `package.py` as `__init__.py`
rel_path = pathlib.PurePath(path).relative_to(repo.packages_path)
if rel_path.name == "package.py":
rel_path = rel_path.with_name("__init__.py")
return str(rel_path)
# Create a zipfile in a temporary file
with tempfile.NamedTemporaryFile(delete=False, mode="wb", dir=repo.root) as f, zipfile.ZipFile(
f, "w", compression=zipfile.ZIP_DEFLATED
) as zip:
reproducible_zipfile_from_prefix(
zip, repo.packages_path, skip=_zip_repo_skip, path_to_name=_zip_repo_path_to_name
)
packages_zip = os.path.join(repo.root, "packages.zip")
try:
# Inform the user whether or not the repo was modified since it was last zipped
if os.path.exists(packages_zip) and filecmp.cmp(f.name, packages_zip):
tty.msg(f"{repo.namespace}: {packages_zip} is up to date")
return
else:
os.rename(f.name, packages_zip)
tty.msg(f"{repo.namespace} was zipped: {packages_zip}")
finally:
try:
os.unlink(f.name)
except OSError:
pass
def _get_repo(repos: List[str], path_or_name) -> Tuple[Optional[str], Optional[spack.repo.Repo]]:
"""Find repo by path or namespace"""
canon_path = spack.util.path.canonicalize_path(path_or_name)
for path in repos:
if canon_path == spack.util.path.canonicalize_path(path):
return path, None
for path in repos:
try:
repo = spack.repo.from_path(path)
except spack.repo.RepoError:
continue
if repo.namespace == path_or_name:
return path, repo
return None, None
def repo(parser, args):
@ -167,5 +226,6 @@ def repo(parser, args):
"add": repo_add,
"remove": repo_remove,
"rm": repo_remove,
"zip": repo_zip,
}
action[args.repo_command](args)

View File

@ -8,6 +8,7 @@
import os.path
import pathlib
import sys
import zipfile
from typing import Any, Dict, Optional, Tuple, Type, Union
import llnl.util.filesystem
@ -20,7 +21,7 @@
import spack.repo
import spack.stage
import spack.util.spack_json as sjson
from spack.util.crypto import Checker, checksum
from spack.util.crypto import Checker, checksum_stream
from spack.util.executable import which, which_string
@ -154,6 +155,9 @@ def __hash__(self) -> int:
return hash(self.sha256)
zipfilecache = {}
class FilePatch(Patch):
"""Describes a patch that is retrieved from a file in the repository."""
@ -193,9 +197,27 @@ def __init__(
# Cannot use pkg.package_dir because it's a property and we have
# classes, not instances.
pkg_dir = os.path.abspath(os.path.dirname(cls.module.__file__))
path = os.path.join(pkg_dir, self.relative_path)
if os.path.exists(path):
abs_path = path
path = pathlib.Path(os.path.join(pkg_dir, self.relative_path))
if "packages.zip" in path.parts:
# check if it exists in the zip file.
idx = path.parts.index("packages.zip")
zip_path, entry_path = pathlib.PurePath(*path.parts[: idx + 1]), pathlib.PurePath(
*path.parts[idx + 1 :]
)
lookup = zipfilecache.get(zip_path)
if lookup is None:
zip = zipfile.ZipFile(zip_path, "r")
namelist = set(zip.namelist())
zipfilecache[zip_path] = (zip, namelist)
else:
zip, namelist = lookup
if str(entry_path) in namelist:
abs_path = str(path)
break
elif path.exists():
abs_path = str(path)
break
if abs_path is None:
@ -215,7 +237,26 @@ def sha256(self) -> str:
The sha256 of the patch file.
"""
if self._sha256 is None and self.path is not None:
self._sha256 = checksum(hashlib.sha256, self.path)
path = pathlib.PurePath(self.path)
if "packages.zip" in path.parts:
# split in path to packages.zip and the path within the zip
idx = path.parts.index("packages.zip")
path_to_zip, path_in_zip = pathlib.PurePath(
*path.parts[: idx + 1]
), pathlib.PurePath(*path.parts[idx + 1 :])
lookup = zipfilecache.get(path_to_zip)
if lookup is None:
zip = zipfile.ZipFile(path_to_zip, "r")
namelist = set(zip.namelist())
zipfilecache[path_to_zip] = (zip, namelist)
else:
zip, namelist = lookup
f = zip.open(str(path_in_zip), "r")
else:
f = open(self.path, "rb")
self._sha256 = checksum_stream(hashlib.sha256, f)
f.close()
assert isinstance(self._sha256, str)
return self._sha256

View File

@ -26,6 +26,7 @@
import types
import uuid
import warnings
import zipimport
from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union
import llnl.path
@ -100,32 +101,6 @@ def get_data(self, path):
return self.prepend.encode() + b"\n" + data
class RepoLoader(_PrependFileLoader):
"""Loads a Python module associated with a package in specific repository"""
#: Code in ``_package_prepend`` is prepended to imported packages.
#:
#: Spack packages are expected to call `from spack.package import *`
#: themselves, but we are allowing a deprecation period before breaking
#: external repos that don't do this yet.
_package_prepend = "from spack.package import *"
def __init__(self, fullname, repo, package_name):
self.repo = repo
self.package_name = package_name
self.package_py = repo.filename_for_package_name(package_name)
self.fullname = fullname
super().__init__(self.fullname, self.package_py, prepend=self._package_prepend)
class SpackNamespaceLoader:
def create_module(self, spec):
return SpackNamespace(spec.name)
def exec_module(self, module):
module.__loader__ = self
class ReposFinder:
"""MetaPathFinder class that loads a Python module corresponding to a Spack package.
@ -165,10 +140,11 @@ def find_spec(self, fullname, python_path, target=None):
if not fullname.startswith(ROOT_PYTHON_NAMESPACE):
return None
loader = self.compute_loader(fullname)
if loader is None:
result = self.compute_loader(fullname)
if result is None:
return None
return importlib.util.spec_from_loader(fullname, loader)
loader, actual_fullname = result
return importlib.util.spec_from_loader(actual_fullname, loader)
def compute_loader(self, fullname):
# namespaces are added to repo, and package modules are leaves.
@ -187,16 +163,29 @@ def compute_loader(self, fullname):
# With 2 nested conditionals we can call "repo.real_name" only once
package_name = repo.real_name(module_name)
if package_name:
return RepoLoader(fullname, repo, package_name)
# annoyingly there is a many to one mapping for pkg module to file, have to
# figure out how to deal with this properly.
return (
(repo.zipimporter, f"{namespace}.{package_name}")
if repo.zipimporter
else (
_PrependFileLoader(
fullname=fullname,
path=repo.filename_for_package_name(package_name),
prepend="from spack.package import *",
),
fullname,
)
)
# We are importing a full namespace like 'spack.pkg.builtin'
if fullname == repo.full_namespace:
return SpackNamespaceLoader()
return SpackNamespaceLoader(), fullname
# No repo provides the namespace, but it is a valid prefix of
# something in the RepoPath.
if is_repo_path and self.current_repository.by_namespace.is_prefix(fullname):
return SpackNamespaceLoader()
return SpackNamespaceLoader(), fullname
return None
@ -207,6 +196,7 @@ def compute_loader(self, fullname):
repo_config_name = "repo.yaml" # Top-level filename for repo config.
repo_index_name = "index.yaml" # Top-level filename for repository index.
packages_dir_name = "packages" # Top-level repo directory containing pkgs.
packages_zip_name = "packages.zip" # Top-level filename for zipped packages.
package_file_name = "package.py" # Filename for packages in a repository.
#: Guaranteed unused default value for some functions.
@ -216,9 +206,9 @@ def compute_loader(self, fullname):
def packages_path():
"""Get the test repo if it is active, otherwise the builtin repo."""
try:
return spack.repo.PATH.get_repo("builtin.mock").packages_path
except spack.repo.UnknownNamespaceError:
return spack.repo.PATH.get_repo("builtin").packages_path
return PATH.get_repo("builtin.mock").packages_path
except UnknownNamespaceError:
return PATH.get_repo("builtin").packages_path
class GitExe:
@ -1009,9 +999,14 @@ def check(condition, msg):
self._names = self.full_namespace.split(".")
packages_dir = config.get("subdirectory", packages_dir_name)
packages_zip = os.path.join(self.root, "packages.zip")
self.zipimporter = (
zipimport.zipimporter(packages_zip) if os.path.exists(packages_zip) else None
)
self.packages_path = os.path.join(self.root, packages_dir)
check(
os.path.isdir(self.packages_path), f"No directory '{packages_dir}' found in '{root}'"
self.zipimporter or os.path.isdir(self.packages_path),
f"No '{self.packages_path}' or '{packages_zip} found in '{root}'",
)
# Class attribute overrides by package name
@ -1507,6 +1502,14 @@ def use_repositories(
PATH = saved
class SpackNamespaceLoader:
def create_module(self, spec):
return SpackNamespace(spec.name)
def exec_module(self, module):
module.__loader__ = self
class MockRepositoryBuilder:
"""Build a mock repository in a directory"""

View File

@ -916,7 +916,7 @@ def interactive_version_filter(
orig_url_dict = url_dict # only copy when using editor to modify
print_header = True
VERSION_COLOR = spack.spec.VERSION_COLOR
while True:
while sys.stdin.isatty():
if print_header:
has_filter = version_filter != VersionList([":"])
header = []
@ -933,7 +933,9 @@ def interactive_version_filter(
num_new = sum(1 for v in sorted_and_filtered if v not in known_versions)
header.append(f"{llnl.string.plural(num_new, 'new version')}")
if has_filter:
header.append(colorize(f"Filtered by {VERSION_COLOR}@@{version_filter}@."))
header.append(
colorize(f"Filtered by {VERSION_COLOR}@@{version_filter}@. (clear with c)")
)
version_with_url = [
colorize(

View File

@ -7,7 +7,9 @@
import io
import os
import pathlib
import shutil
import tarfile
import zipfile
from contextlib import closing, contextmanager
from gzip import GzipFile
from typing import Callable, Dict, Tuple
@ -228,3 +230,53 @@ def reproducible_tarfile_from_prefix(
tar.addfile(file_info, f)
dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical
def reproducible_zipfile_from_prefix(
zip: zipfile.ZipFile,
prefix: str,
*,
skip: Callable[[os.DirEntry], bool] = lambda entry: False,
path_to_name: Callable[[str], str] = default_path_to_name,
) -> None:
"""Similar to ``reproducible_tarfile_from_prefix`` but for zipfiles."""
dir_stack = [prefix]
while dir_stack:
dir = dir_stack.pop()
# Add the dir before its contents. zip.mkdir is Python 3.11.
dir_info = zipfile.ZipInfo(path_to_name(dir))
if not dir_info.filename.endswith("/"):
dir_info.filename += "/"
dir_info.external_attr = (0o40755 << 16) | 0x10
dir_info.file_size = 0
with zip.open(dir_info, "w") as dest:
dest.write(b"")
# Sort by name for reproducibility
with os.scandir(dir) as it:
entries = sorted(it, key=lambda entry: entry.name)
new_dirs = []
for entry in entries:
if skip(entry):
continue
if entry.is_dir(follow_symlinks=False):
new_dirs.append(entry.path)
continue
# symlink / hardlink support in ZIP is poor or non-existent: make copies.
elif entry.is_file(follow_symlinks=True):
file_info = zipfile.ZipInfo(path_to_name(entry.path))
# Normalize permissions like git
s = entry.stat(follow_symlinks=True)
file_info.external_attr = (0o755 if s.st_mode & 0o100 else 0o644) << 16
file_info.file_size = s.st_size
file_info.compress_type = zip.compression
with open(entry.path, "rb") as src, zip.open(file_info, "w") as dest:
shutil.copyfileobj(src, dest) # type: ignore[misc]
dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical

View File

@ -1748,7 +1748,7 @@ _spack_repo() {
then
SPACK_COMPREPLY="-h --help"
else
SPACK_COMPREPLY="create list add remove rm"
SPACK_COMPREPLY="create list add remove rm zip"
fi
}
@ -1792,6 +1792,15 @@ _spack_repo_rm() {
fi
}
_spack_repo_zip() {
if $list_options
then
SPACK_COMPREPLY="-h --help"
else
_repos
fi
}
_spack_resource() {
if $list_options
then

View File

@ -2675,6 +2675,7 @@ complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a list -d 'show
complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a add -d 'add a package source to Spack\'s configuration'
complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a remove -d 'remove a repository from Spack\'s configuration'
complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a rm -d 'remove a repository from Spack\'s configuration'
complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a zip -d 'zip a package repository to make it immutable and faster to load'
complete -c spack -n '__fish_spack_using_command repo' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command repo' -s h -l help -d 'show this help message and exit'
@ -2717,6 +2718,12 @@ complete -c spack -n '__fish_spack_using_command repo rm' -s h -l help -d 'show
complete -c spack -n '__fish_spack_using_command repo rm' -l scope -r -f -a '_builtin defaults system site user command_line'
complete -c spack -n '__fish_spack_using_command repo rm' -l scope -r -d 'configuration scope to modify'
# spack repo zip
set -g __fish_spack_optspecs_spack_repo_zip h/help
complete -c spack -n '__fish_spack_using_command_pos 0 repo zip' $__fish_spack_force_files -a '(__fish_spack_repos)'
complete -c spack -n '__fish_spack_using_command repo zip' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command repo zip' -s h -l help -d 'show this help message and exit'
# spack resource
set -g __fish_spack_optspecs_spack_resource h/help
complete -c spack -n '__fish_spack_using_command_pos 0 resource' -f -a list -d 'list all resources known to spack (currently just patches)'

View File

@ -6,8 +6,7 @@
import socket
from spack.package import *
from .blt import llnl_link_helpers
from spack.pkg.builtin.blt import llnl_link_helpers
class Chai(CachedCMakePackage, CudaPackage, ROCmPackage):

View File

@ -6,8 +6,7 @@
import socket
from spack.package import *
from .blt import llnl_link_helpers
from spack.pkg.builtin.blt import llnl_link_helpers
# Starting with 2022.03.0, the only submodule we want to fetch is tpl/desul

View File

@ -7,8 +7,7 @@
import socket
from spack.package import *
from .blt import llnl_link_helpers
from spack.pkg.builtin.blt import llnl_link_helpers
class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage):