zipfile repo: quick and dirty wip

This commit is contained in:
Harmen Stoppels 2024-08-21 11:23:25 +02:00
parent f9065f0c7e
commit ea823d2308
11 changed files with 249 additions and 77 deletions

1
.gitignore vendored
View File

@ -7,6 +7,7 @@
/var/spack/environments /var/spack/environments
/var/spack/repos/*/index.yaml /var/spack/repos/*/index.yaml
/var/spack/repos/*/lock /var/spack/repos/*/lock
/var/spack/repos/*/packages.zip
/opt /opt
# Ignore everything in /etc/spack except /etc/spack/defaults # Ignore everything in /etc/spack except /etc/spack/defaults
/etc/spack/* /etc/spack/*

View File

@ -3,8 +3,13 @@
# #
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import filecmp
import os import os
import pathlib
import sys import sys
import tempfile
import zipfile
from typing import List, Optional, Tuple
import llnl.util.tty as tty import llnl.util.tty as tty
@ -12,6 +17,7 @@
import spack.repo import spack.repo
import spack.util.path import spack.util.path
from spack.cmd.common import arguments from spack.cmd.common import arguments
from spack.util.archive import reproducible_zipfile_from_prefix
description = "manage package source repositories" description = "manage package source repositories"
section = "config" section = "config"
@ -67,6 +73,12 @@ def setup_parser(subparser):
help="configuration scope to modify", help="configuration scope to modify",
) )
# Zip
zip_parser = sp.add_parser("zip", help=repo_zip.__doc__)
zip_parser.add_argument(
"namespace_or_path", help="namespace or path of a Spack package repository"
)
def repo_create(args): def repo_create(args):
"""create a new package repository""" """create a new package repository"""
@ -109,31 +121,18 @@ def repo_add(args):
def repo_remove(args): def repo_remove(args):
"""remove a repository from Spack's configuration""" """remove a repository from Spack's configuration"""
repos = spack.config.get("repos", scope=args.scope) repos = spack.config.get("repos", scope=args.scope)
namespace_or_path = args.namespace_or_path
# If the argument is a path, remove that repository from config. key, repo = _get_repo(repos, args.namespace_or_path)
canon_path = spack.util.path.canonicalize_path(namespace_or_path)
for repo_path in repos:
repo_canon_path = spack.util.path.canonicalize_path(repo_path)
if canon_path == repo_canon_path:
repos.remove(repo_path)
spack.config.set("repos", repos, args.scope)
tty.msg("Removed repository %s" % repo_path)
return
# If it is a namespace, remove corresponding repo if not key:
for path in repos: tty.die(f"No repository with path or namespace: {args.namespace_or_path}")
try:
repo = spack.repo.from_path(path)
if repo.namespace == namespace_or_path:
repos.remove(path)
spack.config.set("repos", repos, args.scope)
tty.msg("Removed repository %s with namespace '%s'." % (repo.root, repo.namespace))
return
except spack.repo.RepoError:
continue
tty.die("No repository with path or namespace: %s" % namespace_or_path) repos.remove(key)
spack.config.set("repos", repos, args.scope)
if repo:
tty.msg(f"Removed repository {repo.root} with namespace '{repo.namespace}'")
else:
tty.msg(f"Removed repository {key}")
def repo_list(args): def repo_list(args):
@ -147,17 +146,77 @@ def repo_list(args):
continue continue
if sys.stdout.isatty(): if sys.stdout.isatty():
msg = "%d package repositor" % len(repos) tty.msg(f"{len(repos)} package repositor{'y.' if len(repos) == 1 else 'ies.'}")
msg += "y." if len(repos) == 1 else "ies."
tty.msg(msg)
if not repos: if not repos:
return return
max_ns_len = max(len(r.namespace) for r in repos) max_ns_len = max(len(r.namespace) for r in repos)
for repo in repos: for repo in repos:
fmt = "%%-%ds%%s" % (max_ns_len + 4) print(f"{repo.namespace:<{max_ns_len}} {repo.root}")
print(fmt % (repo.namespace, repo.root))
def repo_zip(args):
"""zip a package repository to make it immutable and faster to load"""
key, _ = _get_repo(spack.config.get("repos"), args.namespace_or_path)
if not key:
tty.die(f"No repository with path or namespace: {args.namespace_or_path}")
try:
repo = spack.repo.from_path(key)
except spack.repo.RepoError:
tty.die(f"No repository at path: {key}")
def _zip_repo_skip(entry: os.DirEntry):
return entry.name == "__pycache__"
def _zip_repo_path_to_name(path: str) -> str:
# use spack/pkg/<repo>/* prefix and rename `package.py` as `__init__.py`
rel_path = pathlib.PurePath(path).relative_to(repo.packages_path)
if rel_path.name == "package.py":
rel_path = rel_path.with_name("__init__.py")
return str(rel_path)
# Create a zipfile in a temporary file
with tempfile.NamedTemporaryFile(delete=False, mode="wb", dir=repo.root) as f, zipfile.ZipFile(
f, "w", compression=zipfile.ZIP_DEFLATED
) as zip:
reproducible_zipfile_from_prefix(
zip, repo.packages_path, skip=_zip_repo_skip, path_to_name=_zip_repo_path_to_name
)
packages_zip = os.path.join(repo.root, "packages.zip")
try:
# Inform the user whether or not the repo was modified since it was last zipped
if os.path.exists(packages_zip) and filecmp.cmp(f.name, packages_zip):
tty.msg(f"{repo.namespace}: {packages_zip} is up to date")
return
else:
os.rename(f.name, packages_zip)
tty.msg(f"{repo.namespace} was zipped: {packages_zip}")
finally:
try:
os.unlink(f.name)
except OSError:
pass
def _get_repo(repos: List[str], path_or_name) -> Tuple[Optional[str], Optional[spack.repo.Repo]]:
"""Find repo by path or namespace"""
canon_path = spack.util.path.canonicalize_path(path_or_name)
for path in repos:
if canon_path == spack.util.path.canonicalize_path(path):
return path, None
for path in repos:
try:
repo = spack.repo.from_path(path)
except spack.repo.RepoError:
continue
if repo.namespace == path_or_name:
return path, repo
return None, None
def repo(parser, args): def repo(parser, args):
@ -167,5 +226,6 @@ def repo(parser, args):
"add": repo_add, "add": repo_add,
"remove": repo_remove, "remove": repo_remove,
"rm": repo_remove, "rm": repo_remove,
"zip": repo_zip,
} }
action[args.repo_command](args) action[args.repo_command](args)

View File

@ -8,6 +8,7 @@
import os.path import os.path
import pathlib import pathlib
import sys import sys
import zipfile
from typing import Any, Dict, Optional, Tuple, Type, Union from typing import Any, Dict, Optional, Tuple, Type, Union
import llnl.util.filesystem import llnl.util.filesystem
@ -20,7 +21,7 @@
import spack.repo import spack.repo
import spack.stage import spack.stage
import spack.util.spack_json as sjson import spack.util.spack_json as sjson
from spack.util.crypto import Checker, checksum from spack.util.crypto import Checker, checksum_stream
from spack.util.executable import which, which_string from spack.util.executable import which, which_string
@ -154,6 +155,9 @@ def __hash__(self) -> int:
return hash(self.sha256) return hash(self.sha256)
zipfilecache = {}
class FilePatch(Patch): class FilePatch(Patch):
"""Describes a patch that is retrieved from a file in the repository.""" """Describes a patch that is retrieved from a file in the repository."""
@ -193,9 +197,27 @@ def __init__(
# Cannot use pkg.package_dir because it's a property and we have # Cannot use pkg.package_dir because it's a property and we have
# classes, not instances. # classes, not instances.
pkg_dir = os.path.abspath(os.path.dirname(cls.module.__file__)) pkg_dir = os.path.abspath(os.path.dirname(cls.module.__file__))
path = os.path.join(pkg_dir, self.relative_path) path = pathlib.Path(os.path.join(pkg_dir, self.relative_path))
if os.path.exists(path):
abs_path = path if "packages.zip" in path.parts:
# check if it exists in the zip file.
idx = path.parts.index("packages.zip")
zip_path, entry_path = pathlib.PurePath(*path.parts[: idx + 1]), pathlib.PurePath(
*path.parts[idx + 1 :]
)
lookup = zipfilecache.get(zip_path)
if lookup is None:
zip = zipfile.ZipFile(zip_path, "r")
namelist = set(zip.namelist())
zipfilecache[zip_path] = (zip, namelist)
else:
zip, namelist = lookup
if str(entry_path) in namelist:
abs_path = str(path)
break
elif path.exists():
abs_path = str(path)
break break
if abs_path is None: if abs_path is None:
@ -215,7 +237,26 @@ def sha256(self) -> str:
The sha256 of the patch file. The sha256 of the patch file.
""" """
if self._sha256 is None and self.path is not None: if self._sha256 is None and self.path is not None:
self._sha256 = checksum(hashlib.sha256, self.path) path = pathlib.PurePath(self.path)
if "packages.zip" in path.parts:
# split in path to packages.zip and the path within the zip
idx = path.parts.index("packages.zip")
path_to_zip, path_in_zip = pathlib.PurePath(
*path.parts[: idx + 1]
), pathlib.PurePath(*path.parts[idx + 1 :])
lookup = zipfilecache.get(path_to_zip)
if lookup is None:
zip = zipfile.ZipFile(path_to_zip, "r")
namelist = set(zip.namelist())
zipfilecache[path_to_zip] = (zip, namelist)
else:
zip, namelist = lookup
f = zip.open(str(path_in_zip), "r")
else:
f = open(self.path, "rb")
self._sha256 = checksum_stream(hashlib.sha256, f)
f.close()
assert isinstance(self._sha256, str) assert isinstance(self._sha256, str)
return self._sha256 return self._sha256

View File

@ -26,6 +26,7 @@
import types import types
import uuid import uuid
import warnings import warnings
import zipimport
from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union
import llnl.path import llnl.path
@ -100,32 +101,6 @@ def get_data(self, path):
return self.prepend.encode() + b"\n" + data return self.prepend.encode() + b"\n" + data
class RepoLoader(_PrependFileLoader):
"""Loads a Python module associated with a package in specific repository"""
#: Code in ``_package_prepend`` is prepended to imported packages.
#:
#: Spack packages are expected to call `from spack.package import *`
#: themselves, but we are allowing a deprecation period before breaking
#: external repos that don't do this yet.
_package_prepend = "from spack.package import *"
def __init__(self, fullname, repo, package_name):
self.repo = repo
self.package_name = package_name
self.package_py = repo.filename_for_package_name(package_name)
self.fullname = fullname
super().__init__(self.fullname, self.package_py, prepend=self._package_prepend)
class SpackNamespaceLoader:
def create_module(self, spec):
return SpackNamespace(spec.name)
def exec_module(self, module):
module.__loader__ = self
class ReposFinder: class ReposFinder:
"""MetaPathFinder class that loads a Python module corresponding to a Spack package. """MetaPathFinder class that loads a Python module corresponding to a Spack package.
@ -165,10 +140,11 @@ def find_spec(self, fullname, python_path, target=None):
if not fullname.startswith(ROOT_PYTHON_NAMESPACE): if not fullname.startswith(ROOT_PYTHON_NAMESPACE):
return None return None
loader = self.compute_loader(fullname) result = self.compute_loader(fullname)
if loader is None: if result is None:
return None return None
return importlib.util.spec_from_loader(fullname, loader) loader, actual_fullname = result
return importlib.util.spec_from_loader(actual_fullname, loader)
def compute_loader(self, fullname): def compute_loader(self, fullname):
# namespaces are added to repo, and package modules are leaves. # namespaces are added to repo, and package modules are leaves.
@ -187,16 +163,29 @@ def compute_loader(self, fullname):
# With 2 nested conditionals we can call "repo.real_name" only once # With 2 nested conditionals we can call "repo.real_name" only once
package_name = repo.real_name(module_name) package_name = repo.real_name(module_name)
if package_name: if package_name:
return RepoLoader(fullname, repo, package_name) # annoyingly there is a many to one mapping for pkg module to file, have to
# figure out how to deal with this properly.
return (
(repo.zipimporter, f"{namespace}.{package_name}")
if repo.zipimporter
else (
_PrependFileLoader(
fullname=fullname,
path=repo.filename_for_package_name(package_name),
prepend="from spack.package import *",
),
fullname,
)
)
# We are importing a full namespace like 'spack.pkg.builtin' # We are importing a full namespace like 'spack.pkg.builtin'
if fullname == repo.full_namespace: if fullname == repo.full_namespace:
return SpackNamespaceLoader() return SpackNamespaceLoader(), fullname
# No repo provides the namespace, but it is a valid prefix of # No repo provides the namespace, but it is a valid prefix of
# something in the RepoPath. # something in the RepoPath.
if is_repo_path and self.current_repository.by_namespace.is_prefix(fullname): if is_repo_path and self.current_repository.by_namespace.is_prefix(fullname):
return SpackNamespaceLoader() return SpackNamespaceLoader(), fullname
return None return None
@ -207,6 +196,7 @@ def compute_loader(self, fullname):
repo_config_name = "repo.yaml" # Top-level filename for repo config. repo_config_name = "repo.yaml" # Top-level filename for repo config.
repo_index_name = "index.yaml" # Top-level filename for repository index. repo_index_name = "index.yaml" # Top-level filename for repository index.
packages_dir_name = "packages" # Top-level repo directory containing pkgs. packages_dir_name = "packages" # Top-level repo directory containing pkgs.
packages_zip_name = "packages.zip" # Top-level filename for zipped packages.
package_file_name = "package.py" # Filename for packages in a repository. package_file_name = "package.py" # Filename for packages in a repository.
#: Guaranteed unused default value for some functions. #: Guaranteed unused default value for some functions.
@ -216,9 +206,9 @@ def compute_loader(self, fullname):
def packages_path(): def packages_path():
"""Get the test repo if it is active, otherwise the builtin repo.""" """Get the test repo if it is active, otherwise the builtin repo."""
try: try:
return spack.repo.PATH.get_repo("builtin.mock").packages_path return PATH.get_repo("builtin.mock").packages_path
except spack.repo.UnknownNamespaceError: except UnknownNamespaceError:
return spack.repo.PATH.get_repo("builtin").packages_path return PATH.get_repo("builtin").packages_path
class GitExe: class GitExe:
@ -1009,9 +999,14 @@ def check(condition, msg):
self._names = self.full_namespace.split(".") self._names = self.full_namespace.split(".")
packages_dir = config.get("subdirectory", packages_dir_name) packages_dir = config.get("subdirectory", packages_dir_name)
packages_zip = os.path.join(self.root, "packages.zip")
self.zipimporter = (
zipimport.zipimporter(packages_zip) if os.path.exists(packages_zip) else None
)
self.packages_path = os.path.join(self.root, packages_dir) self.packages_path = os.path.join(self.root, packages_dir)
check( check(
os.path.isdir(self.packages_path), f"No directory '{packages_dir}' found in '{root}'" self.zipimporter or os.path.isdir(self.packages_path),
f"No '{self.packages_path}' or '{packages_zip} found in '{root}'",
) )
# Class attribute overrides by package name # Class attribute overrides by package name
@ -1507,6 +1502,14 @@ def use_repositories(
PATH = saved PATH = saved
class SpackNamespaceLoader:
def create_module(self, spec):
return SpackNamespace(spec.name)
def exec_module(self, module):
module.__loader__ = self
class MockRepositoryBuilder: class MockRepositoryBuilder:
"""Build a mock repository in a directory""" """Build a mock repository in a directory"""

View File

@ -916,7 +916,7 @@ def interactive_version_filter(
orig_url_dict = url_dict # only copy when using editor to modify orig_url_dict = url_dict # only copy when using editor to modify
print_header = True print_header = True
VERSION_COLOR = spack.spec.VERSION_COLOR VERSION_COLOR = spack.spec.VERSION_COLOR
while True: while sys.stdin.isatty():
if print_header: if print_header:
has_filter = version_filter != VersionList([":"]) has_filter = version_filter != VersionList([":"])
header = [] header = []
@ -933,7 +933,9 @@ def interactive_version_filter(
num_new = sum(1 for v in sorted_and_filtered if v not in known_versions) num_new = sum(1 for v in sorted_and_filtered if v not in known_versions)
header.append(f"{llnl.string.plural(num_new, 'new version')}") header.append(f"{llnl.string.plural(num_new, 'new version')}")
if has_filter: if has_filter:
header.append(colorize(f"Filtered by {VERSION_COLOR}@@{version_filter}@.")) header.append(
colorize(f"Filtered by {VERSION_COLOR}@@{version_filter}@. (clear with c)")
)
version_with_url = [ version_with_url = [
colorize( colorize(

View File

@ -7,7 +7,9 @@
import io import io
import os import os
import pathlib import pathlib
import shutil
import tarfile import tarfile
import zipfile
from contextlib import closing, contextmanager from contextlib import closing, contextmanager
from gzip import GzipFile from gzip import GzipFile
from typing import Callable, Dict, Tuple from typing import Callable, Dict, Tuple
@ -228,3 +230,53 @@ def reproducible_tarfile_from_prefix(
tar.addfile(file_info, f) tar.addfile(file_info, f)
dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical
def reproducible_zipfile_from_prefix(
zip: zipfile.ZipFile,
prefix: str,
*,
skip: Callable[[os.DirEntry], bool] = lambda entry: False,
path_to_name: Callable[[str], str] = default_path_to_name,
) -> None:
"""Similar to ``reproducible_tarfile_from_prefix`` but for zipfiles."""
dir_stack = [prefix]
while dir_stack:
dir = dir_stack.pop()
# Add the dir before its contents. zip.mkdir is Python 3.11.
dir_info = zipfile.ZipInfo(path_to_name(dir))
if not dir_info.filename.endswith("/"):
dir_info.filename += "/"
dir_info.external_attr = (0o40755 << 16) | 0x10
dir_info.file_size = 0
with zip.open(dir_info, "w") as dest:
dest.write(b"")
# Sort by name for reproducibility
with os.scandir(dir) as it:
entries = sorted(it, key=lambda entry: entry.name)
new_dirs = []
for entry in entries:
if skip(entry):
continue
if entry.is_dir(follow_symlinks=False):
new_dirs.append(entry.path)
continue
# symlink / hardlink support in ZIP is poor or non-existent: make copies.
elif entry.is_file(follow_symlinks=True):
file_info = zipfile.ZipInfo(path_to_name(entry.path))
# Normalize permissions like git
s = entry.stat(follow_symlinks=True)
file_info.external_attr = (0o755 if s.st_mode & 0o100 else 0o644) << 16
file_info.file_size = s.st_size
file_info.compress_type = zip.compression
with open(entry.path, "rb") as src, zip.open(file_info, "w") as dest:
shutil.copyfileobj(src, dest) # type: ignore[misc]
dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical

View File

@ -1748,7 +1748,7 @@ _spack_repo() {
then then
SPACK_COMPREPLY="-h --help" SPACK_COMPREPLY="-h --help"
else else
SPACK_COMPREPLY="create list add remove rm" SPACK_COMPREPLY="create list add remove rm zip"
fi fi
} }
@ -1792,6 +1792,15 @@ _spack_repo_rm() {
fi fi
} }
_spack_repo_zip() {
if $list_options
then
SPACK_COMPREPLY="-h --help"
else
_repos
fi
}
_spack_resource() { _spack_resource() {
if $list_options if $list_options
then then

View File

@ -2675,6 +2675,7 @@ complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a list -d 'show
complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a add -d 'add a package source to Spack\'s configuration' complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a add -d 'add a package source to Spack\'s configuration'
complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a remove -d 'remove a repository from Spack\'s configuration' complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a remove -d 'remove a repository from Spack\'s configuration'
complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a rm -d 'remove a repository from Spack\'s configuration' complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a rm -d 'remove a repository from Spack\'s configuration'
complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a zip -d 'zip a package repository to make it immutable and faster to load'
complete -c spack -n '__fish_spack_using_command repo' -s h -l help -f -a help complete -c spack -n '__fish_spack_using_command repo' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command repo' -s h -l help -d 'show this help message and exit' complete -c spack -n '__fish_spack_using_command repo' -s h -l help -d 'show this help message and exit'
@ -2717,6 +2718,12 @@ complete -c spack -n '__fish_spack_using_command repo rm' -s h -l help -d 'show
complete -c spack -n '__fish_spack_using_command repo rm' -l scope -r -f -a '_builtin defaults system site user command_line' complete -c spack -n '__fish_spack_using_command repo rm' -l scope -r -f -a '_builtin defaults system site user command_line'
complete -c spack -n '__fish_spack_using_command repo rm' -l scope -r -d 'configuration scope to modify' complete -c spack -n '__fish_spack_using_command repo rm' -l scope -r -d 'configuration scope to modify'
# spack repo zip
set -g __fish_spack_optspecs_spack_repo_zip h/help
complete -c spack -n '__fish_spack_using_command_pos 0 repo zip' $__fish_spack_force_files -a '(__fish_spack_repos)'
complete -c spack -n '__fish_spack_using_command repo zip' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command repo zip' -s h -l help -d 'show this help message and exit'
# spack resource # spack resource
set -g __fish_spack_optspecs_spack_resource h/help set -g __fish_spack_optspecs_spack_resource h/help
complete -c spack -n '__fish_spack_using_command_pos 0 resource' -f -a list -d 'list all resources known to spack (currently just patches)' complete -c spack -n '__fish_spack_using_command_pos 0 resource' -f -a list -d 'list all resources known to spack (currently just patches)'

View File

@ -6,8 +6,7 @@
import socket import socket
from spack.package import * from spack.package import *
from spack.pkg.builtin.blt import llnl_link_helpers
from .blt import llnl_link_helpers
class Chai(CachedCMakePackage, CudaPackage, ROCmPackage): class Chai(CachedCMakePackage, CudaPackage, ROCmPackage):

View File

@ -6,8 +6,7 @@
import socket import socket
from spack.package import * from spack.package import *
from spack.pkg.builtin.blt import llnl_link_helpers
from .blt import llnl_link_helpers
# Starting with 2022.03.0, the only submodule we want to fetch is tpl/desul # Starting with 2022.03.0, the only submodule we want to fetch is tpl/desul

View File

@ -7,8 +7,7 @@
import socket import socket
from spack.package import * from spack.package import *
from spack.pkg.builtin.blt import llnl_link_helpers
from .blt import llnl_link_helpers
class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage): class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage):