diff --git a/.gitignore b/.gitignore index 68f83ea38da..cf214895ae4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ /var/spack/environments /var/spack/repos/*/index.yaml /var/spack/repos/*/lock +/var/spack/repos/*/packages.zip /opt # Ignore everything in /etc/spack except /etc/spack/defaults /etc/spack/* diff --git a/lib/spack/spack/cmd/repo.py b/lib/spack/spack/cmd/repo.py index e41e21f0a5b..c687710d926 100644 --- a/lib/spack/spack/cmd/repo.py +++ b/lib/spack/spack/cmd/repo.py @@ -3,8 +3,13 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import filecmp import os +import pathlib import sys +import tempfile +import zipfile +from typing import List, Optional, Tuple import llnl.util.tty as tty @@ -12,6 +17,7 @@ import spack.repo import spack.util.path from spack.cmd.common import arguments +from spack.util.archive import reproducible_zipfile_from_prefix description = "manage package source repositories" section = "config" @@ -67,6 +73,12 @@ def setup_parser(subparser): help="configuration scope to modify", ) + # Zip + zip_parser = sp.add_parser("zip", help=repo_zip.__doc__) + zip_parser.add_argument( + "namespace_or_path", help="namespace or path of a Spack package repository" + ) + def repo_create(args): """create a new package repository""" @@ -109,31 +121,18 @@ def repo_add(args): def repo_remove(args): """remove a repository from Spack's configuration""" repos = spack.config.get("repos", scope=args.scope) - namespace_or_path = args.namespace_or_path - # If the argument is a path, remove that repository from config. - canon_path = spack.util.path.canonicalize_path(namespace_or_path) - for repo_path in repos: - repo_canon_path = spack.util.path.canonicalize_path(repo_path) - if canon_path == repo_canon_path: - repos.remove(repo_path) - spack.config.set("repos", repos, args.scope) - tty.msg("Removed repository %s" % repo_path) - return + key, repo = _get_repo(repos, args.namespace_or_path) - # If it is a namespace, remove corresponding repo - for path in repos: - try: - repo = spack.repo.from_path(path) - if repo.namespace == namespace_or_path: - repos.remove(path) - spack.config.set("repos", repos, args.scope) - tty.msg("Removed repository %s with namespace '%s'." % (repo.root, repo.namespace)) - return - except spack.repo.RepoError: - continue + if not key: + tty.die(f"No repository with path or namespace: {args.namespace_or_path}") - tty.die("No repository with path or namespace: %s" % namespace_or_path) + repos.remove(key) + spack.config.set("repos", repos, args.scope) + if repo: + tty.msg(f"Removed repository {repo.root} with namespace '{repo.namespace}'") + else: + tty.msg(f"Removed repository {key}") def repo_list(args): @@ -147,17 +146,77 @@ def repo_list(args): continue if sys.stdout.isatty(): - msg = "%d package repositor" % len(repos) - msg += "y." if len(repos) == 1 else "ies." - tty.msg(msg) + tty.msg(f"{len(repos)} package repositor{'y.' if len(repos) == 1 else 'ies.'}") if not repos: return max_ns_len = max(len(r.namespace) for r in repos) for repo in repos: - fmt = "%%-%ds%%s" % (max_ns_len + 4) - print(fmt % (repo.namespace, repo.root)) + print(f"{repo.namespace:<{max_ns_len}} {repo.root}") + + +def repo_zip(args): + """zip a package repository to make it immutable and faster to load""" + key, _ = _get_repo(spack.config.get("repos"), args.namespace_or_path) + + if not key: + tty.die(f"No repository with path or namespace: {args.namespace_or_path}") + + try: + repo = spack.repo.from_path(key) + except spack.repo.RepoError: + tty.die(f"No repository at path: {key}") + + def _zip_repo_skip(entry: os.DirEntry): + return entry.name == "__pycache__" + + def _zip_repo_path_to_name(path: str) -> str: + # use spack/pkg//* prefix and rename `package.py` as `__init__.py` + rel_path = pathlib.PurePath(path).relative_to(repo.packages_path) + if rel_path.name == "package.py": + rel_path = rel_path.with_name("__init__.py") + return str(rel_path) + + # Create a zipfile in a temporary file + with tempfile.NamedTemporaryFile(delete=False, mode="wb", dir=repo.root) as f, zipfile.ZipFile( + f, "w", compression=zipfile.ZIP_DEFLATED + ) as zip: + reproducible_zipfile_from_prefix( + zip, repo.packages_path, skip=_zip_repo_skip, path_to_name=_zip_repo_path_to_name + ) + + packages_zip = os.path.join(repo.root, "packages.zip") + try: + # Inform the user whether or not the repo was modified since it was last zipped + if os.path.exists(packages_zip) and filecmp.cmp(f.name, packages_zip): + tty.msg(f"{repo.namespace}: {packages_zip} is up to date") + return + else: + os.rename(f.name, packages_zip) + tty.msg(f"{repo.namespace} was zipped: {packages_zip}") + finally: + try: + os.unlink(f.name) + except OSError: + pass + + +def _get_repo(repos: List[str], path_or_name) -> Tuple[Optional[str], Optional[spack.repo.Repo]]: + """Find repo by path or namespace""" + canon_path = spack.util.path.canonicalize_path(path_or_name) + for path in repos: + if canon_path == spack.util.path.canonicalize_path(path): + return path, None + + for path in repos: + try: + repo = spack.repo.from_path(path) + except spack.repo.RepoError: + continue + if repo.namespace == path_or_name: + return path, repo + return None, None def repo(parser, args): @@ -167,5 +226,6 @@ def repo(parser, args): "add": repo_add, "remove": repo_remove, "rm": repo_remove, + "zip": repo_zip, } action[args.repo_command](args) diff --git a/lib/spack/spack/patch.py b/lib/spack/spack/patch.py index 6a57f49bb07..7e2f3dcdc04 100644 --- a/lib/spack/spack/patch.py +++ b/lib/spack/spack/patch.py @@ -8,6 +8,7 @@ import os.path import pathlib import sys +import zipfile from typing import Any, Dict, Optional, Tuple, Type, Union import llnl.util.filesystem @@ -20,7 +21,7 @@ import spack.repo import spack.stage import spack.util.spack_json as sjson -from spack.util.crypto import Checker, checksum +from spack.util.crypto import Checker, checksum_stream from spack.util.executable import which, which_string @@ -154,6 +155,9 @@ def __hash__(self) -> int: return hash(self.sha256) +zipfilecache = {} + + class FilePatch(Patch): """Describes a patch that is retrieved from a file in the repository.""" @@ -193,9 +197,27 @@ def __init__( # Cannot use pkg.package_dir because it's a property and we have # classes, not instances. pkg_dir = os.path.abspath(os.path.dirname(cls.module.__file__)) - path = os.path.join(pkg_dir, self.relative_path) - if os.path.exists(path): - abs_path = path + path = pathlib.Path(os.path.join(pkg_dir, self.relative_path)) + + if "packages.zip" in path.parts: + # check if it exists in the zip file. + idx = path.parts.index("packages.zip") + zip_path, entry_path = pathlib.PurePath(*path.parts[: idx + 1]), pathlib.PurePath( + *path.parts[idx + 1 :] + ) + + lookup = zipfilecache.get(zip_path) + if lookup is None: + zip = zipfile.ZipFile(zip_path, "r") + namelist = set(zip.namelist()) + zipfilecache[zip_path] = (zip, namelist) + else: + zip, namelist = lookup + if str(entry_path) in namelist: + abs_path = str(path) + break + elif path.exists(): + abs_path = str(path) break if abs_path is None: @@ -215,7 +237,26 @@ def sha256(self) -> str: The sha256 of the patch file. """ if self._sha256 is None and self.path is not None: - self._sha256 = checksum(hashlib.sha256, self.path) + path = pathlib.PurePath(self.path) + if "packages.zip" in path.parts: + # split in path to packages.zip and the path within the zip + idx = path.parts.index("packages.zip") + path_to_zip, path_in_zip = pathlib.PurePath( + *path.parts[: idx + 1] + ), pathlib.PurePath(*path.parts[idx + 1 :]) + lookup = zipfilecache.get(path_to_zip) + if lookup is None: + zip = zipfile.ZipFile(path_to_zip, "r") + namelist = set(zip.namelist()) + zipfilecache[path_to_zip] = (zip, namelist) + else: + zip, namelist = lookup + f = zip.open(str(path_in_zip), "r") + else: + f = open(self.path, "rb") + self._sha256 = checksum_stream(hashlib.sha256, f) + f.close() + assert isinstance(self._sha256, str) return self._sha256 diff --git a/lib/spack/spack/repo.py b/lib/spack/spack/repo.py index 1e95b4ec648..042e8fdb126 100644 --- a/lib/spack/spack/repo.py +++ b/lib/spack/spack/repo.py @@ -26,6 +26,7 @@ import types import uuid import warnings +import zipimport from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Type, Union import llnl.path @@ -100,32 +101,6 @@ def get_data(self, path): return self.prepend.encode() + b"\n" + data -class RepoLoader(_PrependFileLoader): - """Loads a Python module associated with a package in specific repository""" - - #: Code in ``_package_prepend`` is prepended to imported packages. - #: - #: Spack packages are expected to call `from spack.package import *` - #: themselves, but we are allowing a deprecation period before breaking - #: external repos that don't do this yet. - _package_prepend = "from spack.package import *" - - def __init__(self, fullname, repo, package_name): - self.repo = repo - self.package_name = package_name - self.package_py = repo.filename_for_package_name(package_name) - self.fullname = fullname - super().__init__(self.fullname, self.package_py, prepend=self._package_prepend) - - -class SpackNamespaceLoader: - def create_module(self, spec): - return SpackNamespace(spec.name) - - def exec_module(self, module): - module.__loader__ = self - - class ReposFinder: """MetaPathFinder class that loads a Python module corresponding to a Spack package. @@ -165,10 +140,11 @@ def find_spec(self, fullname, python_path, target=None): if not fullname.startswith(ROOT_PYTHON_NAMESPACE): return None - loader = self.compute_loader(fullname) - if loader is None: + result = self.compute_loader(fullname) + if result is None: return None - return importlib.util.spec_from_loader(fullname, loader) + loader, actual_fullname = result + return importlib.util.spec_from_loader(actual_fullname, loader) def compute_loader(self, fullname): # namespaces are added to repo, and package modules are leaves. @@ -187,16 +163,29 @@ def compute_loader(self, fullname): # With 2 nested conditionals we can call "repo.real_name" only once package_name = repo.real_name(module_name) if package_name: - return RepoLoader(fullname, repo, package_name) + # annoyingly there is a many to one mapping for pkg module to file, have to + # figure out how to deal with this properly. + return ( + (repo.zipimporter, f"{namespace}.{package_name}") + if repo.zipimporter + else ( + _PrependFileLoader( + fullname=fullname, + path=repo.filename_for_package_name(package_name), + prepend="from spack.package import *", + ), + fullname, + ) + ) # We are importing a full namespace like 'spack.pkg.builtin' if fullname == repo.full_namespace: - return SpackNamespaceLoader() + return SpackNamespaceLoader(), fullname # No repo provides the namespace, but it is a valid prefix of # something in the RepoPath. if is_repo_path and self.current_repository.by_namespace.is_prefix(fullname): - return SpackNamespaceLoader() + return SpackNamespaceLoader(), fullname return None @@ -207,6 +196,7 @@ def compute_loader(self, fullname): repo_config_name = "repo.yaml" # Top-level filename for repo config. repo_index_name = "index.yaml" # Top-level filename for repository index. packages_dir_name = "packages" # Top-level repo directory containing pkgs. +packages_zip_name = "packages.zip" # Top-level filename for zipped packages. package_file_name = "package.py" # Filename for packages in a repository. #: Guaranteed unused default value for some functions. @@ -216,9 +206,9 @@ def compute_loader(self, fullname): def packages_path(): """Get the test repo if it is active, otherwise the builtin repo.""" try: - return spack.repo.PATH.get_repo("builtin.mock").packages_path - except spack.repo.UnknownNamespaceError: - return spack.repo.PATH.get_repo("builtin").packages_path + return PATH.get_repo("builtin.mock").packages_path + except UnknownNamespaceError: + return PATH.get_repo("builtin").packages_path class GitExe: @@ -1009,9 +999,14 @@ def check(condition, msg): self._names = self.full_namespace.split(".") packages_dir = config.get("subdirectory", packages_dir_name) + packages_zip = os.path.join(self.root, "packages.zip") + self.zipimporter = ( + zipimport.zipimporter(packages_zip) if os.path.exists(packages_zip) else None + ) self.packages_path = os.path.join(self.root, packages_dir) check( - os.path.isdir(self.packages_path), f"No directory '{packages_dir}' found in '{root}'" + self.zipimporter or os.path.isdir(self.packages_path), + f"No '{self.packages_path}' or '{packages_zip} found in '{root}'", ) # Class attribute overrides by package name @@ -1507,6 +1502,14 @@ def use_repositories( PATH = saved +class SpackNamespaceLoader: + def create_module(self, spec): + return SpackNamespace(spec.name) + + def exec_module(self, module): + module.__loader__ = self + + class MockRepositoryBuilder: """Build a mock repository in a directory""" diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index 752ac42e4ec..83ce656668a 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -916,7 +916,7 @@ def interactive_version_filter( orig_url_dict = url_dict # only copy when using editor to modify print_header = True VERSION_COLOR = spack.spec.VERSION_COLOR - while True: + while sys.stdin.isatty(): if print_header: has_filter = version_filter != VersionList([":"]) header = [] @@ -933,7 +933,9 @@ def interactive_version_filter( num_new = sum(1 for v in sorted_and_filtered if v not in known_versions) header.append(f"{llnl.string.plural(num_new, 'new version')}") if has_filter: - header.append(colorize(f"Filtered by {VERSION_COLOR}@@{version_filter}@.")) + header.append( + colorize(f"Filtered by {VERSION_COLOR}@@{version_filter}@. (clear with c)") + ) version_with_url = [ colorize( diff --git a/lib/spack/spack/util/archive.py b/lib/spack/spack/util/archive.py index 950405887a4..f1360757513 100644 --- a/lib/spack/spack/util/archive.py +++ b/lib/spack/spack/util/archive.py @@ -7,7 +7,9 @@ import io import os import pathlib +import shutil import tarfile +import zipfile from contextlib import closing, contextmanager from gzip import GzipFile from typing import Callable, Dict, Tuple @@ -228,3 +230,53 @@ def reproducible_tarfile_from_prefix( tar.addfile(file_info, f) dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical + + +def reproducible_zipfile_from_prefix( + zip: zipfile.ZipFile, + prefix: str, + *, + skip: Callable[[os.DirEntry], bool] = lambda entry: False, + path_to_name: Callable[[str], str] = default_path_to_name, +) -> None: + """Similar to ``reproducible_tarfile_from_prefix`` but for zipfiles.""" + dir_stack = [prefix] + while dir_stack: + dir = dir_stack.pop() + + # Add the dir before its contents. zip.mkdir is Python 3.11. + dir_info = zipfile.ZipInfo(path_to_name(dir)) + if not dir_info.filename.endswith("/"): + dir_info.filename += "/" + dir_info.external_attr = (0o40755 << 16) | 0x10 + dir_info.file_size = 0 + with zip.open(dir_info, "w") as dest: + dest.write(b"") + + # Sort by name for reproducibility + with os.scandir(dir) as it: + entries = sorted(it, key=lambda entry: entry.name) + + new_dirs = [] + for entry in entries: + if skip(entry): + continue + + if entry.is_dir(follow_symlinks=False): + new_dirs.append(entry.path) + continue + + # symlink / hardlink support in ZIP is poor or non-existent: make copies. + elif entry.is_file(follow_symlinks=True): + file_info = zipfile.ZipInfo(path_to_name(entry.path)) + + # Normalize permissions like git + s = entry.stat(follow_symlinks=True) + file_info.external_attr = (0o755 if s.st_mode & 0o100 else 0o644) << 16 + file_info.file_size = s.st_size + file_info.compress_type = zip.compression + + with open(entry.path, "rb") as src, zip.open(file_info, "w") as dest: + shutil.copyfileobj(src, dest) # type: ignore[misc] + + dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical diff --git a/share/spack/spack-completion.bash b/share/spack/spack-completion.bash index 079f021706d..1a1c65bd374 100644 --- a/share/spack/spack-completion.bash +++ b/share/spack/spack-completion.bash @@ -1748,7 +1748,7 @@ _spack_repo() { then SPACK_COMPREPLY="-h --help" else - SPACK_COMPREPLY="create list add remove rm" + SPACK_COMPREPLY="create list add remove rm zip" fi } @@ -1792,6 +1792,15 @@ _spack_repo_rm() { fi } +_spack_repo_zip() { + if $list_options + then + SPACK_COMPREPLY="-h --help" + else + _repos + fi +} + _spack_resource() { if $list_options then diff --git a/share/spack/spack-completion.fish b/share/spack/spack-completion.fish index 6d891757c85..534f0420fee 100644 --- a/share/spack/spack-completion.fish +++ b/share/spack/spack-completion.fish @@ -2675,6 +2675,7 @@ complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a list -d 'show complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a add -d 'add a package source to Spack\'s configuration' complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a remove -d 'remove a repository from Spack\'s configuration' complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a rm -d 'remove a repository from Spack\'s configuration' +complete -c spack -n '__fish_spack_using_command_pos 0 repo' -f -a zip -d 'zip a package repository to make it immutable and faster to load' complete -c spack -n '__fish_spack_using_command repo' -s h -l help -f -a help complete -c spack -n '__fish_spack_using_command repo' -s h -l help -d 'show this help message and exit' @@ -2717,6 +2718,12 @@ complete -c spack -n '__fish_spack_using_command repo rm' -s h -l help -d 'show complete -c spack -n '__fish_spack_using_command repo rm' -l scope -r -f -a '_builtin defaults system site user command_line' complete -c spack -n '__fish_spack_using_command repo rm' -l scope -r -d 'configuration scope to modify' +# spack repo zip +set -g __fish_spack_optspecs_spack_repo_zip h/help +complete -c spack -n '__fish_spack_using_command_pos 0 repo zip' $__fish_spack_force_files -a '(__fish_spack_repos)' +complete -c spack -n '__fish_spack_using_command repo zip' -s h -l help -f -a help +complete -c spack -n '__fish_spack_using_command repo zip' -s h -l help -d 'show this help message and exit' + # spack resource set -g __fish_spack_optspecs_spack_resource h/help complete -c spack -n '__fish_spack_using_command_pos 0 resource' -f -a list -d 'list all resources known to spack (currently just patches)' diff --git a/var/spack/repos/builtin/packages/chai/package.py b/var/spack/repos/builtin/packages/chai/package.py index 2d408dbc0a2..aeac2ba2503 100644 --- a/var/spack/repos/builtin/packages/chai/package.py +++ b/var/spack/repos/builtin/packages/chai/package.py @@ -6,8 +6,7 @@ import socket from spack.package import * - -from .blt import llnl_link_helpers +from spack.pkg.builtin.blt import llnl_link_helpers class Chai(CachedCMakePackage, CudaPackage, ROCmPackage): diff --git a/var/spack/repos/builtin/packages/raja/package.py b/var/spack/repos/builtin/packages/raja/package.py index 715756c5737..02b69e0f5f3 100644 --- a/var/spack/repos/builtin/packages/raja/package.py +++ b/var/spack/repos/builtin/packages/raja/package.py @@ -6,8 +6,7 @@ import socket from spack.package import * - -from .blt import llnl_link_helpers +from spack.pkg.builtin.blt import llnl_link_helpers # Starting with 2022.03.0, the only submodule we want to fetch is tpl/desul diff --git a/var/spack/repos/builtin/packages/umpire/package.py b/var/spack/repos/builtin/packages/umpire/package.py index 6348d943d3c..d71d88dec8f 100644 --- a/var/spack/repos/builtin/packages/umpire/package.py +++ b/var/spack/repos/builtin/packages/umpire/package.py @@ -7,8 +7,7 @@ import socket from spack.package import * - -from .blt import llnl_link_helpers +from spack.pkg.builtin.blt import llnl_link_helpers class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage):