patches: add a per-repository patch index

- this fixes a bug where if we save a concretized sug-DAG where a package
  had been patched by a dependent, and the dependent was not in the DAG,
  we would not read in all patches correctly.

- Rather than looking up patches in the DAG, we look them up globally
  from an index created from the entire repository.

- The patch cache is a bit tricky for several reasons:

  - we have to cache information from packages, specifically, the patch
    level and working directory.

  - FilePatches need to know which package owns them, so that they can
    figure out where the patch lives.  The repo can change locations from
    run to run, so we have to store relative paths and restore them when
    the cache is reloaded.

  - Patch files can change underneath the cache, because repo indexes
    only update on package changes.  We currently punt on this -- there
    are stub methods for needs_update() that will need to check patch
    files when packages are loaded.  There isn't an easy way to do this
    at global indexing time without making the FastPackageChecker a lot
    slower.  This is TBD for a future commit.

  - Currently, the same patch can only be used one way in a package. That
    is, if it appears twice with different level/working_dir settings,
    bad things will happen.  There's no package that current uses the
    same patch two different ways, so we've punted on this as well, but
    we may need to fix this in the future by moving a lot of the metdata
    (level, working dir) to the spec, and *only* caching sha256sums in
    the PatchCache.  That would require some much more complicated tweaks
    to the Spec, so we're holding off on that til later.

- This required patches to be refactored somewhat -- the difference
  between a UrlPatch and a FilePatch is still not particularly clean.
This commit is contained in:
Todd Gamblin 2018-12-22 19:58:41 -08:00
parent a9b69fa902
commit d3ee6c977b
8 changed files with 338 additions and 115 deletions

View File

@ -395,7 +395,7 @@ def patch(url_or_filename, level=1, when=None, working_dir=".", **kwargs):
certain conditions (e.g. a particular version).
Args:
url_or_filename (str): url or filename of the patch
url_or_filename (str): url or relative filename of the patch
level (int): patch level (as in the patch shell command)
when (Spec): optional anonymous spec that specifies when to apply
the patch
@ -417,13 +417,18 @@ def _execute_patch(pkg_or_dep):
# patch to the existing list.
cur_patches = pkg_or_dep.patches.setdefault(when_spec, [])
# if pkg_or_dep is a Dependency, make it a Package
pkg = pkg_or_dep
if isinstance(pkg, Dependency):
pkg = pkg.pkg
cur_patches.append(spack.patch.create(
pkg, url_or_filename, level, working_dir, **kwargs))
if '://' in url_or_filename:
patch = spack.patch.UrlPatch(
pkg, url_or_filename, level, working_dir, **kwargs)
else:
patch = spack.patch.FilePatch(
pkg, url_or_filename, level, working_dir)
cur_patches.append(patch)
return _execute_patch

View File

@ -223,6 +223,11 @@ def namespace(self):
namespace = namespace[len(prefix):]
return namespace
@property
def fullname(self):
"""Name of this package, including the namespace"""
return '%s.%s' % (self.namespace, self.name)
def run_before(*phases):
"""Registers a method of a package to be run before a given phase"""
@ -567,6 +572,11 @@ def namespace(self):
"""Spack namespace for the package, which identifies its repo."""
return type(self).namespace
@property
def fullname(self):
"""Name of this package, including namespace: namespace.name."""
return type(self).fullname
@property
def global_license_dir(self):
"""Returns the directory where global license files for all
@ -968,40 +978,6 @@ def do_stage(self, mirror_only=False):
if not os.listdir(self.stage.path):
raise FetchError("Archive was empty for %s" % self.name)
@classmethod
def lookup_patch(cls, sha256):
"""Look up a patch associated with this package by its sha256 sum.
Args:
sha256 (str): sha256 sum of the patch to look up
Returns:
(Patch): ``Patch`` object with the given hash, or ``None`` if
not found.
To do the lookup, we build an index lazily. This allows us to
avoid computing a sha256 for *every* patch and on every package
load. With lazy hashing, we only compute hashes on lookup, which
usually happens at build time.
"""
if cls._patches_by_hash is None:
cls._patches_by_hash = {}
# Add patches from the class
for cond, patch_list in cls.patches.items():
for patch in patch_list:
cls._patches_by_hash[patch.sha256] = patch
# and patches on dependencies
for name, conditions in cls.dependencies.items():
for cond, dependency in conditions.items():
for pcond, patch_list in dependency.patches.items():
for patch in patch_list:
cls._patches_by_hash[patch.sha256] = patch
return cls._patches_by_hash.get(sha256, None)
def do_patch(self):
"""Applies patches if they haven't been applied already."""
if not self.spec.concrete:

View File

@ -3,41 +3,24 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import abc
import hashlib
import os
import os.path
import hashlib
import six
import llnl.util.filesystem
import llnl.util.lang
import spack.error
import spack.fetch_strategy as fs
import spack.repo
import spack.stage
from spack.util.crypto import checksum, Checker
import spack.util.spack_json as sjson
from spack.util.executable import which
from spack.util.compression import allowed_archive
def create(pkg, path_or_url, level=1, working_dir=".", **kwargs):
"""Make either a FilePatch or a UrlPatch, depending on arguments.
Args:
pkg: package that needs to be patched
path_or_url: path or url where the patch is found
level: patch level (default 1)
working_dir (str): relative path within the package stage;
change to this before before applying (default '.')
Returns:
(Patch): a patch object on which ``apply(stage)`` can be called
"""
# Check if we are dealing with a URL (which will be fetched)
if '://' in path_or_url:
return UrlPatch(path_or_url, level, working_dir, **kwargs)
# If not, it's a file patch, which is stored within the repo directory.
patch_path = os.path.join(pkg.package_dir, path_or_url)
return FilePatch(patch_path, level, working_dir)
from spack.util.crypto import checksum, Checker
from spack.util.executable import which
def apply_patch(stage, patch_path, level=1, working_dir='.'):
@ -58,54 +41,96 @@ def apply_patch(stage, patch_path, level=1, working_dir='.'):
'-d', working_dir)
@six.add_metaclass(abc.ABCMeta)
class Patch(object):
"""Base class for patches.
Defines the interface (basically just ``apply()``, at the moment) and
common variables.
Arguments:
pkg (str): the package that owns the patch
The owning package is not necessarily the package to apply the patch
to -- in the case where a dependent package patches its dependency,
it is the dependent's fullname.
"""
def __init__(self, path_or_url, level, working_dir):
def __init__(self, pkg, path_or_url, level, working_dir):
# validate level (must be an integer >= 0)
if not isinstance(level, int) or not level >= 0:
raise ValueError("Patch level needs to be a non-negative integer.")
# Attributes shared by all patch subclasses
self.owner = pkg.fullname
self.path_or_url = path_or_url # needed for debug output
self.level = level
self.working_dir = working_dir
# path needs to be set by subclasses before calling self.apply()
self.path = None
@abc.abstractmethod
def apply(self, stage):
"""Apply this patch to code in a stage."""
assert self.path, "self.path must be set before Patch.apply()"
apply_patch(stage, self.path, self.level, self.working_dir)
"""Apply a patch to source in a stage.
Arguments:
stage (spack.stage.Stage): stage where source code lives
"""
def to_dict(self):
"""Partial dictionary -- subclases should add to this."""
return {
'owner': self.owner,
'sha256': self.sha256,
'level': self.level,
'working_dir': self.working_dir,
}
class FilePatch(Patch):
"""Describes a patch that is retrieved from a file in the repository"""
def __init__(self, path, level, working_dir):
super(FilePatch, self).__init__(path, level, working_dir)
"""Describes a patch that is retrieved from a file in the repository.
if not os.path.isfile(path):
raise NoSuchPatchError("No such patch: %s" % path)
self.path = path
Arguments:
pkg (str): the class object for the package that owns the patch
relative_path (str): path to patch, relative to the repository
directory for a package.
level (int): level to pass to patch command
working_dir (str): path within the source directory where patch
should be applied
"""
def __init__(self, pkg, relative_path, level, working_dir):
self.relative_path = relative_path
self.path = os.path.join(pkg.package_dir, self.relative_path)
super(FilePatch, self).__init__(pkg, self.path, level, working_dir)
self._sha256 = None
def apply(self, stage):
if not os.path.isfile(self.path):
raise NoSuchPatchError("No such patch: %s" % self.path)
apply_patch(stage, self.path, self.level, self.working_dir)
@property
def sha256(self):
if self._sha256 is None:
self._sha256 = checksum(hashlib.sha256, self.path)
return self._sha256
def to_dict(self):
return llnl.util.lang.union_dicts(
super(FilePatch, self).to_dict(),
{'relative_path': self.relative_path})
class UrlPatch(Patch):
"""Describes a patch that is retrieved from a URL"""
def __init__(self, url, level, working_dir, **kwargs):
super(UrlPatch, self).__init__(url, level, working_dir)
"""Describes a patch that is retrieved from a URL.
Arguments:
pkg (str): the package that owns the patch
url (str): URL where the patch can be fetched
level (int): level to pass to patch command
working_dir (str): path within the source directory where patch
should be applied
"""
def __init__(self, pkg, url, level=1, working_dir='.', **kwargs):
super(UrlPatch, self).__init__(pkg, url, level, working_dir)
self.url = url
self.path = None # this is set in apply
self.archive_sha256 = kwargs.get('archive_sha256')
if allowed_archive(self.url) and not self.archive_sha256:
@ -153,6 +178,7 @@ def apply(self, stage):
raise NoSuchPatchError(
"Patch failed to download: %s" % self.url)
# set this here so that path is accessible after
self.path = os.path.join(root, files.pop())
if not os.path.isfile(self.path):
@ -168,7 +194,170 @@ def apply(self, stage):
"sha256 checksum failed for %s" % self.path,
"Expected %s but got %s" % (self.sha256, checker.sum))
super(UrlPatch, self).apply(stage)
apply_patch(stage, self.path, self.level, self.working_dir)
def to_dict(self):
data = super(UrlPatch, self).to_dict()
data['url'] = self.url
if self.archive_sha256:
data['archive_sha256'] = self.archive_sha256
return data
def from_dict(dictionary):
"""Create a patch from json dictionary."""
owner = dictionary.get('owner')
if 'owner' not in dictionary:
raise ValueError('Invalid patch dictionary: %s' % dictionary)
pkg = spack.repo.get(owner)
if 'url' in dictionary:
return UrlPatch(
pkg,
dictionary['url'],
dictionary['level'],
dictionary['working_dir'],
sha256=dictionary['sha256'],
archive_sha256=dictionary.get('archive_sha256'))
elif 'relative_path' in dictionary:
patch = FilePatch(
pkg,
dictionary['relative_path'],
dictionary['level'],
dictionary['working_dir'])
# If the patch in the repo changes, we cannot get it back, so we
# just check it and fail here.
# TODO: handle this more gracefully.
sha256 = dictionary['sha256']
checker = Checker(sha256)
if not checker.check(patch.path):
raise fs.ChecksumError(
"sha256 checksum failed for %s" % patch.path,
"Expected %s but got %s" % (sha256, checker.sum),
"Patch may have changed since concretization.")
return patch
else:
raise ValueError("Invalid patch dictionary: %s" % dictionary)
class PatchCache(object):
"""Index of patches used in a repository, by sha256 hash.
This allows us to look up patches without loading all packages. It's
also needed to properly implement dependency patching, as need a way
to look up patches that come from packages not in the Spec sub-DAG.
The patch index is structured like this in a file (this is YAML, but
we write JSON)::
patches:
sha256:
namespace1.package1:
<patch json>
namespace2.package2:
<patch json>
... etc. ...
"""
def __init__(self, data=None):
if data is None:
self.index = {}
else:
if 'patches' not in data:
raise IndexError('invalid patch index; try `spack clean -m`')
self.index = data['patches']
@classmethod
def from_json(cls, stream):
return PatchCache(sjson.load(stream))
def to_json(self, stream):
sjson.dump({'patches': self.index}, stream)
def patch_for_package(self, sha256, pkg):
"""Look up a patch in the index and build a patch object for it.
Arguments:
sha256 (str): sha256 hash to look up
pkg (spack.package.Package): Package object to get patch for.
We build patch objects lazily because building them requires that
we have information about the package's location in its repo.
"""
sha_index = self.index.get(sha256)
if not sha_index:
raise NoSuchPatchError(
"Couldn't find patch with sha256: %s" % sha256)
patch_dict = sha_index.get(pkg.fullname)
if not patch_dict:
raise NoSuchPatchError(
"Couldn't find patch for package %s with sha256: %s"
% (pkg.fullname, sha256))
# add the sha256 back (we take it out on write to save space,
# because it's the index key)
patch_dict = dict(patch_dict)
patch_dict['sha256'] = sha256
return from_dict(patch_dict)
def update_package(self, pkg_fullname):
# remove this package from any patch entries that reference it.
empty = []
for sha256, package_to_patch in self.index.items():
remove = []
for fullname, patch_dict in package_to_patch.items():
if patch_dict['owner'] == pkg_fullname:
remove.append(fullname)
for fullname in remove:
package_to_patch.pop(fullname)
if not package_to_patch:
empty.append(sha256)
# remove any entries that are now empty
for sha256 in empty:
del self.index[sha256]
# update the index with per-package patch indexes
pkg = spack.repo.get(pkg_fullname)
partial_index = self._index_patches(pkg)
for sha256, package_to_patch in partial_index.items():
p2p = self.index.setdefault(sha256, {})
p2p.update(package_to_patch)
def update(self, other):
"""Update this cache with the contents of another."""
for sha256, package_to_patch in other.index.items():
p2p = self.index.setdefault(sha256, {})
p2p.update(package_to_patch)
@staticmethod
def _index_patches(pkg_class):
index = {}
# Add patches from the class
for cond, patch_list in pkg_class.patches.items():
for patch in patch_list:
patch_dict = patch.to_dict()
patch_dict.pop('sha256') # save some space
index[patch.sha256] = {pkg_class.fullname: patch_dict}
# and patches on dependencies
for name, conditions in pkg_class.dependencies.items():
for cond, dependency in conditions.items():
for pcond, patch_list in dependency.patches.items():
for patch in patch_list:
dspec = spack.repo.get(dependency.spec.name)
patch_dict = patch.to_dict()
patch_dict.pop('sha256') # save some space
index[patch.sha256] = {dspec.fullname: patch_dict}
return index
class NoSuchPatchError(spack.error.SpackError):

View File

@ -13,7 +13,6 @@
import inspect
import re
import traceback
import json
from contextlib import contextmanager
from six import string_types, add_metaclass
@ -33,7 +32,9 @@
import spack.config
import spack.caches
import spack.error
import spack.patch
import spack.spec
import spack.util.spack_json as sjson
import spack.util.imp as simp
from spack.provider_index import ProviderIndex
from spack.util.path import canonicalize_path
@ -190,11 +191,11 @@ def __init__(self):
self._tag_dict = collections.defaultdict(list)
def to_json(self, stream):
json.dump({'tags': self._tag_dict}, stream)
sjson.dump({'tags': self._tag_dict}, stream)
@staticmethod
def from_json(stream):
d = json.load(stream)
d = sjson.load(stream)
r = TagIndex()
@ -242,6 +243,22 @@ def create(self):
def _create(self):
"""Create an empty index and return it."""
def needs_update(self, pkg):
"""Whether an update is needed when the package file hasn't changed.
Returns:
(bool): ``True`` if this package needs its index
updated, ``False`` otherwise.
We already automatically update indexes when package files
change, but other files (like patches) may change underneath the
package file. This method can be used to check additional
package-specific files whenever they're loaded, to tell the
RepoIndex to update the index *just* for that package.
"""
return False
@abc.abstractmethod
def read(self, stream):
"""Read this index from a provided file object."""
@ -286,6 +303,28 @@ def write(self, stream):
self.index.to_json(stream)
class PatchIndexer(Indexer):
"""Lifecycle methods for patch cache."""
def _create(self):
return spack.patch.PatchCache()
def needs_update():
# TODO: patches can change under a package and we should handle
# TODO: it, but we currently punt. This should be refactored to
# TODO: check whether patches changed each time a package loads,
# TODO: tell the RepoIndex to reindex them.
return False
def read(self, stream):
self.index = spack.patch.PatchCache.from_json(stream)
def write(self, stream):
self.index.to_json(stream)
def update(self, pkg_fullname):
self.index.update_package(pkg_fullname)
class RepoIndex(object):
"""Container class that manages a set of Indexers for a Repo.
@ -403,6 +442,7 @@ def __init__(self, *repos, **kwargs):
self._all_package_names = None
self._provider_index = None
self._patch_index = None
# Add each repo to this path.
for repo in repos:
@ -501,6 +541,16 @@ def provider_index(self):
return self._provider_index
@property
def patch_index(self):
"""Merged PatchIndex from all Repos in the RepoPath."""
if self._patch_index is None:
self._patch_index = spack.patch.PatchCache()
for repo in reversed(self.repos):
self._patch_index.update(repo.patch_index)
return self._patch_index
@_autospec
def providers_for(self, vpkg_spec):
providers = self.provider_index.providers_for(vpkg_spec)
@ -870,15 +920,14 @@ def dump_provenance(self, spec, path):
"Repository %s does not contain package %s."
% (self.namespace, spec.fullname))
# Install any patch files needed by packages.
# Install patch files needed by the package.
mkdirp(path)
for spec, patches in spec.package.patches.items():
for patch in patches:
if patch.path:
if os.path.exists(patch.path):
install(patch.path, path)
else:
tty.warn("Patch file did not exist: %s" % patch.path)
for patch in spec.patches:
if patch.path:
if os.path.exists(patch.path):
install(patch.path, path)
else:
tty.warn("Patch file did not exist: %s" % patch.path)
# Install the package.py file itself.
install(self.filename_for_package_name(spec), path)
@ -894,6 +943,7 @@ def index(self):
self._repo_index = RepoIndex(self._pkg_checker, self.namespace)
self._repo_index.add_indexer('providers', ProviderIndexer())
self._repo_index.add_indexer('tags', TagIndexer())
self._repo_index.add_indexer('patches', PatchIndexer())
return self._repo_index
@property
@ -906,6 +956,11 @@ def tag_index(self):
"""Index of tags and which packages they're defined on."""
return self.index['tags']
@property
def patch_index(self):
"""Index of patches and packages they're defined on."""
return self.index['patches']
@_autospec
def providers_for(self, vpkg_spec):
providers = self.provider_index.providers_for(vpkg_spec)
@ -1200,6 +1255,10 @@ class UnknownEntityError(RepoError):
"""Raised when we encounter a package spack doesn't have."""
class IndexError(RepoError):
"""Raised when there's an error with an index."""
class UnknownPackageError(UnknownEntityError):
"""Raised when we encounter a package spack doesn't have."""

View File

@ -2677,24 +2677,16 @@ def patches(self):
if 'patches' not in self.variants:
return []
patches = []
# FIXME: The private attribute below is attached after
# FIXME: _patches_in_order_of_appearance is attached after
# FIXME: concretization to store the order of patches somewhere.
# FIXME: Needs to be refactored in a cleaner way.
# translate patch sha256sums to patch objects by consulting the index
patches = []
for sha256 in self.variants['patches']._patches_in_order_of_appearance:
patch = self.package_class.lookup_patch(sha256)
if patch:
patches.append(patch)
continue
# if not found in this package, check immediate dependents
# for dependency patches
for dep_spec in self._dependents.values():
patch = dep_spec.parent.package_class.lookup_patch(sha256)
if patch:
patches.append(patch)
index = spack.repo.path.patch_index
patch = index.patch_for_package(sha256, self.package)
patches.append(patch)
return patches

View File

@ -101,13 +101,14 @@ def test_dont_add_patches_to_installed_package(install_mockery, mock_fetch):
dependency.concretize()
dependency.package.do_install()
dependency.package.patches['dependency-install'] = [
spack.patch.create(None, 'file://fake.patch', sha256='unused-hash')]
dependency_hash = dependency.dag_hash()
dependent = Spec('dependent-install ^/' + dependency_hash)
dependent.concretize()
dependency.package.patches['dependency-install'] = [
spack.patch.UrlPatch(
dependent.package, 'file://fake.patch', sha256='unused-hash')]
assert dependent['dependency-install'] == dependency

View File

@ -160,7 +160,7 @@ def successful_expand(_class):
with open(os.path.join(expanded_path, 'test.patch'), 'w'):
pass
def successful_apply(_class, stage):
def successful_apply(*args, **kwargs):
pass
with Stage('spack-mirror-test') as stage:
@ -170,7 +170,7 @@ def successful_apply(_class, stage):
successful_fetch)
monkeypatch.setattr(spack.fetch_strategy.URLFetchStrategy,
'expand', successful_expand)
monkeypatch.setattr(spack.patch.Patch, 'apply', successful_apply)
monkeypatch.setattr(spack.patch, 'apply_patch', successful_apply)
monkeypatch.setattr(spack.caches.MirrorCache, 'store', record_store)
with spack.config.override('config:checksum', False):

View File

@ -41,8 +41,9 @@ def mock_stage(tmpdir, monkeypatch):
def test_url_patch(mock_stage, filename, sha256, archive_sha256):
# Make a patch object
url = 'file://' + filename
patch = spack.patch.create(
None, url, sha256=sha256, archive_sha256=archive_sha256)
pkg = spack.repo.get('patch')
patch = spack.patch.UrlPatch(
pkg, url, sha256=sha256, archive_sha256=archive_sha256)
# make a stage
with Stage(url) as stage: # TODO: url isn't used; maybe refactor Stage