spec.splice: properly handle cached hash invalidations (#24795)

* spec.splice: properly handle cached hash invalidations

* make package_hash a cached hash on the spec
This commit is contained in:
Greg Becker 2021-07-09 18:16:48 -07:00 committed by GitHub
parent 556975564c
commit ebf2076755
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 91 additions and 18 deletions

View File

@ -19,12 +19,15 @@ class SpecHashDescriptor(object):
We currently use different hashes for different use cases.
"""
hash_types = ('_dag_hash', '_build_hash', '_full_hash')
hash_types = ('_dag_hash', '_build_hash', '_full_hash', '_package_hash')
def __init__(self, deptype=('link', 'run'), package_hash=False, attr=None):
def __init__(self, deptype=('link', 'run'), package_hash=False, attr=None,
override=None):
self.deptype = dp.canonical_deptype(deptype)
self.package_hash = package_hash
self.attr = attr
# Allow spec hashes to have an alternate computation method
self.override = override
#: Default Hash descriptor, used by Spec.dag_hash() and stored in the DB.
@ -40,3 +43,9 @@ def __init__(self, deptype=('link', 'run'), package_hash=False, attr=None):
#: Full hash used in build pipelines to determine when to rebuild packages.
full_hash = SpecHashDescriptor(
deptype=('build', 'link', 'run'), package_hash=True, attr='_full_hash')
#: Package hash used as part of full hash
package_hash = SpecHashDescriptor(
deptype=(), package_hash=True, attr='_package_hash',
override=lambda s: s.package.content_hash())

View File

@ -1030,9 +1030,8 @@ class Spec(object):
#: Cache for spec's prefix, computed lazily in the corresponding property
_prefix = None
def __init__(self, spec_like=None,
normal=False, concrete=False, external_path=None,
external_modules=None, full_hash=None):
def __init__(self, spec_like=None, normal=False,
concrete=False, external_path=None, external_modules=None):
"""Create a new Spec.
Arguments:
@ -1046,8 +1045,6 @@ def __init__(self, spec_like=None,
self._concrete = concrete
self.external_path = external_path
self.external_module = external_module
self._full_hash = full_hash
"""
# Copy if spec_like is a Spec.
@ -1068,6 +1065,8 @@ def __init__(self, spec_like=None,
self._hash = None
self._build_hash = None
self._full_hash = None
self._package_hash = None
self._dunder_hash = None
self._package = None
@ -1082,7 +1081,6 @@ def __init__(self, spec_like=None,
self._concrete = concrete
self.external_path = external_path
self.external_modules = Spec._format_module_list(external_modules)
self._full_hash = full_hash
# Older spack versions did not compute full_hash or build_hash,
# and we may not have the necessary information to recompute them
@ -1501,6 +1499,8 @@ def _spec_hash(self, hash):
"""
# TODO: curently we strip build dependencies by default. Rethink
# this when we move to using package hashing on all specs.
if hash.override is not None:
return hash.override(self)
node_dict = self.to_node_dict(hash=hash)
yaml_text = syaml.dump(node_dict, default_flow_style=True)
return spack.util.hash.b32_hash(yaml_text)
@ -1528,6 +1528,10 @@ def _cached_hash(self, hash, length=None):
return hash_string[:length]
def package_hash(self):
"""Compute the hash of the contents of the package for this node"""
return self._cached_hash(ht.package_hash)
def dag_hash(self, length=None):
"""This is Spack's default hash, used to identify installations.
@ -1653,7 +1657,7 @@ def to_node_dict(self, hash=ht.dag_hash):
d['patches'] = variant._patches_in_order_of_appearance
if hash.package_hash:
package_hash = self.package.content_hash()
package_hash = self.package_hash()
# Full hashes are in bytes
if (not isinstance(package_hash, six.text_type)
@ -1822,6 +1826,7 @@ def from_node_dict(node):
spec._hash = node.get('hash', None)
spec._build_hash = node.get('build_hash', None)
spec._full_hash = node.get('full_hash', None)
spec._package_hash = node.get('package_hash', None)
if 'version' in node or 'versions' in node:
spec.versions = vn.VersionList.from_dict(node)
@ -3441,12 +3446,14 @@ def _dup(self, other, deps=True, cleardeps=True, caches=None):
self._dunder_hash = other._dunder_hash
self._normal = other._normal
self._full_hash = other._full_hash
self._package_hash = other._package_hash
else:
self._hash = None
self._build_hash = None
self._dunder_hash = None
self._normal = False
self._full_hash = None
self._package_hash = None
return changed
@ -4281,19 +4288,22 @@ def splice(self, other, transitive):
# _dependents of these specs should not be trusted.
# Variants may also be ignored here for now...
# Keep all cached hashes because we will invalidate the ones that need
# invalidating later, and we don't want to invalidate unnecessarily
if transitive:
self_nodes = dict((s.name, s.copy(deps=False))
self_nodes = dict((s.name, s.copy(deps=False, caches=True))
for s in self.traverse(root=True)
if s.name not in other)
other_nodes = dict((s.name, s.copy(deps=False))
other_nodes = dict((s.name, s.copy(deps=False, caches=True))
for s in other.traverse(root=True))
else:
# If we're not doing a transitive splice, then we only want the
# root of other.
self_nodes = dict((s.name, s.copy(deps=False))
self_nodes = dict((s.name, s.copy(deps=False, caches=True))
for s in self.traverse(root=True)
if s.name != other.name)
other_nodes = {other.name: other.copy(deps=False)}
other_nodes = {other.name: other.copy(deps=False, caches=True)}
nodes = other_nodes.copy()
nodes.update(self_nodes)
@ -4314,17 +4324,41 @@ def splice(self, other, transitive):
if any(dep not in other_nodes for dep in dependencies):
nodes[name].build_spec = other[name].build_spec
# Clear cached hashes
nodes[self.name].clear_cached_hashes()
ret = nodes[self.name]
# Clear cached hashes for all affected nodes
# Do not touch unaffected nodes
for dep in ret.traverse(root=True, order='post'):
opposite = other_nodes if dep.name in self_nodes else self_nodes
if any(name in dep for name in opposite.keys()):
# Record whether hashes are already cached
# So we don't try to compute a hash from insufficient
# provenance later
has_build_hash = getattr(dep, ht.build_hash.attr, None)
has_full_hash = getattr(dep, ht.full_hash.attr, None)
# package hash cannot be affected by splice
dep.clear_cached_hashes(ignore=['_package_hash'])
# Since this is a concrete spec, we want to make sure hashes
# are cached writing specs only writes cached hashes in case
# the spec is too old to have full provenance for these hashes,
# so we can't rely on doing it at write time.
if has_build_hash:
_ = dep.build_hash()
if has_full_hash:
_ = dep.full_hash()
return nodes[self.name]
def clear_cached_hashes(self):
def clear_cached_hashes(self, ignore=()):
"""
Clears all cached hashes in a Spec, while preserving other properties.
"""
for attr in ht.SpecHashDescriptor.hash_types:
if hasattr(self, attr):
setattr(self, attr, None)
if attr not in ignore:
if hasattr(self, attr):
setattr(self, attr, None)
def __hash__(self):
# If the spec is concrete, we leverage the DAG hash and just use

View File

@ -1027,6 +1027,36 @@ def test_splice(self, transitive):
# Finally, the spec should know it's been spliced:
assert out.spliced
@pytest.mark.parametrize('transitive', [True, False])
def test_splice_with_cached_hashes(self, transitive):
spec = Spec('splice-t')
dep = Spec('splice-h+foo')
spec.concretize()
dep.concretize()
# monkeypatch hashes so we can test that they are cached
spec._full_hash = 'aaaaaa'
spec._build_hash = 'aaaaaa'
dep._full_hash = 'bbbbbb'
dep._build_hash = 'bbbbbb'
spec['splice-h']._full_hash = 'cccccc'
spec['splice-h']._build_hash = 'cccccc'
spec['splice-z']._full_hash = 'dddddd'
spec['splice-z']._build_hash = 'dddddd'
dep['splice-z']._full_hash = 'eeeeee'
dep['splice-z']._build_hash = 'eeeeee'
out = spec.splice(dep, transitive=transitive)
out_z_expected = (dep if transitive else spec)['splice-z']
assert out.full_hash() != spec.full_hash()
assert (out['splice-h'].full_hash() == dep.full_hash()) == transitive
assert out['splice-z'].full_hash() == out_z_expected.full_hash()
assert out.build_hash() != spec.build_hash()
assert (out['splice-h'].build_hash() == dep.build_hash()) == transitive
assert out['splice-z'].build_hash() == out_z_expected.build_hash()
@pytest.mark.parametrize('transitive', [True, False])
def test_splice_input_unchanged(self, transitive):
spec = Spec('splice-t').concretized()