From ecc3bfd48471035ec260c37e82a045e079addb64 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 2 Nov 2020 13:21:11 -0800 Subject: [PATCH] Bugfix - hashing: don't recompute full_hash or build_hash (#19672) There was an error introduced in #19209 where `full_hash()` and `build_hash()` are called on older specs that we've read in from the DB; older specs may not be able to compute these hashes (e.g. if they have removed patches used in computing the full_hash). When serializing a Spec, we want to generate the full/build hash when possible, but we need a mechanism to skip it for Specs that have themselves been read from YAML (and may not support this). To get around this ambiguity and to fix the issue, we: - Add an attribute to the spec called `_hashes_final`, that is `True` if we can't lazily compute `build_hash` and `full_hash`. - Set `_hashes_final` to `False` for new specs (i.e., lazily computing hashes is ok) - Set `_hashes_final` to `True` for concrete specs read in via `from_node_dict`, as it may be too late to recompute hashes. - Compute and write out all hashes in `node_dict_with_hashes` *if possible*. Effectively what this means is that we can round-trip specs that are missing `_build_hash` and `_full_hash` without recomputing them, but for all new specs, we'll compute them and store them. So Spack should work fine with old DBs now. --- lib/spack/spack/spec.py | 43 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 68394032d3e..256e3edc83a 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -1011,6 +1011,14 @@ def __init__(self, spec_like=None, self.external_modules = Spec._format_module_list(external_modules) self._full_hash = full_hash + # Older spack versions did not compute full_hash or build_hash, + # and we may not have the necessary information to recompute them + # if we read in old specs. Old concrete specs are marked "final" + # when read in to indicate that we shouldn't recompute full_hash + # or build_hash. New specs are not final; we can lazily compute + # their hashes. + self._hashes_final = False + # This attribute is used to store custom information for # external specs. None signal that it was not set yet. self.extra_attributes = None @@ -1650,10 +1658,33 @@ def node_dict_with_hashes(self, hash=ht.dag_hash): the hash_type, the build hash is also added. """ node = self.to_node_dict(hash) node[self.name]['hash'] = self.dag_hash() + + # full_hash and build_hash are lazily computed -- but if we write + # a spec out, we want them to be included. This is effectively + # the last chance we get to compute them accurately. if self.concrete: - node[self.name]['full_hash'] = self.full_hash() - if 'build' in hash.deptype: - node[self.name]['build_hash'] = self.build_hash() + # build and full hashes can be written out if: + # 1. they're precomputed (i.e. we read them from somewhere + # and they were already on the spec + # 2. we can still compute them lazily (i.e. we just made them and + # have the full dependency graph on-hand) + # + # we want to avoid recomputing either hash for specs we read + # in from the DB or elsewhere, as we may not have the info + # (like patches, package versions, etc.) that we need to + # compute them. Unknown hashes are better than wrong hashes. + write_full_hash = ( + self._hashes_final and self._full_hash or # cached and final + not self._hashes_final) # lazily compute + write_build_hash = 'build' in hash.deptype and ( + self._hashes_final and self._build_hash or # cached and final + not self._hashes_final) # lazily compute + + if write_full_hash: + node[self.name]['full_hash'] = self.full_hash() + if write_build_hash: + node[self.name]['build_hash'] = self.build_hash() + return node def to_record_dict(self): @@ -1743,6 +1774,11 @@ def from_node_dict(node): # specs read in are concrete unless marked abstract spec._concrete = node.get('concrete', True) + # this spec may have been built with older packages than we have + # on-hand, and we may not have the build dependencies, so mark it + # so we don't recompute full_hash and build_hash. + spec._hashes_final = spec._concrete + if 'patches' in node: patches = node['patches'] if len(patches) > 0: @@ -3176,6 +3212,7 @@ def _dup(self, other, deps=True, cleardeps=True, caches=None): self._dup_deps(other, deptypes, caches) self._concrete = other._concrete + self._hashes_final = other._hashes_final if caches: self._hash = other._hash