bugfix: do not compute package_hash for old concrete specs (#30861)

Old concrete specs were slipping through in `_assign_hash`, and `package_hash` was
attempting to recompute a package hash when we could not know the package a time
of concretization.

Part of this was that the logic for `_assign_hash` was hard to understand -- it was
called twice from `_finalize_concretization` and had special cases for both args it
was called with. It's much easier to understand the logic here if we just inline it.

- [x] Get rid of `_assign_hash` and just integrate it with `_finalize_concretization`
- [x] Don't call `_package_hash` at all for already-concrete specs.
- [x] Add regression test.
This commit is contained in:
Todd Gamblin 2022-05-25 20:12:24 -07:00 committed by GitHub
parent 1b955e66c1
commit d51f949768
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 50 additions and 35 deletions

View File

@ -2952,28 +2952,18 @@ def _mark_concrete(self, value=True):
s.clear_cached_hashes()
s._mark_root_concrete(value)
def _assign_hash(self, hash):
"""Compute and cache the provided hash type for this spec and its dependencies.
def _finalize_concretization(self):
"""Assign hashes to this spec, and mark it concrete.
Arguments:
hash (spack.hash_types.SpecHashDescriptor): the hash to assign to nodes
in the spec.
There are special semantics to consider for `package_hash`, because we can't
call it on *already* concrete specs, but we need to assign it *at concretization
time* to just-concretized specs. So, the concretizer must assign the package
hash *before* marking their specs concrete (so that we know which specs were
already concrete before this latest concretization).
There are special semantics to consider for `package_hash`.
This should be called:
1. for `package_hash`, immediately after concretization, but *before* marking
concrete, and
2. for `dag_hash`, immediately after marking concrete.
`package_hash` is tricky, because we can't call it on *already* concrete specs,
but we need to assign it *at concretization time* to just-concretized specs. So,
the concretizer must assign the package hash *before* marking their specs
concrete (so that the only concrete specs are the ones already marked concrete).
`dag_hash` is also tricky, since it cannot compute `package_hash()` lazily for
the same reason. `package_hash` needs to be assigned *at concretization time*,
so, `to_node_dict()` can't just assume that it can compute `package_hash` itself
`dag_hash` is also tricky, since it cannot compute `package_hash()` lazily.
Because `package_hash` needs to be assigned *at concretization time*,
`to_node_dict()` can't just assume that it can compute `package_hash` itself
-- it needs to either see or not see a `_package_hash` attribute.
Rules of thumb for `package_hash`:
@ -2987,28 +2977,26 @@ def _assign_hash(self, hash):
for spec in self.traverse():
# Already concrete specs either already have a package hash (new dag_hash())
# or they never will b/c we can't know it (old dag_hash()). Skip them.
if hash is ht.package_hash and not spec.concrete:
spec._cached_hash(hash, force=True)
#
# We only assign package hash to not-yet-concrete specs, for which we know
# we can compute the hash.
if not spec.concrete:
# we need force=True here because package hash assignment has to happen
# before we mark concrete, so that we know what was *already* concrete.
spec._cached_hash(ht.package_hash, force=True)
# keep this check here to ensure package hash is saved
assert getattr(spec, hash.attr)
else:
spec._cached_hash(hash)
def _finalize_concretization(self):
"""Assign hashes to this spec, and mark it concrete.
This is called at the end of concretization.
"""
# See docs for in _assign_hash for why package_hash needs to happen here.
self._assign_hash(ht.package_hash)
assert getattr(spec, ht.package_hash.attr)
# Mark everything in the spec as concrete
self._mark_concrete()
# Assign dag_hash (this *could* be done lazily, but it's assigned anyway in
# ensure_no_deprecated, and it's clearer to see explicitly where it happens)
self._assign_hash(ht.dag_hash)
# ensure_no_deprecated, and it's clearer to see explicitly where it happens).
# Any specs that were concrete before finalization will already have a cached
# DAG hash.
for spec in self.traverse():
self._cached_hash(ht.dag_hash)
def concretized(self, tests=False):
"""This is a non-destructive version of concretize().

View File

@ -1293,3 +1293,30 @@ def test_call_dag_hash_on_old_dag_hash_spec(mock_packages, config):
with pytest.raises(ValueError, match='Cannot call package_hash()'):
spec.package_hash()
@pytest.mark.regression('30861')
def test_concretize_partial_old_dag_hash_spec(mock_packages, config):
# create an "old" spec with no package hash
bottom = Spec("dt-diamond-bottom").concretized()
delattr(bottom, "_package_hash")
dummy_hash = "zd4m26eis2wwbvtyfiliar27wkcv3ehk"
bottom._hash = dummy_hash
# add it to an abstract spec as a dependency
top = Spec("dt-diamond")
top.add_dependency_edge(bottom, ())
# concretize with the already-concrete dependency
top.concretize()
for spec in top.traverse():
assert spec.concrete
# make sure dag_hash is untouched
assert spec["dt-diamond-bottom"].dag_hash() == dummy_hash
assert spec["dt-diamond-bottom"]._hash == dummy_hash
# make sure package hash is NOT recomputed
assert not getattr(spec["dt-diamond-bottom"], '_package_hash', None)