From 2806ed2751b36f6fb151d9541c6847e11456746f Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Sat, 15 Mar 2025 21:12:51 +0100 Subject: [PATCH] spec.py: include test deps in dag hash, remove process_hash (#48936) --- lib/spack/spack/database.py | 8 ++-- lib/spack/spack/hash_types.py | 14 +++---- lib/spack/spack/spec.py | 56 ++++++-------------------- lib/spack/spack/test/spec_semantics.py | 2 - lib/spack/spack/test/spec_syntax.py | 2 - lib/spack/spack/test/spec_yaml.py | 7 ---- 6 files changed, 21 insertions(+), 68 deletions(-) diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index 76596e3c771..ccb4a58e705 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -1126,7 +1126,7 @@ def _add( installation_time: Date and time of installation allow_missing: if True, don't warn when installation is not found on on disk - This is useful when installing specs without build deps. + This is useful when installing specs without build/test deps. """ if not spec.concrete: raise NonConcreteSpecAddError("Specs added to DB must be concrete.") @@ -1146,10 +1146,8 @@ def _add( edge.spec, explicit=False, installation_time=installation_time, - # allow missing build-only deps. This prevents excessive warnings when a spec is - # installed, and its build dep is missing a build dep; there's no need to install - # the build dep's build dep first, and there's no need to warn about it missing. - allow_missing=allow_missing or edge.depflag == dt.BUILD, + # allow missing build / test only deps + allow_missing=allow_missing or edge.depflag & (dt.BUILD | dt.TEST) == edge.depflag, ) # Make sure the directory layout agrees whether the spec is installed diff --git a/lib/spack/spack/hash_types.py b/lib/spack/spack/hash_types.py index b0514e119d4..bdf1da3c593 100644 --- a/lib/spack/spack/hash_types.py +++ b/lib/spack/spack/hash_types.py @@ -6,7 +6,7 @@ import spack.deptypes as dt import spack.repo -hashes = [] +HASHES = [] class SpecHashDescriptor: @@ -23,7 +23,7 @@ def __init__(self, depflag: dt.DepFlag, package_hash, name, override=None): self.depflag = depflag self.package_hash = package_hash self.name = name - hashes.append(self) + HASHES.append(self) # Allow spec hashes to have an alternate computation method self.override = override @@ -43,13 +43,9 @@ def __repr__(self): ) -#: Spack's deployment hash. Includes all inputs that can affect how a package is built. -dag_hash = SpecHashDescriptor(depflag=dt.BUILD | dt.LINK | dt.RUN, package_hash=True, name="hash") - - -#: Hash descriptor used only to transfer a DAG, as is, across processes -process_hash = SpecHashDescriptor( - depflag=dt.BUILD | dt.LINK | dt.RUN | dt.TEST, package_hash=True, name="process_hash" +#: The DAG hash includes all inputs that can affect how a package is built. +dag_hash = SpecHashDescriptor( + depflag=dt.BUILD | dt.LINK | dt.RUN | dt.TEST, package_hash=True, name="hash" ) diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 8a29175ef6d..a32d35e1078 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -1515,7 +1515,7 @@ def __init__(self, spec_like=None, *, external_path=None, external_modules=None) self.abstract_hash = None # initial values for all spec hash types - for h in ht.hashes: + for h in ht.HASHES: setattr(self, h.attr, None) # cache for spec's prefix, computed lazily by prefix property @@ -2198,30 +2198,16 @@ def package_hash(self): def dag_hash(self, length=None): """This is Spack's default hash, used to identify installations. - Same as the full hash (includes package hash and build/link/run deps). - Tells us when package files and any dependencies have changes. - NOTE: Versions of Spack prior to 0.18 only included link and run deps. + NOTE: Versions of Spack prior to 1.0 only did not include test deps. """ return self._cached_hash(ht.dag_hash, length) - def process_hash(self, length=None): - """Hash used to transfer specs among processes. - - This hash includes build and test dependencies and is only used to - serialize a spec and pass it around among processes. - """ - return self._cached_hash(ht.process_hash, length) - def dag_hash_bit_prefix(self, bits): """Get the first bits of the DAG hash as an integer type.""" return spack.util.hash.base32_prefix_bits(self.dag_hash(), bits) - def process_hash_bit_prefix(self, bits): - """Get the first bits of the DAG hash as an integer type.""" - return spack.util.hash.base32_prefix_bits(self.process_hash(), bits) - def _lookup_hash(self): """Lookup just one spec with an abstract hash, returning a spec from the the environment, store, or finally, binary caches.""" @@ -3588,11 +3574,11 @@ def _dup(self, other: "Spec", deps: Union[bool, dt.DepTypes, dt.DepFlag] = True) if self._concrete: self._dunder_hash = other._dunder_hash - for h in ht.hashes: + for h in ht.HASHES: setattr(self, h.attr, getattr(other, h.attr, None)) else: self._dunder_hash = None - for h in ht.hashes: + for h in ht.HASHES: setattr(self, h.attr, None) return changed @@ -3784,16 +3770,6 @@ def _cmp_iter(self): # serialized before the hash change and one after, are considered different. yield self.dag_hash() if self.concrete else None - # This needs to be in _cmp_iter so that no specs with different process hashes - # are considered the same by `__hash__` or `__eq__`. - # - # TODO: We should eventually unify the `_cmp_*` methods with `to_node_dict` so - # TODO: there aren't two sources of truth, but this needs some thought, since - # TODO: they exist for speed. We should benchmark whether it's really worth - # TODO: having two types of hashing now that we use `json` instead of `yaml` for - # TODO: spec hashing. - yield self.process_hash() if self.concrete else None - def deps(): for dep in sorted(itertools.chain.from_iterable(self._dependencies.values())): yield dep.spec.name @@ -4447,7 +4423,7 @@ def clear_caches(self, ignore: Tuple[str, ...] = ()) -> None: """ Clears all cached hashes in a Spec, while preserving other properties. """ - for h in ht.hashes: + for h in ht.HASHES: if h.attr not in ignore: if hasattr(self, h.attr): setattr(self, h.attr, None) @@ -4456,18 +4432,12 @@ def clear_caches(self, ignore: Tuple[str, ...] = ()) -> None: setattr(self, attr, None) def __hash__(self): - # If the spec is concrete, we leverage the process hash and just use - # a 64-bit prefix of it. The process hash has the advantage that it's - # computed once per concrete spec, and it's saved -- so if we read - # concrete specs we don't need to recompute the whole hash. This is - # good for large, unchanging specs. - # - # We use the process hash instead of the DAG hash here because the DAG - # hash includes the package hash, which can cause infinite recursion, - # and which isn't defined unless the spec has a known package. + # If the spec is concrete, we leverage the dag hash and just use a 64-bit prefix of it. + # The dag hash has the advantage that it's computed once per concrete spec, and it's saved + # -- so if we read concrete specs we don't need to recompute the whole hash. if self.concrete: if not self._dunder_hash: - self._dunder_hash = self.process_hash_bit_prefix(64) + self._dunder_hash = self.dag_hash_bit_prefix(64) return self._dunder_hash # This is the normal hash for lazy_lexicographic_ordering. It's @@ -4476,7 +4446,7 @@ def __hash__(self): return hash(lang.tuplify(self._cmp_iter)) def __reduce__(self): - return Spec.from_dict, (self.to_dict(hash=ht.process_hash),) + return Spec.from_dict, (self.to_dict(hash=ht.dag_hash),) def attach_git_version_lookup(self): # Add a git lookup method for GitVersions @@ -4798,7 +4768,7 @@ def from_node_dict(cls, node): spec = Spec() name, node = cls.name_and_data(node) - for h in ht.hashes: + for h in ht.HASHES: setattr(spec, h.attr, node.get(h.name, None)) spec.name = name @@ -4981,7 +4951,7 @@ def read_specfile_dep_specs(cls, deps, hash_type=ht.dag_hash.name): """ for dep_name, elt in deps.items(): if isinstance(elt, dict): - for h in ht.hashes: + for h in ht.HASHES: if h.name in elt: dep_hash, deptypes = elt[h.name], elt["type"] hash_type = h.name @@ -5024,7 +4994,7 @@ def read_specfile_dep_specs(cls, deps, hash_type=ht.dag_hash.name): dep_name = dep["name"] if isinstance(elt, dict): # new format: elements of dependency spec are keyed. - for h in ht.hashes: + for h in ht.HASHES: if h.name in elt: dep_hash, deptypes, hash_type, virtuals = cls.extract_info_from_dep(elt, h) break diff --git a/lib/spack/spack/test/spec_semantics.py b/lib/spack/spack/test/spec_semantics.py index ca31f66fce2..d782cbb8d49 100644 --- a/lib/spack/spack/test/spec_semantics.py +++ b/lib/spack/spack/test/spec_semantics.py @@ -1751,7 +1751,6 @@ def test_package_hash_affects_dunder_and_dag_hash(mock_packages, default_mock_co assert hash(a1) == hash(a2) assert a1.dag_hash() == a2.dag_hash() - assert a1.process_hash() == a2.process_hash() a1.clear_caches() a2.clear_caches() @@ -1764,7 +1763,6 @@ def test_package_hash_affects_dunder_and_dag_hash(mock_packages, default_mock_co assert hash(a1) != hash(a2) assert a1.dag_hash() != a2.dag_hash() - assert a1.process_hash() != a2.process_hash() def test_intersects_and_satisfies_on_concretized_spec(default_mock_concretization): diff --git a/lib/spack/spack/test/spec_syntax.py b/lib/spack/spack/test/spec_syntax.py index 064f4e85548..40a1cac2417 100644 --- a/lib/spack/spack/test/spec_syntax.py +++ b/lib/spack/spack/test/spec_syntax.py @@ -878,9 +878,7 @@ def test_ambiguous_hash(mutable_database): x1 = spack.concretize.concretize_one("pkg-a") x2 = x1.copy() x1._hash = "xyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" - x1._process_hash = "xyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" x2._hash = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" - x2._process_hash = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" assert x1 != x2 # doesn't hold when only the dag hash is modified. diff --git a/lib/spack/spack/test/spec_yaml.py b/lib/spack/spack/test/spec_yaml.py index 411beb35722..fd11cce3704 100644 --- a/lib/spack/spack/test/spec_yaml.py +++ b/lib/spack/spack/test/spec_yaml.py @@ -203,13 +203,6 @@ def test_ordered_read_not_required_for_consistent_dag_hash( spec = spec.copy(deps=ht.dag_hash.depflag) # specs and their hashes are equal to the original - assert ( - spec.process_hash() - == from_yaml.process_hash() - == from_json.process_hash() - == from_yaml_rev.process_hash() - == from_json_rev.process_hash() - ) assert ( spec.dag_hash() == from_yaml.dag_hash()