Delay abstract hashes lookup (#39251)

Delay lookup for abstract hashes until concretization time, instead of
until Spec comparison. This has a few advantages:

1. `satisfies` / `intersects` etc don't always know where to resolve the
   abstract hash (in some cases it's wrong to look in the current env,
   db, buildcache, ...). Better to let the call site dictate it.
2. Allows search by abstract hash without triggering a database lookup,
   causing quadratic complexity issues (accidental nested loop during
   search)
3. Simplifies queries against the buildcache, they can now use Spec
   instances instead of strings.

The rules are straightforward:

1. a satisfies b when b's hash is prefix of a's hash
2. a intersects b when either a's or b's hash is a prefix of b's or a's
   hash respectively
This commit is contained in:
Harmen Stoppels 2023-08-17 08:08:50 +02:00 committed by GitHub
parent e381e166ec
commit 190a1bf523
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 92 additions and 68 deletions

View File

@ -2383,22 +2383,12 @@ def __init__(self, all_architectures):
self.possible_specs = specs
def __call__(self, spec, **kwargs):
def __call__(self, spec: Spec, **kwargs):
"""
Args:
spec (str): The spec being searched for in its string representation or hash.
spec: The spec being searched for
"""
matches = []
if spec.startswith("/"):
# Matching a DAG hash
query_hash = spec.replace("/", "")
for candidate_spec in self.possible_specs:
if candidate_spec.dag_hash().startswith(query_hash):
matches.append(candidate_spec)
else:
# Matching a spec constraint
matches = [s for s in self.possible_specs if s.satisfies(spec)]
return matches
return [s for s in self.possible_specs if s.satisfies(spec)]
class FetchIndexError(Exception):

View File

@ -1994,14 +1994,10 @@ def get_one_by_hash(self, dag_hash):
def all_matching_specs(self, *specs: spack.spec.Spec) -> List[Spec]:
"""Returns all concretized specs in the environment satisfying any of the input specs"""
# Look up abstract hashes ahead of time, to avoid O(n^2) traversal.
specs = [s.lookup_hash() for s in specs]
# Avoid double lookup by directly calling _satisfies.
return [
s
for s in traverse.traverse_nodes(self.concrete_roots(), key=traverse.by_dag_hash)
if any(s._satisfies(t) for t in specs)
if any(s.satisfies(t) for t in specs)
]
@spack.repo.autospec

View File

@ -2953,6 +2953,7 @@ def solve(self, specs, out=None, timers=False, stats=False, tests=False, setup_o
setup_only (bool): if True, stop after setup and don't solve (default False).
"""
# Check upfront that the variants are admissible
specs = [s.lookup_hash() for s in specs]
reusable_specs = self._check_input_and_extract_concrete_specs(specs)
reusable_specs.extend(self._reusable_specs(specs))
setup = SpackSolverSetup(tests=tests)
@ -2976,6 +2977,7 @@ def solve_in_rounds(self, specs, out=None, timers=False, stats=False, tests=Fals
stats (bool): print internal statistics if set to True
tests (bool): add test dependencies to the solve
"""
specs = [s.lookup_hash() for s in specs]
reusable_specs = self._check_input_and_extract_concrete_specs(specs)
reusable_specs.extend(self._reusable_specs(specs))
setup = SpackSolverSetup(tests=tests)

View File

@ -1925,19 +1925,15 @@ def _lookup_hash(self):
store, or finally, binary caches."""
import spack.environment
matches = []
active_env = spack.environment.active_environment()
if active_env:
env_matches = active_env.get_by_hash(self.abstract_hash) or []
matches = [m for m in env_matches if m._satisfies(self)]
if not matches:
db_matches = spack.store.STORE.db.get_by_hash(self.abstract_hash) or []
matches = [m for m in db_matches if m._satisfies(self)]
if not matches:
query = spack.binary_distribution.BinaryCacheQuery(True)
remote_matches = query("/" + self.abstract_hash) or []
matches = [m for m in remote_matches if m._satisfies(self)]
# First env, then store, then binary cache
matches = (
(active_env.all_matching_specs(self) if active_env else [])
or spack.store.STORE.db.query(self, installed=any)
or spack.binary_distribution.BinaryCacheQuery(True)(self)
)
if not matches:
raise InvalidHashError(self, self.abstract_hash)
@ -1958,19 +1954,17 @@ def lookup_hash(self):
spec = self.copy(deps=False)
# root spec is replaced
if spec.abstract_hash:
new = self._lookup_hash()
spec._dup(new)
spec._dup(self._lookup_hash())
return spec
# Get dependencies that need to be replaced
for node in self.traverse(root=False):
if node.abstract_hash:
new = node._lookup_hash()
spec._add_dependency(new, deptypes=(), virtuals=())
spec._add_dependency(node._lookup_hash(), deptypes=(), virtuals=())
# reattach nodes that were not otherwise satisfied by new dependencies
for node in self.traverse(root=False):
if not any(n._satisfies(node) for n in spec.traverse()):
if not any(n.satisfies(node) for n in spec.traverse()):
spec._add_dependency(node.copy(), deptypes=(), virtuals=())
return spec
@ -1983,9 +1977,7 @@ def replace_hash(self):
if not any(node for node in self.traverse(order="post") if node.abstract_hash):
return
spec_by_hash = self.lookup_hash()
self._dup(spec_by_hash)
self._dup(self.lookup_hash())
def to_node_dict(self, hash=ht.dag_hash):
"""Create a dictionary representing the state of this Spec.
@ -3721,15 +3713,19 @@ def intersects(self, other: "Spec", deps: bool = True) -> bool:
"""
other = self._autospec(other)
lhs = self.lookup_hash() or self
rhs = other.lookup_hash() or other
return lhs._intersects(rhs, deps)
def _intersects(self, other: "Spec", deps: bool = True) -> bool:
if other.concrete and self.concrete:
return self.dag_hash() == other.dag_hash()
self_hash = self.dag_hash() if self.concrete else self.abstract_hash
other_hash = other.dag_hash() if other.concrete else other.abstract_hash
if (
self_hash
and other_hash
and not (self_hash.startswith(other_hash) or other_hash.startswith(self_hash))
):
return False
# If the names are different, we need to consider virtuals
if self.name != other.name and self.name and other.name:
if self.virtual and other.virtual:
@ -3789,19 +3785,8 @@ def _intersects(self, other: "Spec", deps: bool = True) -> bool:
# If we need to descend into dependencies, do it, otherwise we're done.
if deps:
return self._intersects_dependencies(other)
else:
return True
def satisfies(self, other, deps=True):
"""
This checks constraints on common dependencies against each other.
"""
other = self._autospec(other)
lhs = self.lookup_hash() or self
rhs = other.lookup_hash() or other
return lhs._satisfies(rhs, deps=deps)
return True
def _intersects_dependencies(self, other):
if not other._dependencies or not self._dependencies:
@ -3838,7 +3823,7 @@ def _intersects_dependencies(self, other):
return True
def _satisfies(self, other: "Spec", deps: bool = True) -> bool:
def satisfies(self, other: "Spec", deps: bool = True) -> bool:
"""Return True if all concrete specs matching self also match other, otherwise False.
Args:
@ -3853,6 +3838,13 @@ def _satisfies(self, other: "Spec", deps: bool = True) -> bool:
# objects.
return self.concrete and self.dag_hash() == other.dag_hash()
# If the right-hand side has an abstract hash, make sure it's a prefix of the
# left-hand side's (abstract) hash.
if other.abstract_hash:
compare_hash = self.dag_hash() if self.concrete else self.abstract_hash
if not compare_hash or not compare_hash.startswith(other.abstract_hash):
return False
# If the names are different, we need to consider virtuals
if self.name != other.name and self.name and other.name:
# A concrete provider can satisfy a virtual dependency.
@ -4229,9 +4221,7 @@ def eq_node(self, other):
def _cmp_iter(self):
"""Lazily yield components of self for comparison."""
cmp_spec = self.lookup_hash() or self
for item in cmp_spec._cmp_node():
for item in self._cmp_node():
yield item
# This needs to be in _cmp_iter so that no specs with different process hashes
@ -4242,10 +4232,10 @@ def _cmp_iter(self):
# TODO: they exist for speed. We should benchmark whether it's really worth
# TODO: having two types of hashing now that we use `json` instead of `yaml` for
# TODO: spec hashing.
yield cmp_spec.process_hash() if cmp_spec.concrete else None
yield self.process_hash() if self.concrete else None
def deps():
for dep in sorted(itertools.chain.from_iterable(cmp_spec._dependencies.values())):
for dep in sorted(itertools.chain.from_iterable(self._dependencies.values())):
yield dep.spec.name
yield tuple(sorted(dep.deptypes))
yield hash(dep.spec)

View File

@ -197,7 +197,9 @@ def _expand_matrix_constraints(matrix_config):
for combo in itertools.product(*expanded_rows):
# Construct a combined spec to test against excludes
flat_combo = [constraint for constraint_list in combo for constraint in constraint_list]
flat_combo = [Spec(x) for x in flat_combo]
# Resolve abstract hashes so we can exclude by their concrete properties
flat_combo = [Spec(x).lookup_hash() for x in flat_combo]
test_spec = flat_combo[0].copy()
for constraint in flat_combo[1:]:

View File

@ -1291,3 +1291,38 @@ def test_constrain(factory, lhs_str, rhs_str, result, constrained_str):
rhs = factory(rhs_str)
rhs.constrain(lhs)
assert rhs == factory(constrained_str)
def test_abstract_hash_intersects_and_satisfies(default_mock_concretization):
concrete: Spec = default_mock_concretization("a")
hash = concrete.dag_hash()
hash_5 = hash[:5]
hash_6 = hash[:6]
# abstract hash that doesn't have a common prefix with the others.
hash_other = f"{'a' if hash_5[0] == 'b' else 'b'}{hash_5[1:]}"
abstract_5 = Spec(f"a/{hash_5}")
abstract_6 = Spec(f"a/{hash_6}")
abstract_none = Spec(f"a/{hash_other}")
abstract = Spec("a")
def assert_subset(a: Spec, b: Spec):
assert a.intersects(b) and b.intersects(a) and a.satisfies(b) and not b.satisfies(a)
def assert_disjoint(a: Spec, b: Spec):
assert (
not a.intersects(b)
and not b.intersects(a)
and not a.satisfies(b)
and not b.satisfies(a)
)
# left-hand side is more constrained, so its
# concretization space is a subset of the right-hand side's
assert_subset(concrete, abstract_5)
assert_subset(abstract_6, abstract_5)
assert_subset(abstract_5, abstract)
# disjoint concretization space
assert_disjoint(abstract_none, concrete)
assert_disjoint(abstract_none, abstract_5)

View File

@ -726,22 +726,31 @@ def test_multiple_specs_with_hash(database, config):
@pytest.mark.db
def test_ambiguous_hash(mutable_database, default_mock_concretization, config):
"""Test that abstract hash ambiguity is delayed until concretization.
In the past this ambiguity error would happen during parse time."""
# This is a very sketchy as manually setting hashes easily breaks invariants
x1 = default_mock_concretization("a")
x2 = x1.copy()
x1._hash = "xyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
x1._process_hash = "xyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
x2._hash = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
mutable_database.add(x1, spack.store.STORE.layout)
mutable_database.add(x2, spack.store.STORE.layout)
x2._process_hash = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
assert x1 != x2 # doesn't hold when only the dag hash is modified.
mutable_database.add(x1, directory_layout=None)
mutable_database.add(x2, directory_layout=None)
# ambiguity in first hash character
s1 = SpecParser("/x").next_spec()
with pytest.raises(spack.spec.AmbiguousHashError):
parsed_spec = SpecParser("/x").next_spec()
parsed_spec.replace_hash()
s1.lookup_hash()
# ambiguity in first hash character AND spec name
s2 = SpecParser("a/x").next_spec()
with pytest.raises(spack.spec.AmbiguousHashError):
parsed_spec = SpecParser("a/x").next_spec()
parsed_spec.replace_hash()
s2.lookup_hash()
@pytest.mark.db