ASP-based solver: single Spec instance per dag hash (#39590)

Reused specs used to be referenced directly into the built spec.

This might cause issues like in issue 39570 where two objects in
memory represent the same node, because two reused specs were
loaded from different sources but referred to the same spec
by DAG hash.

The issue is solved by copying concrete specs to a dictionary keyed
by dag hash.
This commit is contained in:
Massimiliano Culpo 2023-10-19 16:00:45 +02:00 committed by GitHub
parent 4f49f7b9df
commit a1ca1a944a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 127 additions and 32 deletions

View File

@ -13,7 +13,7 @@
import re
import types
import warnings
from typing import List, NamedTuple, Optional, Sequence, Tuple, Union
from typing import Dict, List, NamedTuple, Optional, Sequence, Tuple, Union
import archspec.cpu
@ -971,6 +971,70 @@ def _model_has_cycles(self, models):
return cycle_result.unsatisfiable
class ConcreteSpecsByHash(collections.abc.Mapping):
"""Mapping containing concrete specs keyed by DAG hash.
The mapping is ensured to be consistent, i.e. if a spec in the mapping has a dependency with
hash X, it is ensured to be the same object in memory as the spec keyed by X.
"""
def __init__(self) -> None:
self.data: Dict[str, spack.spec.Spec] = {}
def __getitem__(self, dag_hash: str) -> spack.spec.Spec:
return self.data[dag_hash]
def add(self, spec: spack.spec.Spec) -> bool:
"""Adds a new concrete spec to the mapping. Returns True if the spec was just added,
False if the spec was already in the mapping.
Args:
spec: spec to be added
Raises:
ValueError: if the spec is not concrete
"""
if not spec.concrete:
msg = (
f"trying to store the non-concrete spec '{spec}' in a container "
f"that only accepts concrete"
)
raise ValueError(msg)
dag_hash = spec.dag_hash()
if dag_hash in self.data:
return False
# Here we need to iterate on the input and rewire the copy.
self.data[spec.dag_hash()] = spec.copy(deps=False)
nodes_to_reconstruct = [spec]
while nodes_to_reconstruct:
input_parent = nodes_to_reconstruct.pop()
container_parent = self.data[input_parent.dag_hash()]
for edge in input_parent.edges_to_dependencies():
input_child = edge.spec
container_child = self.data.get(input_child.dag_hash())
# Copy children that don't exist yet
if container_child is None:
container_child = input_child.copy(deps=False)
self.data[input_child.dag_hash()] = container_child
nodes_to_reconstruct.append(input_child)
# Rewire edges
container_parent.add_dependency_edge(
dependency_spec=container_child, depflag=edge.depflag, virtuals=edge.virtuals
)
return True
def __len__(self) -> int:
return len(self.data)
def __iter__(self):
return iter(self.data)
class SpackSolverSetup:
"""Class to set up and run a Spack concretization solve."""
@ -994,9 +1058,7 @@ def __init__(self, tests=False):
# (ID, CompilerSpec) -> dictionary of attributes
self.compiler_info = collections.defaultdict(dict)
# hashes we've already added facts for
self.seen_hashes = set()
self.reusable_and_possible = {}
self.reusable_and_possible = ConcreteSpecsByHash()
# id for dummy variables
self._condition_id_counter = itertools.count()
@ -2318,25 +2380,29 @@ def define_variant_values(self):
for pkg, variant, value in self.variant_values_from_specs:
self.gen.fact(fn.pkg_fact(pkg, fn.variant_possible_value(variant, value)))
def _facts_from_concrete_spec(self, spec, possible):
def register_concrete_spec(self, spec, possible):
# tell the solver about any installed packages that could
# be dependencies (don't tell it about the others)
h = spec.dag_hash()
if spec.name in possible and h not in self.seen_hashes:
self.reusable_and_possible[h] = spec
try:
# Only consider installed packages for repo we know
spack.repo.PATH.get(spec)
except (spack.repo.UnknownNamespaceError, spack.repo.UnknownPackageError):
return
if spec.name not in possible:
return
try:
# Only consider installed packages for repo we know
spack.repo.PATH.get(spec)
except (spack.repo.UnknownNamespaceError, spack.repo.UnknownPackageError) as e:
tty.debug(f"[REUSE] Issues when trying to reuse {spec.short_spec}: {str(e)}")
return
self.reusable_and_possible.add(spec)
def concrete_specs(self):
"""Emit facts for reusable specs"""
for h, spec in self.reusable_and_possible.items():
# this indicates that there is a spec like this installed
self.gen.fact(fn.installed_hash(spec.name, h))
# this describes what constraints it imposes on the solve
self.impose(h, spec, body=True)
self.gen.newline()
# Declare as possible parts of specs that are not in package.py
# - Add versions to possible versions
# - Add OS to possible OS's
@ -2347,15 +2413,12 @@ def _facts_from_concrete_spec(self, spec, possible):
)
self.possible_oses.add(dep.os)
# add the hash to the one seen so far
self.seen_hashes.add(h)
def define_concrete_input_specs(self, specs, possible):
# any concrete specs in the input spec list
for input_spec in specs:
for spec in input_spec.traverse():
if spec.concrete:
self._facts_from_concrete_spec(spec, possible)
self.register_concrete_spec(spec, possible)
def setup(
self,
@ -2422,14 +2485,13 @@ def setup(
# get possible compilers
self.possible_compilers = self.generate_possible_compilers(specs)
self.gen.h1("Concrete input spec definitions")
self.gen.h1("Reusable concrete specs")
self.define_concrete_input_specs(specs, self.pkgs)
if reuse:
self.gen.h1("Reusable specs")
self.gen.fact(fn.optimize_for_reuse())
for reusable_spec in reuse:
self._facts_from_concrete_spec(reusable_spec, self.pkgs)
self.register_concrete_spec(reusable_spec, self.pkgs)
self.concrete_specs()
self.gen.h1("Generic statements on possible packages")
node_counter.possible_packages_facts(self.gen, fn)
@ -2620,7 +2682,6 @@ def __init__(self, specs, hash_lookup=None):
self._specs = {}
self._result = None
self._command_line_specs = specs
self._hash_specs = []
self._flag_sources = collections.defaultdict(lambda: set())
self._flag_compiler_defaults = set()
@ -2631,7 +2692,6 @@ def __init__(self, specs, hash_lookup=None):
def hash(self, node, h):
if node not in self._specs:
self._specs[node] = self._hash_lookup[h]
self._hash_specs.append(node)
def node(self, node):
if node not in self._specs:
@ -2869,12 +2929,10 @@ def build_specs(self, function_tuples):
# fix flags after all specs are constructed
self.reorder_flags()
# cycle detection
roots = [spec.root for spec in self._specs.values() if not spec.root.installed]
# inject patches -- note that we' can't use set() to unique the
# roots here, because the specs aren't complete, and the hash
# function will loop forever.
roots = [spec.root for spec in self._specs.values() if not spec.root.installed]
roots = dict((id(r), r) for r in roots)
for root in roots.values():
spack.spec.Spec.inject_patches_variant(root)

View File

@ -1170,7 +1170,7 @@ def test_external_package_versions(self, spec_str, is_external, expected):
)
@pytest.mark.parametrize("mock_db", [True, False])
def test_reuse_does_not_overwrite_dev_specs(
self, dev_first, spec, mock_db, tmpdir, monkeypatch
self, dev_first, spec, mock_db, tmpdir, temporary_store, monkeypatch
):
"""Test that reuse does not mix dev specs with non-dev specs.
@ -1182,8 +1182,7 @@ def test_reuse_does_not_overwrite_dev_specs(
# dev and non-dev specs that are otherwise identical
spec = Spec(spec)
dev_spec = spec.copy()
dev_constraint = "dev_path=%s" % tmpdir.strpath
dev_spec["dev-build-test-install"].constrain(dev_constraint)
dev_spec["dev-build-test-install"].constrain(f"dev_path={tmpdir.strpath}")
# run the test in both orders
first_spec = dev_spec if dev_first else spec
@ -1196,7 +1195,7 @@ def mock_fn(*args, **kwargs):
return [first_spec]
if mock_db:
monkeypatch.setattr(spack.store.STORE.db, "query", mock_fn)
temporary_store.db.add(first_spec, None)
else:
monkeypatch.setattr(spack.binary_distribution, "update_cache_and_get_specs", mock_fn)
@ -2112,6 +2111,24 @@ def test_dont_define_new_version_from_input_if_checksum_required(self, working_e
# when checksums are required
Spec("a@=3.0").concretized()
@pytest.mark.regression("39570")
@pytest.mark.db
def test_reuse_python_from_cli_and_extension_from_db(self, mutable_database):
"""Tests that reusing python with and explicit request on the command line, when the spec
also reuses a python extension from the DB, doesn't fail.
"""
s = Spec("py-extension1").concretized()
python_hash = s["python"].dag_hash()
s.package.do_install(fake=True, explicit=True)
with spack.config.override("concretizer:reuse", True):
with_reuse = Spec(f"py-extension2 ^/{python_hash}").concretized()
with spack.config.override("concretizer:reuse", False):
without_reuse = Spec("py-extension2").concretized()
assert with_reuse.dag_hash() == without_reuse.dag_hash()
@pytest.fixture()
def duplicates_test_repository():
@ -2246,3 +2263,23 @@ def test_pure_build_virtual_dependency(self, strategy):
def test_drop_moving_targets(v_str, v_opts, checksummed):
v = Version(v_str)
assert spack.solver.asp._is_checksummed_version((v, v_opts)) == checksummed
class TestConcreteSpecsByHash:
"""Tests the container of concrete specs"""
@pytest.mark.parametrize("input_specs", [["a"], ["a foobar=bar", "b"], ["a foobar=baz", "b"]])
def test_adding_specs(self, input_specs, default_mock_concretization):
"""Tests that concrete specs in the container are equivalent, but stored as different
objects in memory.
"""
container = spack.solver.asp.ConcreteSpecsByHash()
input_specs = [Spec(s).concretized() for s in input_specs]
for s in input_specs:
container.add(s)
for root in input_specs:
for node in root.traverse(root=True):
assert node == container[node.dag_hash()]
assert node.dag_hash() in container
assert node is not container[node.dag_hash()]