env: Use order of roots to resolve DAG hash conflicts in legacy lockfiles

This commit is contained in:
Scott Wittenburg 2022-03-07 13:16:33 -07:00 committed by Todd Gamblin
parent 9de61c0197
commit fd3bb5177b
2 changed files with 39 additions and 26 deletions

View File

@ -1817,31 +1817,47 @@ def _read_lockfile_dict(self, d):
roots = d['roots']
self.concretized_user_specs = [Spec(r['spec']) for r in roots]
self.concretized_order = [r['hash'] for r in roots]
json_specs_by_hash = collections.OrderedDict()
for lockfile_key in sorted(d['concrete_specs']):
json_specs_by_hash[lockfile_key] = d['concrete_specs'][lockfile_key]
json_specs_by_hash = d['concrete_specs']
specs_by_hash = collections.OrderedDict()
# Track specs by their lockfile key. Currently spack uses the finest
# grained hash as the lockfile key, while older formats used the build
# hash or a previous incarnation of the DAG hash (one that did not
# include build deps or package hash).
specs_by_hash = {}
# Track specs by their DAG hash, allows handling DAG hash collisions
first_seen = {}
# First pass: Put each spec in the map ignoring dependencies
for lockfile_key, node_dict in json_specs_by_hash.items():
specs_by_hash[lockfile_key] = Spec.from_node_dict(node_dict)
# Second pass: For each spec, get its dependencies from the node dict
# and add them to the spec
for lockfile_key, node_dict in json_specs_by_hash.items():
for _, dep_hash, deptypes, _ in (
Spec.dependencies_from_node_dict(node_dict)):
specs_by_hash[lockfile_key]._add_dependency(
specs_by_hash[dep_hash], deptypes)
# Traverse the root specs one at a time in the order they appear.
# The first time we see each DAG hash, that's the one we want to
# keep. This is only required as long as we support older lockfile
# formats where the mapping from DAG hash to lockfile key is possibly
# one-to-many.
for lockfile_key in self.concretized_order:
for s in specs_by_hash[lockfile_key].traverse():
if s.dag_hash() not in first_seen:
first_seen[s.dag_hash()] = s
# Now make sure concretized_order and our internal specs dict
# contains the keys used by modern spack (i.e. the dag_hash
# that includes build deps and package hash).
self.concretized_order = [specs_by_hash[h_key].dag_hash()
for h_key in self.concretized_order]
for _, env_spec in specs_by_hash.items():
spec_dag_hash = env_spec.dag_hash()
if spec_dag_hash in self.concretized_order:
self.specs_by_hash[spec_dag_hash] = env_spec
for spec_dag_hash in self.concretized_order:
self.specs_by_hash[spec_dag_hash] = first_seen[spec_dag_hash]
def write(self, regenerate=True):
"""Writes an in-memory environment to its location on disk.

View File

@ -2866,32 +2866,29 @@ def test_environment_query_spec_by_hash(mock_stage, mock_fetch, install_mockery)
def test_read_legacy_lockfile_and_reconcretize(mock_stage, mock_fetch, install_mockery):
"""Make sure that when we read a legacy environment lock file with a hash
conflict (one from before we switched to full hash), the behavior as to
which of the conflicting specs we pick is deterministic and reproducible.
When we read the lockfile, we (somewhat arbitrarily) process specs in
alphabetical order of their lockfile key. Consequently, when reading an
old lockfile where two specs have a dag hash conflict we expect to keep the
second one we encounter. After we force reconcretization, we should both of
the specs that originally conflicted present in the environment again."""
"""In legacy lockfiles there is possibly a one-to-many relationship between
DAG hash lockfile keys. In the case of DAG hash conflicts, we always keep
the spec associated with whichever root spec came first in "roots". After
we force reconcretization, there should no longer be conflicts, i.e. all
specs that originally conflicted should be present in the environment
again."""
legacy_lockfile_path = os.path.join(
spack.paths.test_path, 'data', 'legacy_env', 'spack.lock'
)
# In the legacy lockfile, we have two conflicting specs that differ only
# in a build-only dependency. The lockfile keys and conflicting specs
# are:
# wci7a3a -> dttop ^dtbuild1@0.5
# 5zg6wxw -> dttop ^dtbuild1@1.0
# So when we initially read the legacy lockfile, we expect to have kept
# the version of dttop that depends on dtbuild1@0.5
# The order of the root specs in this environment is:
# [
# wci7a3a -> dttop ^dtbuild1@0.5,
# 5zg6wxw -> dttop ^dtbuild1@1.0
# ]
# So in the legacy lockfile we have two versions of dttop with the same DAG
# hash (in the past DAG hash did not take build deps into account). Make
# sure we keep the correct instance of each spec, i.e. the one that appeared
# first.
env('create', 'test', legacy_lockfile_path)
test = ev.read('test')
# Before we forcefully reconcretize, we expect there will be only a
# single actual spec in the environment, and it should depend on
# dtbuild1@1.0, since that root appears last in the list.
assert len(test.specs_by_hash) == 1
single_root = next(iter(test.specs_by_hash.values()))