Fix environment reading from lockfile to trust written hashes (#25879)

#22845 revealed a long-standing bug that had never been triggered before, because the
hashing algorithm had been stable for multiple years while the bug was in production. The
bug was that when reading a concretized environment, Spack did not properly read in the
build hashes associated with the specs in the environment. Those hashes were recomputed
(and as long as we didn't change the algorithm, were recomputed identically). Spack's
policy, though, is never to recompute a hash. Once something is installed, we respect its
metadata hash forever -- even if internally Spack changes the hashing method. Put
differently, once something is concretized, it has a concrete hash, and that's it -- forever.

When we changed the hashing algorithm for performance in #22845 we exposed the bug.
This PR fixes the bug at its source, but properly reading in the cached build hash attributes
associated with the specs. I've also renamed some variables in the Environment class
methods to make a mistake of this sort more difficult to make in the future.

* ensure environment build hashes are never recomputed
* add comment clarifying reattachment of env build hashes
* bump lockfile version and include specfile version in env meta
* Fix unit-test for v1 to v2 conversion

Co-authored-by: Massimiliano Culpo <massimiliano.culpo@gmail.com>
This commit is contained in:
Greg Becker 2021-09-13 14:25:48 -07:00 committed by GitHub
parent 9956841331
commit dad69e7d7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 56 additions and 9 deletions

View File

@ -85,7 +85,7 @@
valid_environment_name_re = r'^\w[\w-]*$'
#: version of the lockfile format. Must increase monotonically.
lockfile_format_version = 2
lockfile_format_version = 3
# Magic names
# The name of the standalone spec list in the manifest yaml
@ -1695,12 +1695,12 @@ def _to_lockfile_dict(self):
concrete_specs = {}
for spec in self.specs_by_hash.values():
for s in spec.traverse():
dag_hash_all = s.build_hash()
if dag_hash_all not in concrete_specs:
build_hash = s.build_hash()
if build_hash not in concrete_specs:
spec_dict = s.to_node_dict(hash=ht.build_hash)
# Assumes no legacy formats, since this was just created.
spec_dict[ht.dag_hash.name] = s.dag_hash()
concrete_specs[dag_hash_all] = spec_dict
concrete_specs[build_hash] = spec_dict
hash_spec_list = zip(
self.concretized_order, self.concretized_user_specs)
@ -1711,6 +1711,7 @@ def _to_lockfile_dict(self):
'_meta': {
'file-type': 'spack-lockfile',
'lockfile-version': lockfile_format_version,
'specfile-version': spack.spec.specfile_format_version
},
# users specs + hashes are the 'roots' of the environment
@ -1741,13 +1742,18 @@ def _read_lockfile_dict(self, d):
root_hashes = set(self.concretized_order)
specs_by_hash = {}
for dag_hash, node_dict in json_specs_by_hash.items():
specs_by_hash[dag_hash] = Spec.from_node_dict(node_dict)
for build_hash, node_dict in json_specs_by_hash.items():
spec = Spec.from_node_dict(node_dict)
if d['_meta']['lockfile-version'] > 1:
# Build hash is stored as a key, but not as part of the node dict
# To ensure build hashes are not recomputed, we reattach here
setattr(spec, ht.build_hash.attr, build_hash)
specs_by_hash[build_hash] = spec
for dag_hash, node_dict in json_specs_by_hash.items():
for build_hash, node_dict in json_specs_by_hash.items():
for _, dep_hash, deptypes, _ in (
Spec.dependencies_from_node_dict(node_dict)):
specs_by_hash[dag_hash]._add_dependency(
specs_by_hash[build_hash]._add_dependency(
specs_by_hash[dep_hash], deptypes)
# If we are reading an older lockfile format (which uses dag hashes

View File

@ -186,6 +186,9 @@
default_format += '{%compiler.name}{@compiler.version}{compiler_flags}'
default_format += '{variants}{arch=architecture}'
#: specfile format version. Must increase monotonically
specfile_format_version = 2
def colorize_spec(spec):
"""Returns a spec colorized according to the colors specified in
@ -1798,7 +1801,7 @@ def to_dict(self, hash=ht.dag_hash):
if node_hash not in hash_set:
node_list.append(node)
hash_set.add(node_hash)
meta_dict = syaml.syaml_dict([('version', 2)])
meta_dict = syaml.syaml_dict([('version', specfile_format_version)])
inner_dict = syaml.syaml_dict([('_meta', meta_dict), ('nodes', node_list)])
spec_dict = syaml.syaml_dict([('spec', inner_dict)])
return spec_dict

View File

@ -0,0 +1,38 @@
# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""Test environment internals without CLI"""
import spack.environment as ev
import spack.spec
def test_hash_change_no_rehash_concrete(tmpdir, mock_packages, config):
# create an environment
env_path = tmpdir.mkdir('env_dir').strpath
env = ev.Environment(env_path)
env.write()
# add a spec with a rewritten build hash
spec = spack.spec.Spec('mpileaks')
env.add(spec)
env.concretize()
# rewrite the hash
old_hash = env.concretized_order[0]
new_hash = 'abc'
env.specs_by_hash[old_hash]._build_hash = new_hash
env.concretized_order[0] = new_hash
env.specs_by_hash[new_hash] = env.specs_by_hash[old_hash]
del env.specs_by_hash[old_hash]
env.write()
# Read environment
read_in = ev.Environment(env_path)
# Ensure read hashes are used (rewritten hash seen on read)
assert read_in.concretized_order
assert read_in.concretized_order[0] in read_in.specs_by_hash
assert read_in.specs_by_hash[read_in.concretized_order[0]]._build_hash == new_hash