package_hash: switch to using canonical source instead of AST repr

We are planning to switch to using full hashes for Spack specs, which means that the
package hash will be included in the deployment descriptor. This means we need a more
robust package hash than simply dumping the `repr` of the AST.

The AST repr that we previously used for package content is unreliable because it can
vary between python versions (Python's AST actually changes fairly frequently).

- [x] change `package_hash`, `package_ast`, and `canonical_source` to accept a string for
      alternate source instead of a filename.
- [x] consolidate package hash tests in `test/util/package_hash.py`.
- [x] remove old `package_content` method.
- [x] make `package_hash` do what `canonical_source_hash` was doing before.
- [x] modify `content_hash` in `package.py` to use the new `package_hash` function.

Co-authored-by: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
This commit is contained in:
Todd Gamblin 2021-12-23 11:43:55 -08:00 committed by Greg Becker
parent 39afe946a0
commit 106ae7abe6
4 changed files with 120 additions and 108 deletions

View File

@ -1554,7 +1554,8 @@ def content_hash(self, content=None):
hash_content.append(source_id.encode('utf-8'))
hash_content.extend(':'.join((p.sha256, str(p.level))).encode('utf-8')
for p in self.spec.patches)
hash_content.append(package_hash(self.spec, content))
hash_content.append(package_hash(self.spec, source=content).encode('utf-8'))
b32_hash = base64.b32encode(
hashlib.sha256(bytes().join(
sorted(hash_content))).digest()).lower()

View File

@ -13,15 +13,9 @@
from spack.paths import mock_packages_path
from spack.spec import Spec
from spack.util.naming import mod_to_class
from spack.util.package_hash import package_content
from spack.version import VersionChecksumError
def _generate_content_strip_name(spec):
content = package_content(spec)
return content.replace(spec.package.__class__.__name__, '')
@pytest.mark.usefixtures('config', 'mock_packages')
class TestPackage(object):
def test_load_package(self):
@ -58,51 +52,6 @@ def test_package_class_names(self):
assert 'Pmgrcollective' == mod_to_class('PmgrCollective')
assert '_3db' == mod_to_class('3db')
def test_content_hash_all_same_but_patch_contents(self):
spec1 = Spec("hash-test1@1.1").concretized()
spec2 = Spec("hash-test2@1.1").concretized()
content1 = _generate_content_strip_name(spec1)
content2 = _generate_content_strip_name(spec2)
assert spec1.package.content_hash(content=content1) != \
spec2.package.content_hash(content=content2)
def test_content_hash_different_variants(self):
spec1 = Spec("hash-test1@1.2 +variantx").concretized()
spec2 = Spec("hash-test2@1.2 ~variantx").concretized()
content1 = _generate_content_strip_name(spec1)
content2 = _generate_content_strip_name(spec2)
assert spec1.package.content_hash(content=content1) == \
spec2.package.content_hash(content=content2)
def test_content_hash_cannot_get_details_from_ast(self):
"""Packages hash-test1 and hash-test3 would be considered the same
except that hash-test3 conditionally executes a phase based on
a "when" directive that Spack cannot evaluate by examining the
AST. This test ensures that Spack can compute a content hash
for hash-test3. If Spack cannot determine when a phase applies,
it adds it by default, so the test also ensures that the hashes
differ where Spack includes a phase on account of AST-examination
failure.
"""
spec3 = Spec("hash-test1@1.7").concretized()
spec4 = Spec("hash-test3@1.7").concretized()
content3 = _generate_content_strip_name(spec3)
content4 = _generate_content_strip_name(spec4)
assert(spec3.package.content_hash(content=content3) !=
spec4.package.content_hash(content=content4))
def test_all_same_but_archive_hash(self):
spec1 = Spec("hash-test1@1.3").concretized()
spec2 = Spec("hash-test2@1.3").concretized()
content1 = _generate_content_strip_name(spec1)
content2 = _generate_content_strip_name(spec2)
assert spec1.package.content_hash(content=content1) != \
spec2.package.content_hash(content=content2)
def test_parse_dynamic_function_call(self):
spec = Spec("hash-test4").concretized()
spec.package.content_hash()
# Below tests target direct imports of spack packages from the
# spack.pkg namespace
def test_import_package(self):

View File

@ -17,17 +17,43 @@
datadir = os.path.join(spack.paths.test_path, "data", "unparse")
def test_hash(tmpdir, mock_packages, config):
def compare_sans_name(eq, spec1, spec2):
content1 = ph.canonical_source(spec1)
content1 = content1.replace(spec1.package.__class__.__name__, 'TestPackage')
content2 = ph.canonical_source(spec2)
content2 = content2.replace(spec2.package.__class__.__name__, 'TestPackage')
if eq:
assert content1 == content2
else:
assert content1 != content2
def compare_hash_sans_name(eq, spec1, spec2):
content1 = ph.canonical_source(spec1)
content1 = content1.replace(spec1.package.__class__.__name__, 'TestPackage')
hash1 = spec1.package.content_hash(content=content1)
content2 = ph.canonical_source(spec2)
content2 = content2.replace(spec2.package.__class__.__name__, 'TestPackage')
hash2 = spec2.package.content_hash(content=content2)
if eq:
assert hash1 == hash2
else:
assert hash1 != hash2
def test_hash(mock_packages, config):
ph.package_hash("hash-test1@1.2")
def test_different_variants(tmpdir, mock_packages, config):
def test_different_variants(mock_packages, config):
spec1 = Spec("hash-test1@1.2 +variantx")
spec2 = Spec("hash-test1@1.2 +varianty")
assert ph.package_hash(spec1) == ph.package_hash(spec2)
def test_all_same_but_name(tmpdir, mock_packages, config):
def test_all_same_but_name(mock_packages, config):
spec1 = Spec("hash-test1@1.2")
spec2 = Spec("hash-test2@1.2")
compare_sans_name(True, spec1, spec2)
@ -37,7 +63,7 @@ def test_all_same_but_name(tmpdir, mock_packages, config):
compare_sans_name(True, spec1, spec2)
def test_all_same_but_archive_hash(tmpdir, mock_packages, config):
def test_all_same_but_archive_hash(mock_packages, config):
"""
Archive hash is not intended to be reflected in Package hash.
"""
@ -46,33 +72,60 @@ def test_all_same_but_archive_hash(tmpdir, mock_packages, config):
compare_sans_name(True, spec1, spec2)
def test_all_same_but_patch_contents(tmpdir, mock_packages, config):
def test_all_same_but_patch_contents(mock_packages, config):
spec1 = Spec("hash-test1@1.1")
spec2 = Spec("hash-test2@1.1")
compare_sans_name(True, spec1, spec2)
def test_all_same_but_patches_to_apply(tmpdir, mock_packages, config):
def test_all_same_but_patches_to_apply(mock_packages, config):
spec1 = Spec("hash-test1@1.4")
spec2 = Spec("hash-test2@1.4")
compare_sans_name(True, spec1, spec2)
def test_all_same_but_install(tmpdir, mock_packages, config):
def test_all_same_but_install(mock_packages, config):
spec1 = Spec("hash-test1@1.5")
spec2 = Spec("hash-test2@1.5")
compare_sans_name(False, spec1, spec2)
def compare_sans_name(eq, spec1, spec2):
content1 = ph.package_content(spec1)
content1 = content1.replace(spec1.package.__class__.__name__, '')
content2 = ph.package_content(spec2)
content2 = content2.replace(spec2.package.__class__.__name__, '')
if eq:
assert content1 == content2
else:
assert content1 != content2
def test_content_hash_all_same_but_patch_contents(mock_packages, config):
spec1 = Spec("hash-test1@1.1").concretized()
spec2 = Spec("hash-test2@1.1").concretized()
compare_hash_sans_name(False, spec1, spec2)
def test_content_hash_different_variants(mock_packages, config):
spec1 = Spec("hash-test1@1.2 +variantx").concretized()
spec2 = Spec("hash-test2@1.2 ~variantx").concretized()
compare_hash_sans_name(True, spec1, spec2)
def test_content_hash_cannot_get_details_from_ast(mock_packages, config):
"""Packages hash-test1 and hash-test3 would be considered the same
except that hash-test3 conditionally executes a phase based on
a "when" directive that Spack cannot evaluate by examining the
AST. This test ensures that Spack can compute a content hash
for hash-test3. If Spack cannot determine when a phase applies,
it adds it by default, so the test also ensures that the hashes
differ where Spack includes a phase on account of AST-examination
failure.
"""
spec3 = Spec("hash-test1@1.7").concretized()
spec4 = Spec("hash-test3@1.7").concretized()
compare_hash_sans_name(False, spec3, spec4)
def test_content_hash_all_same_but_archive_hash(mock_packages, config):
spec1 = Spec("hash-test1@1.3").concretized()
spec2 = Spec("hash-test2@1.3").concretized()
compare_hash_sans_name(False, spec1, spec2)
def test_content_hash_parse_dynamic_function_call(mock_packages, config):
spec = Spec("hash-test4").concretized()
spec.package.content_hash()
many_strings = '''\
@ -184,8 +237,9 @@ def test_package_hash_consistency(package_spec, expected_hash):
"""
spec = Spec(package_spec)
filename = os.path.join(datadir, "%s.txt" % spec.name)
print(ph.canonical_source(spec, filename))
h = ph.canonical_source_hash(spec, filename)
with open(filename) as f:
source = f.read()
h = ph.package_hash(spec, source=source)
assert expected_hash == h
@ -214,13 +268,9 @@ def some_function(self):
"""
def test_multimethod_resolution(tmpdir):
when_pkg = tmpdir.join("pkg.py")
with when_pkg.open("w") as f:
f.write(many_multimethods)
def test_multimethod_resolution():
# all are false but the default
filtered = ph.canonical_source("pkg@4.0", str(when_pkg))
filtered = ph.canonical_source("pkg@4.0", source=many_multimethods)
assert "ONE" in filtered
assert "TWO" not in filtered
assert "THREE" not in filtered
@ -228,7 +278,7 @@ def test_multimethod_resolution(tmpdir):
assert "FIVE" in filtered
# we know first @when overrides default and others are false
filtered = ph.canonical_source("pkg@1.0", str(when_pkg))
filtered = ph.canonical_source("pkg@1.0", source=many_multimethods)
assert "ONE" not in filtered
assert "TWO" in filtered
assert "THREE" not in filtered
@ -236,7 +286,7 @@ def test_multimethod_resolution(tmpdir):
assert "FIVE" in filtered
# we know last @when overrides default and others are false
filtered = ph.canonical_source("pkg@3.0", str(when_pkg))
filtered = ph.canonical_source("pkg@3.0", source=many_multimethods)
assert "ONE" not in filtered
assert "TWO" not in filtered
assert "THREE" not in filtered
@ -244,7 +294,7 @@ def test_multimethod_resolution(tmpdir):
assert "FIVE" in filtered
# we don't know if default or THREE will win, include both
filtered = ph.canonical_source("pkg@2.0", str(when_pkg))
filtered = ph.canonical_source("pkg@2.0", source=many_multimethods)
assert "ONE" in filtered
assert "TWO" not in filtered
assert "THREE" in filtered
@ -280,13 +330,9 @@ def some_function(self):
"""
def test_more_dynamic_multimethod_resolution(tmpdir):
when_pkg = tmpdir.join("pkg.py")
with when_pkg.open("w") as f:
f.write(more_dynamic_multimethods)
def test_more_dynamic_multimethod_resolution():
# we know the first one is the only one that can win.
filtered = ph.canonical_source("pkg@4.0", str(when_pkg))
filtered = ph.canonical_source("pkg@4.0", source=more_dynamic_multimethods)
assert "ONE" in filtered
assert "TWO" not in filtered
assert "THREE" not in filtered
@ -294,7 +340,7 @@ def test_more_dynamic_multimethod_resolution(tmpdir):
assert "FIVE" in filtered
# now we have to include ONE and TWO because ONE may win dynamically.
filtered = ph.canonical_source("pkg@1.0", str(when_pkg))
filtered = ph.canonical_source("pkg@1.0", source=more_dynamic_multimethods)
assert "ONE" in filtered
assert "TWO" in filtered
assert "THREE" not in filtered
@ -303,7 +349,7 @@ def test_more_dynamic_multimethod_resolution(tmpdir):
# we know FOUR is true and TWO and THREE are false, but ONE may
# still win dynamically.
filtered = ph.canonical_source("pkg@3.0", str(when_pkg))
filtered = ph.canonical_source("pkg@3.0", source=more_dynamic_multimethods)
assert "ONE" in filtered
assert "TWO" not in filtered
assert "THREE" not in filtered
@ -311,7 +357,7 @@ def test_more_dynamic_multimethod_resolution(tmpdir):
assert "FIVE" in filtered
# TWO and FOUR can't be satisfied, but ONE or THREE could win
filtered = ph.canonical_source("pkg@2.0", str(when_pkg))
filtered = ph.canonical_source("pkg@2.0", source=more_dynamic_multimethods)
assert "ONE" in filtered
assert "TWO" not in filtered
assert "THREE" in filtered

View File

@ -4,7 +4,6 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import ast
import hashlib
import spack.directives
import spack.error
@ -217,45 +216,62 @@ def visit_FunctionDef(self, func): # noqa
return func
def package_content(spec):
return ast.dump(package_ast(spec))
def canonical_source(spec, filter_multimethods=True, source=None):
"""Get canonical source for a spec's package.py by unparsing its AST.
def canonical_source(spec, filename=None, filter_multimethods=True):
Arguments:
filter_multimethods (bool): By default, filter multimethods out of the
AST if they are known statically to be unused. Supply False to disable.
source (str): Optionally provide a string to read python code from.
"""
return unparse(
package_ast(spec, filename, filter_multimethods),
package_ast(spec, filter_multimethods, source=source),
py_ver_consistent=True,
)
def canonical_source_hash(spec, filename=None):
source = canonical_source(spec, filename)
def package_hash(spec, source=None):
"""Get a hash of a package's canonical source code.
This function is used to determine whether a spec needs a rebuild when a
package's source code changes.
Arguments:
source (str): Optionally provide a string to read python code from.
"""
source = canonical_source(spec, filter_multimethods=True, source=source)
return spack.util.hash.b32_hash(source)
def package_hash(spec, content=None):
if content is None:
content = package_content(spec)
return hashlib.sha256(content.encode('utf-8')).digest().lower()
def package_ast(spec, filter_multimethods=True, source=None):
"""Get the AST for the ``package.py`` file corresponding to ``spec``.
def package_ast(spec, filename=None, filter_multimethods=True):
Arguments:
filter_multimethods (bool): By default, filter multimethods out of the
AST if they are known statically to be unused. Supply False to disable.
source (str): Optionally provide a string to read python code from.
"""
spec = spack.spec.Spec(spec)
if not filename:
if source is None:
filename = spack.repo.path.filename_for_package_name(spec.name)
with open(filename) as f:
source = f.read()
with open(filename) as f:
text = f.read()
root = ast.parse(text)
# create an AST
root = ast.parse(source)
# remove docstrings and directives from the package AST
root = RemoveDocstrings().visit(root)
RemoveDirectives(spec).visit(root)
root = RemoveDirectives(spec).visit(root)
if filter_multimethods:
# visit nodes and build up a dictionary of methods (no need to assign)
tagger = TagMultiMethods(spec)
tagger.visit(root)
# transform AST using tagged methods
root = ResolveMultiMethods(tagger.methods).visit(root)
return root