package_hash: switch to using canonical source instead of AST repr

We are planning to switch to using full hashes for Spack specs, which means that the
package hash will be included in the deployment descriptor. This means we need a more
robust package hash than simply dumping the `repr` of the AST.

The AST repr that we previously used for package content is unreliable because it can
vary between python versions (Python's AST actually changes fairly frequently).

- [x] change `package_hash`, `package_ast`, and `canonical_source` to accept a string for
      alternate source instead of a filename.
- [x] consolidate package hash tests in `test/util/package_hash.py`.
- [x] remove old `package_content` method.
- [x] make `package_hash` do what `canonical_source_hash` was doing before.
- [x] modify `content_hash` in `package.py` to use the new `package_hash` function.

Co-authored-by: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
This commit is contained in:
Todd Gamblin 2021-12-23 11:43:55 -08:00 committed by Greg Becker
parent 39afe946a0
commit 106ae7abe6
4 changed files with 120 additions and 108 deletions

View File

@ -1554,7 +1554,8 @@ def content_hash(self, content=None):
hash_content.append(source_id.encode('utf-8')) hash_content.append(source_id.encode('utf-8'))
hash_content.extend(':'.join((p.sha256, str(p.level))).encode('utf-8') hash_content.extend(':'.join((p.sha256, str(p.level))).encode('utf-8')
for p in self.spec.patches) for p in self.spec.patches)
hash_content.append(package_hash(self.spec, content)) hash_content.append(package_hash(self.spec, source=content).encode('utf-8'))
b32_hash = base64.b32encode( b32_hash = base64.b32encode(
hashlib.sha256(bytes().join( hashlib.sha256(bytes().join(
sorted(hash_content))).digest()).lower() sorted(hash_content))).digest()).lower()

View File

@ -13,15 +13,9 @@
from spack.paths import mock_packages_path from spack.paths import mock_packages_path
from spack.spec import Spec from spack.spec import Spec
from spack.util.naming import mod_to_class from spack.util.naming import mod_to_class
from spack.util.package_hash import package_content
from spack.version import VersionChecksumError from spack.version import VersionChecksumError
def _generate_content_strip_name(spec):
content = package_content(spec)
return content.replace(spec.package.__class__.__name__, '')
@pytest.mark.usefixtures('config', 'mock_packages') @pytest.mark.usefixtures('config', 'mock_packages')
class TestPackage(object): class TestPackage(object):
def test_load_package(self): def test_load_package(self):
@ -58,51 +52,6 @@ def test_package_class_names(self):
assert 'Pmgrcollective' == mod_to_class('PmgrCollective') assert 'Pmgrcollective' == mod_to_class('PmgrCollective')
assert '_3db' == mod_to_class('3db') assert '_3db' == mod_to_class('3db')
def test_content_hash_all_same_but_patch_contents(self):
spec1 = Spec("hash-test1@1.1").concretized()
spec2 = Spec("hash-test2@1.1").concretized()
content1 = _generate_content_strip_name(spec1)
content2 = _generate_content_strip_name(spec2)
assert spec1.package.content_hash(content=content1) != \
spec2.package.content_hash(content=content2)
def test_content_hash_different_variants(self):
spec1 = Spec("hash-test1@1.2 +variantx").concretized()
spec2 = Spec("hash-test2@1.2 ~variantx").concretized()
content1 = _generate_content_strip_name(spec1)
content2 = _generate_content_strip_name(spec2)
assert spec1.package.content_hash(content=content1) == \
spec2.package.content_hash(content=content2)
def test_content_hash_cannot_get_details_from_ast(self):
"""Packages hash-test1 and hash-test3 would be considered the same
except that hash-test3 conditionally executes a phase based on
a "when" directive that Spack cannot evaluate by examining the
AST. This test ensures that Spack can compute a content hash
for hash-test3. If Spack cannot determine when a phase applies,
it adds it by default, so the test also ensures that the hashes
differ where Spack includes a phase on account of AST-examination
failure.
"""
spec3 = Spec("hash-test1@1.7").concretized()
spec4 = Spec("hash-test3@1.7").concretized()
content3 = _generate_content_strip_name(spec3)
content4 = _generate_content_strip_name(spec4)
assert(spec3.package.content_hash(content=content3) !=
spec4.package.content_hash(content=content4))
def test_all_same_but_archive_hash(self):
spec1 = Spec("hash-test1@1.3").concretized()
spec2 = Spec("hash-test2@1.3").concretized()
content1 = _generate_content_strip_name(spec1)
content2 = _generate_content_strip_name(spec2)
assert spec1.package.content_hash(content=content1) != \
spec2.package.content_hash(content=content2)
def test_parse_dynamic_function_call(self):
spec = Spec("hash-test4").concretized()
spec.package.content_hash()
# Below tests target direct imports of spack packages from the # Below tests target direct imports of spack packages from the
# spack.pkg namespace # spack.pkg namespace
def test_import_package(self): def test_import_package(self):

View File

@ -17,17 +17,43 @@
datadir = os.path.join(spack.paths.test_path, "data", "unparse") datadir = os.path.join(spack.paths.test_path, "data", "unparse")
def test_hash(tmpdir, mock_packages, config): def compare_sans_name(eq, spec1, spec2):
content1 = ph.canonical_source(spec1)
content1 = content1.replace(spec1.package.__class__.__name__, 'TestPackage')
content2 = ph.canonical_source(spec2)
content2 = content2.replace(spec2.package.__class__.__name__, 'TestPackage')
if eq:
assert content1 == content2
else:
assert content1 != content2
def compare_hash_sans_name(eq, spec1, spec2):
content1 = ph.canonical_source(spec1)
content1 = content1.replace(spec1.package.__class__.__name__, 'TestPackage')
hash1 = spec1.package.content_hash(content=content1)
content2 = ph.canonical_source(spec2)
content2 = content2.replace(spec2.package.__class__.__name__, 'TestPackage')
hash2 = spec2.package.content_hash(content=content2)
if eq:
assert hash1 == hash2
else:
assert hash1 != hash2
def test_hash(mock_packages, config):
ph.package_hash("hash-test1@1.2") ph.package_hash("hash-test1@1.2")
def test_different_variants(tmpdir, mock_packages, config): def test_different_variants(mock_packages, config):
spec1 = Spec("hash-test1@1.2 +variantx") spec1 = Spec("hash-test1@1.2 +variantx")
spec2 = Spec("hash-test1@1.2 +varianty") spec2 = Spec("hash-test1@1.2 +varianty")
assert ph.package_hash(spec1) == ph.package_hash(spec2) assert ph.package_hash(spec1) == ph.package_hash(spec2)
def test_all_same_but_name(tmpdir, mock_packages, config): def test_all_same_but_name(mock_packages, config):
spec1 = Spec("hash-test1@1.2") spec1 = Spec("hash-test1@1.2")
spec2 = Spec("hash-test2@1.2") spec2 = Spec("hash-test2@1.2")
compare_sans_name(True, spec1, spec2) compare_sans_name(True, spec1, spec2)
@ -37,7 +63,7 @@ def test_all_same_but_name(tmpdir, mock_packages, config):
compare_sans_name(True, spec1, spec2) compare_sans_name(True, spec1, spec2)
def test_all_same_but_archive_hash(tmpdir, mock_packages, config): def test_all_same_but_archive_hash(mock_packages, config):
""" """
Archive hash is not intended to be reflected in Package hash. Archive hash is not intended to be reflected in Package hash.
""" """
@ -46,33 +72,60 @@ def test_all_same_but_archive_hash(tmpdir, mock_packages, config):
compare_sans_name(True, spec1, spec2) compare_sans_name(True, spec1, spec2)
def test_all_same_but_patch_contents(tmpdir, mock_packages, config): def test_all_same_but_patch_contents(mock_packages, config):
spec1 = Spec("hash-test1@1.1") spec1 = Spec("hash-test1@1.1")
spec2 = Spec("hash-test2@1.1") spec2 = Spec("hash-test2@1.1")
compare_sans_name(True, spec1, spec2) compare_sans_name(True, spec1, spec2)
def test_all_same_but_patches_to_apply(tmpdir, mock_packages, config): def test_all_same_but_patches_to_apply(mock_packages, config):
spec1 = Spec("hash-test1@1.4") spec1 = Spec("hash-test1@1.4")
spec2 = Spec("hash-test2@1.4") spec2 = Spec("hash-test2@1.4")
compare_sans_name(True, spec1, spec2) compare_sans_name(True, spec1, spec2)
def test_all_same_but_install(tmpdir, mock_packages, config): def test_all_same_but_install(mock_packages, config):
spec1 = Spec("hash-test1@1.5") spec1 = Spec("hash-test1@1.5")
spec2 = Spec("hash-test2@1.5") spec2 = Spec("hash-test2@1.5")
compare_sans_name(False, spec1, spec2) compare_sans_name(False, spec1, spec2)
def compare_sans_name(eq, spec1, spec2): def test_content_hash_all_same_but_patch_contents(mock_packages, config):
content1 = ph.package_content(spec1) spec1 = Spec("hash-test1@1.1").concretized()
content1 = content1.replace(spec1.package.__class__.__name__, '') spec2 = Spec("hash-test2@1.1").concretized()
content2 = ph.package_content(spec2) compare_hash_sans_name(False, spec1, spec2)
content2 = content2.replace(spec2.package.__class__.__name__, '')
if eq:
assert content1 == content2 def test_content_hash_different_variants(mock_packages, config):
else: spec1 = Spec("hash-test1@1.2 +variantx").concretized()
assert content1 != content2 spec2 = Spec("hash-test2@1.2 ~variantx").concretized()
compare_hash_sans_name(True, spec1, spec2)
def test_content_hash_cannot_get_details_from_ast(mock_packages, config):
"""Packages hash-test1 and hash-test3 would be considered the same
except that hash-test3 conditionally executes a phase based on
a "when" directive that Spack cannot evaluate by examining the
AST. This test ensures that Spack can compute a content hash
for hash-test3. If Spack cannot determine when a phase applies,
it adds it by default, so the test also ensures that the hashes
differ where Spack includes a phase on account of AST-examination
failure.
"""
spec3 = Spec("hash-test1@1.7").concretized()
spec4 = Spec("hash-test3@1.7").concretized()
compare_hash_sans_name(False, spec3, spec4)
def test_content_hash_all_same_but_archive_hash(mock_packages, config):
spec1 = Spec("hash-test1@1.3").concretized()
spec2 = Spec("hash-test2@1.3").concretized()
compare_hash_sans_name(False, spec1, spec2)
def test_content_hash_parse_dynamic_function_call(mock_packages, config):
spec = Spec("hash-test4").concretized()
spec.package.content_hash()
many_strings = '''\ many_strings = '''\
@ -184,8 +237,9 @@ def test_package_hash_consistency(package_spec, expected_hash):
""" """
spec = Spec(package_spec) spec = Spec(package_spec)
filename = os.path.join(datadir, "%s.txt" % spec.name) filename = os.path.join(datadir, "%s.txt" % spec.name)
print(ph.canonical_source(spec, filename)) with open(filename) as f:
h = ph.canonical_source_hash(spec, filename) source = f.read()
h = ph.package_hash(spec, source=source)
assert expected_hash == h assert expected_hash == h
@ -214,13 +268,9 @@ def some_function(self):
""" """
def test_multimethod_resolution(tmpdir): def test_multimethod_resolution():
when_pkg = tmpdir.join("pkg.py")
with when_pkg.open("w") as f:
f.write(many_multimethods)
# all are false but the default # all are false but the default
filtered = ph.canonical_source("pkg@4.0", str(when_pkg)) filtered = ph.canonical_source("pkg@4.0", source=many_multimethods)
assert "ONE" in filtered assert "ONE" in filtered
assert "TWO" not in filtered assert "TWO" not in filtered
assert "THREE" not in filtered assert "THREE" not in filtered
@ -228,7 +278,7 @@ def test_multimethod_resolution(tmpdir):
assert "FIVE" in filtered assert "FIVE" in filtered
# we know first @when overrides default and others are false # we know first @when overrides default and others are false
filtered = ph.canonical_source("pkg@1.0", str(when_pkg)) filtered = ph.canonical_source("pkg@1.0", source=many_multimethods)
assert "ONE" not in filtered assert "ONE" not in filtered
assert "TWO" in filtered assert "TWO" in filtered
assert "THREE" not in filtered assert "THREE" not in filtered
@ -236,7 +286,7 @@ def test_multimethod_resolution(tmpdir):
assert "FIVE" in filtered assert "FIVE" in filtered
# we know last @when overrides default and others are false # we know last @when overrides default and others are false
filtered = ph.canonical_source("pkg@3.0", str(when_pkg)) filtered = ph.canonical_source("pkg@3.0", source=many_multimethods)
assert "ONE" not in filtered assert "ONE" not in filtered
assert "TWO" not in filtered assert "TWO" not in filtered
assert "THREE" not in filtered assert "THREE" not in filtered
@ -244,7 +294,7 @@ def test_multimethod_resolution(tmpdir):
assert "FIVE" in filtered assert "FIVE" in filtered
# we don't know if default or THREE will win, include both # we don't know if default or THREE will win, include both
filtered = ph.canonical_source("pkg@2.0", str(when_pkg)) filtered = ph.canonical_source("pkg@2.0", source=many_multimethods)
assert "ONE" in filtered assert "ONE" in filtered
assert "TWO" not in filtered assert "TWO" not in filtered
assert "THREE" in filtered assert "THREE" in filtered
@ -280,13 +330,9 @@ def some_function(self):
""" """
def test_more_dynamic_multimethod_resolution(tmpdir): def test_more_dynamic_multimethod_resolution():
when_pkg = tmpdir.join("pkg.py")
with when_pkg.open("w") as f:
f.write(more_dynamic_multimethods)
# we know the first one is the only one that can win. # we know the first one is the only one that can win.
filtered = ph.canonical_source("pkg@4.0", str(when_pkg)) filtered = ph.canonical_source("pkg@4.0", source=more_dynamic_multimethods)
assert "ONE" in filtered assert "ONE" in filtered
assert "TWO" not in filtered assert "TWO" not in filtered
assert "THREE" not in filtered assert "THREE" not in filtered
@ -294,7 +340,7 @@ def test_more_dynamic_multimethod_resolution(tmpdir):
assert "FIVE" in filtered assert "FIVE" in filtered
# now we have to include ONE and TWO because ONE may win dynamically. # now we have to include ONE and TWO because ONE may win dynamically.
filtered = ph.canonical_source("pkg@1.0", str(when_pkg)) filtered = ph.canonical_source("pkg@1.0", source=more_dynamic_multimethods)
assert "ONE" in filtered assert "ONE" in filtered
assert "TWO" in filtered assert "TWO" in filtered
assert "THREE" not in filtered assert "THREE" not in filtered
@ -303,7 +349,7 @@ def test_more_dynamic_multimethod_resolution(tmpdir):
# we know FOUR is true and TWO and THREE are false, but ONE may # we know FOUR is true and TWO and THREE are false, but ONE may
# still win dynamically. # still win dynamically.
filtered = ph.canonical_source("pkg@3.0", str(when_pkg)) filtered = ph.canonical_source("pkg@3.0", source=more_dynamic_multimethods)
assert "ONE" in filtered assert "ONE" in filtered
assert "TWO" not in filtered assert "TWO" not in filtered
assert "THREE" not in filtered assert "THREE" not in filtered
@ -311,7 +357,7 @@ def test_more_dynamic_multimethod_resolution(tmpdir):
assert "FIVE" in filtered assert "FIVE" in filtered
# TWO and FOUR can't be satisfied, but ONE or THREE could win # TWO and FOUR can't be satisfied, but ONE or THREE could win
filtered = ph.canonical_source("pkg@2.0", str(when_pkg)) filtered = ph.canonical_source("pkg@2.0", source=more_dynamic_multimethods)
assert "ONE" in filtered assert "ONE" in filtered
assert "TWO" not in filtered assert "TWO" not in filtered
assert "THREE" in filtered assert "THREE" in filtered

View File

@ -4,7 +4,6 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import ast import ast
import hashlib
import spack.directives import spack.directives
import spack.error import spack.error
@ -217,45 +216,62 @@ def visit_FunctionDef(self, func): # noqa
return func return func
def package_content(spec): def canonical_source(spec, filter_multimethods=True, source=None):
return ast.dump(package_ast(spec)) """Get canonical source for a spec's package.py by unparsing its AST.
Arguments:
def canonical_source(spec, filename=None, filter_multimethods=True): filter_multimethods (bool): By default, filter multimethods out of the
AST if they are known statically to be unused. Supply False to disable.
source (str): Optionally provide a string to read python code from.
"""
return unparse( return unparse(
package_ast(spec, filename, filter_multimethods), package_ast(spec, filter_multimethods, source=source),
py_ver_consistent=True, py_ver_consistent=True,
) )
def canonical_source_hash(spec, filename=None): def package_hash(spec, source=None):
source = canonical_source(spec, filename) """Get a hash of a package's canonical source code.
This function is used to determine whether a spec needs a rebuild when a
package's source code changes.
Arguments:
source (str): Optionally provide a string to read python code from.
"""
source = canonical_source(spec, filter_multimethods=True, source=source)
return spack.util.hash.b32_hash(source) return spack.util.hash.b32_hash(source)
def package_hash(spec, content=None): def package_ast(spec, filter_multimethods=True, source=None):
if content is None: """Get the AST for the ``package.py`` file corresponding to ``spec``.
content = package_content(spec)
return hashlib.sha256(content.encode('utf-8')).digest().lower()
Arguments:
def package_ast(spec, filename=None, filter_multimethods=True): filter_multimethods (bool): By default, filter multimethods out of the
AST if they are known statically to be unused. Supply False to disable.
source (str): Optionally provide a string to read python code from.
"""
spec = spack.spec.Spec(spec) spec = spack.spec.Spec(spec)
if not filename: if source is None:
filename = spack.repo.path.filename_for_package_name(spec.name) filename = spack.repo.path.filename_for_package_name(spec.name)
with open(filename) as f:
source = f.read()
with open(filename) as f: # create an AST
text = f.read() root = ast.parse(source)
root = ast.parse(text)
# remove docstrings and directives from the package AST
root = RemoveDocstrings().visit(root) root = RemoveDocstrings().visit(root)
root = RemoveDirectives(spec).visit(root)
RemoveDirectives(spec).visit(root)
if filter_multimethods: if filter_multimethods:
# visit nodes and build up a dictionary of methods (no need to assign)
tagger = TagMultiMethods(spec) tagger = TagMultiMethods(spec)
tagger.visit(root) tagger.visit(root)
# transform AST using tagged methods
root = ResolveMultiMethods(tagger.methods).visit(root) root = ResolveMultiMethods(tagger.methods).visit(root)
return root return root