package_hash: remove all unassigned strings, not just docstrings

Some packages use top-level unassigned strings instead of comments, either just after a
docstring on in the body somewhere else. Ignore those strings becasue they have no
effect on package behavior.

- [x] adjust RemoveDocstrings to remove all free-standing strings.
- [x] move tests for util/package_hash.py to test/util/package_hash.py

Co-authored-by: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
This commit is contained in:
Todd Gamblin 2021-12-23 00:41:45 -08:00 committed by Greg Becker
parent 572fbf4f49
commit 8880a00862
2 changed files with 77 additions and 9 deletions

View File

@ -3,18 +3,22 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import ast
import spack.paths
import spack.util.package_hash as ph
from spack.spec import Spec
from spack.util.package_hash import package_content, package_hash
from spack.util.unparse import unparse
def test_hash(tmpdir, mock_packages, config):
package_hash("hash-test1@1.2")
ph.package_hash("hash-test1@1.2")
def test_different_variants(tmpdir, mock_packages, config):
spec1 = Spec("hash-test1@1.2 +variantx")
spec2 = Spec("hash-test1@1.2 +varianty")
assert package_hash(spec1) == package_hash(spec2)
assert ph.package_hash(spec1) == ph.package_hash(spec2)
def test_all_same_but_name(tmpdir, mock_packages, config):
@ -55,11 +59,67 @@ def test_all_same_but_install(tmpdir, mock_packages, config):
def compare_sans_name(eq, spec1, spec2):
content1 = package_content(spec1)
content1 = ph.package_content(spec1)
content1 = content1.replace(spec1.package.__class__.__name__, '')
content2 = package_content(spec2)
content2 = ph.package_content(spec2)
content2 = content2.replace(spec2.package.__class__.__name__, '')
if eq:
assert content1 == content2
else:
assert content1 != content2
many_strings = '''\
"""ONE"""
"""TWO"""
var = "THREE" # make sure this is not removed
"FOUR"
class ManyDocstrings:
"""FIVE"""
"""SIX"""
x = "SEVEN"
def method1():
"""EIGHT"""
print("NINE")
"TEN"
for i in range(10):
print(i)
def method2():
"""ELEVEN"""
return "TWELVE"
'''
def test_remove_docstrings():
tree = ast.parse(many_strings)
tree = ph.RemoveDocstrings().visit(tree)
unparsed = unparse(tree)
# make sure the methods are preserved
assert "method1" in unparsed
assert "method2" in unparsed
# all of these are unassigned and should be removed
assert "ONE" not in unparsed
assert "TWO" not in unparsed
assert "FOUR" not in unparsed
assert "FIVE" not in unparsed
assert "SIX" not in unparsed
assert "EIGHT" not in unparsed
assert "TEN" not in unparsed
assert "ELEVEN" not in unparsed
# these are used in legitimate expressions
assert "THREE" in unparsed
assert "SEVEN" in unparsed
assert "NINE" in unparsed
assert "TWELVE" in unparsed

View File

@ -15,12 +15,20 @@
class RemoveDocstrings(ast.NodeTransformer):
"""Transformer that removes docstrings from a Python AST."""
"""Transformer that removes docstrings from a Python AST.
This removes *all* strings that aren't on the RHS of an assignment statement from
the body of functions, classes, and modules -- even if they're not directly after
the declaration.
"""
def remove_docstring(self, node):
def unused_string(node):
"""Criteria for unassigned body strings."""
return isinstance(node, ast.Expr) and isinstance(node.value, ast.Str)
if node.body:
if isinstance(node.body[0], ast.Expr) and \
isinstance(node.body[0].value, ast.Str):
node.body.pop(0)
node.body = [child for child in node.body if not unused_string(child)]
self.generic_visit(node)
return node