spec.py: make hashing of extra_attributes order independent (#48615)

This commit is contained in:
Harmen Stoppels 2025-01-17 13:50:36 +01:00 committed by GitHub
parent 847f560a6e
commit bb43fa5444
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 102 additions and 98 deletions

View File

@ -1356,14 +1356,8 @@ def _test_detection_by_executable(pkgs, debug_log, error_cls):
def _compare_extra_attribute(_expected, _detected, *, _spec): def _compare_extra_attribute(_expected, _detected, *, _spec):
result = [] result = []
# Check items are of the same type
if not isinstance(_detected, type(_expected)):
_summary = f'{pkg_name}: error when trying to detect "{_expected}"'
_details = [f"{_detected} was detected instead"]
return [error_cls(summary=_summary, details=_details)]
# If they are string expected is a regex # If they are string expected is a regex
if isinstance(_expected, str): if isinstance(_expected, str) and isinstance(_detected, str):
try: try:
_regex = re.compile(_expected) _regex = re.compile(_expected)
except re.error: except re.error:
@ -1379,7 +1373,7 @@ def _compare_extra_attribute(_expected, _detected, *, _spec):
_details = [f"{_detected} does not match the regex"] _details = [f"{_detected} does not match the regex"]
return [error_cls(summary=_summary, details=_details)] return [error_cls(summary=_summary, details=_details)]
if isinstance(_expected, dict): elif isinstance(_expected, dict) and isinstance(_detected, dict):
_not_detected = set(_expected.keys()) - set(_detected.keys()) _not_detected = set(_expected.keys()) - set(_detected.keys())
if _not_detected: if _not_detected:
_summary = f"{pkg_name}: cannot detect some attributes for spec {_spec}" _summary = f"{pkg_name}: cannot detect some attributes for spec {_spec}"
@ -1394,6 +1388,10 @@ def _compare_extra_attribute(_expected, _detected, *, _spec):
result.extend( result.extend(
_compare_extra_attribute(_expected[_key], _detected[_key], _spec=_spec) _compare_extra_attribute(_expected[_key], _detected[_key], _spec=_spec)
) )
else:
_summary = f'{pkg_name}: error when trying to detect "{_expected}"'
_details = [f"{_detected} was detected instead"]
return [error_cls(summary=_summary, details=_details)]
return result return result

View File

@ -2332,11 +2332,16 @@ def to_node_dict(self, hash=ht.dag_hash):
) )
if self.external: if self.external:
if self.extra_attributes:
extra_attributes = syaml.sorted_dict(self.extra_attributes)
else:
extra_attributes = None
d["external"] = syaml.syaml_dict( d["external"] = syaml.syaml_dict(
[ [
("path", self.external_path), ("path", self.external_path),
("module", self.external_modules), ("module", self.external_modules),
("extra_attributes", self.extra_attributes), ("extra_attributes", extra_attributes),
] ]
) )

View File

@ -137,3 +137,19 @@ def test_round_trip_configuration(initial_content, expected_final_content, tmp_p
expected_final_content = initial_content expected_final_content = initial_content
assert final_content.getvalue() == expected_final_content assert final_content.getvalue() == expected_final_content
def test_sorted_dict():
assert syaml.sorted_dict(
{
"z": 0,
"y": [{"x": 0, "w": [2, 1, 0]}, 0],
"v": ({"u": 0, "t": 0, "s": 0}, 0, {"r": 0, "q": 0}),
"p": 0,
}
) == {
"p": 0,
"v": ({"s": 0, "t": 0, "u": 0}, 0, {"q": 0, "r": 0}),
"y": [{"w": [2, 1, 0], "x": 0}, 0],
"z": 0,
}

View File

@ -21,6 +21,7 @@
import ruamel.yaml import ruamel.yaml
import spack.concretize import spack.concretize
import spack.config
import spack.hash_types as ht import spack.hash_types as ht
import spack.paths import spack.paths
import spack.repo import spack.repo
@ -143,89 +144,83 @@ def descend_and_check(iterable, level=0):
assert level >= 5 assert level >= 5
def test_ordered_read_not_required_for_consistent_dag_hash(config, mock_packages): @pytest.mark.parametrize("spec_str", ["mpileaks ^zmpi", "dttop", "dtuse"])
def test_ordered_read_not_required_for_consistent_dag_hash(
spec_str, mutable_config: spack.config.Configuration, mock_packages
):
"""Make sure ordered serialization isn't required to preserve hashes. """Make sure ordered serialization isn't required to preserve hashes.
For consistent hashes, we require that YAML and json documents For consistent hashes, we require that YAML and JSON serializations have their keys in a
have their keys serialized in a deterministic order. However, we deterministic order. However, we don't want to require them to be serialized in order. This
don't want to require them to be serialized in order. This ensures that is not required."""
ensures that is not required.
"""
specs = ["mpileaks ^zmpi", "dttop", "dtuse"]
for spec in specs:
spec = spack.concretize.concretize_one(spec)
# # Make sure that `extra_attributes` of externals is order independent for hashing.
# Dict & corresponding YAML & JSON from the original spec. extra_attributes = {
# "compilers": {"c": "/some/path/bin/cc", "cxx": "/some/path/bin/c++"},
spec_dict = spec.to_dict() "foo": "bar",
"baz": "qux",
}
mutable_config.set(
"packages:dtuse",
{
"buildable": False,
"externals": [
{"spec": "dtuse@=1.0", "prefix": "/usr", "extra_attributes": extra_attributes}
],
},
)
spec = spack.concretize.concretize_one(spec_str)
if spec_str == "dtuse":
assert spec.external and spec.extra_attributes == extra_attributes
spec_dict = spec.to_dict(hash=ht.dag_hash)
spec_yaml = spec.to_yaml() spec_yaml = spec.to_yaml()
spec_json = spec.to_json() spec_json = spec.to_json()
# # Make a spec with dict keys reversed recursively
# Make a spec with reversed OrderedDicts for every spec_dict_rev = reverse_all_dicts(spec_dict)
# OrderedDict in the original.
#
reversed_spec_dict = reverse_all_dicts(spec.to_dict())
#
# Dump to YAML and JSON # Dump to YAML and JSON
#
yaml_string = syaml.dump(spec_dict, default_flow_style=False) yaml_string = syaml.dump(spec_dict, default_flow_style=False)
reversed_yaml_string = syaml.dump(reversed_spec_dict, default_flow_style=False) yaml_string_rev = syaml.dump(spec_dict_rev, default_flow_style=False)
json_string = sjson.dump(spec_dict) json_string = sjson.dump(spec_dict)
reversed_json_string = sjson.dump(reversed_spec_dict) json_string_rev = sjson.dump(spec_dict_rev)
#
# Do many consistency checks
#
# spec yaml is ordered like the spec dict # spec yaml is ordered like the spec dict
assert yaml_string == spec_yaml assert yaml_string == spec_yaml
assert json_string == spec_json assert json_string == spec_json
# reversed string is different from the original, so it # reversed string is different from the original, so it *would* generate a different hash
# *would* generate a different hash assert yaml_string != yaml_string_rev
assert yaml_string != reversed_yaml_string assert json_string != json_string_rev
assert json_string != reversed_json_string
# build specs from the "wrongly" ordered data # build specs from the "wrongly" ordered data
round_trip_yaml_spec = Spec.from_yaml(yaml_string) from_yaml = Spec.from_yaml(yaml_string)
round_trip_json_spec = Spec.from_json(json_string) from_json = Spec.from_json(json_string)
round_trip_reversed_yaml_spec = Spec.from_yaml(reversed_yaml_string) from_yaml_rev = Spec.from_yaml(yaml_string_rev)
round_trip_reversed_json_spec = Spec.from_yaml(reversed_json_string) from_json_rev = Spec.from_json(json_string_rev)
# Strip spec if we stripped the yaml # Strip spec if we stripped the yaml
spec = spec.copy(deps=ht.dag_hash.depflag) spec = spec.copy(deps=ht.dag_hash.depflag)
# specs are equal to the original # specs and their hashes are equal to the original
assert spec == round_trip_yaml_spec assert (
assert spec == round_trip_json_spec spec.process_hash()
== from_yaml.process_hash()
assert spec == round_trip_reversed_yaml_spec == from_json.process_hash()
assert spec == round_trip_reversed_json_spec == from_yaml_rev.process_hash()
assert round_trip_yaml_spec == round_trip_reversed_yaml_spec == from_json_rev.process_hash()
assert round_trip_json_spec == round_trip_reversed_json_spec
# dag_hashes are equal
assert spec.dag_hash() == round_trip_yaml_spec.dag_hash()
assert spec.dag_hash() == round_trip_json_spec.dag_hash()
assert spec.dag_hash() == round_trip_reversed_yaml_spec.dag_hash()
assert spec.dag_hash() == round_trip_reversed_json_spec.dag_hash()
# dag_hash is equal after round-trip by dag_hash
spec = spack.concretize.concretize_one(spec)
round_trip_yaml_spec = spack.concretize.concretize_one(round_trip_yaml_spec)
round_trip_json_spec = spack.concretize.concretize_one(round_trip_json_spec)
round_trip_reversed_yaml_spec = spack.concretize.concretize_one(
round_trip_reversed_yaml_spec
) )
round_trip_reversed_json_spec = spack.concretize.concretize_one( assert (
round_trip_reversed_json_spec spec.dag_hash()
== from_yaml.dag_hash()
== from_json.dag_hash()
== from_yaml_rev.dag_hash()
== from_json_rev.dag_hash()
) )
assert spec.dag_hash() == round_trip_yaml_spec.dag_hash() assert spec == from_yaml == from_json == from_yaml_rev == from_json_rev
assert spec.dag_hash() == round_trip_json_spec.dag_hash()
assert spec.dag_hash() == round_trip_reversed_yaml_spec.dag_hash()
assert spec.dag_hash() == round_trip_reversed_json_spec.dag_hash()
@pytest.mark.parametrize("module", [spack.spec, spack.version]) @pytest.mark.parametrize("module", [spack.spec, spack.version])
@ -296,12 +291,9 @@ def visit_Call(self, node):
def reverse_all_dicts(data): def reverse_all_dicts(data):
"""Descend into data and reverse all the dictionaries""" """Descend into data and reverse all the dictionaries"""
if isinstance(data, dict): if isinstance(data, dict):
return syaml_dict( return type(data)((k, reverse_all_dicts(v)) for k, v in reversed(list(data.items())))
reversed([(reverse_all_dicts(k), reverse_all_dicts(v)) for k, v in data.items()])
)
elif isinstance(data, (list, tuple)): elif isinstance(data, (list, tuple)):
return type(data)(reverse_all_dicts(elt) for elt in data) return type(data)(reverse_all_dicts(elt) for elt in data)
else:
return data return data

View File

@ -448,20 +448,13 @@ def _dump_annotated(handler, data, stream=None):
return getvalue() return getvalue()
def sorted_dict(dict_like): def sorted_dict(data):
"""Return an ordered dict with all the fields sorted recursively. """Descend into data and sort all dictionary keys."""
if isinstance(data, dict):
Args: return type(data)((k, sorted_dict(v)) for k, v in sorted(data.items()))
dict_like (dict): dictionary to be sorted elif isinstance(data, (list, tuple)):
return type(data)(sorted_dict(v) for v in data)
Returns: return data
dictionary sorted recursively
"""
result = syaml_dict(sorted(dict_like.items()))
for key, value in result.items():
if isinstance(value, collections.abc.Mapping):
result[key] = sorted_dict(value)
return result
def extract_comments(data): def extract_comments(data):