spack_yaml.py / spec.py: use dict instead of OrderedDict (#48616)

* for config: let `syaml_dict` inherit from `dict` instead of `OrderedDict`. `syaml_dict` now only exists as a mutable wrapper for yaml related metadata.
* for spec serialization / hashing: use `dict` directly

This is possible since we only support cpython 3.6+ in which dicts are ordered.

This improves performance of hash computation a bit. For a larger spec I'm getting 9.22ms instead of 11.9ms, so 22.5% reduction in runtime.
This commit is contained in:
Harmen Stoppels 2025-01-20 09:33:44 +01:00 committed by GitHub
parent 3bb375a47f
commit 7e41288ca6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 38 additions and 135 deletions

View File

@ -3,8 +3,6 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import llnl.util.lang import llnl.util.lang
import spack.util.spack_yaml as syaml
@llnl.util.lang.lazy_lexicographic_ordering @llnl.util.lang.lazy_lexicographic_ordering
class OperatingSystem: class OperatingSystem:
@ -42,4 +40,4 @@ def _cmp_iter(self):
yield self.version yield self.version
def to_dict(self): def to_dict(self):
return syaml.syaml_dict([("name", self.name), ("version", self.version)]) return {"name": self.name, "version": self.version}

View File

@ -578,14 +578,9 @@ def to_dict(self):
target_data = str(self.target) target_data = str(self.target)
else: else:
# Get rid of compiler flag information before turning the uarch into a dict # Get rid of compiler flag information before turning the uarch into a dict
uarch_dict = self.target.to_dict() target_data = self.target.to_dict()
uarch_dict.pop("compilers", None) target_data.pop("compilers", None)
target_data = syaml.syaml_dict(uarch_dict.items()) return {"arch": {"platform": self.platform, "platform_os": self.os, "target": target_data}}
d = syaml.syaml_dict(
[("platform", self.platform), ("platform_os", self.os), ("target", target_data)]
)
return syaml.syaml_dict([("arch", d)])
@staticmethod @staticmethod
def from_dict(d): def from_dict(d):
@ -710,10 +705,7 @@ def _cmp_iter(self):
yield self.versions yield self.versions
def to_dict(self): def to_dict(self):
d = syaml.syaml_dict([("name", self.name)]) return {"compiler": {"name": self.name, **self.versions.to_dict()}}
d.update(self.versions.to_dict())
return syaml.syaml_dict([("compiler", d)])
@staticmethod @staticmethod
def from_dict(d): def from_dict(d):
@ -2290,9 +2282,7 @@ def to_node_dict(self, hash=ht.dag_hash):
Arguments: Arguments:
hash (spack.hash_types.SpecHashDescriptor) type of hash to generate. hash (spack.hash_types.SpecHashDescriptor) type of hash to generate.
""" """
d = syaml.syaml_dict() d = {"name": self.name}
d["name"] = self.name
if self.versions: if self.versions:
d.update(self.versions.to_dict()) d.update(self.versions.to_dict())
@ -2306,7 +2296,7 @@ def to_node_dict(self, hash=ht.dag_hash):
if self.namespace: if self.namespace:
d["namespace"] = self.namespace d["namespace"] = self.namespace
params = syaml.syaml_dict(sorted(v.yaml_entry() for _, v in self.variants.items())) params = dict(sorted(v.yaml_entry() for v in self.variants.values()))
# Only need the string compiler flag for yaml file # Only need the string compiler flag for yaml file
params.update( params.update(
@ -2337,13 +2327,11 @@ def to_node_dict(self, hash=ht.dag_hash):
else: else:
extra_attributes = None extra_attributes = None
d["external"] = syaml.syaml_dict( d["external"] = {
[ "path": self.external_path,
("path", self.external_path), "module": self.external_modules,
("module", self.external_modules), "extra_attributes": extra_attributes,
("extra_attributes", extra_attributes), }
]
)
if not self._concrete: if not self._concrete:
d["concrete"] = False d["concrete"] = False
@ -2374,29 +2362,25 @@ def to_node_dict(self, hash=ht.dag_hash):
# Note: Relies on sorting dict by keys later in algorithm. # Note: Relies on sorting dict by keys later in algorithm.
deps = self._dependencies_dict(depflag=hash.depflag) deps = self._dependencies_dict(depflag=hash.depflag)
if deps: if deps:
deps_list = [] d["dependencies"] = [
for name, edges_for_name in sorted(deps.items()): {
name_tuple = ("name", name) "name": name,
for dspec in edges_for_name: hash.name: dspec.spec._cached_hash(hash),
hash_tuple = (hash.name, dspec.spec._cached_hash(hash)) "parameters": {
parameters_tuple = ( "deptypes": dt.flag_to_tuple(dspec.depflag),
"parameters", "virtuals": dspec.virtuals,
syaml.syaml_dict( },
( }
("deptypes", dt.flag_to_tuple(dspec.depflag)), for name, edges_for_name in sorted(deps.items())
("virtuals", dspec.virtuals), for dspec in edges_for_name
) ]
),
)
ordered_entries = [name_tuple, hash_tuple, parameters_tuple]
deps_list.append(syaml.syaml_dict(ordered_entries))
d["dependencies"] = deps_list
# Name is included in case this is replacing a virtual. # Name is included in case this is replacing a virtual.
if self._build_spec: if self._build_spec:
d["build_spec"] = syaml.syaml_dict( d["build_spec"] = {
[("name", self.build_spec.name), (hash.name, self.build_spec._cached_hash(hash))] "name": self.build_spec.name,
) hash.name: self.build_spec._cached_hash(hash),
}
return d return d
def to_dict(self, hash=ht.dag_hash): def to_dict(self, hash=ht.dag_hash):
@ -2498,10 +2482,7 @@ def to_dict(self, hash=ht.dag_hash):
node_list.append(node) node_list.append(node)
hash_set.add(node_hash) hash_set.add(node_hash)
meta_dict = syaml.syaml_dict([("version", SPECFILE_FORMAT_VERSION)]) return {"spec": {"_meta": {"version": SPECFILE_FORMAT_VERSION}, "nodes": node_list}}
inner_dict = syaml.syaml_dict([("_meta", meta_dict), ("nodes", node_list)])
spec_dict = syaml.syaml_dict([("spec", inner_dict)])
return spec_dict
def node_dict_with_hashes(self, hash=ht.dag_hash): def node_dict_with_hashes(self, hash=ht.dag_hash):
"""Returns a node_dict of this spec with the dag hash added. If this """Returns a node_dict of this spec with the dag hash added. If this
@ -4906,9 +4887,7 @@ def from_node_dict(cls, node):
spec.external_modules = node["external"]["module"] spec.external_modules = node["external"]["module"]
if spec.external_modules is False: if spec.external_modules is False:
spec.external_modules = None spec.external_modules = None
spec.extra_attributes = node["external"].get( spec.extra_attributes = node["external"].get("extra_attributes", {})
"extra_attributes", syaml.syaml_dict()
)
# specs read in are concrete unless marked abstract # specs read in are concrete unless marked abstract
if node.get("concrete", True): if node.get("concrete", True):

View File

@ -7,11 +7,9 @@
The YAML and JSON formats preserve DAG information in the spec. The YAML and JSON formats preserve DAG information in the spec.
""" """
import ast
import collections import collections
import collections.abc import collections.abc
import gzip import gzip
import inspect
import io import io
import json import json
import os import os
@ -28,7 +26,6 @@
import spack.spec import spack.spec
import spack.util.spack_json as sjson import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml import spack.util.spack_yaml as syaml
import spack.version
from spack.spec import Spec, save_dependency_specfiles from spack.spec import Spec, save_dependency_specfiles
from spack.util.spack_yaml import SpackYAMLError, syaml_dict from spack.util.spack_yaml import SpackYAMLError, syaml_dict
@ -128,7 +125,7 @@ def test_using_ordered_dict(default_mock_concretization, spec_str):
def descend_and_check(iterable, level=0): def descend_and_check(iterable, level=0):
if isinstance(iterable, collections.abc.Mapping): if isinstance(iterable, collections.abc.Mapping):
assert isinstance(iterable, syaml_dict) assert type(iterable) in (syaml_dict, dict)
return descend_and_check(iterable.values(), level=level + 1) return descend_and_check(iterable.values(), level=level + 1)
max_level = level max_level = level
for value in iterable: for value in iterable:
@ -223,71 +220,6 @@ def test_ordered_read_not_required_for_consistent_dag_hash(
assert spec == from_yaml == from_json == from_yaml_rev == from_json_rev assert spec == from_yaml == from_json == from_yaml_rev == from_json_rev
@pytest.mark.parametrize("module", [spack.spec, spack.version])
def test_hashes_use_no_python_dicts(module):
"""Coarse check to make sure we don't use dicts in Spec.to_node_dict().
Python dicts are not guaranteed to iterate in a deterministic order
(at least not in all python versions) so we need to use lists and
syaml_dicts. syaml_dicts are ordered and ensure that hashes in Spack
are deterministic.
This test is intended to handle cases that are not covered by the
consistency checks above, or that would be missed by a dynamic check.
This test traverses the ASTs of functions that are used in our hash
algorithms, finds instances of dictionaries being constructed, and
prints out the line numbers where they occur.
"""
class FindFunctions(ast.NodeVisitor):
"""Find a function definition called to_node_dict."""
def __init__(self):
self.nodes = []
def visit_FunctionDef(self, node):
if node.name in ("to_node_dict", "to_dict", "to_dict_or_value"):
self.nodes.append(node)
class FindDicts(ast.NodeVisitor):
"""Find source locations of dicts in an AST."""
def __init__(self, filename):
self.nodes = []
self.filename = filename
def add_error(self, node):
self.nodes.append(
"Use syaml_dict instead of dict at %s:%s:%s"
% (self.filename, node.lineno, node.col_offset)
)
def visit_Dict(self, node):
self.add_error(node)
def visit_Call(self, node):
name = None
if isinstance(node.func, ast.Name):
name = node.func.id
elif isinstance(node.func, ast.Attribute):
name = node.func.attr
if name == "dict":
self.add_error(node)
find_functions = FindFunctions()
module_ast = ast.parse(inspect.getsource(module))
find_functions.visit(module_ast)
find_dicts = FindDicts(module.__file__)
for node in find_functions.nodes:
find_dicts.visit(node)
# fail with offending lines if we found some dicts.
assert [] == find_dicts.nodes
def reverse_all_dicts(data): def reverse_all_dicts(data):
"""Descend into data and reverse all the dictionaries""" """Descend into data and reverse all the dictionaries"""
if isinstance(data, dict): if isinstance(data, dict):

View File

@ -11,8 +11,6 @@
default unorderd dict. default unorderd dict.
""" """
import collections
import collections.abc
import ctypes import ctypes
import enum import enum
import functools import functools
@ -32,23 +30,20 @@
# Make new classes so we can add custom attributes. # Make new classes so we can add custom attributes.
# Also, use OrderedDict instead of just dict. class syaml_dict(dict):
class syaml_dict(collections.OrderedDict): pass
def __repr__(self):
mappings = (f"{k!r}: {v!r}" for k, v in self.items())
return "{%s}" % ", ".join(mappings)
class syaml_list(list): class syaml_list(list):
__repr__ = list.__repr__ pass
class syaml_str(str): class syaml_str(str):
__repr__ = str.__repr__ pass
class syaml_int(int): class syaml_int(int):
__repr__ = int.__repr__ pass
#: mapping from syaml type -> primitive type #: mapping from syaml type -> primitive type

View File

@ -6,7 +6,6 @@
from bisect import bisect_left from bisect import bisect_left
from typing import Dict, Iterable, Iterator, List, Optional, Tuple, Union from typing import Dict, Iterable, Iterator, List, Optional, Tuple, Union
from spack.util.spack_yaml import syaml_dict
from spack.util.typing import SupportsRichComparison from spack.util.typing import SupportsRichComparison
from .common import ( from .common import (
@ -1044,8 +1043,8 @@ def intersects(self, other: VersionType) -> bool:
def to_dict(self) -> Dict: def to_dict(self) -> Dict:
"""Generate human-readable dict for YAML.""" """Generate human-readable dict for YAML."""
if self.concrete: if self.concrete:
return syaml_dict([("version", str(self[0]))]) return {"version": str(self[0])}
return syaml_dict([("versions", [str(v) for v in self])]) return {"versions": [str(v) for v in self]}
@staticmethod @staticmethod
def from_dict(dictionary) -> "VersionList": def from_dict(dictionary) -> "VersionList":