spack_yaml.py / spec.py: use dict instead of OrderedDict (#48616)

* for config: let `syaml_dict` inherit from `dict` instead of `OrderedDict`. `syaml_dict` now only exists as a mutable wrapper for yaml related metadata.
* for spec serialization / hashing: use `dict` directly

This is possible since we only support cpython 3.6+ in which dicts are ordered.

This improves performance of hash computation a bit. For a larger spec I'm getting 9.22ms instead of 11.9ms, so 22.5% reduction in runtime.
This commit is contained in:
Harmen Stoppels 2025-01-20 09:33:44 +01:00 committed by GitHub
parent 3bb375a47f
commit 7e41288ca6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 38 additions and 135 deletions

View File

@ -3,8 +3,6 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import llnl.util.lang
import spack.util.spack_yaml as syaml
@llnl.util.lang.lazy_lexicographic_ordering
class OperatingSystem:
@ -42,4 +40,4 @@ def _cmp_iter(self):
yield self.version
def to_dict(self):
return syaml.syaml_dict([("name", self.name), ("version", self.version)])
return {"name": self.name, "version": self.version}

View File

@ -578,14 +578,9 @@ def to_dict(self):
target_data = str(self.target)
else:
# Get rid of compiler flag information before turning the uarch into a dict
uarch_dict = self.target.to_dict()
uarch_dict.pop("compilers", None)
target_data = syaml.syaml_dict(uarch_dict.items())
d = syaml.syaml_dict(
[("platform", self.platform), ("platform_os", self.os), ("target", target_data)]
)
return syaml.syaml_dict([("arch", d)])
target_data = self.target.to_dict()
target_data.pop("compilers", None)
return {"arch": {"platform": self.platform, "platform_os": self.os, "target": target_data}}
@staticmethod
def from_dict(d):
@ -710,10 +705,7 @@ def _cmp_iter(self):
yield self.versions
def to_dict(self):
d = syaml.syaml_dict([("name", self.name)])
d.update(self.versions.to_dict())
return syaml.syaml_dict([("compiler", d)])
return {"compiler": {"name": self.name, **self.versions.to_dict()}}
@staticmethod
def from_dict(d):
@ -2290,9 +2282,7 @@ def to_node_dict(self, hash=ht.dag_hash):
Arguments:
hash (spack.hash_types.SpecHashDescriptor) type of hash to generate.
"""
d = syaml.syaml_dict()
d["name"] = self.name
d = {"name": self.name}
if self.versions:
d.update(self.versions.to_dict())
@ -2306,7 +2296,7 @@ def to_node_dict(self, hash=ht.dag_hash):
if self.namespace:
d["namespace"] = self.namespace
params = syaml.syaml_dict(sorted(v.yaml_entry() for _, v in self.variants.items()))
params = dict(sorted(v.yaml_entry() for v in self.variants.values()))
# Only need the string compiler flag for yaml file
params.update(
@ -2337,13 +2327,11 @@ def to_node_dict(self, hash=ht.dag_hash):
else:
extra_attributes = None
d["external"] = syaml.syaml_dict(
[
("path", self.external_path),
("module", self.external_modules),
("extra_attributes", extra_attributes),
]
)
d["external"] = {
"path": self.external_path,
"module": self.external_modules,
"extra_attributes": extra_attributes,
}
if not self._concrete:
d["concrete"] = False
@ -2374,29 +2362,25 @@ def to_node_dict(self, hash=ht.dag_hash):
# Note: Relies on sorting dict by keys later in algorithm.
deps = self._dependencies_dict(depflag=hash.depflag)
if deps:
deps_list = []
for name, edges_for_name in sorted(deps.items()):
name_tuple = ("name", name)
for dspec in edges_for_name:
hash_tuple = (hash.name, dspec.spec._cached_hash(hash))
parameters_tuple = (
"parameters",
syaml.syaml_dict(
(
("deptypes", dt.flag_to_tuple(dspec.depflag)),
("virtuals", dspec.virtuals),
)
),
)
ordered_entries = [name_tuple, hash_tuple, parameters_tuple]
deps_list.append(syaml.syaml_dict(ordered_entries))
d["dependencies"] = deps_list
d["dependencies"] = [
{
"name": name,
hash.name: dspec.spec._cached_hash(hash),
"parameters": {
"deptypes": dt.flag_to_tuple(dspec.depflag),
"virtuals": dspec.virtuals,
},
}
for name, edges_for_name in sorted(deps.items())
for dspec in edges_for_name
]
# Name is included in case this is replacing a virtual.
if self._build_spec:
d["build_spec"] = syaml.syaml_dict(
[("name", self.build_spec.name), (hash.name, self.build_spec._cached_hash(hash))]
)
d["build_spec"] = {
"name": self.build_spec.name,
hash.name: self.build_spec._cached_hash(hash),
}
return d
def to_dict(self, hash=ht.dag_hash):
@ -2498,10 +2482,7 @@ def to_dict(self, hash=ht.dag_hash):
node_list.append(node)
hash_set.add(node_hash)
meta_dict = syaml.syaml_dict([("version", SPECFILE_FORMAT_VERSION)])
inner_dict = syaml.syaml_dict([("_meta", meta_dict), ("nodes", node_list)])
spec_dict = syaml.syaml_dict([("spec", inner_dict)])
return spec_dict
return {"spec": {"_meta": {"version": SPECFILE_FORMAT_VERSION}, "nodes": node_list}}
def node_dict_with_hashes(self, hash=ht.dag_hash):
"""Returns a node_dict of this spec with the dag hash added. If this
@ -4906,9 +4887,7 @@ def from_node_dict(cls, node):
spec.external_modules = node["external"]["module"]
if spec.external_modules is False:
spec.external_modules = None
spec.extra_attributes = node["external"].get(
"extra_attributes", syaml.syaml_dict()
)
spec.extra_attributes = node["external"].get("extra_attributes", {})
# specs read in are concrete unless marked abstract
if node.get("concrete", True):

View File

@ -7,11 +7,9 @@
The YAML and JSON formats preserve DAG information in the spec.
"""
import ast
import collections
import collections.abc
import gzip
import inspect
import io
import json
import os
@ -28,7 +26,6 @@
import spack.spec
import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml
import spack.version
from spack.spec import Spec, save_dependency_specfiles
from spack.util.spack_yaml import SpackYAMLError, syaml_dict
@ -128,7 +125,7 @@ def test_using_ordered_dict(default_mock_concretization, spec_str):
def descend_and_check(iterable, level=0):
if isinstance(iterable, collections.abc.Mapping):
assert isinstance(iterable, syaml_dict)
assert type(iterable) in (syaml_dict, dict)
return descend_and_check(iterable.values(), level=level + 1)
max_level = level
for value in iterable:
@ -223,71 +220,6 @@ def test_ordered_read_not_required_for_consistent_dag_hash(
assert spec == from_yaml == from_json == from_yaml_rev == from_json_rev
@pytest.mark.parametrize("module", [spack.spec, spack.version])
def test_hashes_use_no_python_dicts(module):
"""Coarse check to make sure we don't use dicts in Spec.to_node_dict().
Python dicts are not guaranteed to iterate in a deterministic order
(at least not in all python versions) so we need to use lists and
syaml_dicts. syaml_dicts are ordered and ensure that hashes in Spack
are deterministic.
This test is intended to handle cases that are not covered by the
consistency checks above, or that would be missed by a dynamic check.
This test traverses the ASTs of functions that are used in our hash
algorithms, finds instances of dictionaries being constructed, and
prints out the line numbers where they occur.
"""
class FindFunctions(ast.NodeVisitor):
"""Find a function definition called to_node_dict."""
def __init__(self):
self.nodes = []
def visit_FunctionDef(self, node):
if node.name in ("to_node_dict", "to_dict", "to_dict_or_value"):
self.nodes.append(node)
class FindDicts(ast.NodeVisitor):
"""Find source locations of dicts in an AST."""
def __init__(self, filename):
self.nodes = []
self.filename = filename
def add_error(self, node):
self.nodes.append(
"Use syaml_dict instead of dict at %s:%s:%s"
% (self.filename, node.lineno, node.col_offset)
)
def visit_Dict(self, node):
self.add_error(node)
def visit_Call(self, node):
name = None
if isinstance(node.func, ast.Name):
name = node.func.id
elif isinstance(node.func, ast.Attribute):
name = node.func.attr
if name == "dict":
self.add_error(node)
find_functions = FindFunctions()
module_ast = ast.parse(inspect.getsource(module))
find_functions.visit(module_ast)
find_dicts = FindDicts(module.__file__)
for node in find_functions.nodes:
find_dicts.visit(node)
# fail with offending lines if we found some dicts.
assert [] == find_dicts.nodes
def reverse_all_dicts(data):
"""Descend into data and reverse all the dictionaries"""
if isinstance(data, dict):

View File

@ -11,8 +11,6 @@
default unorderd dict.
"""
import collections
import collections.abc
import ctypes
import enum
import functools
@ -32,23 +30,20 @@
# Make new classes so we can add custom attributes.
# Also, use OrderedDict instead of just dict.
class syaml_dict(collections.OrderedDict):
def __repr__(self):
mappings = (f"{k!r}: {v!r}" for k, v in self.items())
return "{%s}" % ", ".join(mappings)
class syaml_dict(dict):
pass
class syaml_list(list):
__repr__ = list.__repr__
pass
class syaml_str(str):
__repr__ = str.__repr__
pass
class syaml_int(int):
__repr__ = int.__repr__
pass
#: mapping from syaml type -> primitive type

View File

@ -6,7 +6,6 @@
from bisect import bisect_left
from typing import Dict, Iterable, Iterator, List, Optional, Tuple, Union
from spack.util.spack_yaml import syaml_dict
from spack.util.typing import SupportsRichComparison
from .common import (
@ -1044,8 +1043,8 @@ def intersects(self, other: VersionType) -> bool:
def to_dict(self) -> Dict:
"""Generate human-readable dict for YAML."""
if self.concrete:
return syaml_dict([("version", str(self[0]))])
return syaml_dict([("versions", [str(v) for v in self])])
return {"version": str(self[0])}
return {"versions": [str(v) for v in self]}
@staticmethod
def from_dict(dictionary) -> "VersionList":