clean up some comments

Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>
merge three branches into one
2025-05-20 09:25:20 -07:00 · 2025-05-20 01:02:49 -07:00 · 2025-05-19 15:59:03 -07:00 · 2025-05-19 15:59:03 -07:00 · 2025-05-19 15:59:03 -07:00 · 2025-05-19 15:59:02 -07:00
3 changed files with 327 additions and 41 deletions
--- a/lib/spack/llnl/util/lang.py
+++ b/lib/spack/llnl/util/lang.py
@ -67,7 +67,7 @@ def index_by(objects, *funcs):
        }

    If any elements in funcs is a string, it is treated as the name
-    of an attribute, and acts like getattr(object, name).  So
+    of an attribute, and acts like ``getattr(object, name)``.  So
    shorthand for the above two indexes would be::

        index1 = index_by(list_of_specs, 'arch', 'compiler')
@ -77,7 +77,8 @@ def index_by(objects, *funcs):

        index1 = index_by(list_of_specs, ('target', 'compiler'))

-    Keys in the resulting dict will look like ('gcc', 'skylake').
+    Keys in the resulting dict will look like ``('gcc', 'skylake')``.
+
    """
    if not funcs:
        return objects
@ -315,7 +316,9 @@ def lazy_lexicographic_ordering(cls, set_hash=True):

    This is a lazy version of the tuple comparison used frequently to
    implement comparison in Python. Given some objects with fields, you
-    might use tuple keys to implement comparison, e.g.::
+    might use tuple keys to implement comparison, e.g.:
+
+    .. code-block:: python

        class Widget:
            def _cmp_key(self):
@ -343,7 +346,9 @@ def __lt__(self):
    Lazy lexicographic comparison maps the tuple comparison shown above
    to generator functions. Instead of comparing based on pre-constructed
    tuple keys, users of this decorator can compare using elements from a
-    generator. So, you'd write::
+    generator. So, you'd write:
+
+    .. code-block:: python

        @lazy_lexicographic_ordering
        class Widget:
@ -366,6 +371,38 @@ def cd_fun():
    only has to worry about writing ``_cmp_iter``, and making sure the
    elements in it are also comparable.

+    In some cases, you may have a fast way to determine whether two
+    objects are equal, e.g. the ``is`` function or an already-computed
+    cryptographic hash. For this, you can implement your own
+    ``_cmp_fast_eq`` function:
+
+    .. code-block:: python
+
+        @lazy_lexicographic_ordering
+        class Widget:
+            def _cmp_iter(self):
+                yield a
+                yield b
+                def cd_fun():
+                    yield c
+                    yield d
+                yield cd_fun
+                yield e
+
+            def _cmp_fast_eq(self, other):
+                return self is other or None
+
+    ``_cmp_fast_eq`` should return:
+
+        * ``True`` if ``self`` is equal to ``other``,
+        * ``False`` if ``self`` is not equal to ``other``, and
+        * ``None`` if it's not known whether they are equal, and the full
+          comparison should be done.
+
+    ``lazy_lexicographic_ordering`` uses ``_cmp_fast_eq`` to short-circuit
+    the comparison if the answer can be determined quickly. If you do not
+    implement it, it defaults to ``self is other or None``.
+
    Some things to note:

      * If a class already has ``__eq__``, ``__ne__``, ``__lt__``,
@ -386,34 +423,40 @@ def cd_fun():
    if not hasattr(cls, "_cmp_iter"):
        raise TypeError(f"'{cls.__name__}' doesn't define _cmp_iter().")

+    # get an equal operation that allows us to short-circuit comparison
+    # if it's not provided, default to `is`
+    _cmp_fast_eq = getattr(cls, "_cmp_fast_eq", lambda x, y: x is y or None)
+
    # comparison operators are implemented in terms of lazy_eq and lazy_lt
    def eq(self, other):
-        if self is other:
-            return True
+        fast_eq = _cmp_fast_eq(self, other)
+        if fast_eq is not None:
+            return fast_eq
        return (other is not None) and lazy_eq(self._cmp_iter, other._cmp_iter)

    def lt(self, other):
-        if self is other:
+        if _cmp_fast_eq(self, other) is True:
            return False
        return (other is not None) and lazy_lt(self._cmp_iter, other._cmp_iter)

    def ne(self, other):
-        if self is other:
-            return False
+        fast_eq = _cmp_fast_eq(self, other)
+        if fast_eq is not None:
+            return not fast_eq
        return (other is None) or not lazy_eq(self._cmp_iter, other._cmp_iter)

    def gt(self, other):
-        if self is other:
+        if _cmp_fast_eq(self, other) is True:
            return False
        return (other is None) or lazy_lt(other._cmp_iter, self._cmp_iter)

    def le(self, other):
-        if self is other:
+        if _cmp_fast_eq(self, other) is True:
            return True
        return (other is not None) and not lazy_lt(other._cmp_iter, self._cmp_iter)

    def ge(self, other):
-        if self is other:
+        if _cmp_fast_eq(self, other) is True:
            return True
        return (other is None) or not lazy_lt(self._cmp_iter, other._cmp_iter)

--- a/lib/spack/spack/spec.py
+++ b/lib/spack/spack/spec.py
@ -961,7 +961,6 @@ def _sort_by_dep_types(dspec: DependencySpec):
    return dspec.depflag


-@lang.lazy_lexicographic_ordering
 class _EdgeMap(collections.abc.Mapping):
    """Represent a collection of edges (DependencySpec objects) in the DAG.

@ -999,21 +998,6 @@ def add(self, edge: DependencySpec) -> None:
    def __str__(self) -> str:
        return f"{{deps: {', '.join(str(d) for d in sorted(self.values()))}}}"

-    def _cmp_iter(self):
-        for item in sorted(itertools.chain.from_iterable(self.edges.values())):
-            yield item
-
-    def copy(self):
-        """Copies this object and returns a clone"""
-        clone = type(self)()
-        clone.store_by_child = self.store_by_child
-
-        # Copy everything from this dict into it.
-        for dspec in itertools.chain.from_iterable(self.values()):
-            clone.add(dspec.copy())
-
-        return clone
-
    def select(
        self,
        *,
@ -3785,26 +3769,152 @@ def eq_node(self, other):
        """Equality with another spec, not including dependencies."""
        return (other is not None) and lang.lazy_eq(self._cmp_node, other._cmp_node)

-    def _cmp_iter(self):
-        """Lazily yield components of self for comparison."""
-
-        for item in self._cmp_node():
-            yield item
-
+    def _cmp_fast_eq(self, other) -> Optional[bool]:
+        """Short-circuit compare with other for equality, for lazy_lexicographic_ordering."""
        # If there is ever a breaking change to hash computation, whether accidental or purposeful,
        # two specs can be identical modulo DAG hash, depending on what time they were concretized
        # From the perspective of many operation in Spack (database, build cache, etc) a different
        # DAG hash means a different spec. Here we ensure that two otherwise identical specs, one
        # serialized before the hash change and one after, are considered different.
-        yield self.dag_hash() if self.concrete else None
+        if self is other:
+            return True

-        def deps():
-            for dep in sorted(itertools.chain.from_iterable(self._dependencies.values())):
-                yield dep.spec.name
-                yield dep.depflag
-                yield hash(dep.spec)
+        if self.concrete and other and other.concrete:
+            return self.dag_hash() == other.dag_hash()

-        yield deps
+        return None
+
+    def _cmp_iter(self):
+        """Lazily yield components of self for comparison."""
+
+        # Spec comparison in Spack needs to be fast, so there are several cases here for
+        # performance. The main places we care about this are:
+        #
+        #   * Abstract specs: there are lots of abstract specs in package.py files,
+        #     which are put into metadata dictionaries and sorted during concretization
+        #     setup. We want comparing abstract specs to be fast.
+        #
+        #   * Concrete specs: concrete specs are bigger and have lots of nodes and
+        #     edges. Because of the graph complexity, we need a full, linear time
+        #     traversal to compare them -- that's pretty much is unavoidable. But they
+        #     also have precoputed cryptographic hashes (dag_hash()), which we can use
+        #     to do fast equality comparison. See _cmp_fast_eq() above for the
+        #     short-circuit logic for hashes.
+        #
+        # A full traversal involves constructing data structurs, visitor objects, etc.,
+        # and it can be expensive if we have to do it to compare a bunch of tiny
+        # abstract specs. Therefore, there are 3 cases below, which avoid calling
+        # `spack.traverse.traverse_edges()` unless necessary.
+        #
+        # WARNING: the cases below need to be consistent, so don't mess with this code
+        # unless you really know what you're doing. Be sure to keep all three consistent.
+        #
+        # All cases lazily yield:
+        #
+        #   1. A generator over nodes
+        #   2. A generator over canonical edges
+        #
+        # Canonical edges have consistent ids defined by breadth-first traversal order. That is,
+        # the root is always 0, dependencies of the root are 1, 2, 3, etc., and so on.
+        #
+        # The three cases are:
+        #
+        #   1. Spec has no dependencies
+        #      * We can avoid any traversal logic and just yield this node's _cmp_node generator.
+        #
+        #   2. Spec has dependencies, but dependencies have no dependencies.
+        #      * We need to sort edges, but we don't need to track visited nodes, which
+        #        can save us the cost of setting up all the tracking data structures
+        #        `spack.traverse` uses.
+        #
+        #   3. Spec has dependencies that have dependencies.
+        #      * In this case, the spec is *probably* concrete. Equality comparisons
+        #        will be short-circuited by dag_hash(), but other comparisons will need
+        #        to lazily enumerate components of the spec. The traversal logic is
+        #        unavoidable.
+        #
+        # TODO: consider reworking `spack.traverse` to construct fewer data structures
+        # and objects, as this would make all traversals faster and could eliminate the
+        # need for the complexity here. It was not clear at the time of writing that how
+        # much optimization was possible in `spack.traverse`.
+
+        sorted_l1_edges = None
+        edge_list = None
+        node_ids = None
+
+        def nodes():
+            nonlocal sorted_l1_edges
+            nonlocal edge_list
+            nonlocal node_ids
+
+            # Level 0: root node
+            yield self._cmp_node  # always yield the root (this node)
+            if not self._dependencies:  # done if there are no dependencies
+                return
+
+            # Level 1: direct dependencies
+            # we can yield these in sorted order without tracking visited nodes
+            deps_have_deps = False
+            sorted_l1_edges = self.edges_to_dependencies(depflag=dt.ALL)
+            if len(sorted_l1_edges) > 1:
+                sorted_l1_edges = spack.traverse.sort_edges(sorted_l1_edges)
+
+            for edge in sorted_l1_edges:
+                yield edge.spec._cmp_node
+                if edge.spec._dependencies:
+                    deps_have_deps = True
+
+            if not deps_have_deps:  # done if level 1 specs have no dependencies
+                return
+
+            # Level 2: dependencies of direct dependencies
+            # now it's general; we need full traverse() to track visited nodes
+            l1_specs = [edge.spec for edge in sorted_l1_edges]
+
+            # the node_ids dict generates consistent ids based on BFS traversal order
+            # these are used to identify edges later
+            node_ids = collections.defaultdict(lambda: len(node_ids))
+            node_ids[id(self)]  # self is 0
+            for spec in l1_specs:
+                node_ids[id(spec)]  # l1 starts at 1
+
+            edge_list = []
+            for edge in spack.traverse.traverse_edges(
+                l1_specs, order="breadth", cover="edges", root=False, visited=set([0])
+            ):
+                # yield each node only once, and generate a consistent id for it the
+                # first time it's encountered.
+                if id(edge.spec) not in node_ids:
+                    yield edge.spec._cmp_node
+                    node_ids[id(edge.spec)]
+
+                if edge.parent is None:  # skip fake edge to root
+                    continue
+
+                edge_list.append(
+                    (
+                        node_ids[id(edge.parent)],
+                        node_ids[id(edge.spec)],
+                        edge.depflag,
+                        edge.virtuals,
+                    )
+                )
+
+        def edges():
+            # no edges in single-node graph
+            if not self._dependencies:
+                return
+
+            # level 1 edges all start with zero
+            for i, edge in enumerate(sorted_l1_edges, start=1):
+                yield (0, i, edge.depflag, edge.virtuals)
+
+            # yield remaining edges in the order they were encountered during traversal
+            if edge_list:
+                yield from edge_list
+
+        yield nodes
+        yield edges

    @property
    def namespace_if_anonymous(self):
--- a/lib/spack/spack/test/spec_semantics.py
+++ b/lib/spack/spack/test/spec_semantics.py
@ -6,6 +6,8 @@

 import pytest

+import llnl.util.lang
+
 import spack.concretize
 import spack.deptypes as dt
 import spack.directives
@ -2013,6 +2015,137 @@ def test_comparison_multivalued_variants():
    assert Spec("x=a") < Spec("x=a,b") < Spec("x==a,b") < Spec("x==a,b,c")


+@pytest.mark.parametrize(
+    "specs_in_expected_order",
+    [
+        ("a", "b", "c", "d", "e"),
+        ("a@1.0", "a@2.0", "b", "c@3.0", "c@4.0"),
+        ("a^d", "b^c", "c^b", "d^a"),
+        ("e^a", "e^b", "e^c", "e^d"),
+        ("e^a@1.0", "e^a@2.0", "e^a@3.0", "e^a@4.0"),
+        ("e^a@1.0 +a", "e^a@1.0 +b", "e^a@1.0 +c", "e^a@1.0 +c"),
+        ("a^b%c", "a^b%d", "a^b%e", "a^b%f"),
+        ("a^b%c@1.0", "a^b%c@2.0", "a^b%c@3.0", "a^b%c@4.0"),
+        ("a^b%c@1.0 +a", "a^b%c@1.0 +b", "a^b%c@1.0 +c", "a^b%c@1.0 +d"),
+        ("a cflags=-O1", "a cflags=-O2", "a cflags=-O3"),
+        ("a %cmake@1.0 ^b %cmake@2.0", "a %cmake@2.0 ^b %cmake@1.0"),
+        ("a^b^c^d", "a^b^c^e", "a^b^c^f"),
+        ("a^b^c^d", "a^b^c^e", "a^b^c^e", "a^b^c^f"),
+        ("a%b%c%d", "a%b%c%e", "a%b%c%e", "a%b%c%f"),
+        ("d.a", "c.b", "b.c", "a.d"),  # names before namespaces
+    ],
+)
+def test_spec_ordering(specs_in_expected_order):
+    specs_in_expected_order = [Spec(s) for s in specs_in_expected_order]
+    assert sorted(specs_in_expected_order) == specs_in_expected_order
+    assert sorted(reversed(specs_in_expected_order)) == specs_in_expected_order
+
+    for i in range(len(specs_in_expected_order) - 1):
+        lhs, rhs = specs_in_expected_order[i : i + 2]
+        assert lhs <= rhs
+        assert (lhs < rhs and lhs != rhs) or lhs == rhs
+        assert rhs >= lhs
+        assert (rhs > lhs and rhs != lhs) or rhs == lhs
+
+
+EMPTY_VER = vn.VersionList(":")
+EMPTY_VAR = Spec().variants
+EMPTY_FLG = Spec().compiler_flags
+
+
+@pytest.mark.parametrize(
+    "spec,expected_tuplified",
+    [
+        # simple, no dependencies
+        [("a"), ((("a", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),), ())],
+        # with some node attributes
+        [
+            ("a@1.0 +foo cflags='-O3 -g'"),
+            (
+                (
+                    (
+                        "a",
+                        None,
+                        vn.VersionList(["1.0"]),
+                        Spec("+foo").variants,
+                        Spec("cflags='-O3 -g'").compiler_flags,
+                        None,
+                        None,
+                        None,
+                    ),
+                ),
+                (),
+            ),
+        ],
+        # single edge case
+        [
+            ("a^b"),
+            (
+                (
+                    ("a", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("b", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                ),
+                ((0, 1, 0, ()),),
+            ),
+        ],
+        # root with multiple deps
+        [
+            ("a^b^c^d"),
+            (
+                (
+                    ("a", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("b", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("c", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("d", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                ),
+                ((0, 1, 0, ()), (0, 2, 0, ()), (0, 3, 0, ())),
+            ),
+        ],
+        # root with multiple build deps
+        [
+            ("a%b%c%d"),
+            (
+                (
+                    ("a", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("b", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("c", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("d", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                ),
+                ((0, 1, dt.BUILD, ()), (0, 2, dt.BUILD, ()), (0, 3, dt.BUILD, ())),
+            ),
+        ],
+        # dependencies with dependencies
+        [
+            ("a  ^b %c %d  ^e %f %g"),
+            (
+                (
+                    ("a", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("b", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("e", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("c", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("d", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("f", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                    ("g", None, EMPTY_VER, EMPTY_VAR, EMPTY_FLG, None, None, None),
+                ),
+                (
+                    (0, 1, 0, ()),
+                    (0, 2, 0, ()),
+                    (1, 3, dt.BUILD, ()),
+                    (1, 4, dt.BUILD, ()),
+                    (2, 5, dt.BUILD, ()),
+                    (2, 6, dt.BUILD, ()),
+                ),
+            ),
+        ],
+    ],
+)
+def test_spec_canonical_comparison_form(spec, expected_tuplified):
+    print()
+    print()
+    print()
+    assert llnl.util.lang.tuplify(Spec(spec)._cmp_iter) == expected_tuplified
+
+
 def test_comparison_after_breaking_hash_change():
    # We simulate a breaking change in DAG hash computation in Spack. We have two specs that are
    # entirely equal modulo DAG hash. When deserializing these specs, we don't want them to compare
Author	SHA1	Message	Date
Todd Gamblin	37bb0843fd	clean up some comments Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>	2025-05-20 09:25:20 -07:00
Todd Gamblin	07a8f6235b	merge three branches into one	2025-05-20 01:02:49 -07:00
Todd Gamblin	485291ef20	Omit fake edge to root This saves singificant time in comparison Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>	2025-05-19 15:59:03 -07:00
Todd Gamblin	8363fbf40f	Spec: use short-circuiting, stable comparison ## Background Spec comparison on develop used a somewhat questionable optimization to get decent spec comparison performance -- instead of comparing entire spec DAGs, it put a `hash()` call in `_cmp_iter()` and compared specs by their runtime hashes. This gets us good performance abstract specs, which don't have complex dependencies and for which hashing is cheap. But it makes the order of specs unstable and hard to reproduce. We really need to do a full, consistent traversal over specs to compare and to get a stable ordering. Simply taking the hash out and yielding dependencies recursively (i.e. yielding `dep.spec._cmp_iter()` instead of a hash) goes exponential for concrete specs because it explores all paths. Traversal tracks visited nodes, but it's expensive to set up the data structures for that, and it can slow down comparison of simple abstract specs. Abstract spec comparison performance is important for concretization (specifically setup), so we don't want to do that. ## New comparison algorithm We can have (mostly) the best of both worlds -- it's just a bit more complicated. This changes Spec comparison to do a proper, stable graph comparison: 1. Spec comparison will now short-circuit whenever possible for concrete specs, when DAG hashes are known to be equal or not equal. This means that concrete spec `==` and `!=` comparisons will no longer have to traverse the whole DAG. 2. Spec comparison now traverses the graph consistently, comparing nodes and edges in breadth-first order. This means Spec sort order is stable, and it won't vary arbitrarily from run to run. 3. Traversal can be expensive, so we avoid it for simple specs. Specifically, if a spec has no dependencies, or if its dependencies have no dependencies, we avoid calling `traverse_edges()` by doing some special casing. The `_cmp_iter` method for `Spec` now iterates over the DAG and yields nodes in BFS order. While it does that, it generates consistent ids for each node, based on traversal order. It then outputs edges in terms of these ids, along with their depflags and virtuals, so that all parts of the Spec DAG are included. The resulting total ordering of specs keys on node attributes first, then dependency nodes, then any edge differences between graphs. Optimized cases skip the id generation and traversal, since we know the order and therefore the ids in advance. ## Performance ramifications ### Abstract specs This seems to add around 7-8% overhead to concretization setup time. It's worth the cost, because this enables concretization caching (as input to concretization was previously not stable) and setup will eventually be parallelized, at which point it will no longer be a bottleneck for solving. Together those two optimizations will cut well over 50% of the time (likely closer to 90+%) off of most solves. ### Concrete specs Comparison for concrete specs is faster than before, sometimes way faster because comparison is now guaranteed to be linear time w.r.t. DAG size. Times for comparing concrete Specs: ```python def compare(json): a = spack.spec.Spec(json) b = spack.spec.Spec(json) print(a == b) print(timeit.timeit(lambda: a == b, number=1)) compare("./py-black.json") compare("./hdf5.json") ``` * `develop` (uses our prior hash optimization): * `py-black`: 7.013e-05s * `py-hdf5`: 6.445e-05s * `develop` with full traversal and no hash: * `py-black`: 3.955s * `py-hdf5`: 0.0122s * This branch (full traversal, stable, short-circuiting, no hash) * `py-black`: 2.208e-06s * `py-hdf5`: 3.416e-06s Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>	2025-05-19 15:59:03 -07:00
Todd Gamblin	e4b898f7c3	`lazy_lexicographic_ordering`: Add short-circuiting with `_cmp_fast_eq` Add a `_cmp_fast_eq` method to the `lazy_lexicographic_ordering` protocol so that implementors can short-circuit full comparison if they know (e.g. with a hash) that objects are equal (or not). `_cmp_fast_eq` can return True (known true), False (known false), or None (unknown -- do full comparison). Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>	2025-05-19 15:59:03 -07:00
Todd Gamblin	a5f1a4ea81	Spec: Remove unused methods on `_EdgeMap` `_EdgeMap` implements `_cmp_iter` for `lazy_lexicographic_ordering` but it's never used or tested. Remove it. Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>	2025-05-19 15:59:02 -07:00