traverse.py: ensure topo order is bfs for trees (#47720)

This commit is contained in:
Harmen Stoppels 2024-11-22 15:04:19 +01:00 committed by GitHub
parent 559c2f1eb9
commit cb3d6549c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 96 additions and 109 deletions

View File

@ -882,6 +882,9 @@ def __init__(self, *roots: spack.spec.Spec, context: Context):
elif context == Context.RUN:
self.root_depflag = dt.RUN | dt.LINK
def accept(self, item):
return True
def neighbors(self, item):
spec = item.edge.spec
if spec.dag_hash() in self.root_hashes:
@ -919,19 +922,19 @@ def effective_deptypes(
a flag specifying in what way they do so. The list is ordered topologically
from root to leaf, meaning that environment modifications should be applied
in reverse so that dependents override dependencies, not the other way around."""
visitor = traverse.TopoVisitor(
EnvironmentVisitor(*specs, context=context),
key=lambda x: x.dag_hash(),
topo_sorted_edges = traverse.traverse_topo_edges_generator(
traverse.with_artificial_edges(specs),
visitor=EnvironmentVisitor(*specs, context=context),
key=traverse.by_dag_hash,
root=True,
all_edges=True,
)
traverse.traverse_depth_first_with_visitor(traverse.with_artificial_edges(specs), visitor)
# Dictionary with "no mode" as default value, so it's easy to write modes[x] |= flag.
use_modes = defaultdict(lambda: UseMode(0))
nodes_with_type = []
for edge in visitor.edges:
for edge in topo_sorted_edges:
parent, child, depflag = edge.parent, edge.spec, edge.depflag
# Mark the starting point

View File

@ -325,12 +325,7 @@ def write(self, spec, color=None, out=None):
self._out = llnl.util.tty.color.ColorStream(out, color=color)
# We'll traverse the spec in topological order as we graph it.
nodes_in_topological_order = [
edge.spec
for edge in spack.traverse.traverse_edges_topo(
[spec], direction="children", deptype=self.depflag
)
]
nodes_in_topological_order = list(spec.traverse(order="topo", deptype=self.depflag))
nodes_in_topological_order.reverse()
# Work on a copy to be nondestructive

View File

@ -73,5 +73,18 @@ def test_ascii_graph_mpileaks(config, mock_packages, monkeypatch):
o | libdwarf
|/
o libelf
"""
or graph_str
== r"""o mpileaks
|\
| o callpath
|/|
| o dyninst
| |\
o | | mpich
/ /
| o libdwarf
|/
o libelf
"""
)

View File

@ -431,3 +431,26 @@ def test_traverse_nodes_no_deps(abstract_specs_dtuse):
]
outputs = [x for x in traverse.traverse_nodes(inputs, deptype=dt.NONE)]
assert outputs == [abstract_specs_dtuse["dtuse"], abstract_specs_dtuse["dtlink5"]]
@pytest.mark.parametrize("cover", ["nodes", "edges"])
def test_topo_is_bfs_for_trees(cover):
"""For trees, both DFS and BFS produce a topological order, but BFS is the most sensible for
our applications, where we typically want to avoid that transitive dependencies shadow direct
depenencies in global search paths, etc. This test ensures that for trees, the default topo
order coincides with BFS."""
binary_tree = create_dag(
nodes=["A", "B", "C", "D", "E", "F", "G"],
edges=(
("A", "B", "all"),
("A", "C", "all"),
("B", "D", "all"),
("B", "E", "all"),
("C", "F", "all"),
("C", "G", "all"),
),
)
assert list(traverse.traverse_nodes([binary_tree["A"]], order="topo", cover=cover)) == list(
traverse.traverse_nodes([binary_tree["A"]], order="breadth", cover=cover)
)

View File

@ -115,70 +115,6 @@ def neighbors(self, item):
return self.visitor.neighbors(item)
class TopoVisitor:
"""Visitor that can be used in :py:func:`depth-first traversal
<spack.traverse.traverse_depth_first_with_visitor>` to generate
a topologically ordered list of specs.
Algorithm based on "Section 22.4: Topological sort", Introduction to Algorithms
(2001, 2nd edition) by Cormen, Thomas H.; Leiserson, Charles E.; Rivest, Ronald L.;
Stein, Clifford.
Summary of the algorithm: prepend each vertex to a list in depth-first post-order,
not following edges to nodes already seen. This ensures all descendants occur after
their parent, yielding a topological order.
Note: in this particular implementation we collect the *edges* through which the
vertices are discovered, meaning that a topological order of *vertices* is obtained
by taking the specs pointed to: ``map(lambda edge: edge.spec, visitor.edges)``.
Lastly, ``all_edges=True`` can be used to retrieve a list of all reachable
edges, with the property that for each vertex all in-edges precede all out-edges.
"""
def __init__(self, visitor, key=id, root=True, all_edges=False):
"""
Arguments:
visitor: visitor that implements accept(), pre(), post() and neighbors()
key: uniqueness key for nodes
root (bool): Whether to include the root node.
all_edges (bool): when ``False`` (default): Each node is reached once,
and ``map(lambda edge: edge.spec, visitor.edges)`` is topologically
ordered. When ``True``, every edge is listed, ordered such that for
each node all in-edges precede all out-edges.
"""
self.visited = set()
self.visitor = visitor
self.key = key
self.root = root
self.reverse_order = []
self.all_edges = all_edges
def accept(self, item):
if self.key(item.edge.spec) not in self.visited:
return True
if self.all_edges and (self.root or item.depth > 0):
self.reverse_order.append(item.edge)
return False
def pre(self, item):
# You could add a temporary marker for cycle detection
# that's cleared in `post`, but we assume no cycles.
pass
def post(self, item):
self.visited.add(self.key(item.edge.spec))
if self.root or item.depth > 0:
self.reverse_order.append(item.edge)
def neighbors(self, item):
return self.visitor.neighbors(item)
@property
def edges(self):
"""Return edges in topological order (in-edges precede out-edges)."""
return list(reversed(self.reverse_order))
def get_visitor_from_args(
cover, direction, depflag: Union[dt.DepFlag, dt.DepTypes], key=id, visited=None, visitor=None
):
@ -381,39 +317,54 @@ def traverse_breadth_first_tree_nodes(parent_id, edges, key=id, depth=0):
yield item
# Topologic order
def traverse_edges_topo(
specs,
direction="children",
deptype: Union[dt.DepFlag, dt.DepTypes] = "all",
key=id,
root=True,
all_edges=False,
):
def traverse_topo_edges_generator(edges, visitor, key=id, root=True, all_edges=False):
"""
Returns a list of edges in topological order, in the sense that all in-edges of a
vertex appear before all out-edges. By default with direction=children edges are
directed from dependent to dependency. With directions=parents, the edges are
directed from dependency to dependent.
Returns a list of edges in topological order, in the sense that all in-edges of a vertex appear
before all out-edges.
Arguments:
specs (list): List of root specs (considered to be depth 0)
direction (str): ``children`` (edges are directed from dependent to dependency)
or ``parents`` (edges are flipped / directed from dependency to dependent)
deptype: allowed dependency types
edges (list): List of EdgeAndDepth instances
visitor: visitor instance that defines the sub-DAG to traverse
key: function that takes a spec and outputs a key for uniqueness test.
root (bool): Yield the root nodes themselves
all_edges (bool): When ``False`` only one in-edge per node is returned, when
``True`` all reachable edges are returned.
"""
if not isinstance(deptype, dt.DepFlag):
deptype = dt.canonicalize(deptype)
visitor: Union[BaseVisitor, ReverseVisitor, TopoVisitor] = BaseVisitor(deptype)
if direction == "parents":
visitor = ReverseVisitor(visitor, deptype)
visitor = TopoVisitor(visitor, key=key, root=root, all_edges=all_edges)
traverse_depth_first_with_visitor(with_artificial_edges(specs), visitor)
return visitor.edges
# Topo order used to be implemented using a DFS visitor, which was relatively efficient in that
# it would visit nodes only once, and it was composable. In practice however it would yield a
# DFS order on DAGs that are trees, which is undesirable in many cases. For example, a list of
# search paths for trees is better in BFS order, so that direct dependencies are listed first.
# That way a transitive dependency cannot shadow a direct one. So, here we collect the sub-DAG
# of interest and then compute a topological order that is the most breadth-first possible.
# maps node identifier to the number of remaining in-edges
in_edge_count = defaultdict(int)
# maps parent identifier to a list of edges, where None is a special identifier
# for the artificial root/source.
node_to_edges = defaultdict(list)
for edge in traverse_breadth_first_edges_generator(
edges, CoverEdgesVisitor(visitor, key=key), root=True, depth=False
):
in_edge_count[key(edge.spec)] += 1
parent_id = key(edge.parent) if edge.parent is not None else None
node_to_edges[parent_id].append(edge)
queue = [None]
while queue:
for edge in node_to_edges[queue.pop(0)]:
child_id = key(edge.spec)
in_edge_count[child_id] -= 1
should_yield = root or edge.parent is not None
if all_edges and should_yield:
yield edge
if in_edge_count[child_id] == 0:
if not all_edges and should_yield:
yield edge
queue.append(key(edge.spec))
# High-level API: traverse_edges, traverse_nodes, traverse_tree.
@ -462,20 +413,20 @@ def traverse_edges(
A generator that yields ``DependencySpec`` if depth is ``False``
or a tuple of ``(depth, DependencySpec)`` if depth is ``True``.
"""
# validate input
if order == "topo":
if cover == "paths":
raise ValueError("cover=paths not supported for order=topo")
# TODO: There is no known need for topological ordering of traversals (edge or node)
# with an initialized "visited" set. Revisit if needed.
if visited is not None:
raise ValueError("visited set not implemented for order=topo")
return traverse_edges_topo(
specs, direction, deptype, key, root, all_edges=cover == "edges"
)
elif order not in ("post", "pre", "breadth"):
raise ValueError(f"Unknown order {order}")
# In topo traversal we need to construct a sub-DAG including all edges even if we are yielding
# a subset of them, hence "paths".
_cover = "paths" if order == "topo" else cover
visitor = get_visitor_from_args(_cover, direction, deptype, key, visited)
root_edges = with_artificial_edges(specs)
visitor = get_visitor_from_args(cover, direction, deptype, key, visited)
# Depth-first
if order in ("pre", "post"):
@ -484,8 +435,10 @@ def traverse_edges(
)
elif order == "breadth":
return traverse_breadth_first_edges_generator(root_edges, visitor, root, depth)
raise ValueError("Unknown order {}".format(order))
elif order == "topo":
return traverse_topo_edges_generator(
root_edges, visitor, key, root, all_edges=cover == "edges"
)
def traverse_nodes(