traverse.py: ensure topo order is bfs for trees (#47720)
This commit is contained in:
parent
559c2f1eb9
commit
cb3d6549c9
@ -882,6 +882,9 @@ def __init__(self, *roots: spack.spec.Spec, context: Context):
|
||||
elif context == Context.RUN:
|
||||
self.root_depflag = dt.RUN | dt.LINK
|
||||
|
||||
def accept(self, item):
|
||||
return True
|
||||
|
||||
def neighbors(self, item):
|
||||
spec = item.edge.spec
|
||||
if spec.dag_hash() in self.root_hashes:
|
||||
@ -919,19 +922,19 @@ def effective_deptypes(
|
||||
a flag specifying in what way they do so. The list is ordered topologically
|
||||
from root to leaf, meaning that environment modifications should be applied
|
||||
in reverse so that dependents override dependencies, not the other way around."""
|
||||
visitor = traverse.TopoVisitor(
|
||||
EnvironmentVisitor(*specs, context=context),
|
||||
key=lambda x: x.dag_hash(),
|
||||
topo_sorted_edges = traverse.traverse_topo_edges_generator(
|
||||
traverse.with_artificial_edges(specs),
|
||||
visitor=EnvironmentVisitor(*specs, context=context),
|
||||
key=traverse.by_dag_hash,
|
||||
root=True,
|
||||
all_edges=True,
|
||||
)
|
||||
traverse.traverse_depth_first_with_visitor(traverse.with_artificial_edges(specs), visitor)
|
||||
|
||||
# Dictionary with "no mode" as default value, so it's easy to write modes[x] |= flag.
|
||||
use_modes = defaultdict(lambda: UseMode(0))
|
||||
nodes_with_type = []
|
||||
|
||||
for edge in visitor.edges:
|
||||
for edge in topo_sorted_edges:
|
||||
parent, child, depflag = edge.parent, edge.spec, edge.depflag
|
||||
|
||||
# Mark the starting point
|
||||
|
@ -325,12 +325,7 @@ def write(self, spec, color=None, out=None):
|
||||
self._out = llnl.util.tty.color.ColorStream(out, color=color)
|
||||
|
||||
# We'll traverse the spec in topological order as we graph it.
|
||||
nodes_in_topological_order = [
|
||||
edge.spec
|
||||
for edge in spack.traverse.traverse_edges_topo(
|
||||
[spec], direction="children", deptype=self.depflag
|
||||
)
|
||||
]
|
||||
nodes_in_topological_order = list(spec.traverse(order="topo", deptype=self.depflag))
|
||||
nodes_in_topological_order.reverse()
|
||||
|
||||
# Work on a copy to be nondestructive
|
||||
|
@ -73,5 +73,18 @@ def test_ascii_graph_mpileaks(config, mock_packages, monkeypatch):
|
||||
o | libdwarf
|
||||
|/
|
||||
o libelf
|
||||
"""
|
||||
or graph_str
|
||||
== r"""o mpileaks
|
||||
|\
|
||||
| o callpath
|
||||
|/|
|
||||
| o dyninst
|
||||
| |\
|
||||
o | | mpich
|
||||
/ /
|
||||
| o libdwarf
|
||||
|/
|
||||
o libelf
|
||||
"""
|
||||
)
|
||||
|
@ -431,3 +431,26 @@ def test_traverse_nodes_no_deps(abstract_specs_dtuse):
|
||||
]
|
||||
outputs = [x for x in traverse.traverse_nodes(inputs, deptype=dt.NONE)]
|
||||
assert outputs == [abstract_specs_dtuse["dtuse"], abstract_specs_dtuse["dtlink5"]]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cover", ["nodes", "edges"])
|
||||
def test_topo_is_bfs_for_trees(cover):
|
||||
"""For trees, both DFS and BFS produce a topological order, but BFS is the most sensible for
|
||||
our applications, where we typically want to avoid that transitive dependencies shadow direct
|
||||
depenencies in global search paths, etc. This test ensures that for trees, the default topo
|
||||
order coincides with BFS."""
|
||||
binary_tree = create_dag(
|
||||
nodes=["A", "B", "C", "D", "E", "F", "G"],
|
||||
edges=(
|
||||
("A", "B", "all"),
|
||||
("A", "C", "all"),
|
||||
("B", "D", "all"),
|
||||
("B", "E", "all"),
|
||||
("C", "F", "all"),
|
||||
("C", "G", "all"),
|
||||
),
|
||||
)
|
||||
|
||||
assert list(traverse.traverse_nodes([binary_tree["A"]], order="topo", cover=cover)) == list(
|
||||
traverse.traverse_nodes([binary_tree["A"]], order="breadth", cover=cover)
|
||||
)
|
||||
|
@ -115,70 +115,6 @@ def neighbors(self, item):
|
||||
return self.visitor.neighbors(item)
|
||||
|
||||
|
||||
class TopoVisitor:
|
||||
"""Visitor that can be used in :py:func:`depth-first traversal
|
||||
<spack.traverse.traverse_depth_first_with_visitor>` to generate
|
||||
a topologically ordered list of specs.
|
||||
|
||||
Algorithm based on "Section 22.4: Topological sort", Introduction to Algorithms
|
||||
(2001, 2nd edition) by Cormen, Thomas H.; Leiserson, Charles E.; Rivest, Ronald L.;
|
||||
Stein, Clifford.
|
||||
|
||||
Summary of the algorithm: prepend each vertex to a list in depth-first post-order,
|
||||
not following edges to nodes already seen. This ensures all descendants occur after
|
||||
their parent, yielding a topological order.
|
||||
|
||||
Note: in this particular implementation we collect the *edges* through which the
|
||||
vertices are discovered, meaning that a topological order of *vertices* is obtained
|
||||
by taking the specs pointed to: ``map(lambda edge: edge.spec, visitor.edges)``.
|
||||
Lastly, ``all_edges=True`` can be used to retrieve a list of all reachable
|
||||
edges, with the property that for each vertex all in-edges precede all out-edges.
|
||||
"""
|
||||
|
||||
def __init__(self, visitor, key=id, root=True, all_edges=False):
|
||||
"""
|
||||
Arguments:
|
||||
visitor: visitor that implements accept(), pre(), post() and neighbors()
|
||||
key: uniqueness key for nodes
|
||||
root (bool): Whether to include the root node.
|
||||
all_edges (bool): when ``False`` (default): Each node is reached once,
|
||||
and ``map(lambda edge: edge.spec, visitor.edges)`` is topologically
|
||||
ordered. When ``True``, every edge is listed, ordered such that for
|
||||
each node all in-edges precede all out-edges.
|
||||
"""
|
||||
self.visited = set()
|
||||
self.visitor = visitor
|
||||
self.key = key
|
||||
self.root = root
|
||||
self.reverse_order = []
|
||||
self.all_edges = all_edges
|
||||
|
||||
def accept(self, item):
|
||||
if self.key(item.edge.spec) not in self.visited:
|
||||
return True
|
||||
if self.all_edges and (self.root or item.depth > 0):
|
||||
self.reverse_order.append(item.edge)
|
||||
return False
|
||||
|
||||
def pre(self, item):
|
||||
# You could add a temporary marker for cycle detection
|
||||
# that's cleared in `post`, but we assume no cycles.
|
||||
pass
|
||||
|
||||
def post(self, item):
|
||||
self.visited.add(self.key(item.edge.spec))
|
||||
if self.root or item.depth > 0:
|
||||
self.reverse_order.append(item.edge)
|
||||
|
||||
def neighbors(self, item):
|
||||
return self.visitor.neighbors(item)
|
||||
|
||||
@property
|
||||
def edges(self):
|
||||
"""Return edges in topological order (in-edges precede out-edges)."""
|
||||
return list(reversed(self.reverse_order))
|
||||
|
||||
|
||||
def get_visitor_from_args(
|
||||
cover, direction, depflag: Union[dt.DepFlag, dt.DepTypes], key=id, visited=None, visitor=None
|
||||
):
|
||||
@ -381,39 +317,54 @@ def traverse_breadth_first_tree_nodes(parent_id, edges, key=id, depth=0):
|
||||
yield item
|
||||
|
||||
|
||||
# Topologic order
|
||||
def traverse_edges_topo(
|
||||
specs,
|
||||
direction="children",
|
||||
deptype: Union[dt.DepFlag, dt.DepTypes] = "all",
|
||||
key=id,
|
||||
root=True,
|
||||
all_edges=False,
|
||||
):
|
||||
def traverse_topo_edges_generator(edges, visitor, key=id, root=True, all_edges=False):
|
||||
"""
|
||||
Returns a list of edges in topological order, in the sense that all in-edges of a
|
||||
vertex appear before all out-edges. By default with direction=children edges are
|
||||
directed from dependent to dependency. With directions=parents, the edges are
|
||||
directed from dependency to dependent.
|
||||
Returns a list of edges in topological order, in the sense that all in-edges of a vertex appear
|
||||
before all out-edges.
|
||||
|
||||
Arguments:
|
||||
specs (list): List of root specs (considered to be depth 0)
|
||||
direction (str): ``children`` (edges are directed from dependent to dependency)
|
||||
or ``parents`` (edges are flipped / directed from dependency to dependent)
|
||||
deptype: allowed dependency types
|
||||
edges (list): List of EdgeAndDepth instances
|
||||
visitor: visitor instance that defines the sub-DAG to traverse
|
||||
key: function that takes a spec and outputs a key for uniqueness test.
|
||||
root (bool): Yield the root nodes themselves
|
||||
all_edges (bool): When ``False`` only one in-edge per node is returned, when
|
||||
``True`` all reachable edges are returned.
|
||||
"""
|
||||
if not isinstance(deptype, dt.DepFlag):
|
||||
deptype = dt.canonicalize(deptype)
|
||||
visitor: Union[BaseVisitor, ReverseVisitor, TopoVisitor] = BaseVisitor(deptype)
|
||||
if direction == "parents":
|
||||
visitor = ReverseVisitor(visitor, deptype)
|
||||
visitor = TopoVisitor(visitor, key=key, root=root, all_edges=all_edges)
|
||||
traverse_depth_first_with_visitor(with_artificial_edges(specs), visitor)
|
||||
return visitor.edges
|
||||
# Topo order used to be implemented using a DFS visitor, which was relatively efficient in that
|
||||
# it would visit nodes only once, and it was composable. In practice however it would yield a
|
||||
# DFS order on DAGs that are trees, which is undesirable in many cases. For example, a list of
|
||||
# search paths for trees is better in BFS order, so that direct dependencies are listed first.
|
||||
# That way a transitive dependency cannot shadow a direct one. So, here we collect the sub-DAG
|
||||
# of interest and then compute a topological order that is the most breadth-first possible.
|
||||
|
||||
# maps node identifier to the number of remaining in-edges
|
||||
in_edge_count = defaultdict(int)
|
||||
# maps parent identifier to a list of edges, where None is a special identifier
|
||||
# for the artificial root/source.
|
||||
node_to_edges = defaultdict(list)
|
||||
for edge in traverse_breadth_first_edges_generator(
|
||||
edges, CoverEdgesVisitor(visitor, key=key), root=True, depth=False
|
||||
):
|
||||
in_edge_count[key(edge.spec)] += 1
|
||||
parent_id = key(edge.parent) if edge.parent is not None else None
|
||||
node_to_edges[parent_id].append(edge)
|
||||
|
||||
queue = [None]
|
||||
|
||||
while queue:
|
||||
for edge in node_to_edges[queue.pop(0)]:
|
||||
child_id = key(edge.spec)
|
||||
in_edge_count[child_id] -= 1
|
||||
|
||||
should_yield = root or edge.parent is not None
|
||||
|
||||
if all_edges and should_yield:
|
||||
yield edge
|
||||
|
||||
if in_edge_count[child_id] == 0:
|
||||
if not all_edges and should_yield:
|
||||
yield edge
|
||||
queue.append(key(edge.spec))
|
||||
|
||||
|
||||
# High-level API: traverse_edges, traverse_nodes, traverse_tree.
|
||||
@ -462,20 +413,20 @@ def traverse_edges(
|
||||
A generator that yields ``DependencySpec`` if depth is ``False``
|
||||
or a tuple of ``(depth, DependencySpec)`` if depth is ``True``.
|
||||
"""
|
||||
|
||||
# validate input
|
||||
if order == "topo":
|
||||
if cover == "paths":
|
||||
raise ValueError("cover=paths not supported for order=topo")
|
||||
# TODO: There is no known need for topological ordering of traversals (edge or node)
|
||||
# with an initialized "visited" set. Revisit if needed.
|
||||
if visited is not None:
|
||||
raise ValueError("visited set not implemented for order=topo")
|
||||
return traverse_edges_topo(
|
||||
specs, direction, deptype, key, root, all_edges=cover == "edges"
|
||||
)
|
||||
elif order not in ("post", "pre", "breadth"):
|
||||
raise ValueError(f"Unknown order {order}")
|
||||
|
||||
# In topo traversal we need to construct a sub-DAG including all edges even if we are yielding
|
||||
# a subset of them, hence "paths".
|
||||
_cover = "paths" if order == "topo" else cover
|
||||
visitor = get_visitor_from_args(_cover, direction, deptype, key, visited)
|
||||
root_edges = with_artificial_edges(specs)
|
||||
visitor = get_visitor_from_args(cover, direction, deptype, key, visited)
|
||||
|
||||
# Depth-first
|
||||
if order in ("pre", "post"):
|
||||
@ -484,8 +435,10 @@ def traverse_edges(
|
||||
)
|
||||
elif order == "breadth":
|
||||
return traverse_breadth_first_edges_generator(root_edges, visitor, root, depth)
|
||||
|
||||
raise ValueError("Unknown order {}".format(order))
|
||||
elif order == "topo":
|
||||
return traverse_topo_edges_generator(
|
||||
root_edges, visitor, key, root, all_edges=cover == "edges"
|
||||
)
|
||||
|
||||
|
||||
def traverse_nodes(
|
||||
|
Loading…
Reference in New Issue
Block a user