traverse.py: ensure topo order is bfs for trees (#47720)
This commit is contained in:
parent
559c2f1eb9
commit
cb3d6549c9
@ -882,6 +882,9 @@ def __init__(self, *roots: spack.spec.Spec, context: Context):
|
|||||||
elif context == Context.RUN:
|
elif context == Context.RUN:
|
||||||
self.root_depflag = dt.RUN | dt.LINK
|
self.root_depflag = dt.RUN | dt.LINK
|
||||||
|
|
||||||
|
def accept(self, item):
|
||||||
|
return True
|
||||||
|
|
||||||
def neighbors(self, item):
|
def neighbors(self, item):
|
||||||
spec = item.edge.spec
|
spec = item.edge.spec
|
||||||
if spec.dag_hash() in self.root_hashes:
|
if spec.dag_hash() in self.root_hashes:
|
||||||
@ -919,19 +922,19 @@ def effective_deptypes(
|
|||||||
a flag specifying in what way they do so. The list is ordered topologically
|
a flag specifying in what way they do so. The list is ordered topologically
|
||||||
from root to leaf, meaning that environment modifications should be applied
|
from root to leaf, meaning that environment modifications should be applied
|
||||||
in reverse so that dependents override dependencies, not the other way around."""
|
in reverse so that dependents override dependencies, not the other way around."""
|
||||||
visitor = traverse.TopoVisitor(
|
topo_sorted_edges = traverse.traverse_topo_edges_generator(
|
||||||
EnvironmentVisitor(*specs, context=context),
|
traverse.with_artificial_edges(specs),
|
||||||
key=lambda x: x.dag_hash(),
|
visitor=EnvironmentVisitor(*specs, context=context),
|
||||||
|
key=traverse.by_dag_hash,
|
||||||
root=True,
|
root=True,
|
||||||
all_edges=True,
|
all_edges=True,
|
||||||
)
|
)
|
||||||
traverse.traverse_depth_first_with_visitor(traverse.with_artificial_edges(specs), visitor)
|
|
||||||
|
|
||||||
# Dictionary with "no mode" as default value, so it's easy to write modes[x] |= flag.
|
# Dictionary with "no mode" as default value, so it's easy to write modes[x] |= flag.
|
||||||
use_modes = defaultdict(lambda: UseMode(0))
|
use_modes = defaultdict(lambda: UseMode(0))
|
||||||
nodes_with_type = []
|
nodes_with_type = []
|
||||||
|
|
||||||
for edge in visitor.edges:
|
for edge in topo_sorted_edges:
|
||||||
parent, child, depflag = edge.parent, edge.spec, edge.depflag
|
parent, child, depflag = edge.parent, edge.spec, edge.depflag
|
||||||
|
|
||||||
# Mark the starting point
|
# Mark the starting point
|
||||||
|
@ -325,12 +325,7 @@ def write(self, spec, color=None, out=None):
|
|||||||
self._out = llnl.util.tty.color.ColorStream(out, color=color)
|
self._out = llnl.util.tty.color.ColorStream(out, color=color)
|
||||||
|
|
||||||
# We'll traverse the spec in topological order as we graph it.
|
# We'll traverse the spec in topological order as we graph it.
|
||||||
nodes_in_topological_order = [
|
nodes_in_topological_order = list(spec.traverse(order="topo", deptype=self.depflag))
|
||||||
edge.spec
|
|
||||||
for edge in spack.traverse.traverse_edges_topo(
|
|
||||||
[spec], direction="children", deptype=self.depflag
|
|
||||||
)
|
|
||||||
]
|
|
||||||
nodes_in_topological_order.reverse()
|
nodes_in_topological_order.reverse()
|
||||||
|
|
||||||
# Work on a copy to be nondestructive
|
# Work on a copy to be nondestructive
|
||||||
|
@ -73,5 +73,18 @@ def test_ascii_graph_mpileaks(config, mock_packages, monkeypatch):
|
|||||||
o | libdwarf
|
o | libdwarf
|
||||||
|/
|
|/
|
||||||
o libelf
|
o libelf
|
||||||
|
"""
|
||||||
|
or graph_str
|
||||||
|
== r"""o mpileaks
|
||||||
|
|\
|
||||||
|
| o callpath
|
||||||
|
|/|
|
||||||
|
| o dyninst
|
||||||
|
| |\
|
||||||
|
o | | mpich
|
||||||
|
/ /
|
||||||
|
| o libdwarf
|
||||||
|
|/
|
||||||
|
o libelf
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
@ -431,3 +431,26 @@ def test_traverse_nodes_no_deps(abstract_specs_dtuse):
|
|||||||
]
|
]
|
||||||
outputs = [x for x in traverse.traverse_nodes(inputs, deptype=dt.NONE)]
|
outputs = [x for x in traverse.traverse_nodes(inputs, deptype=dt.NONE)]
|
||||||
assert outputs == [abstract_specs_dtuse["dtuse"], abstract_specs_dtuse["dtlink5"]]
|
assert outputs == [abstract_specs_dtuse["dtuse"], abstract_specs_dtuse["dtlink5"]]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("cover", ["nodes", "edges"])
|
||||||
|
def test_topo_is_bfs_for_trees(cover):
|
||||||
|
"""For trees, both DFS and BFS produce a topological order, but BFS is the most sensible for
|
||||||
|
our applications, where we typically want to avoid that transitive dependencies shadow direct
|
||||||
|
depenencies in global search paths, etc. This test ensures that for trees, the default topo
|
||||||
|
order coincides with BFS."""
|
||||||
|
binary_tree = create_dag(
|
||||||
|
nodes=["A", "B", "C", "D", "E", "F", "G"],
|
||||||
|
edges=(
|
||||||
|
("A", "B", "all"),
|
||||||
|
("A", "C", "all"),
|
||||||
|
("B", "D", "all"),
|
||||||
|
("B", "E", "all"),
|
||||||
|
("C", "F", "all"),
|
||||||
|
("C", "G", "all"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert list(traverse.traverse_nodes([binary_tree["A"]], order="topo", cover=cover)) == list(
|
||||||
|
traverse.traverse_nodes([binary_tree["A"]], order="breadth", cover=cover)
|
||||||
|
)
|
||||||
|
@ -115,70 +115,6 @@ def neighbors(self, item):
|
|||||||
return self.visitor.neighbors(item)
|
return self.visitor.neighbors(item)
|
||||||
|
|
||||||
|
|
||||||
class TopoVisitor:
|
|
||||||
"""Visitor that can be used in :py:func:`depth-first traversal
|
|
||||||
<spack.traverse.traverse_depth_first_with_visitor>` to generate
|
|
||||||
a topologically ordered list of specs.
|
|
||||||
|
|
||||||
Algorithm based on "Section 22.4: Topological sort", Introduction to Algorithms
|
|
||||||
(2001, 2nd edition) by Cormen, Thomas H.; Leiserson, Charles E.; Rivest, Ronald L.;
|
|
||||||
Stein, Clifford.
|
|
||||||
|
|
||||||
Summary of the algorithm: prepend each vertex to a list in depth-first post-order,
|
|
||||||
not following edges to nodes already seen. This ensures all descendants occur after
|
|
||||||
their parent, yielding a topological order.
|
|
||||||
|
|
||||||
Note: in this particular implementation we collect the *edges* through which the
|
|
||||||
vertices are discovered, meaning that a topological order of *vertices* is obtained
|
|
||||||
by taking the specs pointed to: ``map(lambda edge: edge.spec, visitor.edges)``.
|
|
||||||
Lastly, ``all_edges=True`` can be used to retrieve a list of all reachable
|
|
||||||
edges, with the property that for each vertex all in-edges precede all out-edges.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, visitor, key=id, root=True, all_edges=False):
|
|
||||||
"""
|
|
||||||
Arguments:
|
|
||||||
visitor: visitor that implements accept(), pre(), post() and neighbors()
|
|
||||||
key: uniqueness key for nodes
|
|
||||||
root (bool): Whether to include the root node.
|
|
||||||
all_edges (bool): when ``False`` (default): Each node is reached once,
|
|
||||||
and ``map(lambda edge: edge.spec, visitor.edges)`` is topologically
|
|
||||||
ordered. When ``True``, every edge is listed, ordered such that for
|
|
||||||
each node all in-edges precede all out-edges.
|
|
||||||
"""
|
|
||||||
self.visited = set()
|
|
||||||
self.visitor = visitor
|
|
||||||
self.key = key
|
|
||||||
self.root = root
|
|
||||||
self.reverse_order = []
|
|
||||||
self.all_edges = all_edges
|
|
||||||
|
|
||||||
def accept(self, item):
|
|
||||||
if self.key(item.edge.spec) not in self.visited:
|
|
||||||
return True
|
|
||||||
if self.all_edges and (self.root or item.depth > 0):
|
|
||||||
self.reverse_order.append(item.edge)
|
|
||||||
return False
|
|
||||||
|
|
||||||
def pre(self, item):
|
|
||||||
# You could add a temporary marker for cycle detection
|
|
||||||
# that's cleared in `post`, but we assume no cycles.
|
|
||||||
pass
|
|
||||||
|
|
||||||
def post(self, item):
|
|
||||||
self.visited.add(self.key(item.edge.spec))
|
|
||||||
if self.root or item.depth > 0:
|
|
||||||
self.reverse_order.append(item.edge)
|
|
||||||
|
|
||||||
def neighbors(self, item):
|
|
||||||
return self.visitor.neighbors(item)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def edges(self):
|
|
||||||
"""Return edges in topological order (in-edges precede out-edges)."""
|
|
||||||
return list(reversed(self.reverse_order))
|
|
||||||
|
|
||||||
|
|
||||||
def get_visitor_from_args(
|
def get_visitor_from_args(
|
||||||
cover, direction, depflag: Union[dt.DepFlag, dt.DepTypes], key=id, visited=None, visitor=None
|
cover, direction, depflag: Union[dt.DepFlag, dt.DepTypes], key=id, visited=None, visitor=None
|
||||||
):
|
):
|
||||||
@ -381,39 +317,54 @@ def traverse_breadth_first_tree_nodes(parent_id, edges, key=id, depth=0):
|
|||||||
yield item
|
yield item
|
||||||
|
|
||||||
|
|
||||||
# Topologic order
|
def traverse_topo_edges_generator(edges, visitor, key=id, root=True, all_edges=False):
|
||||||
def traverse_edges_topo(
|
|
||||||
specs,
|
|
||||||
direction="children",
|
|
||||||
deptype: Union[dt.DepFlag, dt.DepTypes] = "all",
|
|
||||||
key=id,
|
|
||||||
root=True,
|
|
||||||
all_edges=False,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Returns a list of edges in topological order, in the sense that all in-edges of a
|
Returns a list of edges in topological order, in the sense that all in-edges of a vertex appear
|
||||||
vertex appear before all out-edges. By default with direction=children edges are
|
before all out-edges.
|
||||||
directed from dependent to dependency. With directions=parents, the edges are
|
|
||||||
directed from dependency to dependent.
|
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
specs (list): List of root specs (considered to be depth 0)
|
edges (list): List of EdgeAndDepth instances
|
||||||
direction (str): ``children`` (edges are directed from dependent to dependency)
|
visitor: visitor instance that defines the sub-DAG to traverse
|
||||||
or ``parents`` (edges are flipped / directed from dependency to dependent)
|
|
||||||
deptype: allowed dependency types
|
|
||||||
key: function that takes a spec and outputs a key for uniqueness test.
|
key: function that takes a spec and outputs a key for uniqueness test.
|
||||||
root (bool): Yield the root nodes themselves
|
root (bool): Yield the root nodes themselves
|
||||||
all_edges (bool): When ``False`` only one in-edge per node is returned, when
|
all_edges (bool): When ``False`` only one in-edge per node is returned, when
|
||||||
``True`` all reachable edges are returned.
|
``True`` all reachable edges are returned.
|
||||||
"""
|
"""
|
||||||
if not isinstance(deptype, dt.DepFlag):
|
# Topo order used to be implemented using a DFS visitor, which was relatively efficient in that
|
||||||
deptype = dt.canonicalize(deptype)
|
# it would visit nodes only once, and it was composable. In practice however it would yield a
|
||||||
visitor: Union[BaseVisitor, ReverseVisitor, TopoVisitor] = BaseVisitor(deptype)
|
# DFS order on DAGs that are trees, which is undesirable in many cases. For example, a list of
|
||||||
if direction == "parents":
|
# search paths for trees is better in BFS order, so that direct dependencies are listed first.
|
||||||
visitor = ReverseVisitor(visitor, deptype)
|
# That way a transitive dependency cannot shadow a direct one. So, here we collect the sub-DAG
|
||||||
visitor = TopoVisitor(visitor, key=key, root=root, all_edges=all_edges)
|
# of interest and then compute a topological order that is the most breadth-first possible.
|
||||||
traverse_depth_first_with_visitor(with_artificial_edges(specs), visitor)
|
|
||||||
return visitor.edges
|
# maps node identifier to the number of remaining in-edges
|
||||||
|
in_edge_count = defaultdict(int)
|
||||||
|
# maps parent identifier to a list of edges, where None is a special identifier
|
||||||
|
# for the artificial root/source.
|
||||||
|
node_to_edges = defaultdict(list)
|
||||||
|
for edge in traverse_breadth_first_edges_generator(
|
||||||
|
edges, CoverEdgesVisitor(visitor, key=key), root=True, depth=False
|
||||||
|
):
|
||||||
|
in_edge_count[key(edge.spec)] += 1
|
||||||
|
parent_id = key(edge.parent) if edge.parent is not None else None
|
||||||
|
node_to_edges[parent_id].append(edge)
|
||||||
|
|
||||||
|
queue = [None]
|
||||||
|
|
||||||
|
while queue:
|
||||||
|
for edge in node_to_edges[queue.pop(0)]:
|
||||||
|
child_id = key(edge.spec)
|
||||||
|
in_edge_count[child_id] -= 1
|
||||||
|
|
||||||
|
should_yield = root or edge.parent is not None
|
||||||
|
|
||||||
|
if all_edges and should_yield:
|
||||||
|
yield edge
|
||||||
|
|
||||||
|
if in_edge_count[child_id] == 0:
|
||||||
|
if not all_edges and should_yield:
|
||||||
|
yield edge
|
||||||
|
queue.append(key(edge.spec))
|
||||||
|
|
||||||
|
|
||||||
# High-level API: traverse_edges, traverse_nodes, traverse_tree.
|
# High-level API: traverse_edges, traverse_nodes, traverse_tree.
|
||||||
@ -462,20 +413,20 @@ def traverse_edges(
|
|||||||
A generator that yields ``DependencySpec`` if depth is ``False``
|
A generator that yields ``DependencySpec`` if depth is ``False``
|
||||||
or a tuple of ``(depth, DependencySpec)`` if depth is ``True``.
|
or a tuple of ``(depth, DependencySpec)`` if depth is ``True``.
|
||||||
"""
|
"""
|
||||||
|
# validate input
|
||||||
if order == "topo":
|
if order == "topo":
|
||||||
if cover == "paths":
|
if cover == "paths":
|
||||||
raise ValueError("cover=paths not supported for order=topo")
|
raise ValueError("cover=paths not supported for order=topo")
|
||||||
# TODO: There is no known need for topological ordering of traversals (edge or node)
|
|
||||||
# with an initialized "visited" set. Revisit if needed.
|
|
||||||
if visited is not None:
|
if visited is not None:
|
||||||
raise ValueError("visited set not implemented for order=topo")
|
raise ValueError("visited set not implemented for order=topo")
|
||||||
return traverse_edges_topo(
|
elif order not in ("post", "pre", "breadth"):
|
||||||
specs, direction, deptype, key, root, all_edges=cover == "edges"
|
raise ValueError(f"Unknown order {order}")
|
||||||
)
|
|
||||||
|
|
||||||
|
# In topo traversal we need to construct a sub-DAG including all edges even if we are yielding
|
||||||
|
# a subset of them, hence "paths".
|
||||||
|
_cover = "paths" if order == "topo" else cover
|
||||||
|
visitor = get_visitor_from_args(_cover, direction, deptype, key, visited)
|
||||||
root_edges = with_artificial_edges(specs)
|
root_edges = with_artificial_edges(specs)
|
||||||
visitor = get_visitor_from_args(cover, direction, deptype, key, visited)
|
|
||||||
|
|
||||||
# Depth-first
|
# Depth-first
|
||||||
if order in ("pre", "post"):
|
if order in ("pre", "post"):
|
||||||
@ -484,8 +435,10 @@ def traverse_edges(
|
|||||||
)
|
)
|
||||||
elif order == "breadth":
|
elif order == "breadth":
|
||||||
return traverse_breadth_first_edges_generator(root_edges, visitor, root, depth)
|
return traverse_breadth_first_edges_generator(root_edges, visitor, root, depth)
|
||||||
|
elif order == "topo":
|
||||||
raise ValueError("Unknown order {}".format(order))
|
return traverse_topo_edges_generator(
|
||||||
|
root_edges, visitor, key, root, all_edges=cover == "edges"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def traverse_nodes(
|
def traverse_nodes(
|
||||||
|
Loading…
Reference in New Issue
Block a user