Consolidate DAG traversal in traverse.py, support DFS/BFS (#33406)

This PR introduces breadth-first traversal, and moves depth-first traversal
logic out of Spec's member functions, into `traverse.py`.

It introduces a high-level API with three main methods:

```python
spack.traverse.traverse_edges(specs, kwargs...)
spack.traverse.traverse_nodes(specs, kwags...)
spack.traverse.traverse_tree(specs, kwargs...)
```

with the usual `root`, `order`, `cover`, `direction`, `deptype`, `depth`, `key`,
`visited` kwargs for the first two.

What's new is that `order="breadth"` is added for breadth-first traversal.

The lower level API is not exported, but is certainly useful for advanced use
cases. The lower level API includes visitor classes for direction reversal and
edge pruning, which can be used to create more advanced traversal methods,
especially useful when the `deptype` is not constant but depends on the node
or depth. 

---

There's a couple nice use-cases for breadth-first traversal:

- Sometimes roots have to be handled differently (e.g. follow build edges of
  roots but not of deps). BFS ensures that root nodes are always discovered at
  depth 0, instead of at any depth > 1 as a dep of another root.
- When printing a tree, it would be nice to reduce indent levels so it fits in the 
  terminal, and ensure that e.g. `zlib` is not printed at indent level 10 as a 
  dependency of a build dep of a build dep -- rather if it's a direct dep of my
  package, I wanna see it at depth 1. This basically requires one breadth-first
  traversal to construct a tree, which can then be printed with depth-first traversal.
- In environments in general, it's sometimes inconvenient to have a double
  loop: first over the roots then over each root's deps, and maintain your own
  `visited` set outside. With BFS, you can simply init the queue with the
  environment root specs and it Just Works. [Example here](3ec7304699/lib/spack/spack/environment/environment.py (L1815-L1816))
This commit is contained in:
Harmen Stoppels 2022-11-02 07:04:47 +01:00 committed by GitHub
parent 353e31e72a
commit 4bad9f9b13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 798 additions and 202 deletions

View File

@ -28,6 +28,7 @@
import spack.environment.shell
import spack.schema.env
import spack.tengine
import spack.traverse as traverse
import spack.util.string as string
from spack.util.environment import EnvironmentModifications
@ -634,36 +635,6 @@ def env_depfile_setup_parser(subparser):
)
class SpecNode(object):
def __init__(self, spec, depth):
self.spec = spec
self.depth = depth
def key(self):
return self.spec.dag_hash()
class UniqueNodesQueue(object):
def __init__(self, init=[]):
self.seen = set()
self.queue = []
for item in init:
self.push(item)
def push(self, item):
key = item.key()
if key in self.seen:
return
self.queue.append(item)
self.seen.add(key)
def empty(self):
return len(self.queue) == 0
def pop(self):
return self.queue.pop()
def _deptypes(use_buildcache):
"""What edges should we follow for a given node? If it's a cache-only
node, then we can drop build type deps."""
@ -692,29 +663,27 @@ def __init__(self, target, pkg_buildcache, deps_buildcache):
def neighbors(self, node):
"""Produce a list of spec to follow from node"""
deptypes = self.deptypes_root if node.depth == 0 else self.deptypes_deps
return node.spec.dependencies(deptype=deptypes)
return traverse.sort_edges(node.edge.spec.edges_to_dependencies(deptype=deptypes))
def visit(self, node):
dag_hash = node.spec.dag_hash()
spec_str = node.spec.format("{name}{@version}{%compiler}{variants}{arch=architecture}")
buildcache = self.pkg_buildcache if node.depth == 0 else self.deps_buildcache
if buildcache == "only":
build_cache_flag = "--use-buildcache=only"
elif buildcache == "never":
build_cache_flag = "--use-buildcache=never"
else:
build_cache_flag = ""
prereqs = " ".join([self.target(dep.dag_hash()) for dep in self.neighbors(node)])
self.adjacency_list.append((dag_hash, spec_str, build_cache_flag, prereqs))
def build_cache_flag(self, depth):
setting = self.pkg_buildcache if depth == 0 else self.deps_buildcache
if setting == "only":
return "--use-buildcache=only"
elif setting == "never":
return "--use-buildcache=never"
return ""
def accept(self, node):
dag_hash = node.edge.spec.dag_hash()
spec_str = node.edge.spec.format(
"{name}{@version}{%compiler}{variants}{arch=architecture}"
)
buildcache_flag = self.build_cache_flag(node.depth)
prereqs = " ".join([self.target(dep.spec.dag_hash()) for dep in self.neighbors(node)])
self.adjacency_list.append((dag_hash, spec_str, buildcache_flag, prereqs))
def traverse_breadth_first(visitor, specs=[]):
queue = UniqueNodesQueue([SpecNode(s, 0) for s in specs])
while not queue.empty():
node = queue.pop()
visitor.visit(node)
for child in visitor.neighbors(node):
queue.push(SpecNode(child, node.depth + 1))
# We already accepted this
return True
def env_depfile(args):
@ -751,17 +720,22 @@ def get_install_deps_target(name):
else:
roots = [s for _, s in env.concretized_specs()]
# Shallow means we will drop non-direct build deps from the DAG
# We produce a sub-DAG from the DAG induced by roots, where we drop build
# edges for those specs that are installed through a binary cache.
pkg_buildcache, dep_buildcache = args.use_buildcache
visitor = MakeTargetVisitor(get_install_target, pkg_buildcache, dep_buildcache)
traverse_breadth_first(visitor, roots)
make_targets = MakeTargetVisitor(get_install_target, pkg_buildcache, dep_buildcache)
traverse.traverse_breadth_first_with_visitor(
roots, traverse.CoverNodesVisitor(make_targets, key=lambda s: s.dag_hash())
)
# Root specs without deps are the prereqs for the environment target
root_install_targets = [get_install_target(h.dag_hash()) for h in roots]
# Cleanable targets...
cleanable_targets = [get_install_target(h) for h, _, _, _ in visitor.adjacency_list]
cleanable_targets.extend([get_install_deps_target(h) for h, _, _, _ in visitor.adjacency_list])
cleanable_targets = [get_install_target(h) for h, _, _, _ in make_targets.adjacency_list]
cleanable_targets.extend(
[get_install_deps_target(h) for h, _, _, _ in make_targets.adjacency_list]
)
buf = six.StringIO()
@ -780,7 +754,7 @@ def get_install_deps_target(name):
"install_deps_target": get_target(".install-deps"),
"any_hash_target": get_target("%"),
"jobserver_support": "+" if args.jobserver else "",
"adjacency_list": visitor.adjacency_list,
"adjacency_list": make_targets.adjacency_list,
}
)

View File

@ -76,8 +76,7 @@ def setup_parser(subparser):
"-t", "--types", action="store_true", default=False, help="show dependency types"
)
arguments.add_common_arguments(subparser, ["specs"])
spack.cmd.common.arguments.add_concretizer_args(subparser)
arguments.add_concretizer_args(subparser)
def spec(parser, args):

View File

@ -77,7 +77,6 @@
"""
import collections
import itertools
import operator
import os
import re
import sys
@ -106,6 +105,7 @@
import spack.solver
import spack.store
import spack.target
import spack.traverse as traverse
import spack.util.crypto
import spack.util.executable
import spack.util.hash
@ -814,10 +814,6 @@ def _sort_by_dep_types(dspec):
return tuple(t not in dspec.deptypes for t in ("link", "run", "build", "test"))
def _sort_edges_by_pkg_name(edges):
edges.sort(key=lambda edge: edge.spec.name)
#: Enum for edge directions
EdgeDirection = lang.enum(parent=0, child=1)
@ -1601,144 +1597,12 @@ def installed_upstream(self):
return upstream
def traverse(self, **kwargs):
depth = kwargs.get("depth", False)
"""Shorthand for :meth:`~spack.traverse.traverse_nodes`"""
return traverse.traverse_nodes([self], **kwargs)
if depth:
for d, edge in self.traverse_edges(**kwargs):
yield d, edge.spec
else:
for edge in self.traverse_edges(**kwargs):
yield edge.spec
def traverse_edges(self, visited=None, d=0, deptype="all", dep_spec=None, **kwargs):
"""Generic traversal of the DAG represented by this spec.
This yields ``DependencySpec`` objects as they are traversed.
An imaginary incoming edge to the root is yielded first as
``DependencySpec(None, root, ())``.
Note: the edges are traversed from ``edge.parent`` to ``edge.spec``,
even if the direction is reversed. When ``direction="children"`` the
parent points to the dependent, and spec to the dependency. Conversely
when ``direction="parents"`` parent points to the dependency, and spec
to the dependent.
Options:
order [=pre|post]
Order to traverse spec nodes. Defaults to preorder traversal.
Options are:
'pre': Pre-order traversal; each node is yielded before its
children in the dependency DAG.
'post': Post-order traversal; each node is yielded after its
children in the dependency DAG.
cover [=nodes|edges|paths]
Determines how extensively to cover the dag. Possible values:
'nodes': Visit each node in the dag only once. Every node
yielded by this function will be unique.
'edges': If a node has been visited once but is reached along a
new path from the root, yield it but do not descend
into it. This traverses each 'edge' in the DAG once.
'paths': Explore every unique path reachable from the root.
This descends into visited subtrees and will yield
nodes twice if they're reachable by multiple paths.
depth [=False]
Defaults to False. When True, yields not just nodes in the
spec, but also their depth from the root in a (depth, node)
tuple.
key [=id]
Allow a custom key function to track the identity of nodes
in the traversal.
root [=True]
If False, this won't yield the root node, just its descendents.
direction [=children|parents]
If 'children', does a traversal of this spec's children. If
'parents', traverses upwards in the DAG towards the root.
"""
# get initial values for kwargs
depth = kwargs.get("depth", False)
key_fun = kwargs.get("key", id)
if isinstance(key_fun, six.string_types):
key_fun = operator.attrgetter(key_fun)
yield_root = kwargs.get("root", True)
cover = kwargs.get("cover", "nodes")
direction = kwargs.get("direction", "children")
order = kwargs.get("order", "pre")
# we don't want to run canonical_deptype every time through
# traverse, because it is somewhat expensive. This ensures we
# canonicalize only once.
canonical_deptype = kwargs.get("canonical_deptype", None)
if canonical_deptype is None:
deptype = dp.canonical_deptype(deptype)
kwargs["canonical_deptype"] = deptype
else:
deptype = canonical_deptype
# Make sure kwargs have legal values; raise ValueError if not.
def validate(name, val, allowed_values):
if val not in allowed_values:
raise ValueError(
"Invalid value for %s: %s. Choices are %s"
% (name, val, ",".join(allowed_values))
)
validate("cover", cover, ("nodes", "edges", "paths"))
validate("direction", direction, ("children", "parents"))
validate("order", order, ("pre", "post"))
if visited is None:
visited = set()
key = key_fun(self)
# Node traversal does not yield visited nodes.
if key in visited and cover == "nodes":
return
def return_val(dspec):
if not dspec:
# make a fake dspec for the root.
dspec = DependencySpec(None, self, ())
return (d, dspec) if depth else dspec
yield_me = yield_root or d > 0
# Preorder traversal yields before successors
if yield_me and order == "pre":
yield return_val(dep_spec)
# Edge traversal yields but skips children of visited nodes
if not (key in visited and cover == "edges"):
visited.add(key)
# This code determines direction and yields the children/parents
if direction == "children":
edges = self.edges_to_dependencies()
else:
edges = [edge.flip() for edge in self.edges_from_dependents()]
_sort_edges_by_pkg_name(edges)
for dspec in edges:
dt = dspec.deptypes
if dt and not any(d in deptype for d in dt):
continue
for child in dspec.spec.traverse_edges(visited, d + 1, deptype, dspec, **kwargs):
yield child
# Postorder traversal yields after successors
if yield_me and order == "post":
yield return_val(dep_spec)
def traverse_edges(self, **kwargs):
"""Shorthand for :meth:`~spack.traverse.traverse_edges`"""
return traverse.traverse_edges([self], **kwargs)
@property
def short_spec(self):
@ -4604,11 +4468,13 @@ def tree(self, **kwargs):
show_types = kwargs.pop("show_types", False)
deptypes = kwargs.pop("deptypes", "all")
recurse_dependencies = kwargs.pop("recurse_dependencies", True)
depth_first = kwargs.pop("depth_first", False)
lang.check_kwargs(kwargs, self.tree)
out = ""
for d, dep_spec in self.traverse_edges(
order="pre", cover=cover, depth=True, deptype=deptypes
for d, dep_spec in traverse.traverse_tree(
[self], cover=cover, deptype=deptypes, depth_first=depth_first
):
node = dep_spec.spec

View File

@ -0,0 +1,211 @@
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import spack.traverse as traverse
from spack.spec import Spec
def key_by_hash(spec):
return spec.dag_hash()
def test_breadth_first_traversal(config, mock_packages):
# That that depth of discovery is non-decreasing
s = Spec("dttop").concretized()
depths = [
depth
for (depth, _) in traverse.traverse_nodes(
[s], order="breadth", key=key_by_hash, depth=True
)
]
assert depths == sorted(depths)
def test_breadth_first_deptype_traversal(config, mock_packages):
s = Spec("dtuse").concretized()
names = [
"dtuse",
"dttop",
"dtbuild1",
"dtlink1",
"dtbuild2",
"dtlink2",
"dtlink3",
"dtlink4",
]
traversal = traverse.traverse_nodes(
[s], order="breadth", key=key_by_hash, deptype=("build", "link")
)
assert [x.name for x in traversal] == names
def test_breadth_firsrt_traversal_deptype_with_builddeps(config, mock_packages):
s = Spec("dttop").concretized()
names = ["dttop", "dtbuild1", "dtlink1", "dtbuild2", "dtlink2", "dtlink3", "dtlink4"]
traversal = traverse.traverse_nodes(
[s], order="breadth", key=key_by_hash, deptype=("build", "link")
)
assert [x.name for x in traversal] == names
def test_breadth_first_traversal_deptype_full(config, mock_packages):
s = Spec("dttop").concretized()
names = [
"dttop",
"dtbuild1",
"dtlink1",
"dtrun1",
"dtbuild2",
"dtlink2",
"dtrun2",
"dtlink3",
"dtlink5",
"dtrun3",
"dtlink4",
"dtbuild3",
]
traversal = traverse.traverse_nodes([s], order="breadth", key=key_by_hash, deptype="all")
assert [x.name for x in traversal] == names
def test_breadth_first_traversal_deptype_run(config, mock_packages):
s = Spec("dttop").concretized()
names = ["dttop", "dtrun1", "dtrun3"]
traversal = traverse.traverse_nodes([s], order="breadth", key=key_by_hash, deptype="run")
assert [x.name for x in traversal] == names
def test_breadth_first_traversal_reverse(config, mock_packages):
s = Spec("dt-diamond").concretized()
gen = traverse.traverse_nodes(
[s["dt-diamond-bottom"]], order="breadth", key=key_by_hash, direction="parents", depth=True
)
assert [(depth, spec.name) for (depth, spec) in gen] == [
(0, "dt-diamond-bottom"),
(1, "dt-diamond-left"),
(1, "dt-diamond-right"),
(2, "dt-diamond"),
]
def test_breadth_first_traversal_multiple_roots(config, mock_packages):
# With DFS, the branch dt-diamond -> dt-diamond-left -> dt-diamond-bottom
# is followed, with BFS, dt-diamond-bottom should be traced through the second
# root dt-diamond-right at depth 1 instead.
s = Spec("dt-diamond").concretized()
roots = [s["dt-diamond"], s["dt-diamond-right"]]
gen = traverse.traverse_edges(roots, order="breadth", key=key_by_hash, depth=True, root=False)
assert [(depth, edge.parent.name, edge.spec.name) for (depth, edge) in gen] == [
(1, "dt-diamond", "dt-diamond-left"), # edge from first root "to" depth 1
(1, "dt-diamond-right", "dt-diamond-bottom"), # edge from second root "to" depth 1
]
def test_breadth_first_versus_depth_first_tree(config, mock_packages):
"""
The packages chain-a, chain-b, chain-c, chain-d have the following DAG:
a --> b --> c --> d # a chain
a --> c # and "skip" connections
a --> d
Here we test at what depth the nodes are discovered when using BFS vs DFS.
"""
s = Spec("chain-a").concretized()
# BFS should find all nodes as direct deps
assert [
(depth, edge.spec.name)
for (depth, edge) in traverse.traverse_tree([s], cover="nodes", depth_first=False)
] == [
(0, "chain-a"),
(1, "chain-b"),
(1, "chain-c"),
(1, "chain-d"),
]
# DFS will disover all nodes along the chain a -> b -> c -> d.
assert [
(depth, edge.spec.name)
for (depth, edge) in traverse.traverse_tree([s], cover="nodes", depth_first=True)
] == [
(0, "chain-a"),
(1, "chain-b"),
(2, "chain-c"),
(3, "chain-d"),
]
# When covering all edges, we should never exceed depth 2 in BFS.
assert [
(depth, edge.spec.name)
for (depth, edge) in traverse.traverse_tree([s], cover="edges", depth_first=False)
] == [
(0, "chain-a"),
(1, "chain-b"),
(2, "chain-c"),
(1, "chain-c"),
(2, "chain-d"),
(1, "chain-d"),
]
# In DFS we see the chain again.
assert [
(depth, edge.spec.name)
for (depth, edge) in traverse.traverse_tree([s], cover="edges", depth_first=True)
] == [
(0, "chain-a"),
(1, "chain-b"),
(2, "chain-c"),
(3, "chain-d"),
(1, "chain-c"),
(1, "chain-d"),
]
def test_breadth_first_versus_depth_first_printing(config, mock_packages):
"""Test breadth-first versus depth-first tree printing."""
s = Spec("chain-a").concretized()
args = {"format": "{name}", "color": False}
dfs_tree_nodes = """\
chain-a
^chain-b
^chain-c
^chain-d
"""
assert s.tree(depth_first=True, **args) == dfs_tree_nodes
bfs_tree_nodes = """\
chain-a
^chain-b
^chain-c
^chain-d
"""
assert s.tree(depth_first=False, **args) == bfs_tree_nodes
dfs_tree_edges = """\
chain-a
^chain-b
^chain-c
^chain-d
^chain-c
^chain-d
"""
assert s.tree(depth_first=True, cover="edges", **args) == dfs_tree_edges
bfs_tree_edges = """\
chain-a
^chain-b
^chain-c
^chain-c
^chain-d
^chain-d
"""
assert s.tree(depth_first=False, cover="edges", **args) == bfs_tree_edges

417
lib/spack/spack/traverse.py Normal file
View File

@ -0,0 +1,417 @@
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from collections import defaultdict, namedtuple
import spack.spec
# Export only the high-level API.
__all__ = ["traverse_edges", "traverse_nodes", "traverse_tree"]
#: Data class that stores a directed edge together with depth at
#: which the target vertex was found. It is passed to ``accept``
#: and ``neighbors`` of visitors, so they can decide whether to
#: follow the edge or not.
EdgeAndDepth = namedtuple("EdgeAndDepth", ["edge", "depth"])
def sort_edges(edges):
edges.sort(key=lambda edge: edge.spec.name)
return edges
class BaseVisitor(object):
"""A simple visitor that accepts all edges unconditionally and follows all
edges to dependencies of a given ``deptype``."""
def __init__(self, deptype="all"):
self.deptype = deptype
def accept(self, node):
"""
Arguments:
node (EdgeAndDepth): Provides the depth and the edge through which the
node was discovered
Returns:
bool: Returns ``True`` if the node is accepted. When ``False``, this
indicates that the node won't be yielded by iterators and dependencies
are not followed.
"""
return True
def neighbors(self, node):
return sort_edges(node.edge.spec.edges_to_dependencies(deptype=self.deptype))
class ReverseVisitor(object):
"""A visitor that reverses the arrows in the DAG, following dependents."""
def __init__(self, visitor, deptype="all"):
self.visitor = visitor
self.deptype = deptype
def accept(self, node):
return self.visitor.accept(node)
def neighbors(self, node):
"""Return dependents, note that we actually flip the edge direction to allow
generic programming"""
spec = node.edge.spec
return sort_edges(
[edge.flip() for edge in spec.edges_from_dependents(deptype=self.deptype)]
)
class CoverNodesVisitor(object):
"""A visitor that traverses each node once."""
def __init__(self, visitor, key=id, visited=None):
self.visitor = visitor
self.key = key
self.visited = set() if visited is None else visited
def accept(self, node):
# Covering nodes means: visit nodes once and only once.
key = self.key(node.edge.spec)
if key in self.visited:
return False
accept = self.visitor.accept(node)
self.visited.add(key)
return accept
def neighbors(self, node):
return self.visitor.neighbors(node)
class CoverEdgesVisitor(object):
"""A visitor that traverses all edges once."""
def __init__(self, visitor, key=id, visited=None):
self.visitor = visitor
self.visited = set() if visited is None else visited
self.key = key
def accept(self, node):
return self.visitor.accept(node)
def neighbors(self, node):
# Covering edges means: drop dependencies of visited nodes.
key = self.key(node.edge.spec)
if key in self.visited:
return []
self.visited.add(key)
return self.visitor.neighbors(node)
def get_visitor_from_args(cover, direction, deptype, key=id, visited=None, visitor=None):
"""
Create a visitor object from common keyword arguments.
Arguments:
cover (str): Determines how extensively to cover the dag. Possible values:
``nodes`` -- Visit each unique node in the dag only once.
``edges`` -- If a node has been visited once but is reached along a
new path, it's accepted, but not recurisvely followed. This traverses
each 'edge' in the DAG once.
``paths`` -- Explore every unique path reachable from the root.
This descends into visited subtrees and will accept nodes multiple
times if they're reachable by multiple paths.
direction (str): ``children`` or ``parents``. If ``children``, does a traversal
of this spec's children. If ``parents``, traverses upwards in the DAG
towards the root.
deptype (str or tuple): allowed dependency types
key: function that takes a spec and outputs a key for uniqueness test.
visited (set or None): a set of nodes not to follow (when using cover=nodes/edges)
visitor: An initial visitor that is used for composition.
Returns:
A visitor
"""
visitor = visitor or BaseVisitor(deptype)
if cover == "nodes":
visitor = CoverNodesVisitor(visitor, key, visited)
elif cover == "edges":
visitor = CoverEdgesVisitor(visitor, key, visited)
if direction == "parents":
visitor = ReverseVisitor(visitor, deptype)
return visitor
def root_specs(specs):
"""Initialize a list of edges from an imaginary root node to the root specs."""
return [
EdgeAndDepth(edge=spack.spec.DependencySpec(parent=None, spec=s, deptypes=()), depth=0)
for s in specs
]
def traverse_depth_first_edges_generator(nodes, visitor, post_order=False, root=True, depth=False):
# This is a somewhat non-standard implementation, but the reason to start with
# edges is that we don't have to deal with an artificial root node when doing DFS
# on multiple (root) specs.
for node in nodes:
if not visitor.accept(node):
continue
yield_me = root or node.depth > 0
# Pre
if yield_me and not post_order:
yield (node.depth, node.edge) if depth else node.edge
neighbors = [
EdgeAndDepth(edge=edge, depth=node.depth + 1) for edge in visitor.neighbors(node)
]
# This extra branch is just for efficiency.
if len(neighbors) >= 0:
for item in traverse_depth_first_edges_generator(
neighbors, visitor, post_order, root, depth
):
yield item
# Post
if yield_me and post_order:
yield (node.depth, node.edge) if depth else node.edge
def traverse_breadth_first_edges_generator(queue, visitor, root=True, depth=False):
while len(queue) > 0:
node = queue.pop(0)
# If the visitor doesn't accept the node, we don't yield it nor follow its edges.
if not visitor.accept(node):
continue
if root or node.depth > 0:
yield (node.depth, node.edge) if depth else node.edge
for edge in visitor.neighbors(node):
queue.append(EdgeAndDepth(edge, node.depth + 1))
def traverse_breadth_first_with_visitor(specs, visitor):
"""Performs breadth first traversal for a list of specs (not a generator).
Arguments:
specs (list): List of Spec instances.
visitor: object that implements accept and neighbors interface, see
for example BaseVisitor.
"""
queue = root_specs(specs)
while len(queue) > 0:
node = queue.pop(0)
# If the visitor doesn't accept the node, we don't traverse it further.
if not visitor.accept(node):
continue
for edge in visitor.neighbors(node):
queue.append(EdgeAndDepth(edge, node.depth + 1))
# Helper functions for generating a tree using breadth-first traversal
def breadth_first_to_tree_edges(roots, deptype="all", key=id):
"""This produces an adjacency list (with edges) and a map of parents.
There may be nodes that are reached through multiple edges. To print as
a tree, one should use the parents dict to verify if the path leading to
the node is through the correct parent. If not, the branch should be
truncated."""
edges = defaultdict(list)
parents = dict()
for edge in traverse_edges(roots, order="breadth", cover="edges", deptype=deptype, key=key):
parent_id = None if edge.parent is None else key(edge.parent)
child_id = key(edge.spec)
edges[parent_id].append(edge)
if child_id not in parents:
parents[child_id] = parent_id
return edges, parents
def breadth_first_to_tree_nodes(roots, deptype="all", key=id):
"""This produces a list of edges that forms a tree; every node has no more
that one incoming edge."""
edges = defaultdict(list)
for edge in traverse_edges(roots, order="breadth", cover="nodes", deptype=deptype, key=key):
parent_id = None if edge.parent is None else key(edge.parent)
edges[parent_id].append(edge)
return edges
def traverse_breadth_first_tree_edges(parent_id, edges, parents, key=id, depth=0):
"""Do a depth-first search on edges generated by bread-first traversal,
which can be used to produce a tree."""
for edge in edges[parent_id]:
yield (depth, edge)
child_id = key(edge.spec)
# Don't follow further if we're not the parent
if parents[child_id] != parent_id:
continue
# yield from ... in Python 3.
for item in traverse_breadth_first_tree_edges(child_id, edges, parents, key, depth + 1):
yield item
def traverse_breadth_first_tree_nodes(parent_id, edges, key=id, depth=0):
for edge in edges[parent_id]:
yield (depth, edge)
for item in traverse_breadth_first_tree_nodes(key(edge.spec), edges, key, depth + 1):
yield item
# High-level API: traverse_edges, traverse_nodes, traverse_tree.
def traverse_edges(
specs,
root=True,
order="pre",
cover="nodes",
direction="children",
deptype="all",
depth=False,
key=id,
visited=None,
):
"""
Generator that yields edges from the DAG, starting from a list of root specs.
Arguments:
specs (list): List of root specs (considered to be depth 0)
root (bool): Yield the root nodes themselves
order (str): What order of traversal to use in the DAG. For depth-first
search this can be ``pre`` or ``post``. For BFS this should be ``breadth``.
cover (str): Determines how extensively to cover the dag. Possible values:
``nodes`` -- Visit each unique node in the dag only once.
``edges`` -- If a node has been visited once but is reached along a
new path, it's accepted, but not recurisvely followed. This traverses
each 'edge' in the DAG once.
``paths`` -- Explore every unique path reachable from the root.
This descends into visited subtrees and will accept nodes multiple
times if they're reachable by multiple paths.
direction (str): ``children`` or ``parents``. If ``children``, does a traversal
of this spec's children. If ``parents``, traverses upwards in the DAG
towards the root.
deptype (str or tuple): allowed dependency types
depth (bool): When ``False``, yield just edges. When ``True`` yield
the tuple (depth, edge), where depth corresponds to the depth
at which edge.spec was discovered.
key: function that takes a spec and outputs a key for uniqueness test.
visited (set or None): a set of nodes not to follow
Returns:
A generator that yields ``DependencySpec`` if depth is ``False``
or a tuple of ``(depth, DependencySpec)`` if depth is ``True``.
"""
root_edges = root_specs(specs)
visitor = get_visitor_from_args(cover, direction, deptype, key, visited)
# Depth-first
if order in ("pre", "post"):
return traverse_depth_first_edges_generator(
root_edges, visitor, order == "post", root, depth
)
# Breadth-first
return traverse_breadth_first_edges_generator(root_edges, visitor, root, depth)
def traverse_nodes(
specs,
root=True,
order="pre",
cover="nodes",
direction="children",
deptype="all",
depth=False,
key=id,
visited=None,
):
"""
Generator that yields specs from the DAG, starting from a list of root specs.
Arguments:
specs (list): List of root specs (considered to be depth 0)
root (bool): Yield the root nodes themselves
order (str): What order of traversal to use in the DAG. For depth-first
search this can be ``pre`` or ``post``. For BFS this should be ``breadth``.
cover (str): Determines how extensively to cover the dag. Possible values:
``nodes`` -- Visit each unique node in the dag only once.
``edges`` -- If a node has been visited once but is reached along a
new path, it's accepted, but not recurisvely followed. This traverses
each 'edge' in the DAG once.
``paths`` -- Explore every unique path reachable from the root.
This descends into visited subtrees and will accept nodes multiple
times if they're reachable by multiple paths.
direction (str): ``children`` or ``parents``. If ``children``, does a traversal
of this spec's children. If ``parents``, traverses upwards in the DAG
towards the root.
deptype (str or tuple): allowed dependency types
depth (bool): When ``False``, yield just edges. When ``True`` yield
the tuple ``(depth, edge)``, where depth corresponds to the depth
at which ``edge.spec`` was discovered.
key: function that takes a spec and outputs a key for uniqueness test.
visited (set or None): a set of nodes not to follow
Yields:
By default :class:`~spack.spec.Spec`, or a tuple ``(depth, Spec)`` if depth is
set to ``True``.
"""
for item in traverse_edges(specs, root, order, cover, direction, deptype, depth, key, visited):
yield (item[0], item[1].spec) if depth else item.spec
def traverse_tree(specs, cover="nodes", deptype="all", key=id, depth_first=True):
"""
Generator that yields ``(depth, DependencySpec)`` tuples in the depth-first
pre-order, so that a tree can be printed from it.
Arguments:
specs (list): List of root specs (considered to be depth 0)
cover (str): Determines how extensively to cover the dag. Possible values:
``nodes`` -- Visit each unique node in the dag only once.
``edges`` -- If a node has been visited once but is reached along a
new path, it's accepted, but not recurisvely followed. This traverses
each 'edge' in the DAG once.
``paths`` -- Explore every unique path reachable from the root.
This descends into visited subtrees and will accept nodes multiple
times if they're reachable by multiple paths.
deptype (str or tuple): allowed dependency types
key: function that takes a spec and outputs a key for uniqueness test.
depth_first (bool): Explore the tree in depth-first or breadth-first order.
When setting ``depth_first=True`` and ``cover=nodes``, each spec only
occurs once at the shallowest level, which is useful when rendering
the tree in a terminal.
Returns:
A generator that yields ``(depth, DependencySpec)`` tuples in such an order
that a tree can be printed.
"""
# BFS only makes sense when going over edges and nodes, for paths the tree is
# identical to DFS, which is much more efficient then.
if not depth_first and cover == "edges":
edges, parents = breadth_first_to_tree_edges(specs, deptype, key)
return traverse_breadth_first_tree_edges(None, edges, parents)
elif not depth_first and cover == "nodes":
edges = breadth_first_to_tree_nodes(specs, deptype, key)
return traverse_breadth_first_tree_nodes(None, edges)
return traverse_edges(specs, order="pre", cover=cover, deptype=deptype, key=key, depth=True)

View File

@ -0,0 +1,34 @@
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from spack.package import *
class ChainA(Package):
"""
Part of a collection of mock packages used for testing depth-first vs
breadth-first traversal. The DAG they form:
a --> b --> c --> d # a chain
a --> c # "skip" connection
a --> d # "skip" connection
Spack's edge order is based on the child package name.
In depth-first traversal we get a tree that looks like a chain:
a
b
c
d
In breadth-first we get the tree:
a
b
c
d
"""
homepage = "https://example.com"
has_code = False
version("1.0")
depends_on("chain-b")
depends_on("chain-c")
depends_on("chain-d")

View File

@ -0,0 +1,32 @@
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from spack.package import *
class ChainB(Package):
"""
Part of a collection of mock packages used for testing depth-first vs
breadth-first traversal. The DAG they form:
a --> b --> c --> d # a chain
a --> c # "skip" connection
a --> d # "skip" connection
Spack's edge order is based on the child package name.
In depth-first traversal we get a tree that looks like a chain:
a
b
c
d
In breadth-first we get the tree:
a
b
c
d
"""
homepage = "https://example.com"
has_code = False
version("1.0")
depends_on("chain-c")

View File

@ -0,0 +1,32 @@
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from spack.package import *
class ChainC(Package):
"""
Part of a collection of mock packages used for testing depth-first vs
breadth-first traversal. The DAG they form:
a --> b --> c --> d # a chain
a --> c # "skip" connection
a --> d # "skip" connection
Spack's edge order is based on the child package name.
In depth-first traversal we get a tree that looks like a chain:
a
b
c
d
In breadth-first we get the tree:
a
b
c
d
"""
homepage = "https://example.com"
has_code = False
version("1.0")
depends_on("chain-d")

View File

@ -0,0 +1,31 @@
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from spack.package import *
class ChainD(Package):
"""
Part of a collection of mock packages used for testing depth-first vs
breadth-first traversal. The DAG they form:
a --> b --> c --> d # a chain
a --> c # "skip" connection
a --> d # "skip" connection
Spack's edge order is based on the child package name.
In depth-first traversal we get a tree that looks like a chain:
a
b
c
d
In breadth-first we get the tree:
a
b
c
d
"""
homepage = "https://example.com"
has_code = False
version("1.0")