More efficient implementation of find_dependent_nodes

New implementation of find_dependent_nodes that builds a copy graph with
all edges reversed, then invoking find_reachable_nodes.

Rationale:

From original comment:
TODO(thinrichs): is it equivalent/better to invert all the edges
and run depth-first-search?

I believe inverting then dfs is better.

Inverting first then dfs: each edge examined twice

Current implementation: each edge examined the number of times the
`while changed` loop is executed, which is usually at least twice,
up to the number of nodes in the worst case (e.g., linear graph, worst case
`for node in self.edges` order).

Change-Id: I1ad772fb1f9732b89d13896dbe84278c6c4fd39c
This commit is contained in:
Eric K 2016-02-05 13:19:32 -08:00
parent 81adeb8dd3
commit ccbdc795ab
2 changed files with 43 additions and 13 deletions

View File

@ -282,29 +282,35 @@ class Graph(object):
s += "}"
return s
def _inverted_edge_graph(self):
"""create a shallow copy of self with the edges inverted"""
newGraph = Graph()
newGraph.nodes = self.nodes
for source_node in self.edges:
for edge in self.edges[source_node]:
try:
newGraph.edges[edge.node].add(Graph.edge_data(source_node))
except KeyError:
newGraph.edges[edge.node] = set(
[Graph.edge_data(source_node)])
return newGraph
def find_dependent_nodes(self, nodes):
"""Return all nodes dependent on @nodes.
Node T is dependent on node T.
Node T is dependent on node R if there is an edge from node S to T,
and S is dependent on R.
Note that node T is dependent on node T even if T is not in the graph
"""
# TODO(thinrichs): is it equivalent/better to invert all the edges
# and run depth-first-search?
marked = set(nodes) # copy so we can modify
changed = True
while changed:
changed = False
for node in self.edges:
hasmarked = any(x.node in marked for x in self.edges[node])
if hasmarked:
if node not in marked:
marked.add(node)
changed = True
return marked
return (self._inverted_edge_graph().find_reachable_nodes(nodes)
| set(nodes))
def find_reachable_nodes(self, roots):
"""Return all nodes reachable from @roots."""
if len(roots) == 0:
return set()
self.depth_first_search(roots)
result = [x for x in self.nodes if self.nodes[x].begin is not None]
self.reset_nodes()

View File

@ -223,6 +223,30 @@ class TestGraph(base.TestCase):
self.assertTrue(g1.dependencies(5), set([5]))
self.assertTrue(g1.dependencies(11), set([11, 12]))
def test_find_dependent_nodes(self):
g1 = utility.Graph()
self.assertEqual(g1.find_dependent_nodes([1]), set([1]))
g1.add_edge(0, 1)
g1.add_edge(1, 2)
g1.add_edge(2, 3)
g1.add_edge(2, 4)
g1.add_edge(3, 5)
g1.add_edge(0, 6)
g1.add_edge(7, 8)
g1.add_edge(8, 9)
g1.add_edge(10, 5)
g1.add_edge(11, 12)
self.assertEqual(g1.find_dependent_nodes([0]), set([0]))
self.assertEqual(g1.find_dependent_nodes([2]), set([2, 1, 0]))
self.assertEqual(g1.find_dependent_nodes([5]),
set([5, 0, 1, 2, 3, 10]))
self.assertEqual(g1.find_dependent_nodes([12]), set([11, 12]))
self.assertEqual(g1.find_dependent_nodes([5, 6]),
set([5, 0, 1, 2, 3, 10, 6]))
g1.add_edge(5, 2) # add cycle
self.assertEqual(g1.find_dependent_nodes([2]),
set([5, 0, 1, 2, 3, 10]))
class TestBagGraph(base.TestCase):
def test_nodes(self):