More efficient implementation of find_dependent_nodes

New implementation of find_dependent_nodes that builds a copy graph with all edges reversed, then invoking find_reachable_nodes. Rationale: From original comment: TODO(thinrichs): is it equivalent/better to invert all the edges and run depth-first-search? I believe inverting then dfs is better. Inverting first then dfs: each edge examined twice Current implementation: each edge examined the number of times the `while changed` loop is executed, which is usually at least twice, up to the number of nodes in the worst case (e.g., linear graph, worst case `for node in self.edges` order). Change-Id: I1ad772fb1f9732b89d13896dbe84278c6c4fd39c
2016-02-05 13:19:32 -08:00 · 2016-02-05 13:19:32 -08:00 · ccbdc795ab
commit ccbdc795ab
parent 81adeb8dd3
2 changed files with 43 additions and 13 deletions
--- a/congress/datalog/utility.py
+++ b/congress/datalog/utility.py
@ -282,29 +282,35 @@ class Graph(object):
        s += "}"
        return s

+    def _inverted_edge_graph(self):
+        """create a shallow copy of self with the edges inverted"""
+        newGraph = Graph()
+        newGraph.nodes = self.nodes
+        for source_node in self.edges:
+            for edge in self.edges[source_node]:
+                try:
+                    newGraph.edges[edge.node].add(Graph.edge_data(source_node))
+                except KeyError:
+                    newGraph.edges[edge.node] = set(
+                        [Graph.edge_data(source_node)])
+        return newGraph
+
    def find_dependent_nodes(self, nodes):
        """Return all nodes dependent on @nodes.

        Node T is dependent on node T.
        Node T is dependent on node R if there is an edge from node S to T,
            and S is dependent on R.
+
+        Note that node T is dependent on node T even if T is not in the graph
        """
-        # TODO(thinrichs): is it equivalent/better to invert all the edges
-        #   and run depth-first-search?
-        marked = set(nodes)  # copy so we can modify
-        changed = True
-        while changed:
-            changed = False
-            for node in self.edges:
-                hasmarked = any(x.node in marked for x in self.edges[node])
-                if hasmarked:
-                    if node not in marked:
-                        marked.add(node)
-                        changed = True
-        return marked
+        return (self._inverted_edge_graph().find_reachable_nodes(nodes)
+                | set(nodes))

    def find_reachable_nodes(self, roots):
        """Return all nodes reachable from @roots."""
+        if len(roots) == 0:
+            return set()
        self.depth_first_search(roots)
        result = [x for x in self.nodes if self.nodes[x].begin is not None]
        self.reset_nodes()
--- a/congress/tests/datalog/test_utility.py
+++ b/congress/tests/datalog/test_utility.py
@ -223,6 +223,30 @@ class TestGraph(base.TestCase):
        self.assertTrue(g1.dependencies(5), set([5]))
        self.assertTrue(g1.dependencies(11), set([11, 12]))

+    def test_find_dependent_nodes(self):
+        g1 = utility.Graph()
+        self.assertEqual(g1.find_dependent_nodes([1]), set([1]))
+        g1.add_edge(0, 1)
+        g1.add_edge(1, 2)
+        g1.add_edge(2, 3)
+        g1.add_edge(2, 4)
+        g1.add_edge(3, 5)
+        g1.add_edge(0, 6)
+        g1.add_edge(7, 8)
+        g1.add_edge(8, 9)
+        g1.add_edge(10, 5)
+        g1.add_edge(11, 12)
+        self.assertEqual(g1.find_dependent_nodes([0]), set([0]))
+        self.assertEqual(g1.find_dependent_nodes([2]), set([2, 1, 0]))
+        self.assertEqual(g1.find_dependent_nodes([5]),
+                         set([5, 0, 1, 2, 3, 10]))
+        self.assertEqual(g1.find_dependent_nodes([12]), set([11, 12]))
+        self.assertEqual(g1.find_dependent_nodes([5, 6]),
+                         set([5, 0, 1, 2, 3, 10, 6]))
+        g1.add_edge(5, 2)  # add cycle
+        self.assertEqual(g1.find_dependent_nodes([2]),
+                         set([5, 0, 1, 2, 3, 10]))
+

 class TestBagGraph(base.TestCase):
    def test_nodes(self):