Reintegrate parallel action

Fixes: bug 1221505 Fixes: bug 1225759 Change-Id: Id4c915d36d0da679b313dba8421ac621aeb7c818
2013-09-18 12:32:15 -07:00
parent d736bdbfae
commit f7daa45d69
2 changed files with 116 additions and 3 deletions
--- a/taskflow/engines/action_engine/engine.py
+++ b/taskflow/engines/action_engine/engine.py
@@ -40,6 +40,7 @@ class ActionEngine(object):
    Converts the flow to recursive structure of actions.
    """
    _graph_action = None
    def __init__(self, flow, storage):
        self._failures = []
@@ -100,20 +101,23 @@ class ActionEngine(object):
        self.task_notifier.notify(state, details)
    def _translate_flow_to_action(self):
-        # Flatten the flow into just 1 graph.
+        assert self._graph_action is not None, ('Graph action class must be'
                                                ' specified')
        task_graph = flow_utils.flatten(self._flow)
-        ga = graph_action.SequentialGraphAction(task_graph)
+        ga = self._graph_action(task_graph)
        for n in task_graph.nodes_iter():
            ga.add(n, task_action.TaskAction(n, self))
        return ga
    @decorators.locked
    def compile(self):
        if self._root is None:
            self._root = self._translate_flow_to_action()
 class SingleThreadedActionEngine(ActionEngine):
    # This one attempts to run in a serial manner.
    _graph_action = graph_action.SequentialGraphAction
    def __init__(self, flow, flow_detail=None, book=None, backend=None):
        if flow_detail is None:
            flow_detail = p_utils.create_flow_detail(flow,
@@ -124,6 +128,9 @@ class SingleThreadedActionEngine(ActionEngine):
 class MultiThreadedActionEngine(ActionEngine):
    # This one attempts to run in a parallel manner.
    _graph_action = graph_action.ParallelGraphAction
    def __init__(self, flow, flow_detail=None, book=None, backend=None,
                 executor=None):
        if flow_detail is None:
--- a/taskflow/engines/action_engine/graph_action.py
+++ b/taskflow/engines/action_engine/graph_action.py
@@ -16,7 +16,17 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 import collections
 import logging
 import threading
 from concurrent import futures
 from taskflow.engines.action_engine import base_action as base
 from taskflow import exceptions as exc
 from taskflow.utils import misc
 LOG = logging.getLogger(__name__)
 class GraphAction(base.Action):
@@ -79,3 +89,99 @@ class SequentialGraphAction(GraphAction):
            action = self._action_mapping[node]
            action.revert(engine)  # raises on failure
            to_revert += self._resolve_dependencies(node, deps_counter, True)
 class ParallelGraphAction(SequentialGraphAction):
    def execute(self, engine):
        """This action executes the provided graph in parallel by selecting
        nodes which can run (those which have there dependencies satisified
        or those with no dependencies) and submitting them to the executor
        to be ran, and then after running this process will be repeated until
        no more nodes can be ran (or a failure has a occured and all nodes
        were stopped from further running).
        """
        # A deque is a thread safe push/pop/popleft/append implementation
        all_futures = collections.deque()
        executor = engine.executor
        has_failed = threading.Event()
        deps_lock = threading.RLock()
        deps_counter = self._get_nodes_dependencies_count()
        def submit_followups(node):
            # Mutating the deps_counter isn't thread safe.
            with deps_lock:
                to_execute = self._resolve_dependencies(node, deps_counter)
            submit_count = 0
            for n in to_execute:
                try:
                    all_futures.append(executor.submit(run_node, n))
                    submit_count += 1
                except RuntimeError:
                    # Someone shutdown the executor while we are still
                    # using it, get out as quickly as we can...
                    has_failed.set()
                    break
            return submit_count
        def run_node(node):
            if has_failed.is_set():
                # Someone failed, don't even bother running.
                return
            action = self._action_mapping[node]
            try:
                action.execute(engine)
            except Exception:
                # Make sure others don't continue working (although they may
                # be already actively working, but u can't stop that anyway).
                has_failed.set()
                raise
            if has_failed.is_set():
                # Someone else failed, don't even bother submitting any
                # followup jobs.
                return
            # NOTE(harlowja): the future itself will not return until after it
            # submits followup tasks, this keeps the parent thread waiting for
            # more results since the all_futures deque will not be empty until
            # everyone stops submitting followups.
            submitted = submit_followups(node)
            LOG.debug("After running %s, %s followup actions were submitted",
                      node, submitted)
        # Nothing to execute in the first place
        if not deps_counter:
            return
        # Ensure that we obtain the lock just in-case the functions submitted
        # immediately themselves start submitting there own jobs (which could
        # happen if they are very quick).
        with deps_lock:
            to_execute = self._browse_nodes_to_execute(deps_counter)
            for n in to_execute:
                try:
                    all_futures.append(executor.submit(run_node, n))
                except RuntimeError:
                    # Someone shutdown the executor while we are still using
                    # it, get out as quickly as we can....
                    break
        # Keep on continuing to consume the futures until there are no more
        # futures to consume so that we can get there failures. Notice that
        # results are not captured, as results of tasks go into storage and
        # do not get returned here.
        failures = []
        while len(all_futures):
            # Take in FIFO order, not in LIFO order.
            f = all_futures.popleft()
            try:
                f.result()
            except futures.CancelledError:
                # TODO(harlowja): can we use the cancellation feature to
                # actually achieve cancellation in taskflow??
                pass
            except Exception:
                failures.append(misc.Failure())
        if len(failures) > 1:
            raise exc.LinkedException.link([fail.exc_info
                                            for fail in failures])
        elif len(failures) == 1:
            failures[0].reraise()