Protect storage better against external concurrent access

Lock down the various state machine action handling functions so that they are ensured correct (and consistent) access to the storage layer when they are modifiying and/or reading it. Change-Id: Ie893a44aa963ab515f19e77f9904f49c843cb4e5
2016-06-14 17:13:37 -07:00
parent b3b659f38b
commit 22f75755b7
2 changed files with 62 additions and 44 deletions
--- a/taskflow/engines/action_engine/builder.py
+++ b/taskflow/engines/action_engine/builder.py
@@ -143,6 +143,7 @@ class MachineBuilder(object):
        get_atom_intention = self._storage.get_atom_intention

        def do_schedule(next_nodes):
+            with self._storage.lock.write_lock():
                return self._scheduler.schedule(
                    sorted(next_nodes,
                           key=lambda node: getattr(node, 'priority', 0),
@@ -164,6 +165,7 @@ class MachineBuilder(object):
            # to include any nodes that need to be executed (from a previous
            # attempt, which may be empty if never ran before) and any nodes
            # that are now ready to be ran.
+            with self._storage.lock.write_lock():
                memory.next_up.update(
                    iter_utils.unique_seen((self._completer.resume(),
                                            iter_next_atoms())))
@@ -176,6 +178,7 @@ class MachineBuilder(object):
            # it is *always* called before the final state is entered.
            if memory.failures:
                return FAILED
+            with self._storage.lock.read_lock():
                leftover_atoms = iter_utils.count(
                    # Avoid activating the deciders, since at this point
                    # the engine is finishing and there will be no more further
@@ -199,6 +202,7 @@ class MachineBuilder(object):
            # if the user of this engine has requested the engine/storage
            # that holds this information to stop or suspend); handles failures
            # that occur during this process safely...
+            with self._storage.lock.write_lock():
                current_flow_state = self._storage.get_flow_state()
                if current_flow_state == st.RUNNING and memory.next_up:
                    not_done, failures = do_schedule(memory.next_up)
@@ -277,6 +281,7 @@ class MachineBuilder(object):
            # nodes to be scheduled in the future); handles failures that
            # occur during this process safely...
            next_up = set()
+            with self._storage.lock.write_lock():
                while memory.done:
                    fut = memory.done.pop()
                    # Force it to be completed so that we can ensure that
@@ -290,7 +295,8 @@ class MachineBuilder(object):
                            more_work = set(iter_next_atoms(atom=atom))
                        except Exception:
                            memory.failures.append(failure.Failure())
-                        LOG.exception("Engine '%s' atom post-completion"
+                            LOG.exception(
+                                "Engine '%s' atom post-completion"
                                " next atom searching failed", atom)
                        else:
                            next_up.update(more_work)
--- a/taskflow/storage.py
+++ b/taskflow/storage.py
@@ -407,6 +407,18 @@ class Storage(object):
                self._failures.setdefault(atom_name, {})
        return atom_ids

+    @property
+    def lock(self):
+        """Reader/writer lock used to ensure multi-thread safety.
+
+        This does **not** protect against the **same** storage objects being
+        used by multiple engines/users across multiple processes (or
+        different machines); certain backends handle that situation better
+        than others (for example by using sequence identifiers) and it's a
+        ongoing work in progress to make that better).
+        """
+        return self._lock
+
    def ensure_atom(self, atom):
        """Ensure there is an atomdetail for the **given** atom.