Don't keep the state/version in the task name.

Instead of having individual entries for the task that change with the state name + version, we should instead just keep the same task name but update the metadata about the states the task has gone through instead. Also store the task version in the same metadata and warn users when the versions may be incompat. This makes it easier to see what a task has done without having to know all the states it has gone through (just to find the task details about that task) as well as being able to detect version issues. Change-Id: Ia6b9400394212230905341d205d966dfdee5dfdf
2013-06-08 10:55:37 -07:00
parent b94b4f0412
commit f56086d067
3 changed files with 119 additions and 39 deletions
--- a/taskflow/backends/memory.py
+++ b/taskflow/backends/memory.py
@@ -95,32 +95,30 @@ class MemoryCatalog(catalog.Catalog):
 class MemoryFlowDetail(logbook.FlowDetail):
    def __init__(self, book, name, task_cls=logbook.TaskDetail):
        super(MemoryFlowDetail, self).__init__(book, name)
-        self._tasks = []
+        self._tasks = {}
        self._task_cls = task_cls

    def __iter__(self):
-        for t in self._tasks:
+        for t in self._tasks.values():
            yield t

    def __contains__(self, task_name):
-        for t in self:
-            if t.name == task_name:
-                return True
-        return False
+        return task_name in self._tasks

    def __getitem__(self, task_name):
-        return [t for t in self if t.name == task_name]
+        return self._tasks[task_name]

    def __len__(self):
        return len(self._tasks)

    def add_task(self, task_name, metadata=None):
        task_details = self._task_cls(task_name, metadata)
-        self._tasks.append(task_details)
+        self._tasks[task_name] = task_details
        return task_details

    def __delitem__(self, task_name):
-        self._tasks = [t for t in self if t.name != task_name]
+        self._tasks.pop(task_name, None)
+        return None


 class MemoryLogBook(logbook.LogBook):
--- a/taskflow/job.py
+++ b/taskflow/job.py
@@ -17,6 +17,8 @@
 #    under the License.

 import abc
+import logging
+import re
 import types

 from taskflow import exceptions as exc
@@ -25,11 +27,25 @@ from taskflow import utils

 from taskflow.openstack.common import uuidutils

+LOG = logging.getLogger(__name__)

-def task_and_state(task, state):
-    """Combines a task objects string representation with a state to
-    create a uniquely identifying task+state name."""

+def _get_task_version(task):
+    """Gets a tasks *string* version, whether it is a task object/function."""
+    task_version = ''
+    if isinstance(task, types.FunctionType):
+        task_version = getattr(task, '__version__', '')
+    if not task_version and hasattr(task, 'version'):
+        task_version = task.version
+    if isinstance(task_version, (list, tuple)):
+        task_version = utils.join(task_version, with_what=".")
+    if not isinstance(task_version, basestring):
+        task_version = str(task_version)
+    return task_version
+
+
+def _get_task_name(task):
+    """Gets a tasks *string* name, whether it is a task object/function."""
    task_name = ""
    if isinstance(task, types.FunctionType):
        # If its a function look for the attributes that should have been
@@ -44,14 +60,46 @@ def task_and_state(task, state):
                                                          '__name__')
                           if a is not None]
            task_name = utils.join(name_pieces, ".")
-        task_version = getattr(task, '__version__', None)
-        if isinstance(task_version, (list, tuple)):
-            task_version = utils.join(task_version, with_what=".")
-        if task_version is not None:
-            task_name += "==%s" % (task_version)
    else:
        task_name = str(task)
-    return "%s;%s" % (task_name, state)
+    return task_name
+
+
+def _is_version_compatible(version_1, version_2):
+    """Checks for major version compatibility of two *string" versions."""
+    if version_1 == version_2:
+        # Equivalent exactly, so skip the rest.
+        return True
+
+    def _convert_to_pieces(version):
+        try:
+            pieces = []
+            for p in version.split("."):
+                p = p.strip()
+                if not len(p):
+                    pieces.append(0)
+                    continue
+                # Clean off things like 1alpha, or 2b and just select the
+                # digit that starts that entry instead.
+                p_match = re.match(r"(\d+)([A-Za-z]*)(.*)", p)
+                if p_match:
+                    p = p_match.group(1)
+                pieces.append(int(p))
+        except (AttributeError, TypeError, ValueError):
+            pieces = []
+        return pieces
+
+    version_1_pieces = _convert_to_pieces(version_1)
+    version_2_pieces = _convert_to_pieces(version_2)
+    if len(version_1_pieces) == 0 or len(version_2_pieces) == 0:
+        return False
+
+    # Ensure major version compatibility to start.
+    major1 = version_1_pieces[0]
+    major2 = version_2_pieces[0]
+    if major1 != major2:
+        return False
+    return True


 class Claimer(object):
@@ -122,31 +170,65 @@ class Job(object):
    def _task_listener(self, _context, state, flow, task, result=None):
        """Store the result of the task under the given flow in the log
        book so that it can be retrieved later."""
-        metadata = None
+        metadata = {}
        flow_details = self.logbook[flow.name]
        if state in (states.SUCCESS, states.FAILURE):
-            metadata = {
-                'result': result,
-            }
-        task_state = task_and_state(task, state)
-        if task_state not in flow_details:
-            flow_details.add_task(task_state, metadata)
+            metadata['result'] = result
+
+        name = _get_task_name(task)
+        if name not in flow_details:
+            metadata['states'] = [state]
+            metadata['version'] = _get_task_version(task)
+            flow_details.add_task(name, metadata)
+        else:
+            details = flow_details[name]
+
+            # Warn about task versions possibly being incompatible
+            my_version = _get_task_version(task)
+            prev_version = details.metadata.get('version')
+            if not _is_version_compatible(my_version, prev_version):
+                LOG.warn("Updating a task with a different version than the"
+                         " one being listened to (%s != %s)",
+                         prev_version, my_version)
+
+            past_states = details.metadata.get('states', [])
+            past_states.append(state)
+            details.metadata['states'] = past_states
+            details.metadata.update(metadata)

    def _task_result_fetcher(self, _context, flow, task):
        flow_details = self.logbook[flow.name]
+
        # See if it completed before (or failed before) so that we can use its
        # results instead of having to recompute it.
-        for s in (states.SUCCESS, states.FAILURE):
-            name = task_and_state(task, s)
-            if name in flow_details:
-                # TODO(harlowja): should we be a little more cautious about
-                # duplicate task results? Maybe we shouldn't allow them to
-                # have the same name in the first place?
-                details = flow_details[name][0]
-                if details.metadata and 'result' in details.metadata:
-                    return (True, s == states.FAILURE,
-                            details.metadata['result'])
-        return (False, False, None)
+        not_found = (False, False, None)
+        name = _get_task_name(task)
+        if name not in flow_details:
+            return not_found
+
+        details = flow_details[name]
+        has_completed = False
+        was_failure = False
+        task_states = details.metadata.get('states', [])
+        for state in task_states:
+            if state in (states.SUCCESS, states.FAILURE):
+                if state == states.FAILURE:
+                    was_failure = True
+                has_completed = True
+                break
+
+        # Warn about task versions possibly being incompatible
+        my_version = _get_task_version(task)
+        prev_version = details.metadata.get('version')
+        if not _is_version_compatible(my_version, prev_version):
+            LOG.warn("Fetching task results from a task with a different"
+                     " version from the one being requested (%s != %s)",
+                     prev_version, my_version)
+
+        if has_completed:
+            return (True, was_failure, details.metadata.get('result'))
+
+        return not_found

    def associate(self, flow, parents=True):
        """Attachs the needed resumption and state change tracking listeners
--- a/taskflow/tests/unit/test_memory.py
+++ b/taskflow/tests/unit/test_memory.py
@@ -151,7 +151,7 @@ class MemoryBackendTest(unittest2.TestCase):
        wf.run(j.context)

        self.assertEquals(1, len(j.logbook))
-        self.assertEquals(4, len(j.logbook["the-int-action"]))
+        self.assertEquals(2, len(j.logbook["the-int-action"]))
        self.assertEquals(1, len(call_log))

        wf.reset()
@@ -160,7 +160,7 @@ class MemoryBackendTest(unittest2.TestCase):
        wf.run(j.context)

        self.assertEquals(1, len(j.logbook))
-        self.assertEquals(6, len(j.logbook["the-int-action"]))
+        self.assertEquals(3, len(j.logbook["the-int-action"]))
        self.assertEquals(2, len(call_log))
        self.assertEquals(states.SUCCESS, wf.state)

@@ -193,7 +193,7 @@ class MemoryBackendTest(unittest2.TestCase):
        wf.run(j.context)

        self.assertEquals(1, len(j.logbook))
-        self.assertEquals(4, len(j.logbook["the-line-action"]))
+        self.assertEquals(2, len(j.logbook["the-line-action"]))
        self.assertEquals(2, len(call_log))
        self.assertEquals(states.SUCCESS, wf.state)