From a682717f734f1488306a4266cb8e18eb07faa9d4 Mon Sep 17 00:00:00 2001
From: Joshua Harlow <harlowja@yahoo-inc.com>
Date: Fri, 11 Oct 2013 18:25:04 -0700
Subject: [PATCH] Continue adding docs to examples

Add more docs to the fake billing example and
move the utility AttrDict to utils/misc.py at
the same time (since its a useful utility to
allow others to use).

Change-Id: Id372032ae1545e0bd666bf991c21b21d320aba4b
---
 taskflow/examples/fake_billing.py | 71 +++++++++++++++++--------------
 taskflow/utils/misc.py            | 37 ++++++++++++++++
 2 files changed, 76 insertions(+), 32 deletions(-)

diff --git a/taskflow/examples/fake_billing.py b/taskflow/examples/fake_billing.py
index 446f160ae..cee51a642 100644
--- a/taskflow/examples/fake_billing.py
+++ b/taskflow/examples/fake_billing.py
@@ -39,20 +39,15 @@ from taskflow.patterns import linear_flow as lf
 from taskflow import task
 from taskflow.utils import misc
 
-
-class AttrDict(object):
-    def __init__(self, **kwargs):
-        self._attrs = {}
-        for (k, v) in kwargs.items():
-            if ' ' in k or k in ('self',) or not len(k):
-                raise AttributeError("Invalid attribute name")
-            self._attrs[k] = v
-
-    def __getattr__(self, name):
-        try:
-            return self._attrs[name]
-        except KeyError:
-            raise AttributeError("No attributed named '%s'" % (name))
+# INTRO: This example walks through a miniature workflow which simulates a
+# the reception of a API request, creation of a database entry, driver
+# activation (which invokes a 'fake' webservice) and final completion.
+#
+# This example also shows how a function/object (in this class the url sending)
+# that occurs during driver activation can update the progress of a task
+# without being aware of the internals of how to do this by associating a
+# callback that the url sending can update as the sending progresses from 0.0%
+# complete to 100% complete.
 
 
 class DB(object):
@@ -63,15 +58,24 @@ class DB(object):
 class UrlCaller(object):
     def __init__(self):
         self._send_time = 0.5
+        self._chunks = 25
 
     def send(self, url, data, status_cb=None):
-        sleep_time = float(self._send_time) / 25
+        sleep_time = float(self._send_time) / self._chunks
         for i in range(0, len(data)):
             time.sleep(sleep_time)
+            # As we send the data, each chunk we 'fake' send will progress
+            # the sending progress that much further to 100%
             if status_cb:
                 status_cb(float(i) / len(data))
 
 
+# Since engines save the output of tasks to a optional persistant storage
+# backend resources have to be dealt with in a slightly different manner since
+# resources are transient and can not be persisted (or serialized). For tasks
+# that require access to a set of resources it is a common pattern to provide
+# a object (in this case this object) on construction of those tasks via the
+# task constructor.
 class ResourceFetcher(object):
     def __init__(self):
         self._db_handle = None
@@ -126,11 +130,17 @@ class ActivateDriver(task.Task):
     def execute(self, parsed_request):
         print("Sending billing data to %s" % (self._url))
         url_sender = self._resources.url_handle
+        # Note that here we attach our update_progress function (which is a
+        # function that the engine also 'binds' to) to the progress function
+        # that the url sending helper class uses. This allows the task progress
+        # to be tied to the url sending progress, which is very useful for
+        # downstream systems to be aware of what a task is doing at any time.
         url_sender.send(self._url, json.dumps(parsed_request),
                         status_cb=self.update_progress)
         return self._url
 
     def update_progress(self, progress, **kwargs):
+        # Override the parent method to also print out the status.
         super(ActivateDriver, self).update_progress(progress, **kwargs)
         print("%s is %0.2f%% done" % (self.name, progress * 100))
 
@@ -141,36 +151,33 @@ class DeclareSuccess(task.Task):
         print("All data processed and sent to %s" % (sent_to))
 
 
-SERIAL = False
-if SERIAL:
-    engine_conf = {
-        'engine': 'serial',
-    }
-else:
-    engine_conf = {
-        'engine': 'parallel',
-    }
-
-
 # Resources (db handles and similar) of course can't be persisted so we need
 # to make sure that we pass this resource fetcher to the tasks constructor so
-# that the tasks have access to any needed resources (lazily loaded).
+# that the tasks have access to any needed resources (the resources are
+# lazily loaded so that they are only created when they are used).
 resources = ResourceFetcher()
 flow = lf.Flow("initialize-me")
 
 # 1. First we extract the api request into a useable format.
 # 2. Then we go ahead and make a database entry for our request.
-flow.add(ExtractInputRequest(resources),
-         MakeDBEntry(resources))
+flow.add(ExtractInputRequest(resources), MakeDBEntry(resources))
 
-# 3. Then we activate our payment method and finally declare success
+# 3. Then we activate our payment method and finally declare success.
 sub_flow = gf.Flow("after-initialize")
 sub_flow.add(ActivateDriver(resources), DeclareSuccess())
 flow.add(sub_flow)
 
+# Initially populate the storage with the following request object,
+# prepopulating this allows the tasks that dependent on the 'request' variable
+# to start processing (in this case this is the ExtractInputRequest task).
 store = {
-    'request': AttrDict(user="bob", id="1.35"),
+    'request': misc.AttrDict(user="bob", id="1.35"),
 }
-eng = engines.load(flow, engine_conf=engine_conf, store=store)
+eng = engines.load(flow, engine_conf='serial', store=store)
+
+# This context manager automatically adds (and automatically removes) a
+# helpful set of state transition notification printing helper utilities
+# that show you exactly what transitions the engine is going through
+# while running the various billing related tasks.
 with printing.PrintingListener(eng):
     eng.run()
diff --git a/taskflow/utils/misc.py b/taskflow/utils/misc.py
index f39c2decc..cf568b3b7 100644
--- a/taskflow/utils/misc.py
+++ b/taskflow/utils/misc.py
@@ -75,6 +75,43 @@ def get_duplicate_keys(iterable, key=None):
     return duplicates
 
 
+class AttrDict(dict):
+    """Helper utility class to create a class that can be accessed by
+    attribute name from a dictionary that contains a set of keys and values.
+    """
+    @staticmethod
+    def _is_valid_attribute_name(name):
+        if not isinstance(name, six.string_types) or len(name) == 0:
+            return False
+        if name.lower().startswith('self') or name.startswith("_"):
+            return False
+        if not name[0].isalpha():
+            return False
+        for i in range(1, len(name)):
+            if not (name[i].isalpha() or name[i].isdigit()):
+                return False
+        return True
+
+    def __init__(self, **kwargs):
+        for (k, v) in kwargs.items():
+            if not self._is_valid_attribute_name(k):
+                raise AttributeError("Invalid attribute name: '%s'" % (k))
+            self[k] = v
+
+    def __getattr__(self, name):
+        if not self._is_valid_attribute_name(name):
+            raise AttributeError("Invalid attribute name: '%s'" % (name))
+        try:
+            return self[name]
+        except KeyError:
+            raise AttributeError("No attributed named: '%s'" % (name))
+
+    def __setattr__(self, name, value):
+        if not self._is_valid_attribute_name(name):
+            raise AttributeError("Invalid attribute name: '%s'" % (name))
+        self[name] = value
+
+
 class ExponentialBackoff(object):
     def __init__(self, attempts, exponent=2):
         self.attempts = int(attempts)