From e3171f869e7ec33335d084aacf7c66901478a40e Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@objectif-libre.com>
Date: Tue, 18 Dec 2018 15:08:50 +0100
Subject: [PATCH] Add some developer documentation for collectors

This adds an entry about collectors to the developer documentation.
Some information about collectors has been moved from the admin to the
developer documentation.

Change-Id: I2423761b9f7a672fe837d5d5954473301d936ba3
Story: 2004179
Task: 28514
---
 doc/source/admin/architecture.rst  |  37 ++----
 doc/source/developer/collector.rst | 177 +++++++++++++++++++++++++++++
 doc/source/developer/index.rst     |   1 +
 3 files changed, 185 insertions(+), 30 deletions(-)
 create mode 100644 doc/source/developer/collector.rst

diff --git a/doc/source/admin/architecture.rst b/doc/source/admin/architecture.rst
index d4929663..2bf50174 100644
--- a/doc/source/admin/architecture.rst
+++ b/doc/source/admin/architecture.rst
@@ -10,11 +10,6 @@ CloudKitty can be cut in five big parts:
 * Storage
 * Report writer
 
-
-.. Graph is outdated, and needs to be modified. Skipping it.
-    .. graphviz:: graph/arch.dot
-
-
 Module loading and extensions
 =============================
 
@@ -28,16 +23,15 @@ management of its configuration.
 Collectors and storage backends are loaded with stevedore but configured in
 CloudKitty's configuration file.
 
-
 Collector
 =========
 
 **Loaded with stevedore**
 
-The name of the collector to use is specified in the configuration, only one
-collector can be loaded at once.
-This part is responsible of information gathering. It consists of a python
-class that loads data from a backend and return it in a format that CloudKitty
+The name of the collector to use is specified in the configuration. For now,
+only one collector can be loaded at once.
+This part is responsible for information gathering. It consists of a python
+class that loads data from a backend and returns it in a format that CloudKitty
 can handle.
 
 The data format of CloudKitty is the following:
@@ -64,25 +58,10 @@ The data format of CloudKitty is the following:
    }
 
 
-Example code of a basic collector:
-
-.. code-block:: python
-
-    class MyCollector(BaseCollector):
-        def __init__(self, **kwargs):
-            super(MyCollector, self).__init__(**kwargs)
-
-        def get_mydata(self, start, end=None, project_id=None, q_filter=None):
-            # Do stuff
-            return ck_data
-
-
-You'll now be able to add the gathering of mydata in CloudKitty by modifying
-the configuration and specifying the new service in collect/services.
-
-If you need to load multiple collectors, you can use the ``meta`` collector and
-use its API to enable/disable collector loading, and set priority.
+For information about how to write a custom collector, see
+the `developer documentation`_.
 
+.. _developer documentation: ../developer/collector.html
 
 Rating
 ======
@@ -126,7 +105,6 @@ Example of minimal rating module (taken from the Noop module):
                             entry['rating'] = {'price': decimal.Decimal(0)}
             return data
 
-
 Storage
 =======
 
@@ -138,7 +116,6 @@ the need of knowing the type of backend used.
 
 You can use the API to create reports on the fly for example.
 
-
 Writer
 ======
 
diff --git a/doc/source/developer/collector.rst b/doc/source/developer/collector.rst
new file mode 100644
index 00000000..5c7ea7e4
--- /dev/null
+++ b/doc/source/developer/collector.rst
@@ -0,0 +1,177 @@
+=========
+Collector
+=========
+
+Data format
+===========
+
+Internally, CloudKitty's data format is a bit more detailled than what can be
+found in the `architecture documentation`_.
+
+The internal data format is the following:
+
+.. code-block:: json
+
+   {
+       "bananas": [
+           {
+               "vol": {
+                   "unit": "banana",
+                   "qty": 1
+               },
+               "rating": {
+                   "price": 1
+               },
+               "groupby": {
+                   "xxx_id": "hello",
+                   "yyy_id": "bye",
+               },
+               "metadata": {
+                   "flavor": "chocolate",
+                   "eaten_by": "gorilla",
+               },
+          }
+       ],
+   }
+
+However, developers implementing a collector don't need to format the data
+themselves, as there are helper functions for these matters.
+
+Implementation
+==============
+
+Each collector must implement the following class:
+
+.. autoclass:: cloudkitty.collector.BaseCollector
+   :members: fetch_all, check_configuration
+
+The ``retrieve`` method of the ``BaseCollector`` class is called by the
+orchestrator. This method calls the ``fetch_all`` method of the child class.
+
+To create a collector, you need to implement at least the ``fetch_all`` method.
+
+
+Data collection
++++++++++++++++
+
+Collectors must implement a ``fetch_all`` method. This method is called for
+each metric type, for each scope, for each collect period. It has the
+following prototype:
+
+.. autoclass:: cloudkitty.collector.BaseCollector
+   :members: fetch_all
+
+This method is supposed to return a list of objects formatted by
+``CloudKittyFormatTransformer``.
+
+Example code of a basic collector:
+
+.. code-block:: python
+
+    from cloudkitty.collector import BaseCollector
+
+    class MyCollector(BaseCollector):
+        def __init__(self, **kwargs):
+            super(MyCollector, self).__init__(**kwargs)
+
+        def fetch_all(self, metric_name, start, end,
+                      project_id=None, q_filter=None):
+            data = []
+            for CONDITION:
+                # do stuff
+                data.append(self.t_cloudkitty.format_item(
+                    groupby, # dict
+                    metadata, # dict
+                    unit, # str
+                    qty=qty, # int / float
+                ))
+
+            return data
+
+
+``project_id`` can be misleading, as it is a legacy name. It contains the
+ID of the current scope. The attribute corresponding to the scope is specified
+in the configuration, under ``[collect]/scope_key``. Thus, all queries should
+filter based on this attribute. Example:
+
+.. code-block:: python
+
+    from oslo_config import cfg
+
+    from cloudkitty.collector import BaseCollector
+
+    CONF = cfg.CONF
+
+    class MyCollector(BaseCollector):
+        def __init__(self, **kwargs):
+            super(MyCollector, self).__init__(**kwargs)
+
+        def fetch_all(self, metric_name, start, end,
+                      project_id=None, q_filter=None):
+            scope_key = CONF.collect.scope_key
+            filters = {'start': start, 'stop': stop, scope_key: project_id}
+
+            data = self.client.query(
+                filters=filters,
+                groupby=self.conf[metric_name]['groupby'])
+            # Format data etc
+            return output
+
+
+Additional configuration
+++++++++++++++++++++++++
+
+If you need to extend the metric configuration (add parameters to the
+``extra_args`` section of ``metrics.yml``), you can overload the
+``check_configuration`` method of the base collector:
+
+.. autoclass:: cloudkitty.collector.BaseCollector
+   :members: check_configuration
+
+This method uses `voluptuous`_ for data validation. The base schema for each
+metric can be found in ``cloudkitty.collector.METRIC_BASE_SCHEMA``. This schema
+is meant to be extended by other collectors. Example taken from the gnocchi
+collector code:
+
+.. code-block:: python
+
+   from cloudkitty import collector
+
+   GNOCCHI_EXTRA_SCHEMA = {
+       Required('extra_args'): {
+           Required('resource_type'): All(str, Length(min=1)),
+           # Due to Gnocchi model, metric are grouped by resource.
+           # This parameter allows to adapt the key of the resource identifier
+           Required('resource_key', default='id'): All(str, Length(min=1)),
+           Required('aggregation_method', default='max'):
+               In(['max', 'mean', 'min']),
+       },
+   }
+
+   class GnocchiCollector(collector.BaseCollector):
+
+       collector_name = 'gnocchi'
+
+       @staticmethod
+       def check_configuration(conf):
+           conf = collector.BaseCollector.check_configuration(conf)
+           metric_schema = Schema(collector.METRIC_BASE_SCHEMA).extend(
+               GNOCCHI_EXTRA_SCHEMA)
+
+           output = {}
+           for metric_name, metric in conf.items():
+               met = output[metric_name] = metric_schema(metric)
+
+               if met['extra_args']['resource_key'] not in met['groupby']:
+                   met['groupby'].append(met['extra_args']['resource_key'])
+
+           return output
+
+
+If your collector does not need any ``extra_args``, it is not required to
+overload the ``check_configuration`` method.
+
+
+.. _architecture documentation: ../admin/architecture.html
+
+.. _voluptuous: https://github.com/alecthomas/voluptuous
diff --git a/doc/source/developer/index.rst b/doc/source/developer/index.rst
index ac1de048..4004c56b 100644
--- a/doc/source/developer/index.rst
+++ b/doc/source/developer/index.rst
@@ -5,4 +5,5 @@ Developer Documentation
 .. toctree::
    :glob:
 
+   collector
    storage