From abffd1342624fd107c9a9ba7ec75cca9a821d983 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Weing=C3=A4rtner?= <rafael@apache.org>
Date: Tue, 31 Mar 2020 21:16:42 -0300
Subject: [PATCH] Create 'use_all_resource_revisions' for Gnocchi collector

This option is useful when using Gnocchi with the patch introduced in
https://github.com/gnocchixyz/gnocchi/pull/1059. That patch can
cause queries to return more than one entry per granularity (
timespan), according to the revisions a resource has. This can be
problematic when using the 'mutate' option of Cloudkitty. Therefore,
we proposed this option to allow operators to discard all
datapoints returned from Gnocchi, but the last one in the
granularity queried by CloudKitty. The default behavior is
maintained, which means, CloudKitty always use all of the data
points returned.

Change-Id: I051ae1fa3ef6ace9aa417f4ccdca929dab0274b2
---
 cloudkitty/collector/gnocchi.py               | 47 ++++++++++++++++++-
 cloudkitty/tests/collectors/test_gnocchi.py   | 47 +++++++++++++++++++
 .../tests/collectors/test_validation.py       |  2 +-
 doc/source/_static/cloudkitty.conf.sample     |  9 ++++
 doc/source/admin/configuration/collector.rst  | 11 +++++
 ...or-gnocchi-collector-39d29603b1f554e1.yaml |  4 ++
 6 files changed, 118 insertions(+), 2 deletions(-)
 create mode 100644 releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml

diff --git a/cloudkitty/collector/gnocchi.py b/cloudkitty/collector/gnocchi.py
index ee7d9e4a..a3a3a724 100644
--- a/cloudkitty/collector/gnocchi.py
+++ b/cloudkitty/collector/gnocchi.py
@@ -15,6 +15,7 @@
 #
 from datetime import timedelta
 import requests
+
 import six
 
 from gnocchiclient import auth as gauth
@@ -36,7 +37,6 @@ from cloudkitty import dataframe
 from cloudkitty import utils as ck_utils
 from cloudkitty.utils import tz as tzutils
 
-
 LOG = logging.getLogger(__name__)
 
 COLLECTOR_GNOCCHI_OPTS = 'collector_gnocchi'
@@ -115,6 +115,7 @@ GNOCCHI_EXTRA_SCHEMA = {
         Required('re_aggregation_method', default='max'):
             In(BASIC_AGGREGATION_METHODS),
         Required('force_granularity', default=3600): All(int, Range(min=0)),
+        Required('use_all_resource_revisions', default=True): All(bool),
     },
 }
 
@@ -413,6 +414,9 @@ class GnocchiCollector(collector.BaseCollector):
             q_filter=q_filter,
         )
 
+        data = GnocchiCollector.filter_unecessary_measurements(
+            data, met, metric_name)
+
         resources_info = None
         if met['metadata']:
             resources_info = self._fetch_resources(
@@ -422,9 +426,13 @@ class GnocchiCollector(collector.BaseCollector):
                 project_id=project_id,
                 q_filter=q_filter
             )
+
         formated_resources = list()
         for d in data:
             # Only if aggregates have been found
+            LOG.debug("Processing entry [%s] for [%s] in timestamp ["
+                      "start=%s, end=%s] and project id [%s]", d,
+                      metric_name, start, end, project_id)
             if d['measures']['measures']['aggregated']:
                 try:
                     metadata, groupby, qty = self._format_data(
@@ -444,3 +452,40 @@ class GnocchiCollector(collector.BaseCollector):
                     metadata,
                 ))
         return formated_resources
+
+    @staticmethod
+    def filter_unecessary_measurements(data, met, metric_name):
+        """Filter unecessary measurements if not 'use_all_resource_revisions'
+
+        The option 'use_all_resource_revisions' is useful when using Gnocchi
+        with the patch introduced in
+        https://github.com/gnocchixyz/gnocchi/pull/1059.
+
+        That patch can cause queries to return more than one entry per
+        granularity (timespan), according to the revisions a resource has.
+        This can be problematic when using the 'mutate' option of Cloudkitty.
+        Therefore, this option ('use_all_resource_revisions') allows operators
+        to discard all datapoints returned from Gnocchi, but the last one in
+        the granularity queried by CloudKitty. The default behavior is
+        maintained, which means, CloudKitty always use all of the data
+        points returned.
+        """
+
+        use_all_resource_revisions = \
+            met['extra_args']['use_all_resource_revisions']
+        LOG.debug("Configuration use_all_resource_revisions set to [%s] for "
+                  "%s", use_all_resource_revisions, metric_name)
+
+        if data and not use_all_resource_revisions:
+            data.sort(
+                key=lambda x: (x["group"]["id"], x["group"]["revision_start"]),
+                reverse=False)
+
+            # We just care about the oldest entry per resource ID in the
+            # given time slice (configured granularity in Cloudkitty).
+            single_entries_per_id = {d["group"]["id"]: d for d in
+                                     data}.values()
+            LOG.debug("Replaced list of data points [%s] with [%s] for "
+                      "metric [%s]", data, single_entries_per_id, metric_name)
+            data = single_entries_per_id
+        return data
diff --git a/cloudkitty/tests/collectors/test_gnocchi.py b/cloudkitty/tests/collectors/test_gnocchi.py
index 4bf420e3..6600ae82 100644
--- a/cloudkitty/tests/collectors/test_gnocchi.py
+++ b/cloudkitty/tests/collectors/test_gnocchi.py
@@ -215,3 +215,50 @@ class GnocchiCollectorAggregationOperationTest(tests.TestCase):
             ["metric", "metric_one", "rate:mean"],
         ]
         self.do_test(expected_op, extra_args=extra_args)
+
+    def test_filter_unecessary_measurements_use_all_datapoints(self):
+        data = [
+            {"group":
+                {
+                    "id": "id-1",
+                    "revision_start": datetime.datetime(
+                        2020, 1, 1, tzinfo=tz.tzutc())}},
+            {"group":
+                {"id": "id-1",
+                 "revision_start": datetime.datetime(
+                     2020, 1, 1, 1, 10, 0, tzinfo=tz.tzutc())}}
+        ]
+
+        expected_data = data.copy()
+        metric_name = 'test_metric'
+        metric = {
+            'name': metric_name,
+            'extra_args': {'use_all_resource_revisions': True}}
+
+        data_filtered = gnocchi.GnocchiCollector.\
+            filter_unecessary_measurements(data, metric, metric_name)
+
+        self.assertEqual(expected_data, data_filtered)
+
+    def test_filter_unecessary_measurements_use_only_last_datapoint(self):
+        expected_data = {"group": {"id": "id-1",
+                                   "revision_start": datetime.datetime(
+                                       2020, 1, 1, 1, 10, 0, tzinfo=tz.tzutc())
+                                   }}
+
+        data = [
+            {"group": {"id": "id-1", "revision_start": datetime.datetime(
+                     2020, 1, 1, tzinfo=tz.tzutc())}},
+            expected_data
+        ]
+
+        metric_name = 'test_metric'
+        metric = {'name': metric_name, 'extra_args': {
+            'use_all_resource_revisions': False}}
+
+        data_filtered = gnocchi.GnocchiCollector.\
+            filter_unecessary_measurements(data, metric, metric_name)
+
+        data_filtered = list(data_filtered)
+        self.assertEqual(1, len(data_filtered))
+        self.assertEqual(expected_data, data_filtered[0])
diff --git a/cloudkitty/tests/collectors/test_validation.py b/cloudkitty/tests/collectors/test_validation.py
index 7a1bdce4..c62974f3 100644
--- a/cloudkitty/tests/collectors/test_validation.py
+++ b/cloudkitty/tests/collectors/test_validation.py
@@ -71,7 +71,7 @@ class MetricConfigValidationTest(tests.TestCase):
         expected_output['metric_one']['extra_args'] = {
             'aggregation_method': 'max', 're_aggregation_method': 'max',
             'force_granularity': 3600, 'resource_type': 'res',
-            'resource_key': 'id'}
+            'resource_key': 'id', 'use_all_resource_revisions': True}
 
         self.assertEqual(
             collector.gnocchi.GnocchiCollector.check_configuration(data),
diff --git a/doc/source/_static/cloudkitty.conf.sample b/doc/source/_static/cloudkitty.conf.sample
index 3d43849b..d029aa83 100644
--- a/doc/source/_static/cloudkitty.conf.sample
+++ b/doc/source/_static/cloudkitty.conf.sample
@@ -1046,6 +1046,15 @@
 # (string value)
 #ssl_cafile =
 
+# Client certificate PEM file used for authentication. (string value)
+#ssl_client_cert_file =
+
+# Client key PEM file used for authentication. (string value)
+#ssl_client_key_file =
+
+# Client key password file used for authentication. (string value)
+#ssl_client_key_password =
+
 
 [oslo_messaging_notifications]
 
diff --git a/doc/source/admin/configuration/collector.rst b/doc/source/admin/configuration/collector.rst
index ff2572be..5ea614c1 100644
--- a/doc/source/admin/configuration/collector.rst
+++ b/doc/source/admin/configuration/collector.rst
@@ -279,6 +279,17 @@ Gnocchi
   used for metric aggregations. Else, the lowest available granularity will be
   used (meaning the granularity covering the longest period).
 
+* ``use_all_resource_revisions``: Defaults to ``True``. This option is useful
+  when using Gnocchi with the patch introduced via https://github
+  .com/gnocchixyz/gnocchi/pull/1059. That patch can cause queries to return
+  more than one entry per granularity (timespan), according to the revisions a
+  resource has. This can be problematic when using the 'mutate' option
+  of Cloudkitty. This option to allow operators to discard all datapoints
+  returned from Gnocchi, but the last one in the granularity queried by
+  CloudKitty for a resource id. The default behavior is maintained, which
+  means, CloudKitty always use all of the data points returned.
+
+
 Monasca
 ~~~~~~~
 
diff --git a/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml b/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml
new file mode 100644
index 00000000..d930a512
--- /dev/null
+++ b/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml
@@ -0,0 +1,4 @@
+---
+features:
+  - |
+    Create the option 'use_all_resource_revisions' for Gnocchi collector.