Create 'use_all_resource_revisions' for Gnocchi collector

This option is useful when using Gnocchi with the patch introduced in https://github.com/gnocchixyz/gnocchi/pull/1059. That patch can cause queries to return more than one entry per granularity ( timespan), according to the revisions a resource has. This can be problematic when using the 'mutate' option of Cloudkitty. Therefore, we proposed this option to allow operators to discard all datapoints returned from Gnocchi, but the last one in the granularity queried by CloudKitty. The default behavior is maintained, which means, CloudKitty always use all of the data points returned. Change-Id: I051ae1fa3ef6ace9aa417f4ccdca929dab0274b2
2020-03-31 21:16:42 -03:00 · 2020-03-31 21:16:42 -03:00 · abffd13426
commit abffd13426
parent 28b41c17e2
6 changed files with 118 additions and 2 deletions
--- a/cloudkitty/collector/gnocchi.py
+++ b/cloudkitty/collector/gnocchi.py
@ -15,6 +15,7 @@
 #
 from datetime import timedelta
 import requests
+
 import six

 from gnocchiclient import auth as gauth
@ -36,7 +37,6 @@ from cloudkitty import dataframe
 from cloudkitty import utils as ck_utils
 from cloudkitty.utils import tz as tzutils

-
 LOG = logging.getLogger(__name__)

 COLLECTOR_GNOCCHI_OPTS = 'collector_gnocchi'
@ -115,6 +115,7 @@ GNOCCHI_EXTRA_SCHEMA = {
        Required('re_aggregation_method', default='max'):
            In(BASIC_AGGREGATION_METHODS),
        Required('force_granularity', default=3600): All(int, Range(min=0)),
+        Required('use_all_resource_revisions', default=True): All(bool),
    },
 }

@ -413,6 +414,9 @@ class GnocchiCollector(collector.BaseCollector):
            q_filter=q_filter,
        )

+        data = GnocchiCollector.filter_unecessary_measurements(
+            data, met, metric_name)
+
        resources_info = None
        if met['metadata']:
            resources_info = self._fetch_resources(
@ -422,9 +426,13 @@ class GnocchiCollector(collector.BaseCollector):
                project_id=project_id,
                q_filter=q_filter
            )
+
        formated_resources = list()
        for d in data:
            # Only if aggregates have been found
+            LOG.debug("Processing entry [%s] for [%s] in timestamp ["
+                      "start=%s, end=%s] and project id [%s]", d,
+                      metric_name, start, end, project_id)
            if d['measures']['measures']['aggregated']:
                try:
                    metadata, groupby, qty = self._format_data(
@ -444,3 +452,40 @@ class GnocchiCollector(collector.BaseCollector):
                    metadata,
                ))
        return formated_resources
+
+    @staticmethod
+    def filter_unecessary_measurements(data, met, metric_name):
+        """Filter unecessary measurements if not 'use_all_resource_revisions'
+
+        The option 'use_all_resource_revisions' is useful when using Gnocchi
+        with the patch introduced in
+        https://github.com/gnocchixyz/gnocchi/pull/1059.
+
+        That patch can cause queries to return more than one entry per
+        granularity (timespan), according to the revisions a resource has.
+        This can be problematic when using the 'mutate' option of Cloudkitty.
+        Therefore, this option ('use_all_resource_revisions') allows operators
+        to discard all datapoints returned from Gnocchi, but the last one in
+        the granularity queried by CloudKitty. The default behavior is
+        maintained, which means, CloudKitty always use all of the data
+        points returned.
+        """
+
+        use_all_resource_revisions = \
+            met['extra_args']['use_all_resource_revisions']
+        LOG.debug("Configuration use_all_resource_revisions set to [%s] for "
+                  "%s", use_all_resource_revisions, metric_name)
+
+        if data and not use_all_resource_revisions:
+            data.sort(
+                key=lambda x: (x["group"]["id"], x["group"]["revision_start"]),
+                reverse=False)
+
+            # We just care about the oldest entry per resource ID in the
+            # given time slice (configured granularity in Cloudkitty).
+            single_entries_per_id = {d["group"]["id"]: d for d in
+                                     data}.values()
+            LOG.debug("Replaced list of data points [%s] with [%s] for "
+                      "metric [%s]", data, single_entries_per_id, metric_name)
+            data = single_entries_per_id
+        return data
--- a/cloudkitty/tests/collectors/test_gnocchi.py
+++ b/cloudkitty/tests/collectors/test_gnocchi.py
@ -215,3 +215,50 @@ class GnocchiCollectorAggregationOperationTest(tests.TestCase):
            ["metric", "metric_one", "rate:mean"],
        ]
        self.do_test(expected_op, extra_args=extra_args)
+
+    def test_filter_unecessary_measurements_use_all_datapoints(self):
+        data = [
+            {"group":
+                {
+                    "id": "id-1",
+                    "revision_start": datetime.datetime(
+                        2020, 1, 1, tzinfo=tz.tzutc())}},
+            {"group":
+                {"id": "id-1",
+                 "revision_start": datetime.datetime(
+                     2020, 1, 1, 1, 10, 0, tzinfo=tz.tzutc())}}
+        ]
+
+        expected_data = data.copy()
+        metric_name = 'test_metric'
+        metric = {
+            'name': metric_name,
+            'extra_args': {'use_all_resource_revisions': True}}
+
+        data_filtered = gnocchi.GnocchiCollector.\
+            filter_unecessary_measurements(data, metric, metric_name)
+
+        self.assertEqual(expected_data, data_filtered)
+
+    def test_filter_unecessary_measurements_use_only_last_datapoint(self):
+        expected_data = {"group": {"id": "id-1",
+                                   "revision_start": datetime.datetime(
+                                       2020, 1, 1, 1, 10, 0, tzinfo=tz.tzutc())
+                                   }}
+
+        data = [
+            {"group": {"id": "id-1", "revision_start": datetime.datetime(
+                     2020, 1, 1, tzinfo=tz.tzutc())}},
+            expected_data
+        ]
+
+        metric_name = 'test_metric'
+        metric = {'name': metric_name, 'extra_args': {
+            'use_all_resource_revisions': False}}
+
+        data_filtered = gnocchi.GnocchiCollector.\
+            filter_unecessary_measurements(data, metric, metric_name)
+
+        data_filtered = list(data_filtered)
+        self.assertEqual(1, len(data_filtered))
+        self.assertEqual(expected_data, data_filtered[0])
--- a/cloudkitty/tests/collectors/test_validation.py
+++ b/cloudkitty/tests/collectors/test_validation.py
@ -71,7 +71,7 @@ class MetricConfigValidationTest(tests.TestCase):
        expected_output['metric_one']['extra_args'] = {
            'aggregation_method': 'max', 're_aggregation_method': 'max',
            'force_granularity': 3600, 'resource_type': 'res',
-            'resource_key': 'id'}
+            'resource_key': 'id', 'use_all_resource_revisions': True}

        self.assertEqual(
            collector.gnocchi.GnocchiCollector.check_configuration(data),
--- a/doc/source/_static/cloudkitty.conf.sample
+++ b/doc/source/_static/cloudkitty.conf.sample
@ -1046,6 +1046,15 @@
 # (string value)
 #ssl_cafile =

+# Client certificate PEM file used for authentication. (string value)
+#ssl_client_cert_file =
+
+# Client key PEM file used for authentication. (string value)
+#ssl_client_key_file =
+
+# Client key password file used for authentication. (string value)
+#ssl_client_key_password =
+

 [oslo_messaging_notifications]

--- a/doc/source/admin/configuration/collector.rst
+++ b/doc/source/admin/configuration/collector.rst
@ -279,6 +279,17 @@ Gnocchi
  used for metric aggregations. Else, the lowest available granularity will be
  used (meaning the granularity covering the longest period).

+* ``use_all_resource_revisions``: Defaults to ``True``. This option is useful
+  when using Gnocchi with the patch introduced via https://github
+  .com/gnocchixyz/gnocchi/pull/1059. That patch can cause queries to return
+  more than one entry per granularity (timespan), according to the revisions a
+  resource has. This can be problematic when using the 'mutate' option
+  of Cloudkitty. This option to allow operators to discard all datapoints
+  returned from Gnocchi, but the last one in the granularity queried by
+  CloudKitty for a resource id. The default behavior is maintained, which
+  means, CloudKitty always use all of the data points returned.
+
+
 Monasca
 ~~~~~~~

--- a/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml
+++ b/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml
@ -0,0 +1,4 @@
+---
+features:
+  - |
+    Create the option 'use_all_resource_revisions' for Gnocchi collector.