Create 'use_all_resource_revisions' for Gnocchi collector

This option is useful when using Gnocchi with the patch introduced in https://github.com/gnocchixyz/gnocchi/pull/1059. That patch can cause queries to return more than one entry per granularity ( timespan), according to the revisions a resource has. This can be problematic when using the 'mutate' option of Cloudkitty. Therefore, we proposed this option to allow operators to discard all datapoints returned from Gnocchi, but the last one in the granularity queried by CloudKitty. The default behavior is maintained, which means, CloudKitty always use all of the data points returned. Change-Id: I051ae1fa3ef6ace9aa417f4ccdca929dab0274b2
2020-03-31 21:16:42 -03:00 · 2020-03-31 21:16:42 -03:00 · abffd13426
commit abffd13426
parent 28b41c17e2
6 changed files with 118 additions and 2 deletions
--- a/cloudkitty/collector/gnocchi.py
+++ b/cloudkitty/collector/gnocchi.py
@ -15,6 +15,7 @@
 #
 from datetime import timedelta
 import requests
 import six
 from gnocchiclient import auth as gauth
@ -36,7 +37,6 @@ from cloudkitty import dataframe
 from cloudkitty import utils as ck_utils
 from cloudkitty.utils import tz as tzutils
 LOG = logging.getLogger(__name__)
 COLLECTOR_GNOCCHI_OPTS = 'collector_gnocchi'
@ -115,6 +115,7 @@ GNOCCHI_EXTRA_SCHEMA = {
        Required('re_aggregation_method', default='max'):
            In(BASIC_AGGREGATION_METHODS),
        Required('force_granularity', default=3600): All(int, Range(min=0)),
        Required('use_all_resource_revisions', default=True): All(bool),
    },
 }
@ -413,6 +414,9 @@ class GnocchiCollector(collector.BaseCollector):
            q_filter=q_filter,
        )
        data = GnocchiCollector.filter_unecessary_measurements(
            data, met, metric_name)
        resources_info = None
        if met['metadata']:
            resources_info = self._fetch_resources(
@ -422,9 +426,13 @@ class GnocchiCollector(collector.BaseCollector):
                project_id=project_id,
                q_filter=q_filter
            )
        formated_resources = list()
        for d in data:
            # Only if aggregates have been found
            LOG.debug("Processing entry [%s] for [%s] in timestamp ["
                      "start=%s, end=%s] and project id [%s]", d,
                      metric_name, start, end, project_id)
            if d['measures']['measures']['aggregated']:
                try:
                    metadata, groupby, qty = self._format_data(
@ -444,3 +452,40 @@ class GnocchiCollector(collector.BaseCollector):
                    metadata,
                ))
        return formated_resources
    @staticmethod
    def filter_unecessary_measurements(data, met, metric_name):
        """Filter unecessary measurements if not 'use_all_resource_revisions'
        The option 'use_all_resource_revisions' is useful when using Gnocchi
        with the patch introduced in
        https://github.com/gnocchixyz/gnocchi/pull/1059.
        That patch can cause queries to return more than one entry per
        granularity (timespan), according to the revisions a resource has.
        This can be problematic when using the 'mutate' option of Cloudkitty.
        Therefore, this option ('use_all_resource_revisions') allows operators
        to discard all datapoints returned from Gnocchi, but the last one in
        the granularity queried by CloudKitty. The default behavior is
        maintained, which means, CloudKitty always use all of the data
        points returned.
        """
        use_all_resource_revisions = \
            met['extra_args']['use_all_resource_revisions']
        LOG.debug("Configuration use_all_resource_revisions set to [%s] for "
                  "%s", use_all_resource_revisions, metric_name)
        if data and not use_all_resource_revisions:
            data.sort(
                key=lambda x: (x["group"]["id"], x["group"]["revision_start"]),
                reverse=False)
            # We just care about the oldest entry per resource ID in the
            # given time slice (configured granularity in Cloudkitty).
            single_entries_per_id = {d["group"]["id"]: d for d in
                                     data}.values()
            LOG.debug("Replaced list of data points [%s] with [%s] for "
                      "metric [%s]", data, single_entries_per_id, metric_name)
            data = single_entries_per_id
        return data
--- a/cloudkitty/tests/collectors/test_gnocchi.py
+++ b/cloudkitty/tests/collectors/test_gnocchi.py
@ -215,3 +215,50 @@ class GnocchiCollectorAggregationOperationTest(tests.TestCase):
            ["metric", "metric_one", "rate:mean"],
        ]
        self.do_test(expected_op, extra_args=extra_args)
    def test_filter_unecessary_measurements_use_all_datapoints(self):
        data = [
            {"group":
                {
                    "id": "id-1",
                    "revision_start": datetime.datetime(
                        2020, 1, 1, tzinfo=tz.tzutc())}},
            {"group":
                {"id": "id-1",
                 "revision_start": datetime.datetime(
                     2020, 1, 1, 1, 10, 0, tzinfo=tz.tzutc())}}
        ]
        expected_data = data.copy()
        metric_name = 'test_metric'
        metric = {
            'name': metric_name,
            'extra_args': {'use_all_resource_revisions': True}}
        data_filtered = gnocchi.GnocchiCollector.\
            filter_unecessary_measurements(data, metric, metric_name)
        self.assertEqual(expected_data, data_filtered)
    def test_filter_unecessary_measurements_use_only_last_datapoint(self):
        expected_data = {"group": {"id": "id-1",
                                   "revision_start": datetime.datetime(
                                       2020, 1, 1, 1, 10, 0, tzinfo=tz.tzutc())
                                   }}
        data = [
            {"group": {"id": "id-1", "revision_start": datetime.datetime(
                     2020, 1, 1, tzinfo=tz.tzutc())}},
            expected_data
        ]
        metric_name = 'test_metric'
        metric = {'name': metric_name, 'extra_args': {
            'use_all_resource_revisions': False}}
        data_filtered = gnocchi.GnocchiCollector.\
            filter_unecessary_measurements(data, metric, metric_name)
        data_filtered = list(data_filtered)
        self.assertEqual(1, len(data_filtered))
        self.assertEqual(expected_data, data_filtered[0])
--- a/cloudkitty/tests/collectors/test_validation.py
+++ b/cloudkitty/tests/collectors/test_validation.py
@ -71,7 +71,7 @@ class MetricConfigValidationTest(tests.TestCase):
        expected_output['metric_one']['extra_args'] = {
            'aggregation_method': 'max', 're_aggregation_method': 'max',
            'force_granularity': 3600, 'resource_type': 'res',
-            'resource_key': 'id'}
+            'resource_key': 'id', 'use_all_resource_revisions': True}
        self.assertEqual(
            collector.gnocchi.GnocchiCollector.check_configuration(data),
--- a/doc/source/_static/cloudkitty.conf.sample
+++ b/doc/source/_static/cloudkitty.conf.sample
@ -1046,6 +1046,15 @@
 # (string value)
 #ssl_cafile =
 # Client certificate PEM file used for authentication. (string value)
 #ssl_client_cert_file =
 # Client key PEM file used for authentication. (string value)
 #ssl_client_key_file =
 # Client key password file used for authentication. (string value)
 #ssl_client_key_password =
 [oslo_messaging_notifications]
--- a/doc/source/admin/configuration/collector.rst
+++ b/doc/source/admin/configuration/collector.rst
@ -279,6 +279,17 @@ Gnocchi
  used for metric aggregations. Else, the lowest available granularity will be
  used (meaning the granularity covering the longest period).
 * ``use_all_resource_revisions``: Defaults to ``True``. This option is useful
  when using Gnocchi with the patch introduced via https://github
  .com/gnocchixyz/gnocchi/pull/1059. That patch can cause queries to return
  more than one entry per granularity (timespan), according to the revisions a
  resource has. This can be problematic when using the 'mutate' option
  of Cloudkitty. This option to allow operators to discard all datapoints
  returned from Gnocchi, but the last one in the granularity queried by
  CloudKitty for a resource id. The default behavior is maintained, which
  means, CloudKitty always use all of the data points returned.
 Monasca
 ~~~~~~~
--- a/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml
+++ b/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml
@ -0,0 +1,4 @@
 ---
 features:
  - |
    Create the option 'use_all_resource_revisions' for Gnocchi collector.