diff --git a/cloudkitty/collector/gnocchi.py b/cloudkitty/collector/gnocchi.py index ee7d9e4a..a3a3a724 100644 --- a/cloudkitty/collector/gnocchi.py +++ b/cloudkitty/collector/gnocchi.py @@ -15,6 +15,7 @@ # from datetime import timedelta import requests + import six from gnocchiclient import auth as gauth @@ -36,7 +37,6 @@ from cloudkitty import dataframe from cloudkitty import utils as ck_utils from cloudkitty.utils import tz as tzutils - LOG = logging.getLogger(__name__) COLLECTOR_GNOCCHI_OPTS = 'collector_gnocchi' @@ -115,6 +115,7 @@ GNOCCHI_EXTRA_SCHEMA = { Required('re_aggregation_method', default='max'): In(BASIC_AGGREGATION_METHODS), Required('force_granularity', default=3600): All(int, Range(min=0)), + Required('use_all_resource_revisions', default=True): All(bool), }, } @@ -413,6 +414,9 @@ class GnocchiCollector(collector.BaseCollector): q_filter=q_filter, ) + data = GnocchiCollector.filter_unecessary_measurements( + data, met, metric_name) + resources_info = None if met['metadata']: resources_info = self._fetch_resources( @@ -422,9 +426,13 @@ class GnocchiCollector(collector.BaseCollector): project_id=project_id, q_filter=q_filter ) + formated_resources = list() for d in data: # Only if aggregates have been found + LOG.debug("Processing entry [%s] for [%s] in timestamp [" + "start=%s, end=%s] and project id [%s]", d, + metric_name, start, end, project_id) if d['measures']['measures']['aggregated']: try: metadata, groupby, qty = self._format_data( @@ -444,3 +452,40 @@ class GnocchiCollector(collector.BaseCollector): metadata, )) return formated_resources + + @staticmethod + def filter_unecessary_measurements(data, met, metric_name): + """Filter unecessary measurements if not 'use_all_resource_revisions' + + The option 'use_all_resource_revisions' is useful when using Gnocchi + with the patch introduced in + https://github.com/gnocchixyz/gnocchi/pull/1059. + + That patch can cause queries to return more than one entry per + granularity (timespan), according to the revisions a resource has. + This can be problematic when using the 'mutate' option of Cloudkitty. + Therefore, this option ('use_all_resource_revisions') allows operators + to discard all datapoints returned from Gnocchi, but the last one in + the granularity queried by CloudKitty. The default behavior is + maintained, which means, CloudKitty always use all of the data + points returned. + """ + + use_all_resource_revisions = \ + met['extra_args']['use_all_resource_revisions'] + LOG.debug("Configuration use_all_resource_revisions set to [%s] for " + "%s", use_all_resource_revisions, metric_name) + + if data and not use_all_resource_revisions: + data.sort( + key=lambda x: (x["group"]["id"], x["group"]["revision_start"]), + reverse=False) + + # We just care about the oldest entry per resource ID in the + # given time slice (configured granularity in Cloudkitty). + single_entries_per_id = {d["group"]["id"]: d for d in + data}.values() + LOG.debug("Replaced list of data points [%s] with [%s] for " + "metric [%s]", data, single_entries_per_id, metric_name) + data = single_entries_per_id + return data diff --git a/cloudkitty/tests/collectors/test_gnocchi.py b/cloudkitty/tests/collectors/test_gnocchi.py index 4bf420e3..6600ae82 100644 --- a/cloudkitty/tests/collectors/test_gnocchi.py +++ b/cloudkitty/tests/collectors/test_gnocchi.py @@ -215,3 +215,50 @@ class GnocchiCollectorAggregationOperationTest(tests.TestCase): ["metric", "metric_one", "rate:mean"], ] self.do_test(expected_op, extra_args=extra_args) + + def test_filter_unecessary_measurements_use_all_datapoints(self): + data = [ + {"group": + { + "id": "id-1", + "revision_start": datetime.datetime( + 2020, 1, 1, tzinfo=tz.tzutc())}}, + {"group": + {"id": "id-1", + "revision_start": datetime.datetime( + 2020, 1, 1, 1, 10, 0, tzinfo=tz.tzutc())}} + ] + + expected_data = data.copy() + metric_name = 'test_metric' + metric = { + 'name': metric_name, + 'extra_args': {'use_all_resource_revisions': True}} + + data_filtered = gnocchi.GnocchiCollector.\ + filter_unecessary_measurements(data, metric, metric_name) + + self.assertEqual(expected_data, data_filtered) + + def test_filter_unecessary_measurements_use_only_last_datapoint(self): + expected_data = {"group": {"id": "id-1", + "revision_start": datetime.datetime( + 2020, 1, 1, 1, 10, 0, tzinfo=tz.tzutc()) + }} + + data = [ + {"group": {"id": "id-1", "revision_start": datetime.datetime( + 2020, 1, 1, tzinfo=tz.tzutc())}}, + expected_data + ] + + metric_name = 'test_metric' + metric = {'name': metric_name, 'extra_args': { + 'use_all_resource_revisions': False}} + + data_filtered = gnocchi.GnocchiCollector.\ + filter_unecessary_measurements(data, metric, metric_name) + + data_filtered = list(data_filtered) + self.assertEqual(1, len(data_filtered)) + self.assertEqual(expected_data, data_filtered[0]) diff --git a/cloudkitty/tests/collectors/test_validation.py b/cloudkitty/tests/collectors/test_validation.py index 7a1bdce4..c62974f3 100644 --- a/cloudkitty/tests/collectors/test_validation.py +++ b/cloudkitty/tests/collectors/test_validation.py @@ -71,7 +71,7 @@ class MetricConfigValidationTest(tests.TestCase): expected_output['metric_one']['extra_args'] = { 'aggregation_method': 'max', 're_aggregation_method': 'max', 'force_granularity': 3600, 'resource_type': 'res', - 'resource_key': 'id'} + 'resource_key': 'id', 'use_all_resource_revisions': True} self.assertEqual( collector.gnocchi.GnocchiCollector.check_configuration(data), diff --git a/doc/source/admin/configuration/collector.rst b/doc/source/admin/configuration/collector.rst index ff2572be..5ea614c1 100644 --- a/doc/source/admin/configuration/collector.rst +++ b/doc/source/admin/configuration/collector.rst @@ -279,6 +279,17 @@ Gnocchi used for metric aggregations. Else, the lowest available granularity will be used (meaning the granularity covering the longest period). +* ``use_all_resource_revisions``: Defaults to ``True``. This option is useful + when using Gnocchi with the patch introduced via https://github + .com/gnocchixyz/gnocchi/pull/1059. That patch can cause queries to return + more than one entry per granularity (timespan), according to the revisions a + resource has. This can be problematic when using the 'mutate' option + of Cloudkitty. This option to allow operators to discard all datapoints + returned from Gnocchi, but the last one in the granularity queried by + CloudKitty for a resource id. The default behavior is maintained, which + means, CloudKitty always use all of the data points returned. + + Monasca ~~~~~~~ diff --git a/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml b/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml new file mode 100644 index 00000000..d930a512 --- /dev/null +++ b/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + Create the option 'use_all_resource_revisions' for Gnocchi collector.