Add groupby options by different timeframes

Introduce new default groupby options: (i) time: to group data by hourly; (ii) time-d: to group data by day of the year; (iii) time-w: to group data by week of the year; (iv) time-m: to group data by month; and, (v) time-y: to group data by year. If you have old data in CloudKitty and you wish to use these group by methods, you will need to reprocess the desired timeframe. Story: #2009839 Task: #44438 Depends-On: https://review.opendev.org/c/x/wsme/+/893677 Change-Id: Iad296f54f6701af84e168796aec9b1033a2a8a2d
2022-10-11 16:15:38 -03:00 · 2022-10-11 16:15:38 -03:00 · 45f5e72472
parent 60077a3cc4
commit 45f5e72472
14 changed files with 220 additions and 30 deletions
--- a/api-ref/source/v2/summary/summary_parameters.yml
+++ b/api-ref/source/v2/summary/summary_parameters.yml
@ -52,7 +52,19 @@ filters:
 groupby:
  in: query
  description: |
-    Optional attributes to group the summary by.
+    Optional attributes to group the summary by. The ``groupby`` elements are
    defined in the collector YML settings. Therefore, one can group the
    result using any of the ``groupby`` attributes defined in the collector
    settings of CloudKitty. Besides those attributes, by default, starting
    in CloudKitty ``2024.1`` release, we will have the following new groupby
    options: (i) time: to group data hourly; (ii) time-d: to group data
    by day of the year; (iii) time-w: to group data by week of the year;
    (iv) time-m: to group data by month; and, (v) time-y: to group data by
    year. If you have old data in CloudKitty and you wish to use these
    group by methods, you will need to reprocess the desired timeframe.
    The `groupby` options ``time-d``, ``time-w``, ``time-m``, ``time-y`` are the
    short versions of the following `groupby` options ``day_of_the_year``,
    ``week_of_the_year``, ``month``, and ``year`` respectively.
  type: list of strings
  required: false
--- a/cloudkitty/collector/init.py
+++ b/cloudkitty/collector/init.py
@ -14,6 +14,7 @@
 #    under the License.
 #
 import abc
 import datetime
 import fractions
 from oslo_config import cfg
@ -30,9 +31,9 @@ from voluptuous import Optional
 from voluptuous import Required
 from voluptuous import Schema
 from cloudkitty.dataframe import DataPoint
 from cloudkitty import utils as ck_utils
 LOG = logging.getLogger(__name__)
 collect_opts = [
@ -248,6 +249,30 @@ class BaseCollector(object, metaclass=abc.ABCMeta):
        return name, data
    def _create_data_point(self, unit, qty, price, groupby, metadata, start):
        if not start:
            start = datetime.datetime.now()
            LOG.debug("Collector [%s]. No start datetime defined for "
                      "datapoint[unit=%s, quantity=%s, price=%s, groupby=%s, "
                      "metadata=%s]. Therefore, we use the current time as "
                      "the start time for this datapoint.",
                      self.collector_name, unit, qty, price, groupby, metadata)
        week_of_the_year = start.strftime("%U")
        day_of_the_year = start.strftime("%-j")
        month_of_the_year = start.strftime("%-m")
        year = start.strftime("%Y")
        if groupby is None:
            groupby = {}
        groupby['week_of_the_year'] = week_of_the_year
        groupby['day_of_the_year'] = day_of_the_year
        groupby['month'] = month_of_the_year
        groupby['year'] = year
        return DataPoint(unit, qty, price, groupby, metadata)
 class InvalidConfiguration(Exception):
    pass
--- a/cloudkitty/collector/gnocchi.py
+++ b/cloudkitty/collector/gnocchi.py
@ -33,7 +33,6 @@ from voluptuous import Schema
 from cloudkitty import collector
 from cloudkitty.common import custom_session
 from cloudkitty import dataframe
 from cloudkitty import utils as ck_utils
 from cloudkitty.utils import tz as tzutils
@ -517,13 +516,9 @@ class GnocchiCollector(collector.BaseCollector):
                            project_id, start, end, e),
                    )
                    continue
-                formated_resources.append(dataframe.DataPoint(
+                point = self._create_data_point(
-                    met['unit'],
+                    met['unit'], qty, 0, groupby, metadata, start)
-                    qty,
+                formated_resources.append(point)
                    0,
                    groupby,
                    metadata,
                ))
        return formated_resources
    @staticmethod
--- a/cloudkitty/collector/monasca.py
+++ b/cloudkitty/collector/monasca.py
@ -24,7 +24,6 @@ from voluptuous import Schema
 from cloudkitty import collector
 from cloudkitty.common import monasca_client as mon_client_utils
 from cloudkitty import dataframe
 from cloudkitty import utils as ck_utils
 LOG = logging.getLogger(__name__)
@ -231,11 +230,8 @@ class MonascaCollector(collector.BaseCollector):
            if len(d['statistics']):
                metadata, groupby, qty = self._format_data(
                    met, d, resources_info)
-                formated_resources.append(dataframe.DataPoint(
+
-                    met['unit'],
+                point = self._create_data_point(
-                    qty,
+                    met['unit'], qty, 0, groupby, metadata, start)
-                    0,
+                formated_resources.append(point)
                    groupby,
                    metadata,
                ))
        return formated_resources
--- a/cloudkitty/collector/prometheus.py
+++ b/cloudkitty/collector/prometheus.py
@ -28,7 +28,6 @@ from cloudkitty import collector
 from cloudkitty.collector.exceptions import CollectError
 from cloudkitty.common.prometheus_client import PrometheusClient
 from cloudkitty.common.prometheus_client import PrometheusResponseError
 from cloudkitty import dataframe
 from cloudkitty import utils as ck_utils
 from cloudkitty.utils import tz as tzutils
@ -243,12 +242,8 @@ class PrometheusCollector(collector.BaseCollector):
                item,
            )
-            formatted_resources.append(dataframe.DataPoint(
+            point = self._create_data_point(self.conf[metric_name]['unit'],
-                self.conf[metric_name]['unit'],
+                                            qty, 0, groupby, metadata, start)
-                qty,
+            formatted_resources.append(point)
                0,
                groupby,
                metadata,
            ))
        return formatted_resources
--- a/cloudkitty/storage/init.py
+++ b/cloudkitty/storage/init.py
@ -177,6 +177,9 @@ class V1StorageAdapter(storage_v2.BaseStorage):
                    storage_gby.append('res_type')
                elif elem == 'project_id':
                    storage_gby.append('tenant_id')
                else:
                    LOG.warning("The groupby [%s] is not supported by MySQL "
                                "storage backend.", elem)
        return ','.join(storage_gby) if storage_gby else None
    def get_tenants(self, begin, end):
--- a/cloudkitty/storage/v2/init.py
+++ b/cloudkitty/storage/v2/init.py
@ -16,10 +16,14 @@
 import abc
 import datetime
 from oslo_log import log as logging
 from oslo_config import cfg
 from cloudkitty import storage_state
 from werkzeug import exceptions as http_exceptions
 storage_opts = [
    cfg.IntOpt(
@ -33,6 +37,8 @@ storage_opts = [
 CONF = cfg.CONF
 CONF.register_opts(storage_opts, 'storage')
 LOG = logging.getLogger(__name__)
 class BaseStorage(object, metaclass=abc.ABCMeta):
    """Abstract class for v2 storage objects."""
@ -159,3 +165,35 @@ class BaseStorage(object, metaclass=abc.ABCMeta):
    # NOTE(lpeschke): This is only kept for v1 storage backward compatibility
    def get_tenants(self, begin=None, end=None):
        return storage_state.StateManager().get_tenants(begin, end)
    TIME_COMMANDS_MAP = {"d": "day_of_the_year", "w": "week_of_the_year",
                         "m": "month", "y": "year"}
    def parse_groupby_syntax_to_groupby_elements(self, groupbys):
        if not groupbys:
            LOG.debug("No groupby to process syntax.")
            return groupbys
        groupbys_parsed = []
        for elem in groupbys:
            if 'time' in elem:
                time_command = elem.split('-')
                number_of_parts = len(time_command)
                if number_of_parts == 2:
                    g = self.TIME_COMMANDS_MAP.get(time_command[1])
                    if not g:
                        raise http_exceptions.BadRequest(
                            "Invalid groupby time option. There is no "
                            "groupby processing for [%s]." % elem)
                    LOG.debug("Replacing API groupby time command [%s] with "
                              "internal groupby command [%s].", elem, g)
                    elem = g
                elif number_of_parts > 2:
                    LOG.warning("The groupby [%s] command is not expected for "
                                "storage backend [%s]. Therefore, we leave it "
                                "as is.", elem, self)
            groupbys_parsed.append(elem)
        return groupbys_parsed
--- a/cloudkitty/storage/v2/elasticsearch/init.py
+++ b/cloudkitty/storage/v2/elasticsearch/init.py
@ -194,6 +194,7 @@ class ElasticsearchStorage(v2_storage.BaseStorage):
        begin, end = self._local_to_utc(begin or tzutils.get_month_start(),
                                        end or tzutils.get_next_month())
        groupby = self.parse_groupby_syntax_to_groupby_elements(groupby)
        total, docs = self._conn.total(begin, end, metric_types, filters,
                                       groupby, custom_fields=custom_fields,
                                       offset=offset, limit=limit,
--- a/cloudkitty/storage/v2/influx.py
+++ b/cloudkitty/storage/v2/influx.py
@ -394,6 +394,7 @@ class InfluxStorage(v2_storage.BaseStorage):
              custom_fields="SUM(qty) AS qty, SUM(price) AS rate"):
        begin, end = self._check_begin_end(begin, end)
        groupby = self.parse_groupby_syntax_to_groupby_elements(groupby)
        total = self._conn.get_total(metric_types, begin, end,
                                     custom_fields, groupby, filters)
--- a/cloudkitty/tests/collectors/test_prometheus.py
+++ b/cloudkitty/tests/collectors/test_prometheus.py
@ -256,14 +256,16 @@ class PrometheusCollectorTest(tests.TestCase):
    def test_format_retrieve(self):
        expected_name = 'http_requests_total'
        group_by = {'bar': '', 'foo': '', 'project_id': '',
                    'week_of_the_year': '00', 'day_of_the_year': '1',
                    'month': '1', 'year': '2015'}
        expected_data = [
            dataframe.DataPoint(
-                'instance', '7', '0',
+                'instance', '7', '0', group_by,
                {'bar': '', 'foo': '', 'project_id': ''},
                {'code': '200', 'instance': 'localhost:9090'}),
            dataframe.DataPoint(
-                'instance', '42', '0',
+                'instance', '42', '0', group_by,
                {'bar': '', 'foo': '', 'project_id': ''},
                {'code': '200', 'instance': 'localhost:9090'}),
        ]
--- a/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml
+++ b/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml
@ -86,3 +86,32 @@ tests:
    response_json_paths:
      $.results.`len`: 2
      $.total: 2
  - name: Get a summary grouped by time-w and project_id
    url: /v2/summary
    status: 200
    query_parameters:
      groupby: [time-w, project_id]
    response_json_paths:
      $.results.`len`: 4
      $.total: 4
  - name: Get a summary grouped by time-d
    url: /v2/summary
    status: 200
    query_parameters:
      groupby: [time-d]
    response_json_paths:
      $.results.`len`: 2
      $.total: 2
  - name: Get a summary grouped by time-y
    url: /v2/summary
    status: 200
    query_parameters:
      groupby: [time-y]
    response_json_paths:
      $.results.`len`: 3
      $.total: 3
--- a/cloudkitty/tests/storage/v2/test_storage_unit.py
+++ b/cloudkitty/tests/storage/v2/test_storage_unit.py
@ -16,8 +16,10 @@ import datetime
 from unittest import mock
 import testscenarios
 from werkzeug import exceptions as http_exceptions
 from cloudkitty import storage
 from cloudkitty.tests import samples
 from cloudkitty.tests.storage.v2 import es_utils
 from cloudkitty.tests.storage.v2 import influx_utils
@ -340,5 +342,68 @@ class StorageUnitTest(TestCase):
        self.assertEqual(expected_length, retrieved_length)
    def test_parse_groupby_syntax_to_groupby_elements_no_time_groupby(self):
        groupby = ["something"]
        out = self.storage.parse_groupby_syntax_to_groupby_elements(groupby)
        self.assertEqual(groupby, out)
    def test_parse_groupby_syntax_to_groupby_elements_time_groupby(self):
        groupby = ["something", "time"]
        out = self.storage.parse_groupby_syntax_to_groupby_elements(groupby)
        self.assertEqual(groupby, out)
    def test_parse_groupby_syntax_to_groupby_elements_odd_time(self):
        groupby = ["something", "time-odd-time-element"]
        with mock.patch.object(storage.v2.LOG, 'warning') as log_mock:
            out = self.storage.parse_groupby_syntax_to_groupby_elements(
                groupby)
            log_mock.assert_has_calls([
                mock.call("The groupby [%s] command is not expected for "
                          "storage backend [%s]. Therefore, we leave it as "
                          "is.", "time-odd-time-element", self.storage)])
        self.assertEqual(groupby, out)
    def test_parse_groupby_syntax_to_groupby_elements_wrong_time_frame(self):
        groupby = ["something", "time-u"]
        expected_message = r"400 Bad Request: Invalid groupby time option. " \
                           r"There is no groupby processing for \[time-u\]."
        self.assertRaisesRegex(
            http_exceptions.BadRequest, expected_message,
            self.storage.parse_groupby_syntax_to_groupby_elements,
            groupby)
    def test_parse_groupby_syntax_to_groupby_elements_all_time_options(self):
        groupby = ["something", "time", "time-d", "time-w", "time-m", "time-y"]
        expected_log_calls = []
        for k, v in storage.v2.BaseStorage.TIME_COMMANDS_MAP.items():
            expected_log_calls.append(
                mock.call("Replacing API groupby time command [%s] with "
                          "internal groupby command [%s].", "time-%s" % k, v))
        with mock.patch.object(storage.v2.LOG, 'debug') as log_debug_mock:
            out = self.storage.parse_groupby_syntax_to_groupby_elements(
                groupby)
            log_debug_mock.assert_has_calls(expected_log_calls)
        self.assertEqual(["something", "time", "day_of_the_year",
                          "week_of_the_year", "month", "year"], out)
    def test_parse_groupby_syntax_to_groupby_elements_no_groupby(self):
        with mock.patch.object(storage.v2.LOG, 'debug') as log_debug_mock:
            out = self.storage.parse_groupby_syntax_to_groupby_elements(None)
            log_debug_mock.assert_has_calls([
                mock.call("No groupby to process syntax.")])
            self.assertIsNone(out)
 StorageUnitTest.generate_scenarios()
--- a/cloudkitty/tests/utils.py
+++ b/cloudkitty/tests/utils.py
@ -39,9 +39,27 @@ def generate_v2_storage_data(min_length=10,
        for project_id in project_ids:
            data = [copy.deepcopy(sample)
                    for i in range(min_length + random.randint(1, 10))]
            first_group = data[:round(len(data)/2)]
            second_group = data[round(len(data)/2):]
            for elem in first_group:
                elem['groupby']['year'] = 2022
                elem['groupby']['week_of_the_year'] = 1
                elem['groupby']['day_of_the_year'] = 1
                elem['groupby']['month'] = 10
            for elem in second_group:
                elem['groupby']['year'] = 2023
                elem['groupby']['week_of_the_year'] = 2
                elem['groupby']['day_of_the_year'] = 2
                elem['groupby']['month'] = 12
            data[0]['groupby']['year'] = 2021
            for elem in data:
                elem['groupby']['id'] = uuidutils.generate_uuid()
                elem['groupby']['project_id'] = project_id
            datapoints += [dataframe.DataPoint(
                elem['vol']['unit'],
                elem['vol']['qty'],
--- a/releasenotes/notes/support-group-by-timeframes-1247aa336916f3b6.yaml
+++ b/releasenotes/notes/support-group-by-timeframes-1247aa336916f3b6.yaml
@ -0,0 +1,10 @@
 ---
 features:
  - |
    Introduce new default groupby options: (i) time: to group data hourly.
    The actual group by process will depend on the ``period`` parameter. The
    default value is ``3600``, which represents one hour; (ii) time-d: to
    group data by day of the year; (iii) time-w: to group data by week of
    the year; (iv) time-m: to group data by month; and, (v) time-y: to group
    data by year. If you have old data in CloudKitty and you wish to use
    these group by methods, you will need to reprocess the desired timeframe.