Support grouping by timestamp in GET /v2/summary

Work items: * Updated Elasticsearch and InfluxDB v2 storage backends in order to support grouping on time. * Added a "dt_from_ts()" function to "cloudkitty.tzutils", along with unit tests. This function converts an epoch timestamp to a timezone-aware datetime object. * Added unit tests for GET /v2/summary to validate grouping by time works as expected. * Updated the API documentation. Change-Id: Ia01fced0bdb3a9b389a89d56f02029b9456781c2 Story: 2006730 Task: 37153
2019-10-16 11:58:22 +02:00 · 2019-10-16 11:58:22 +02:00 · 0d8a636755
commit 0d8a636755
parent 33edfb483d
10 changed files with 145 additions and 15 deletions
--- a/cloudkitty/storage/v2/elasticsearch/init.py
+++ b/cloudkitty/storage/v2/elasticsearch/init.py
@ -184,6 +184,9 @@ class ElasticsearchStorage(v2_storage.BaseStorage):
        # Means we had a composite aggregation
        if 'key' in doc.keys():
            for key, value in doc['key'].items():
+                if key == 'begin' or key == 'end':
+                    # Elasticsearch returns ts in milliseconds
+                    value = tzutils.dt_from_ts(value // 1000)
                output[key] = value
        return output

--- a/cloudkitty/storage/v2/elasticsearch/client.py
+++ b/cloudkitty/storage/v2/elasticsearch/client.py
@ -92,14 +92,18 @@ class ElasticsearchClient(object):
                           {'term': {'metadata.' + k: v}}]
        return should

-    @staticmethod
-    def _build_composite(groupby):
+    def _build_composite(self, groupby):
        if not groupby:
            return []
        sources = []
        for elem in groupby:
            if elem == 'type':
                sources.append({'type': {'terms': {'field': 'type'}}})
+            elif elem == 'time':
+                # Not doing a date_histogram aggregation because we don't know
+                # the period
+                sources.append({'begin': {'terms': {'field': 'start'}}})
+                sources.append({'end': {'terms': {'field': 'end'}}})
            else:
                sources.append({elem: {'terms': {'field': 'groupby.' + elem}}})

--- a/cloudkitty/storage/v2/influx.py
+++ b/cloudkitty/storage/v2/influx.py
@ -65,19 +65,27 @@ CONF.register_opts(influx_storage_opts, INFLUX_STORAGE_GROUP)
 PERIOD_FIELD_NAME = '__ck_collect_period'


+def _sanitized_groupby(groupby):
+    forbidden = ('time',)
+    return [g for g in groupby if g not in forbidden] if groupby else []
+
+
 class InfluxClient(object):
    """Classe used to ease interaction with InfluxDB"""

-    def __init__(self, chunk_size=500, autocommit=True):
+    def __init__(self, chunk_size=500, autocommit=True, default_period=3600):
        """Creates an InfluxClient object.

        :param chunk_size: Size after which points should be pushed.
        :param autocommit: Set to false to disable autocommit
+        :param default_period: Placeholder for the period in cae it can't
+                               be determined.
        """
        self._conn = self._get_influx_client()
        self._chunk_size = chunk_size
        self._autocommit = autocommit
        self._retention_policy = CONF.storage_influxdb.retention_policy
+        self._default_period = default_period
        self._points = []

    @staticmethod
@ -185,7 +193,13 @@ class InfluxClient(object):
        query += self._get_type_query(types)

        if groupby:
-            groupby_query = '"' + '","'.join(groupby) + '"'
+            groupby_query = ''
+            if 'time' in groupby:
+                groupby_query += 'time(' + str(self._default_period) + 's)'
+                groupby_query += ',' if groupby else ''
+            if groupby:
+                groupby_query += '"' + '","'.join(
+                    _sanitized_groupby(groupby)) + '"'
            query += ' GROUP BY ' + groupby_query

        query += ';'
@ -243,7 +257,7 @@ class InfluxStorage(v2_storage.BaseStorage):
    def __init__(self, *args, **kwargs):
        super(InfluxStorage, self).__init__(*args, **kwargs)
        self._default_period = kwargs.get('period') or CONF.collect.period
-        self._conn = InfluxClient()
+        self._conn = InfluxClient(default_period=self._default_period)

    def init(self):
        policy = CONF.storage_influxdb.retention_policy
@ -326,8 +340,11 @@ class InfluxStorage(v2_storage.BaseStorage):
    def delete(self, begin=None, end=None, filters=None):
        self._conn.delete(begin, end, filters)

-    @staticmethod
-    def _get_total_elem(begin, end, groupby, series_groupby, point):
+    def _get_total_elem(self, begin, end, groupby, series_groupby, point):
+        if groupby and 'time' in groupby:
+            begin = tzutils.dt_from_iso(point['time'])
+            period = point.get(PERIOD_FIELD_NAME) or self._default_period
+            end = tzutils.add_delta(begin, datetime.timedelta(seconds=period))
        output = {
            'begin': begin,
            'end': end,
@ -335,7 +352,7 @@ class InfluxStorage(v2_storage.BaseStorage):
            'rate': point['price'],
        }
        if groupby:
-            for group in groupby:
+            for group in _sanitized_groupby(groupby):
                output[group] = series_groupby.get(group, '')
        return output

@ -353,12 +370,18 @@ class InfluxStorage(v2_storage.BaseStorage):
        output = []
        for (series_name, series_groupby), points in total.items():
            for point in points:
-                output.append(self._get_total_elem(
-                    begin, end,
-                    groupby,
-                    series_groupby,
-                    point))
+                # NOTE(peschk_l): InfluxDB returns all timestamps for a given
+                # period and interval, even those with no data. This filters
+                # out periods with no data
+                if point['qty'] is not None and point['price'] is not None:
+                    output.append(self._get_total_elem(
+                        tzutils.utc_to_local(begin),
+                        tzutils.utc_to_local(end),
+                        groupby,
+                        series_groupby,
+                        point))

+        groupby = _sanitized_groupby(groupby)
        if groupby:
            output.sort(key=lambda x: [x[group] for group in groupby])
        return {
--- a/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml
+++ b/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml
@ -68,3 +68,21 @@ tests:
    response_json_paths:
      $.results.`len`: 0
      $.total: 0
+
+  - name: Get a summary grouped by time
+    url: /v2/summary
+    status: 200
+    query_parameters:
+      groupby: [time]
+    response_json_paths:
+      $.results.`len`: 1
+      $.total: 1
+
+  - name: Get a summary grouped by time and project_id
+    url: /v2/summary
+    status: 200
+    query_parameters:
+      groupby: [time, project_id]
+    response_json_paths:
+      $.results.`len`: 2
+      $.total: 2
--- a/cloudkitty/tests/storage/v2/influx_utils.py
+++ b/cloudkitty/tests/storage/v2/influx_utils.py
@ -17,6 +17,7 @@ import functools

 from influxdb import resultset

+from cloudkitty.storage.v2.influx import _sanitized_groupby
 from cloudkitty.storage.v2.influx import InfluxClient


@ -63,6 +64,12 @@ class FakeInfluxClient(InfluxClient):
                break
            valid = True
            for tag in serie['tags'].keys():
+                if tag == 'time':
+                    if point['time'].isoformat() != serie['values'][0][0]:
+                        valid = False
+                        break
+                    else:
+                        continue
                if tag not in point['tags'].keys() or \
                   point['tags'][tag] != serie['tags'][tag]:
                    valid = False
@ -74,10 +81,11 @@ class FakeInfluxClient(InfluxClient):
        if target_serie is None:
            target_serie = copy.deepcopy(self.total_series_sample)
            if groupby:
-                target_serie['tags'] = {k: point['tags'][k] for k in groupby}
+                target_serie['tags'] = {k: point['tags'][k] for k in
+                                        _sanitized_groupby(groupby)}
            else:
                target_serie['tags'] = {}
-            target_serie['values'] = [['1970-01-01T00:00:00Z', 0, 0]]
+            target_serie['values'] = [[point['time'].isoformat(), 0, 0]]
            series.append(target_serie)
        return target_serie

--- a/cloudkitty/tests/test_tzutils.py
+++ b/cloudkitty/tests/test_tzutils.py
@ -16,6 +16,7 @@ import datetime
 import unittest

 from dateutil import tz
+import mock
 from oslo_utils import timeutils

 from cloudkitty import tzutils
@ -132,3 +133,15 @@ class TestTZUtils(unittest.TestCase):
        two = datetime.datetime(2019, 3, 31, 3,
                                tzinfo=tz.gettz('Europe/Paris'))
        self.assertEqual(tzutils.diff_seconds(two, one), 3600)
+
+    def test_cloudkitty_dt_from_ts_as_utc(self):
+        ts = 1569902400
+        dt = datetime.datetime(2019, 10, 1, 4, tzinfo=tz.UTC)
+        self.assertEqual(dt, tzutils.dt_from_ts(ts, as_utc=True))
+
+    def test_cloudkitty_dt_from_ts_local_tz(self):
+        ts = 1569902400
+        timezone = tz.gettz('Europe/Paris')
+        dt = datetime.datetime(2019, 10, 1, 6, tzinfo=timezone)
+        with mock.patch.object(tzutils, '_LOCAL_TZ', new=timezone):
+            self.assertEqual(dt, tzutils.dt_from_ts(ts))
--- a/cloudkitty/tzutils.py
+++ b/cloudkitty/tzutils.py
@ -84,6 +84,14 @@ def dt_from_iso(time_str, as_utc=False):
        tz.UTC if as_utc else _LOCAL_TZ).replace(microsecond=0)


+def dt_from_ts(ts, as_utc=False):
+    """Parses a timezone-aware datetime object from an epoch timestamp.
+
+    Returns the object as being from the local timezone.
+    """
+    return datetime.datetime.fromtimestamp(ts, tz.UTC if as_utc else _LOCAL_TZ)
+
+
 def add_delta(dt, delta):
    """Adds a timedelta to a datetime object.

--- a/doc/source/api-reference/v2/api_samples/summary/summary_get_groupby_time.json
+++ b/doc/source/api-reference/v2/api_samples/summary/summary_get_groupby_time.json
@ -0,0 +1,33 @@
+{
+  "total": 232,
+  "columns": [
+    "begin",
+    "end",
+    "qty",
+    "rate",
+    "project_id"
+  ],
+  "results": [
+    [
+      "2019-10-01T06:00:00+02:00",
+      "2019-10-01T07:00:00+02:00",
+      3.5533905029296875,
+      1.7766952514648438,
+      "84631866b2d84db49b29828052bdc287"
+    ],
+    [
+      "2019-10-01T07:00:00+02:00",
+      "2019-10-01T08:00:00+02:00",
+      3.5533905029296875,
+      1.7766952514648438,
+      "84631866b2d84db49b29828052bdc287"
+    ],
+    [
+      "2019-10-01T08:00:00+02:00",
+      "2019-10-01T09:00:00+02:00",
+      3.5533905029296875,
+      1.7766952514648438,
+      "84631866b2d84db49b29828052bdc287"
+    ]
+  ]
+}
--- a/doc/source/api-reference/v2/summary/summary.inc
+++ b/doc/source/api-reference/v2/summary/summary.inc
@ -65,6 +65,11 @@ the columns for each element of ``results``. The columns are the four mandatory
 (``begin``, ``end``, ``qty``, ``rate``) along with each attribute the result is
 grouped by.

+.. note:: It is also possible to group data by time, in order to obtain timeseries.
+          In order to do this, group by ``time``. No extra column will be added,
+          but you'll get one entry per collect period in the queried timeframe.
+          See examples below.
+
 .. rest_parameters:: summary/summary_parameters.yml

   - begin: begin_resp
@ -75,6 +80,15 @@ grouped by.
 Response Example
 ----------------

+Grouping by time and project_id:
+
+.. code-block:: shell
+
+   curl "http://cloudkitty-api:8889/v2/summary?groupby=time&groupby=project_id&limit=3"
+
+.. literalinclude:: ./api_samples/summary/summary_get_groupby_time.json
+   :language: javascript
+
 .. code-block:: shell

   curl "http://cloudkitty-api:8889/v2/summary?filters=project_id%3Afe9c35372db6420089883805b37a34af&groupby=type&groupby=project_id"
--- a/releasenotes/notes/support-groupby-time-v2-summary-48ff5ad671f8c7c5.yaml
+++ b/releasenotes/notes/support-groupby-time-v2-summary-48ff5ad671f8c7c5.yaml
@ -0,0 +1,6 @@
+---
+upgrade:
+  - |
+    It is now possible to group v2 summaries by timestamp. In order to do this,
+    the ``time`` parameter must be specified in the ``groupby`` list:
+    ``cloudkitty summary get -g time,type``.