Support grouping by timestamp in GET /v2/summary

Work items:

* Updated Elasticsearch and InfluxDB v2 storage backends in order to support
  grouping on time.

* Added a "dt_from_ts()" function to "cloudkitty.tzutils", along with unit
  tests. This function converts an epoch timestamp to a timezone-aware datetime
  object.

* Added unit tests for GET /v2/summary to validate grouping by time works as
  expected.

* Updated the API documentation.

Change-Id: Ia01fced0bdb3a9b389a89d56f02029b9456781c2
Story: 2006730
Task: 37153
This commit is contained in:
Luka Peschke 2019-10-16 11:58:22 +02:00
parent 33edfb483d
commit 0d8a636755
10 changed files with 145 additions and 15 deletions

View File

@ -184,6 +184,9 @@ class ElasticsearchStorage(v2_storage.BaseStorage):
# Means we had a composite aggregation
if 'key' in doc.keys():
for key, value in doc['key'].items():
if key == 'begin' or key == 'end':
# Elasticsearch returns ts in milliseconds
value = tzutils.dt_from_ts(value // 1000)
output[key] = value
return output

View File

@ -92,14 +92,18 @@ class ElasticsearchClient(object):
{'term': {'metadata.' + k: v}}]
return should
@staticmethod
def _build_composite(groupby):
def _build_composite(self, groupby):
if not groupby:
return []
sources = []
for elem in groupby:
if elem == 'type':
sources.append({'type': {'terms': {'field': 'type'}}})
elif elem == 'time':
# Not doing a date_histogram aggregation because we don't know
# the period
sources.append({'begin': {'terms': {'field': 'start'}}})
sources.append({'end': {'terms': {'field': 'end'}}})
else:
sources.append({elem: {'terms': {'field': 'groupby.' + elem}}})

View File

@ -65,19 +65,27 @@ CONF.register_opts(influx_storage_opts, INFLUX_STORAGE_GROUP)
PERIOD_FIELD_NAME = '__ck_collect_period'
def _sanitized_groupby(groupby):
forbidden = ('time',)
return [g for g in groupby if g not in forbidden] if groupby else []
class InfluxClient(object):
"""Classe used to ease interaction with InfluxDB"""
def __init__(self, chunk_size=500, autocommit=True):
def __init__(self, chunk_size=500, autocommit=True, default_period=3600):
"""Creates an InfluxClient object.
:param chunk_size: Size after which points should be pushed.
:param autocommit: Set to false to disable autocommit
:param default_period: Placeholder for the period in cae it can't
be determined.
"""
self._conn = self._get_influx_client()
self._chunk_size = chunk_size
self._autocommit = autocommit
self._retention_policy = CONF.storage_influxdb.retention_policy
self._default_period = default_period
self._points = []
@staticmethod
@ -185,7 +193,13 @@ class InfluxClient(object):
query += self._get_type_query(types)
if groupby:
groupby_query = '"' + '","'.join(groupby) + '"'
groupby_query = ''
if 'time' in groupby:
groupby_query += 'time(' + str(self._default_period) + 's)'
groupby_query += ',' if groupby else ''
if groupby:
groupby_query += '"' + '","'.join(
_sanitized_groupby(groupby)) + '"'
query += ' GROUP BY ' + groupby_query
query += ';'
@ -243,7 +257,7 @@ class InfluxStorage(v2_storage.BaseStorage):
def __init__(self, *args, **kwargs):
super(InfluxStorage, self).__init__(*args, **kwargs)
self._default_period = kwargs.get('period') or CONF.collect.period
self._conn = InfluxClient()
self._conn = InfluxClient(default_period=self._default_period)
def init(self):
policy = CONF.storage_influxdb.retention_policy
@ -326,8 +340,11 @@ class InfluxStorage(v2_storage.BaseStorage):
def delete(self, begin=None, end=None, filters=None):
self._conn.delete(begin, end, filters)
@staticmethod
def _get_total_elem(begin, end, groupby, series_groupby, point):
def _get_total_elem(self, begin, end, groupby, series_groupby, point):
if groupby and 'time' in groupby:
begin = tzutils.dt_from_iso(point['time'])
period = point.get(PERIOD_FIELD_NAME) or self._default_period
end = tzutils.add_delta(begin, datetime.timedelta(seconds=period))
output = {
'begin': begin,
'end': end,
@ -335,7 +352,7 @@ class InfluxStorage(v2_storage.BaseStorage):
'rate': point['price'],
}
if groupby:
for group in groupby:
for group in _sanitized_groupby(groupby):
output[group] = series_groupby.get(group, '')
return output
@ -353,12 +370,18 @@ class InfluxStorage(v2_storage.BaseStorage):
output = []
for (series_name, series_groupby), points in total.items():
for point in points:
output.append(self._get_total_elem(
begin, end,
groupby,
series_groupby,
point))
# NOTE(peschk_l): InfluxDB returns all timestamps for a given
# period and interval, even those with no data. This filters
# out periods with no data
if point['qty'] is not None and point['price'] is not None:
output.append(self._get_total_elem(
tzutils.utc_to_local(begin),
tzutils.utc_to_local(end),
groupby,
series_groupby,
point))
groupby = _sanitized_groupby(groupby)
if groupby:
output.sort(key=lambda x: [x[group] for group in groupby])
return {

View File

@ -68,3 +68,21 @@ tests:
response_json_paths:
$.results.`len`: 0
$.total: 0
- name: Get a summary grouped by time
url: /v2/summary
status: 200
query_parameters:
groupby: [time]
response_json_paths:
$.results.`len`: 1
$.total: 1
- name: Get a summary grouped by time and project_id
url: /v2/summary
status: 200
query_parameters:
groupby: [time, project_id]
response_json_paths:
$.results.`len`: 2
$.total: 2

View File

@ -17,6 +17,7 @@ import functools
from influxdb import resultset
from cloudkitty.storage.v2.influx import _sanitized_groupby
from cloudkitty.storage.v2.influx import InfluxClient
@ -63,6 +64,12 @@ class FakeInfluxClient(InfluxClient):
break
valid = True
for tag in serie['tags'].keys():
if tag == 'time':
if point['time'].isoformat() != serie['values'][0][0]:
valid = False
break
else:
continue
if tag not in point['tags'].keys() or \
point['tags'][tag] != serie['tags'][tag]:
valid = False
@ -74,10 +81,11 @@ class FakeInfluxClient(InfluxClient):
if target_serie is None:
target_serie = copy.deepcopy(self.total_series_sample)
if groupby:
target_serie['tags'] = {k: point['tags'][k] for k in groupby}
target_serie['tags'] = {k: point['tags'][k] for k in
_sanitized_groupby(groupby)}
else:
target_serie['tags'] = {}
target_serie['values'] = [['1970-01-01T00:00:00Z', 0, 0]]
target_serie['values'] = [[point['time'].isoformat(), 0, 0]]
series.append(target_serie)
return target_serie

View File

@ -16,6 +16,7 @@ import datetime
import unittest
from dateutil import tz
import mock
from oslo_utils import timeutils
from cloudkitty import tzutils
@ -132,3 +133,15 @@ class TestTZUtils(unittest.TestCase):
two = datetime.datetime(2019, 3, 31, 3,
tzinfo=tz.gettz('Europe/Paris'))
self.assertEqual(tzutils.diff_seconds(two, one), 3600)
def test_cloudkitty_dt_from_ts_as_utc(self):
ts = 1569902400
dt = datetime.datetime(2019, 10, 1, 4, tzinfo=tz.UTC)
self.assertEqual(dt, tzutils.dt_from_ts(ts, as_utc=True))
def test_cloudkitty_dt_from_ts_local_tz(self):
ts = 1569902400
timezone = tz.gettz('Europe/Paris')
dt = datetime.datetime(2019, 10, 1, 6, tzinfo=timezone)
with mock.patch.object(tzutils, '_LOCAL_TZ', new=timezone):
self.assertEqual(dt, tzutils.dt_from_ts(ts))

View File

@ -84,6 +84,14 @@ def dt_from_iso(time_str, as_utc=False):
tz.UTC if as_utc else _LOCAL_TZ).replace(microsecond=0)
def dt_from_ts(ts, as_utc=False):
"""Parses a timezone-aware datetime object from an epoch timestamp.
Returns the object as being from the local timezone.
"""
return datetime.datetime.fromtimestamp(ts, tz.UTC if as_utc else _LOCAL_TZ)
def add_delta(dt, delta):
"""Adds a timedelta to a datetime object.

View File

@ -0,0 +1,33 @@
{
"total": 232,
"columns": [
"begin",
"end",
"qty",
"rate",
"project_id"
],
"results": [
[
"2019-10-01T06:00:00+02:00",
"2019-10-01T07:00:00+02:00",
3.5533905029296875,
1.7766952514648438,
"84631866b2d84db49b29828052bdc287"
],
[
"2019-10-01T07:00:00+02:00",
"2019-10-01T08:00:00+02:00",
3.5533905029296875,
1.7766952514648438,
"84631866b2d84db49b29828052bdc287"
],
[
"2019-10-01T08:00:00+02:00",
"2019-10-01T09:00:00+02:00",
3.5533905029296875,
1.7766952514648438,
"84631866b2d84db49b29828052bdc287"
]
]
}

View File

@ -65,6 +65,11 @@ the columns for each element of ``results``. The columns are the four mandatory
(``begin``, ``end``, ``qty``, ``rate``) along with each attribute the result is
grouped by.
.. note:: It is also possible to group data by time, in order to obtain timeseries.
In order to do this, group by ``time``. No extra column will be added,
but you'll get one entry per collect period in the queried timeframe.
See examples below.
.. rest_parameters:: summary/summary_parameters.yml
- begin: begin_resp
@ -75,6 +80,15 @@ grouped by.
Response Example
----------------
Grouping by time and project_id:
.. code-block:: shell
curl "http://cloudkitty-api:8889/v2/summary?groupby=time&groupby=project_id&limit=3"
.. literalinclude:: ./api_samples/summary/summary_get_groupby_time.json
:language: javascript
.. code-block:: shell
curl "http://cloudkitty-api:8889/v2/summary?filters=project_id%3Afe9c35372db6420089883805b37a34af&groupby=type&groupby=project_id"

View File

@ -0,0 +1,6 @@
---
upgrade:
- |
It is now possible to group v2 summaries by timestamp. In order to do this,
the ``time`` parameter must be specified in the ``groupby`` list:
``cloudkitty summary get -g time,type``.