Selectable aggregation functions for statistics

Addresses: BP selectable-aggregates

Previously, the statistics API always computed and returned a
standard set of aggregate functions.

Now, individual aggregation functions can be selected and even
parameterized via syntax of form:

   /v2/meters/<meter_name>/statistics?aggregate.func=<name>&aggregate.param=<value>

For example in order to calculate the average CPU util only:

  GET /v2/meters/cpu_util/statistics?aggregate.func=avg

  HTTP/1.0 200 OK
  [{"aggregate": {"avg": 0.6858829535841072},
    "avg": 0.6858829535841072,
    "duration_start": "2014-01-30T11:13:23",
    "duration_end": "2014-01-31T16:07:13",
    "duration": 104030.0,
    "period": 0,
    "period_start": "2014-01-30T11:13:23",
    "period_end": "2014-01-31T16:07:13",
    "groupby": null,
    "unit": "%"}]

In the current patch, selectable aggregates are provided by the
sqlalchemy driver only, with support in the mongodb driver to
follow in a subsequent patch.

Change-Id: I6cc095ba5ae16dea3f6b404e72a070ab9ac49c9a
This commit is contained in:
Eoghan Glynn
2014-02-25 15:54:11 +00:00
parent 4ca9550cfb
commit cdc49d6c75
10 changed files with 258 additions and 50 deletions

View File

@@ -108,6 +108,14 @@ META_TYPE_MAP = {bool: models.MetaBool,
long: models.MetaBigInt,
float: models.MetaFloat}
STANDARD_AGGREGATES = dict(
avg=func.avg(models.Sample.volume).label('avg'),
sum=func.sum(models.Sample.volume).label('sum'),
min=func.min(models.Sample.volume).label('min'),
max=func.max(models.Sample.volume).label('max'),
count=func.count(models.Sample.volume).label('count')
)
def apply_metaquery_filter(session, query, metaquery):
"""Apply provided metaquery filter to existing query.
@@ -634,18 +642,32 @@ class Connection(base.Connection):
limit,
models.MeterSample)
def _make_stats_query(self, sample_filter, groupby):
@staticmethod
def _get_aggregate_functions(aggregate):
if not aggregate:
return [f for f in STANDARD_AGGREGATES.values()]
functions = []
for a in aggregate:
if a.func in STANDARD_AGGREGATES:
functions.append(STANDARD_AGGREGATES[a.func])
else:
raise NotImplementedError(_('Selectable aggregate function %s'
' is not supported') % a.func)
return functions
def _make_stats_query(self, sample_filter, groupby, aggregate):
select = [
models.Meter.unit,
func.min(models.Sample.timestamp).label('tsmin'),
func.max(models.Sample.timestamp).label('tsmax'),
func.avg(models.Sample.volume).label('avg'),
func.sum(models.Sample.volume).label('sum'),
func.min(models.Sample.volume).label('min'),
func.max(models.Sample.volume).label('max'),
func.count(models.Sample.volume).label('count')
]
select.extend(self._get_aggregate_functions(aggregate))
session = self._get_db_session()
if groupby:
@@ -659,30 +681,41 @@ class Connection(base.Connection):
return make_query_from_filter(session, query, sample_filter)
@staticmethod
def _stats_result_aggregates(result, aggregate):
stats_args = {}
if isinstance(result.count, (int, long)):
stats_args['count'] = result.count
for attr in ['min', 'max', 'sum', 'avg']:
if hasattr(result, attr):
stats_args[attr] = getattr(result, attr)
if aggregate:
stats_args['aggregate'] = dict(
('%s%s' % (a.func, '/%s' % a.param if a.param else ''),
getattr(result, a.func)) for a in aggregate
)
return stats_args
@staticmethod
def _stats_result_to_model(result, period, period_start,
period_end, groupby):
period_end, groupby, aggregate):
stats_args = Connection._stats_result_aggregates(result, aggregate)
stats_args['unit'] = result.unit
duration = (timeutils.delta_seconds(result.tsmin, result.tsmax)
if result.tsmin is not None and result.tsmax is not None
else None)
return api_models.Statistics(
unit=result.unit,
count=int(result.count),
min=result.min,
max=result.max,
avg=result.avg,
sum=result.sum,
duration_start=result.tsmin,
duration_end=result.tsmax,
duration=duration,
period=period,
period_start=period_start,
period_end=period_end,
groupby=(dict((g, getattr(result, g)) for g in groupby)
if groupby else None)
)
stats_args['duration'] = duration
stats_args['duration_start'] = result.tsmin
stats_args['duration_end'] = result.tsmax
stats_args['period'] = period
stats_args['period_start'] = period_start
stats_args['period_end'] = period_end
stats_args['groupby'] = (dict(
(g, getattr(result, g)) for g in groupby) if groupby else None)
return api_models.Statistics(**stats_args)
def get_meter_statistics(self, sample_filter, period=None, groupby=None):
def get_meter_statistics(self, sample_filter, period=None, groupby=None,
aggregate=None):
"""Return an iterable of api_models.Statistics instances containing
meter statistics described by the query parameters.
@@ -696,17 +729,22 @@ class Connection(base.Connection):
_("Unable to group by these fields"))
if not period:
for res in self._make_stats_query(sample_filter, groupby):
for res in self._make_stats_query(sample_filter,
groupby,
aggregate):
if res.count:
yield self._stats_result_to_model(res, 0,
res.tsmin, res.tsmax,
groupby)
groupby,
aggregate)
return
if not sample_filter.start or not sample_filter.end:
res = self._make_stats_query(sample_filter, None).first()
res = self._make_stats_query(sample_filter,
None,
aggregate).first()
query = self._make_stats_query(sample_filter, groupby)
query = self._make_stats_query(sample_filter, groupby, aggregate)
# HACK(jd) This is an awful method to compute stats by period, but
# since we're trying to be SQL agnostic we have to write portable
# code, so here it is, admire! We're going to do one request to get
@@ -726,7 +764,8 @@ class Connection(base.Connection):
period_end)),
period_start=period_start,
period_end=period_end,
groupby=groupby
groupby=groupby,
aggregate=aggregate
)
@staticmethod
@@ -1299,7 +1338,14 @@ class QueryTransformer(object):
'statistics': {'groupby': True,
'query': {'simple': True,
'metadata': True},
'aggregation': {'standard': True}},
'aggregation': {'standard': True,
'selectable': {
'max': True,
'min': True,
'sum': True,
'avg': True,
'count': True}}
},
'alarms': {'query': {'simple': True,
'complex': True},
'history': {'query': {'simple': True,