deb-gnocchi/gnocchi/aggregates/moving_stats.py

146 lines
6.0 KiB
Python

# -*- encoding: utf-8 -*-
#
# Copyright 2014-2015 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import numpy
import pandas
import six
from gnocchi import aggregates
from gnocchi import utils
class MovingAverage(aggregates.CustomAggregator):
@staticmethod
def check_window_valid(window):
"""Takes in the window parameter string, reformats as a float."""
if window is None:
msg = 'Moving aggregate must have window specified.'
raise aggregates.CustomAggFailure(msg)
try:
return utils.to_timespan(six.text_type(window)).total_seconds()
except Exception:
raise aggregates.CustomAggFailure('Invalid value for window')
@staticmethod
def retrieve_data(storage_obj, metric, start, stop, window):
"""Retrieves finest-res data available from storage."""
all_data = storage_obj.get_measures(metric, start, stop)
try:
min_grain = min(set([row[1] for row in all_data if row[1] == 0
or window % row[1] == 0]))
except Exception:
msg = ("No data available that is either full-res or "
"of a granularity that factors into the window size "
"you specified.")
raise aggregates.CustomAggFailure(msg)
return min_grain, pandas.Series([r[2] for r in all_data
if r[1] == min_grain],
[r[0] for r in all_data
if r[1] == min_grain])
@staticmethod
def aggregate_data(data, func, window, min_grain, center=False,
min_size=1):
"""Calculates moving func of data with sampling width of window.
:param data: Series of timestamp, value pairs
:param func: the function to use when aggregating
:param window: (float) range of data to use in each aggregation.
:param min_grain: granularity of the data being passed in.
:param center: whether to index the aggregated values by the first
timestamp of the values picked up by the window or by the central
timestamp.
:param min_size: if the number of points in the window is less than
min_size, the aggregate is not computed and nan is returned for
that iteration.
"""
if center:
center = utils.strtobool(center)
def moving_window(x):
msec = datetime.timedelta(milliseconds=1)
zero = datetime.timedelta(seconds=0)
half_span = datetime.timedelta(seconds=window / 2)
start = utils.normalize_time(data.index[0])
stop = utils.normalize_time(
data.index[-1] + datetime.timedelta(seconds=min_grain))
# min_grain addition necessary since each bin of rolled-up data
# is indexed by leftmost timestamp of bin.
left = half_span if center else zero
right = 2 * half_span - left - msec
# msec subtraction is so we don't include right endpoint in slice.
x = utils.normalize_time(x)
if x - left >= start and x + right <= stop:
dslice = data[x - left: x + right]
if center and dslice.size % 2 == 0:
return func([func(data[x - msec - left: x - msec + right]),
func(data[x + msec - left: x + msec + right])
])
# (NOTE) atmalagon: the msec shift here is so that we have two
# consecutive windows; one centered at time x - msec,
# and one centered at time x + msec. We then average the
# aggregates from the two windows; this result is centered
# at time x. Doing this double average is a way to return a
# centered average indexed by a timestamp that existed in
# the input data (which wouldn't be the case for an even number
# of points if we did only one centered average).
else:
return numpy.nan
if dslice.size < min_size:
return numpy.nan
return func(dslice)
try:
result = pandas.Series(data.index).apply(moving_window)
# change from integer index to timestamp index
result.index = data.index
return [(t, window, r) for t, r
in six.iteritems(result[~result.isnull()])]
except Exception as e:
raise aggregates.CustomAggFailure(str(e))
def compute(self, storage_obj, metric, start, stop, window=None,
center=False):
"""Returns list of (timestamp, window, aggregated value) tuples.
:param storage_obj: a call is placed to the storage object to retrieve
the stored data.
:param metric: the metric
:param start: start timestamp
:param stop: stop timestamp
:param window: format string specifying the size over which to
aggregate the retrieved data
:param center: how to index the aggregated data (central timestamp or
leftmost timestamp)
"""
window = self.check_window_valid(window)
min_grain, data = self.retrieve_data(storage_obj, metric, start,
stop, window)
return self.aggregate_data(data, numpy.mean, window, min_grain, center,
min_size=1)