resample only data affected by new measures
currently, we are passing in the full unaggregated timeserie when we update each of the aggregates for each granularity. this is incorrect as the unaggregated timeserie corresponds to the largest granularity of the metric. this means that for smaller granularities, we are updating and resampling points that are already aggregated and not affected by new incoming measures. Change-Id: I687c2a18b332494f5c5cb7fdfe6f2b3d1e8de804 Closes-Bug: #1562820
This commit is contained in:
parent
3a9140094a
commit
3bdfaeb67e
|
@ -162,7 +162,7 @@ class TimeSerie(SerializableMixin):
|
||||||
return value.nanos / 10e8
|
return value.nanos / 10e8
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _round_timestamp(ts, freq):
|
def round_timestamp(ts, freq):
|
||||||
return pandas.Timestamp(
|
return pandas.Timestamp(
|
||||||
(pandas.Timestamp(ts).value // freq) * freq)
|
(pandas.Timestamp(ts).value // freq) * freq)
|
||||||
|
|
||||||
|
@ -223,8 +223,8 @@ class BoundTimeSerie(TimeSerie):
|
||||||
|
|
||||||
def set_values(self, values, before_truncate_callback=None,
|
def set_values(self, values, before_truncate_callback=None,
|
||||||
ignore_too_old_timestamps=False):
|
ignore_too_old_timestamps=False):
|
||||||
|
# NOTE: values must be sorted when passed in.
|
||||||
if self.block_size is not None and not self.ts.empty:
|
if self.block_size is not None and not self.ts.empty:
|
||||||
values = sorted(values, key=operator.itemgetter(0))
|
|
||||||
first_block_timestamp = self._first_block_timestamp()
|
first_block_timestamp = self._first_block_timestamp()
|
||||||
if ignore_too_old_timestamps:
|
if ignore_too_old_timestamps:
|
||||||
for index, (timestamp, value) in enumerate(values):
|
for index, (timestamp, value) in enumerate(values):
|
||||||
|
@ -268,8 +268,8 @@ class BoundTimeSerie(TimeSerie):
|
||||||
return basic
|
return basic
|
||||||
|
|
||||||
def _first_block_timestamp(self):
|
def _first_block_timestamp(self):
|
||||||
rounded = self._round_timestamp(self.ts.index[-1],
|
rounded = self.round_timestamp(self.ts.index[-1],
|
||||||
self.block_size.delta.value)
|
self.block_size.delta.value)
|
||||||
|
|
||||||
return rounded - (self.block_size * self.back_window)
|
return rounded - (self.block_size * self.back_window)
|
||||||
|
|
||||||
|
@ -323,7 +323,7 @@ class AggregatedTimeSerie(TimeSerie):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_split_key_datetime(cls, timestamp, sampling):
|
def get_split_key_datetime(cls, timestamp, sampling):
|
||||||
return cls._round_timestamp(
|
return cls.round_timestamp(
|
||||||
timestamp, freq=sampling * cls.POINTS_PER_SPLIT * 10e8)
|
timestamp, freq=sampling * cls.POINTS_PER_SPLIT * 10e8)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -447,7 +447,7 @@ class AggregatedTimeSerie(TimeSerie):
|
||||||
# Group by the sampling, and then apply the aggregation method on
|
# Group by the sampling, and then apply the aggregation method on
|
||||||
# the points after `after'
|
# the points after `after'
|
||||||
groupedby = self.ts[after:].groupby(
|
groupedby = self.ts[after:].groupby(
|
||||||
functools.partial(self._round_timestamp,
|
functools.partial(self.round_timestamp,
|
||||||
freq=self.sampling * 10e8))
|
freq=self.sampling * 10e8))
|
||||||
agg_func = getattr(groupedby, self.aggregation_method_func_name)
|
agg_func = getattr(groupedby, self.aggregation_method_func_name)
|
||||||
if self.aggregation_method_func_name == 'quantile':
|
if self.aggregation_method_func_name == 'quantile':
|
||||||
|
@ -469,7 +469,7 @@ class AggregatedTimeSerie(TimeSerie):
|
||||||
if from_timestamp is None:
|
if from_timestamp is None:
|
||||||
from_ = None
|
from_ = None
|
||||||
else:
|
else:
|
||||||
from_ = self._round_timestamp(from_timestamp, self.sampling * 10e8)
|
from_ = self.round_timestamp(from_timestamp, self.sampling * 10e8)
|
||||||
points = self[from_:to_timestamp]
|
points = self[from_:to_timestamp]
|
||||||
try:
|
try:
|
||||||
# Do not include stop timestamp
|
# Do not include stop timestamp
|
||||||
|
|
|
@ -18,6 +18,7 @@ import collections
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
import operator
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
@ -342,6 +343,7 @@ class CarbonaraBasedStorage(storage.StorageDriver):
|
||||||
% metric)
|
% metric)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
measures = sorted(measures, key=operator.itemgetter(0))
|
||||||
try:
|
try:
|
||||||
with timeutils.StopWatch() as sw:
|
with timeutils.StopWatch() as sw:
|
||||||
raw_measures = (
|
raw_measures = (
|
||||||
|
@ -380,9 +382,17 @@ class CarbonaraBasedStorage(storage.StorageDriver):
|
||||||
back_window=metric.archive_policy.back_window)
|
back_window=metric.archive_policy.back_window)
|
||||||
|
|
||||||
def _map_add_measures(bound_timeserie):
|
def _map_add_measures(bound_timeserie):
|
||||||
|
# NOTE (gordc): bound_timeserie is entire set of
|
||||||
|
# unaggregated measures matching largest
|
||||||
|
# granularity. the following takes only the points
|
||||||
|
# affected by new measures for specific granularity
|
||||||
|
tstamp = max(bound_timeserie.first, measures[0][0])
|
||||||
self._map_in_thread(
|
self._map_in_thread(
|
||||||
self._add_measures,
|
self._add_measures,
|
||||||
((aggregation, d, metric, bound_timeserie)
|
((aggregation, d, metric,
|
||||||
|
carbonara.TimeSerie(bound_timeserie.ts[
|
||||||
|
carbonara.TimeSerie.round_timestamp(
|
||||||
|
tstamp, d.granularity * 10e8):]))
|
||||||
for aggregation in agg_methods
|
for aggregation in agg_methods
|
||||||
for d in metric.archive_policy.definition))
|
for d in metric.archive_policy.definition))
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ from oslo_utils import timeutils
|
||||||
from oslotest import base
|
from oslotest import base
|
||||||
import six.moves
|
import six.moves
|
||||||
|
|
||||||
|
from gnocchi import carbonara
|
||||||
from gnocchi import storage
|
from gnocchi import storage
|
||||||
from gnocchi.storage import _carbonara
|
from gnocchi.storage import _carbonara
|
||||||
from gnocchi.storage import null
|
from gnocchi.storage import null
|
||||||
|
@ -112,27 +113,47 @@ class TestStorageDriver(tests_base.TestCase):
|
||||||
self.assertEqual(3661, len(self.storage.get_measures(m)))
|
self.assertEqual(3661, len(self.storage.get_measures(m)))
|
||||||
|
|
||||||
@mock.patch('gnocchi.carbonara.AggregatedTimeSerie.POINTS_PER_SPLIT', 48)
|
@mock.patch('gnocchi.carbonara.AggregatedTimeSerie.POINTS_PER_SPLIT', 48)
|
||||||
def test_add_measures_big_update_subset(self):
|
def test_add_measures_update_subset_split(self):
|
||||||
m, m_sql = self._create_metric('medium')
|
m, m_sql = self._create_metric('medium')
|
||||||
measures = [
|
measures = [
|
||||||
storage.Measure(datetime.datetime(2014, 1, i, j, 0, 0), 100)
|
storage.Measure(datetime.datetime(2014, 1, 6, i, j, 0), 100)
|
||||||
for i in six.moves.range(1, 6) for j in six.moves.range(0, 24)]
|
for i in six.moves.range(2) for j in six.moves.range(0, 60, 2)]
|
||||||
measures.append(
|
|
||||||
storage.Measure(datetime.datetime(2014, 1, 6, 0, 0, 0), 100))
|
|
||||||
self.storage.add_measures(m, measures)
|
self.storage.add_measures(m, measures)
|
||||||
self.storage.process_background_tasks(self.index, sync=True)
|
self.storage.process_background_tasks(self.index, sync=True)
|
||||||
|
|
||||||
|
# add measure to end, in same aggregate time as last point.
|
||||||
self.storage.add_measures(m, [
|
self.storage.add_measures(m, [
|
||||||
storage.Measure(datetime.datetime(2014, 1, 6, 1, 0, 0), 100)])
|
storage.Measure(datetime.datetime(2014, 1, 6, 1, 58, 1), 100)])
|
||||||
|
|
||||||
with mock.patch.object(self.storage, '_store_metric_measures') as c:
|
with mock.patch.object(self.storage, '_store_metric_measures') as c:
|
||||||
|
# should only resample last aggregate
|
||||||
self.storage.process_background_tasks(self.index, sync=True)
|
self.storage.process_background_tasks(self.index, sync=True)
|
||||||
count = 0
|
count = 0
|
||||||
for call in c.mock_calls:
|
for call in c.mock_calls:
|
||||||
if mock.call(m_sql, mock.ANY, 'mean', 3600.0, mock.ANY) == call:
|
# policy is 60 points and split is 48. should only update 2nd half
|
||||||
|
if mock.call(m_sql, mock.ANY, 'mean', 60.0, mock.ANY) == call:
|
||||||
count += 1
|
count += 1
|
||||||
self.assertEqual(1, count)
|
self.assertEqual(1, count)
|
||||||
|
|
||||||
|
def test_add_measures_update_subset(self):
|
||||||
|
m, m_sql = self._create_metric('medium')
|
||||||
|
measures = [
|
||||||
|
storage.Measure(datetime.datetime(2014, 1, 6, i, j, 0), 100)
|
||||||
|
for i in six.moves.range(2) for j in six.moves.range(0, 60, 2)]
|
||||||
|
self.storage.add_measures(m, measures)
|
||||||
|
self.storage.process_background_tasks(self.index, sync=True)
|
||||||
|
|
||||||
|
# add measure to end, in same aggregate time as last point.
|
||||||
|
new_point = datetime.datetime(2014, 1, 6, 1, 58, 1)
|
||||||
|
self.storage.add_measures(m, [storage.Measure(new_point, 100)])
|
||||||
|
|
||||||
|
with mock.patch.object(self.storage, '_add_measures') as c:
|
||||||
|
self.storage.process_background_tasks(self.index, sync=True)
|
||||||
|
for __, args, __ in c.mock_calls:
|
||||||
|
self.assertEqual(
|
||||||
|
args[3].first, carbonara.TimeSerie.round_timestamp(
|
||||||
|
new_point, args[1].granularity * 10e8))
|
||||||
|
|
||||||
def test_delete_old_measures(self):
|
def test_delete_old_measures(self):
|
||||||
self.storage.add_measures(self.metric, [
|
self.storage.add_measures(self.metric, [
|
||||||
storage.Measure(datetime.datetime(2014, 1, 1, 12, 0, 1), 69),
|
storage.Measure(datetime.datetime(2014, 1, 1, 12, 0, 1), 69),
|
||||||
|
|
Loading…
Reference in New Issue