From 02179e0c329e307c86f4e713a0b5f9e6f4cd7945 Mon Sep 17 00:00:00 2001 From: Takashi Kajinami Date: Mon, 5 Oct 2020 08:47:40 +0900 Subject: [PATCH] Limit number of records deleted by aodh-expirer This patch introduces the same functionality as is implemented in panko recently[1], and allows us to limit the number of alarm histories deleted in a single iteration, to avoid the query takes a long time and if there are many expired records. [1] Icf83ffe089301b3782273923f18efd4d209131c2 Change-Id: Ie1d1bbb911cf56a56f712291f61ffaabfa97422f --- aodh/cmd/storage.py | 19 ++++++++++++++++--- aodh/storage/__init__.py | 5 +++++ aodh/storage/base.py | 6 +++--- aodh/storage/impl_log.py | 9 ++++----- aodh/storage/impl_sqlalchemy.py | 18 ++++++++++-------- .../storage/test_storage_scenarios.py | 2 +- aodh/tests/unit/test_bin.py | 3 ++- ...-batch-delete-events-32496f15b1169887.yaml | 7 +++++++ 8 files changed, 48 insertions(+), 21 deletions(-) create mode 100644 releasenotes/notes/support-batch-delete-events-32496f15b1169887.yaml diff --git a/aodh/cmd/storage.py b/aodh/cmd/storage.py index d736d78fe..1ce401f7e 100644 --- a/aodh/cmd/storage.py +++ b/aodh/cmd/storage.py @@ -33,9 +33,22 @@ def expirer(): if conf.database.alarm_history_time_to_live > 0: LOG.debug("Clearing expired alarm history data") - storage_conn = storage.get_connection_from_config(conf) - storage_conn.clear_expired_alarm_history_data( - conf.database.alarm_history_time_to_live) + conn = storage.get_connection_from_config(conf) + max_count = conf.database.alarm_histories_delete_batch_size + try: + if max_count > 0: + conn.clear_expired_alarm_history_data( + conf.database.alarm_history_time_to_live, + max_count) + else: + deleted = max_count = 100 + while deleted and deleted > 0: + deleted = conn.clear_expired_alarm_history_data( + conf.database.alarm_history_time_to_live, + max_count) + except TypeError: + LOG.warning("Storage driver does not support " + "'alarm_histories_delete_batch_size' config option.") else: LOG.info("Nothing to clean, database alarm history time to live " "is disabled") diff --git a/aodh/storage/__init__.py b/aodh/storage/__init__.py index f5074bc06..1dfd23242 100644 --- a/aodh/storage/__init__.py +++ b/aodh/storage/__init__.py @@ -34,6 +34,11 @@ OPTS = [ default=-1, help=("Number of seconds that alarm histories are kept " "in the database for (<= 0 means forever).")), + cfg.IntOpt('alarm_histories_delete_batch_size', + default=0, + min=0, + help=("Number of alarm histories to be deleted in one " + "iteration from the database (0 means all).")), ] diff --git a/aodh/storage/base.py b/aodh/storage/base.py index 160880351..02bf8151e 100644 --- a/aodh/storage/base.py +++ b/aodh/storage/base.py @@ -191,13 +191,13 @@ class Connection(object): return cls.STORAGE_CAPABILITIES @staticmethod - def clear_expired_alarm_history_data(alarm_history_ttl): + def clear_expired_alarm_history_data(ttl, max_count=None): """Clear expired alarm history data from the backend storage system. Clearing occurs according to the time-to-live. - :param alarm_history_ttl: Number of seconds to keep alarm history - records for. + :param ttl: Number of seconds to keep alarm history records for. + :param max_count: Number of records to delete. """ raise aodh.NotImplementedError('Clearing alarm history ' 'not implemented') diff --git a/aodh/storage/impl_log.py b/aodh/storage/impl_log.py index da578b98f..15219ddf5 100644 --- a/aodh/storage/impl_log.py +++ b/aodh/storage/impl_log.py @@ -56,13 +56,12 @@ class Connection(base.Connection): """Delete an alarm and its history data.""" @staticmethod - def clear_expired_alarm_history_data(alarm_history_ttl): + def clear_expired_alarm_history_data(ttl, max_count=None): """Clear expired alarm history data from the backend storage system. Clearing occurs according to the time-to-live. - :param alarm_history_ttl: Number of seconds to keep alarm history - records for. + :param ttl: Number of seconds to keep alarm history records for. + :param max_count: Number of records to delete. """ - LOG.info('Dropping alarm history data with TTL %d', - alarm_history_ttl) + LOG.info('Dropping alarm history %d data with TTL %d', max_count, ttl) diff --git a/aodh/storage/impl_sqlalchemy.py b/aodh/storage/impl_sqlalchemy.py index f28173e05..31b683ddd 100644 --- a/aodh/storage/impl_sqlalchemy.py +++ b/aodh/storage/impl_sqlalchemy.py @@ -398,21 +398,23 @@ class Connection(base.Connection): alarm_change_row.update(alarm_change) session.add(alarm_change_row) - def clear_expired_alarm_history_data(self, alarm_history_ttl): + def clear_expired_alarm_history_data(self, ttl, max_count=100): """Clear expired alarm history data from the backend storage system. Clearing occurs according to the time-to-live. - :param alarm_history_ttl: Number of seconds to keep alarm history - records for. + :param ttl: Number of seconds to keep alarm history records for. + :param max_count: Number of records to delete. """ session = self._engine_facade.get_session() with session.begin(): - valid_start = (timeutils.utcnow() - - datetime.timedelta(seconds=alarm_history_ttl)) - deleted_rows = (session.query(models.AlarmChange) - .filter(models.AlarmChange.timestamp < valid_start) - .delete()) + end = timeutils.utcnow() - datetime.timedelta(seconds=ttl) + alarm_history_q = (session.query(models.AlarmChange.event_id) + .filter(models.AlarmChange.timestamp < end)) + event_ids = [i[0] for i in alarm_history_q.limit(max_count)] + deleted_rows = session.query(models.AlarmChange).filter( + models.AlarmChange.event_id.in_(event_ids) + ).delete(synchronize_session="fetch") LOG.info("%d alarm histories are removed from database", deleted_rows) diff --git a/aodh/tests/functional/storage/test_storage_scenarios.py b/aodh/tests/functional/storage/test_storage_scenarios.py index 6991be5f6..7a754bac0 100644 --- a/aodh/tests/functional/storage/test_storage_scenarios.py +++ b/aodh/tests/functional/storage/test_storage_scenarios.py @@ -277,7 +277,7 @@ class AlarmHistoryTest(AlarmTestBase): def _clear_alarm_history(self, utcnow, ttl, count): self.mock_utcnow.return_value = utcnow - self.alarm_conn.clear_expired_alarm_history_data(ttl) + self.alarm_conn.clear_expired_alarm_history_data(ttl, 100) history = list(self.alarm_conn.query_alarm_history()) self.assertEqual(count, len(history)) diff --git a/aodh/tests/unit/test_bin.py b/aodh/tests/unit/test_bin.py index 68223c416..1a6cd1c42 100644 --- a/aodh/tests/unit/test_bin.py +++ b/aodh/tests/unit/test_bin.py @@ -55,6 +55,7 @@ class BinTestCase(base.BaseTestCase): def test_run_expirer_ttl_enabled(self): content = ("[database]\n" "alarm_history_time_to_live=1\n" + "alarm_histories_delete_batch_size=10\n" "connection=log://localhost\n") content = content.encode('utf-8') self.tempfile = fileutils.write_to_tempfile(content=content, @@ -67,7 +68,7 @@ class BinTestCase(base.BaseTestCase): stderr=subprocess.PIPE) out, __ = subp.communicate() self.assertEqual(0, subp.poll()) - msg = "Dropping alarm history data with TTL 1" + msg = "Dropping alarm history 10 data with TTL 1" msg = msg.encode('utf-8') self.assertIn(msg, out) diff --git a/releasenotes/notes/support-batch-delete-events-32496f15b1169887.yaml b/releasenotes/notes/support-batch-delete-events-32496f15b1169887.yaml new file mode 100644 index 000000000..ecccbb8c7 --- /dev/null +++ b/releasenotes/notes/support-batch-delete-events-32496f15b1169887.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + A new ``alarm_histories_delete_bacth_size`` option has been added to limit + a number of alarm histories deleted from the database by aodh-expirer in + a single iteration. This parameter is useful when there are a lot of alarm + histories in the database.