From ae241cc68fb8261ca2805a0050171d4596ce546a Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Tue, 6 Mar 2018 09:10:27 -0800 Subject: [PATCH] Add simple db purge command This adds a simple purge command to nova-manage. It either deletes all shadow archived data, or data older than a date if provided. This also adds a post-test hook to run purge after archive to validate that it at least works on data generated by a gate run. Related to blueprint purge-db Change-Id: I6f87cf03d49be6bfad2c5e6f0c8accf0fab4e6ee --- doc/source/cli/nova-manage.rst | 10 +++ gate/post_test_hook.sh | 12 +++ nova/cmd/manage.py | 31 +++++++ nova/db/sqlalchemy/api.py | 59 +++++++++++++ nova/tests/functional/db/test_archive.py | 86 +++++++++++++++++++ nova/tests/unit/test_nova_manage.py | 34 ++++++++ .../purge-db-command-d4cd9ea5400f479c.yaml | 5 ++ requirements.txt | 1 + 8 files changed, 238 insertions(+) create mode 100644 releasenotes/notes/purge-db-command-d4cd9ea5400f479c.yaml diff --git a/doc/source/cli/nova-manage.rst b/doc/source/cli/nova-manage.rst index 0545d46f61be..7556bbaaebce 100644 --- a/doc/source/cli/nova-manage.rst +++ b/doc/source/cli/nova-manage.rst @@ -68,6 +68,16 @@ Nova Database continuously until all deleted rows are archived. Use the --max_rows option, which defaults to 1000, as a batch size for each iteration. +``nova-manage db purge [--all] [--before ] [--verbose]`` + + Delete rows from shadow tables. Specifying --all will delete all data from + all shadow tables. Specifying --before will delete data from all shadow tables + that is older than the date provided. Date strings may be fuzzy, such as + ``Oct 21 2015``. Returns exit code 0 if rows were deleted, 1 if required + arguments are not provided, 2 if an invalid date is provided, 3 if no data + was deleted. Specifying --verbose will cause information to be printed about + purged records. + ``nova-manage db null_instance_uuid_scan [--delete]`` Lists and optionally deletes database records where instance_uuid is NULL. diff --git a/gate/post_test_hook.sh b/gate/post_test_hook.sh index 320c839db554..c56e15569f4c 100755 --- a/gate/post_test_hook.sh +++ b/gate/post_test_hook.sh @@ -18,6 +18,17 @@ function archive_deleted_rows { done } +function purge_db { + $MANAGE $* db purge --all --verbose + RET=$? + if [[ $RET -eq 0 ]]; then + echo Purge successful + else + echo Purge failed with result $RET + return $RET + fi +} + BASE=${BASE:-/opt/stack} source ${BASE}/new/devstack/functions-common source ${BASE}/new/devstack/lib/nova @@ -29,6 +40,7 @@ cell_conf=$(conductor_conf 1) conf="--config-file $NOVA_CONF --config-file $cell_conf" archive_deleted_rows $conf +purge_db $conf set -e # We need to get the admin credentials to run the OSC CLIs for Placement. diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index 7b4241ff9b68..a089582bf6c5 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -29,6 +29,7 @@ import re import sys import traceback +from dateutil import parser as dateutil_parser import decorator import netaddr from oslo_config import cfg @@ -546,6 +547,36 @@ Error: %s""") % six.text_type(e)) # NOTE(danms): Return nonzero if we archived something return int(bool(table_to_rows_archived)) + @args('--before', dest='before', + help='If specified, purge rows from shadow tables that are older ' + 'than this. Fuzzy time specs are allowed') + @args('--all', dest='purge_all', action='store_true', + help='Purge all rows in the shadow tables') + @args('--verbose', dest='verbose', action='store_true', default=False, + help='Print information about purged records') + def purge(self, before=None, purge_all=False, verbose=False): + if before is None and purge_all is False: + print(_('Either --before or --all is required')) + return 1 + if before: + try: + before_date = dateutil_parser.parse(before, fuzzy=True) + except ValueError as e: + print(_('Invalid value for --before: %s') % e) + return 2 + else: + before_date = None + + def status(msg): + if verbose: + print(msg) + + deleted = sa_db.purge_shadow_tables(before_date, status_fn=status) + if deleted: + return 0 + else: + return 3 + @args('--delete', action='store_true', dest='delete', help='If specified, automatically delete any records found where ' 'instance_uuid is NULL.') diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index 0043def07406..54b28013cc5d 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -5920,6 +5920,65 @@ def archive_deleted_rows(max_rows=None): return table_to_rows_archived, deleted_instance_uuids +def _purgeable_tables(metadata): + return [t for t in metadata.sorted_tables + if (t.name.startswith(_SHADOW_TABLE_PREFIX) and not + t.name.endswith('migrate_version'))] + + +def purge_shadow_tables(before_date, status_fn=None): + engine = get_engine() + conn = engine.connect() + metadata = MetaData() + metadata.bind = engine + metadata.reflect() + total_deleted = 0 + + if status_fn is None: + status_fn = lambda m: None + + # Some things never get formally deleted, and thus deleted_at + # is never set. So, prefer specific timestamp columns here + # for those special cases. + overrides = { + 'shadow_instance_actions': 'created_at', + 'shadow_instance_actions_events': 'created_at', + } + + for table in _purgeable_tables(metadata): + if before_date is None: + col = None + elif table.name in overrides: + col = getattr(table.c, overrides[table.name]) + elif hasattr(table.c, 'deleted_at'): + col = table.c.deleted_at + elif hasattr(table.c, 'updated_at'): + col = table.c.updated_at + elif hasattr(table.c, 'created_at'): + col = table.c.created_at + else: + status_fn(_('Unable to purge table %(table)s because it ' + 'has no timestamp column') % { + 'table': table.name}) + continue + + if col is not None: + delete = table.delete().where(col < before_date) + else: + delete = table.delete() + + deleted = conn.execute(delete) + if deleted.rowcount > 0: + status_fn(_('Deleted %(rows)i rows from %(table)s based on ' + 'timestamp column %(col)s') % { + 'rows': deleted.rowcount, + 'table': table.name, + 'col': col is None and '(n/a)' or col.name}) + total_deleted += deleted.rowcount + + return total_deleted + + @pick_context_manager_writer def service_uuids_online_data_migration(context, max_count): from nova.objects import service diff --git a/nova/tests/functional/db/test_archive.py b/nova/tests/functional/db/test_archive.py index a4e896fba511..f51c0b14921e 100644 --- a/nova/tests/functional/db/test_archive.py +++ b/nova/tests/functional/db/test_archive.py @@ -12,7 +12,15 @@ # License for the specific language governing permissions and limitations # under the License. +import datetime +import re + +from dateutil import parser as dateutil_parser +from oslo_utils import timeutils from sqlalchemy.dialects import sqlite +from sqlalchemy import func +from sqlalchemy import MetaData +from sqlalchemy import select from nova import context from nova import db @@ -144,3 +152,81 @@ class TestDatabaseArchive(test_servers.ServersTestBase): # by the archive self.assertIn('instance_actions', results) self.assertIn('instance_actions_events', results) + + def _get_table_counts(self): + engine = sqlalchemy_api.get_engine() + conn = engine.connect() + meta = MetaData(engine) + meta.reflect() + shadow_tables = sqlalchemy_api._purgeable_tables(meta) + results = {} + for table in shadow_tables: + r = conn.execute( + select([func.count()]).select_from(table)).fetchone() + results[table.name] = r[0] + return results + + def test_archive_then_purge_all(self): + server = self._create_server() + server_id = server['id'] + self._delete_server(server_id) + results, deleted_ids = db.archive_deleted_rows(max_rows=1000) + self.assertEqual([server_id], deleted_ids) + + lines = [] + + def status(msg): + lines.append(msg) + + deleted = sqlalchemy_api.purge_shadow_tables(None, status_fn=status) + self.assertNotEqual(0, deleted) + self.assertNotEqual(0, len(lines)) + for line in lines: + self.assertIsNotNone(re.match(r'Deleted [1-9][0-9]* rows from .*', + line)) + + results = self._get_table_counts() + # No table should have any rows + self.assertFalse(any(results.values())) + + def test_archive_then_purge_by_date(self): + server = self._create_server() + server_id = server['id'] + self._delete_server(server_id) + results, deleted_ids = db.archive_deleted_rows(max_rows=1000) + self.assertEqual([server_id], deleted_ids) + + pre_purge_results = self._get_table_counts() + + past = timeutils.utcnow() - datetime.timedelta(hours=1) + deleted = sqlalchemy_api.purge_shadow_tables(past) + # Make sure we didn't delete anything if the marker is before + # we started + self.assertEqual(0, deleted) + + results = self._get_table_counts() + # Nothing should be changed if we didn't purge anything + self.assertEqual(pre_purge_results, results) + + future = timeutils.utcnow() + datetime.timedelta(hours=1) + deleted = sqlalchemy_api.purge_shadow_tables(future) + # Make sure we deleted things when the marker is after + # we started + self.assertNotEqual(0, deleted) + + results = self._get_table_counts() + # There should be no rows in any table if we purged everything + self.assertFalse(any(results.values())) + + def test_purge_with_real_date(self): + """Make sure the result of dateutil's parser works with the + query we're making to sqlalchemy. + """ + server = self._create_server() + server_id = server['id'] + self._delete_server(server_id) + results, deleted_ids = db.archive_deleted_rows(max_rows=1000) + self.assertEqual([server_id], deleted_ids) + date = dateutil_parser.parse('oct 21 2015', fuzzy=True) + deleted = sqlalchemy_api.purge_shadow_tables(date) + self.assertEqual(0, deleted) diff --git a/nova/tests/unit/test_nova_manage.py b/nova/tests/unit/test_nova_manage.py index add162887998..34942b1ccd10 100644 --- a/nova/tests/unit/test_nova_manage.py +++ b/nova/tests/unit/test_nova_manage.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +import datetime import sys import ddt @@ -534,6 +535,39 @@ Archiving.....stopped else: self.assertEqual(0, len(output)) + @mock.patch('nova.db.sqlalchemy.api.purge_shadow_tables') + def test_purge_all(self, mock_purge): + mock_purge.return_value = 1 + ret = self.commands.purge(purge_all=True) + self.assertEqual(0, ret) + mock_purge.assert_called_once_with(None, status_fn=mock.ANY) + + @mock.patch('nova.db.sqlalchemy.api.purge_shadow_tables') + def test_purge_date(self, mock_purge): + mock_purge.return_value = 1 + ret = self.commands.purge(before='oct 21 2015') + self.assertEqual(0, ret) + mock_purge.assert_called_once_with(datetime.datetime(2015, 10, 21), + status_fn=mock.ANY) + + @mock.patch('nova.db.sqlalchemy.api.purge_shadow_tables') + def test_purge_date_fail(self, mock_purge): + ret = self.commands.purge(before='notadate') + self.assertEqual(2, ret) + self.assertFalse(mock_purge.called) + + @mock.patch('nova.db.sqlalchemy.api.purge_shadow_tables') + def test_purge_no_args(self, mock_purge): + ret = self.commands.purge() + self.assertEqual(1, ret) + self.assertFalse(mock_purge.called) + + @mock.patch('nova.db.sqlalchemy.api.purge_shadow_tables') + def test_purge_nothing_deleted(self, mock_purge): + mock_purge.return_value = 0 + ret = self.commands.purge(purge_all=True) + self.assertEqual(3, ret) + @mock.patch.object(migration, 'db_null_instance_uuid_scan', return_value={'foo': 0}) def test_null_instance_uuid_scan_no_records_found(self, mock_scan): diff --git a/releasenotes/notes/purge-db-command-d4cd9ea5400f479c.yaml b/releasenotes/notes/purge-db-command-d4cd9ea5400f479c.yaml new file mode 100644 index 000000000000..8f53be1b44e6 --- /dev/null +++ b/releasenotes/notes/purge-db-command-d4cd9ea5400f479c.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + The nova-manage command now has a 'db purge' command that will delete data + from the shadow tables after 'db archive_deleted_rows' has been run. diff --git a/requirements.txt b/requirements.txt index a5dd6d0b802a..6beb3d718535 100644 --- a/requirements.txt +++ b/requirements.txt @@ -64,3 +64,4 @@ cursive>=0.2.1 # Apache-2.0 pypowervm>=1.1.11 # Apache-2.0 os-service-types>=1.2.0 # Apache-2.0 taskflow>=2.16.0 # Apache-2.0 +python-dateutil>=2.5.3 # BSD