Add simple db purge command

This adds a simple purge command to nova-manage. It either deletes all
shadow archived data, or data older than a date if provided.

This also adds a post-test hook to run purge after archive to validate
that it at least works on data generated by a gate run.

Related to blueprint purge-db

Change-Id: I6f87cf03d49be6bfad2c5e6f0c8accf0fab4e6ee
This commit is contained in:
Dan Smith 2018-03-06 09:10:27 -08:00
parent 64635ba18d
commit ae241cc68f
8 changed files with 238 additions and 0 deletions

View File

@ -68,6 +68,16 @@ Nova Database
continuously until all deleted rows are archived. Use the --max_rows option,
which defaults to 1000, as a batch size for each iteration.
``nova-manage db purge [--all] [--before <date>] [--verbose]``
Delete rows from shadow tables. Specifying --all will delete all data from
all shadow tables. Specifying --before will delete data from all shadow tables
that is older than the date provided. Date strings may be fuzzy, such as
``Oct 21 2015``. Returns exit code 0 if rows were deleted, 1 if required
arguments are not provided, 2 if an invalid date is provided, 3 if no data
was deleted. Specifying --verbose will cause information to be printed about
purged records.
``nova-manage db null_instance_uuid_scan [--delete]``
Lists and optionally deletes database records where instance_uuid is NULL.

View File

@ -18,6 +18,17 @@ function archive_deleted_rows {
done
}
function purge_db {
$MANAGE $* db purge --all --verbose
RET=$?
if [[ $RET -eq 0 ]]; then
echo Purge successful
else
echo Purge failed with result $RET
return $RET
fi
}
BASE=${BASE:-/opt/stack}
source ${BASE}/new/devstack/functions-common
source ${BASE}/new/devstack/lib/nova
@ -29,6 +40,7 @@ cell_conf=$(conductor_conf 1)
conf="--config-file $NOVA_CONF --config-file $cell_conf"
archive_deleted_rows $conf
purge_db $conf
set -e
# We need to get the admin credentials to run the OSC CLIs for Placement.

View File

@ -29,6 +29,7 @@ import re
import sys
import traceback
from dateutil import parser as dateutil_parser
import decorator
import netaddr
from oslo_config import cfg
@ -546,6 +547,36 @@ Error: %s""") % six.text_type(e))
# NOTE(danms): Return nonzero if we archived something
return int(bool(table_to_rows_archived))
@args('--before', dest='before',
help='If specified, purge rows from shadow tables that are older '
'than this. Fuzzy time specs are allowed')
@args('--all', dest='purge_all', action='store_true',
help='Purge all rows in the shadow tables')
@args('--verbose', dest='verbose', action='store_true', default=False,
help='Print information about purged records')
def purge(self, before=None, purge_all=False, verbose=False):
if before is None and purge_all is False:
print(_('Either --before or --all is required'))
return 1
if before:
try:
before_date = dateutil_parser.parse(before, fuzzy=True)
except ValueError as e:
print(_('Invalid value for --before: %s') % e)
return 2
else:
before_date = None
def status(msg):
if verbose:
print(msg)
deleted = sa_db.purge_shadow_tables(before_date, status_fn=status)
if deleted:
return 0
else:
return 3
@args('--delete', action='store_true', dest='delete',
help='If specified, automatically delete any records found where '
'instance_uuid is NULL.')

View File

@ -5920,6 +5920,65 @@ def archive_deleted_rows(max_rows=None):
return table_to_rows_archived, deleted_instance_uuids
def _purgeable_tables(metadata):
return [t for t in metadata.sorted_tables
if (t.name.startswith(_SHADOW_TABLE_PREFIX) and not
t.name.endswith('migrate_version'))]
def purge_shadow_tables(before_date, status_fn=None):
engine = get_engine()
conn = engine.connect()
metadata = MetaData()
metadata.bind = engine
metadata.reflect()
total_deleted = 0
if status_fn is None:
status_fn = lambda m: None
# Some things never get formally deleted, and thus deleted_at
# is never set. So, prefer specific timestamp columns here
# for those special cases.
overrides = {
'shadow_instance_actions': 'created_at',
'shadow_instance_actions_events': 'created_at',
}
for table in _purgeable_tables(metadata):
if before_date is None:
col = None
elif table.name in overrides:
col = getattr(table.c, overrides[table.name])
elif hasattr(table.c, 'deleted_at'):
col = table.c.deleted_at
elif hasattr(table.c, 'updated_at'):
col = table.c.updated_at
elif hasattr(table.c, 'created_at'):
col = table.c.created_at
else:
status_fn(_('Unable to purge table %(table)s because it '
'has no timestamp column') % {
'table': table.name})
continue
if col is not None:
delete = table.delete().where(col < before_date)
else:
delete = table.delete()
deleted = conn.execute(delete)
if deleted.rowcount > 0:
status_fn(_('Deleted %(rows)i rows from %(table)s based on '
'timestamp column %(col)s') % {
'rows': deleted.rowcount,
'table': table.name,
'col': col is None and '(n/a)' or col.name})
total_deleted += deleted.rowcount
return total_deleted
@pick_context_manager_writer
def service_uuids_online_data_migration(context, max_count):
from nova.objects import service

View File

@ -12,7 +12,15 @@
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import re
from dateutil import parser as dateutil_parser
from oslo_utils import timeutils
from sqlalchemy.dialects import sqlite
from sqlalchemy import func
from sqlalchemy import MetaData
from sqlalchemy import select
from nova import context
from nova import db
@ -144,3 +152,81 @@ class TestDatabaseArchive(test_servers.ServersTestBase):
# by the archive
self.assertIn('instance_actions', results)
self.assertIn('instance_actions_events', results)
def _get_table_counts(self):
engine = sqlalchemy_api.get_engine()
conn = engine.connect()
meta = MetaData(engine)
meta.reflect()
shadow_tables = sqlalchemy_api._purgeable_tables(meta)
results = {}
for table in shadow_tables:
r = conn.execute(
select([func.count()]).select_from(table)).fetchone()
results[table.name] = r[0]
return results
def test_archive_then_purge_all(self):
server = self._create_server()
server_id = server['id']
self._delete_server(server_id)
results, deleted_ids = db.archive_deleted_rows(max_rows=1000)
self.assertEqual([server_id], deleted_ids)
lines = []
def status(msg):
lines.append(msg)
deleted = sqlalchemy_api.purge_shadow_tables(None, status_fn=status)
self.assertNotEqual(0, deleted)
self.assertNotEqual(0, len(lines))
for line in lines:
self.assertIsNotNone(re.match(r'Deleted [1-9][0-9]* rows from .*',
line))
results = self._get_table_counts()
# No table should have any rows
self.assertFalse(any(results.values()))
def test_archive_then_purge_by_date(self):
server = self._create_server()
server_id = server['id']
self._delete_server(server_id)
results, deleted_ids = db.archive_deleted_rows(max_rows=1000)
self.assertEqual([server_id], deleted_ids)
pre_purge_results = self._get_table_counts()
past = timeutils.utcnow() - datetime.timedelta(hours=1)
deleted = sqlalchemy_api.purge_shadow_tables(past)
# Make sure we didn't delete anything if the marker is before
# we started
self.assertEqual(0, deleted)
results = self._get_table_counts()
# Nothing should be changed if we didn't purge anything
self.assertEqual(pre_purge_results, results)
future = timeutils.utcnow() + datetime.timedelta(hours=1)
deleted = sqlalchemy_api.purge_shadow_tables(future)
# Make sure we deleted things when the marker is after
# we started
self.assertNotEqual(0, deleted)
results = self._get_table_counts()
# There should be no rows in any table if we purged everything
self.assertFalse(any(results.values()))
def test_purge_with_real_date(self):
"""Make sure the result of dateutil's parser works with the
query we're making to sqlalchemy.
"""
server = self._create_server()
server_id = server['id']
self._delete_server(server_id)
results, deleted_ids = db.archive_deleted_rows(max_rows=1000)
self.assertEqual([server_id], deleted_ids)
date = dateutil_parser.parse('oct 21 2015', fuzzy=True)
deleted = sqlalchemy_api.purge_shadow_tables(date)
self.assertEqual(0, deleted)

View File

@ -13,6 +13,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import sys
import ddt
@ -534,6 +535,39 @@ Archiving.....stopped
else:
self.assertEqual(0, len(output))
@mock.patch('nova.db.sqlalchemy.api.purge_shadow_tables')
def test_purge_all(self, mock_purge):
mock_purge.return_value = 1
ret = self.commands.purge(purge_all=True)
self.assertEqual(0, ret)
mock_purge.assert_called_once_with(None, status_fn=mock.ANY)
@mock.patch('nova.db.sqlalchemy.api.purge_shadow_tables')
def test_purge_date(self, mock_purge):
mock_purge.return_value = 1
ret = self.commands.purge(before='oct 21 2015')
self.assertEqual(0, ret)
mock_purge.assert_called_once_with(datetime.datetime(2015, 10, 21),
status_fn=mock.ANY)
@mock.patch('nova.db.sqlalchemy.api.purge_shadow_tables')
def test_purge_date_fail(self, mock_purge):
ret = self.commands.purge(before='notadate')
self.assertEqual(2, ret)
self.assertFalse(mock_purge.called)
@mock.patch('nova.db.sqlalchemy.api.purge_shadow_tables')
def test_purge_no_args(self, mock_purge):
ret = self.commands.purge()
self.assertEqual(1, ret)
self.assertFalse(mock_purge.called)
@mock.patch('nova.db.sqlalchemy.api.purge_shadow_tables')
def test_purge_nothing_deleted(self, mock_purge):
mock_purge.return_value = 0
ret = self.commands.purge(purge_all=True)
self.assertEqual(3, ret)
@mock.patch.object(migration, 'db_null_instance_uuid_scan',
return_value={'foo': 0})
def test_null_instance_uuid_scan_no_records_found(self, mock_scan):

View File

@ -0,0 +1,5 @@
---
features:
- |
The nova-manage command now has a 'db purge' command that will delete data
from the shadow tables after 'db archive_deleted_rows' has been run.

View File

@ -64,3 +64,4 @@ cursive>=0.2.1 # Apache-2.0
pypowervm>=1.1.11 # Apache-2.0
os-service-types>=1.2.0 # Apache-2.0
taskflow>=2.16.0 # Apache-2.0
python-dateutil>=2.5.3 # BSD