database: Archive parent and child rows "trees" one at a time

Previously, we archived deleted rows in batches of max_rows parents +
their child rows in a single database transaction. Doing it that way
limited how high a value of max_rows could be specified by the caller
because of the size of the database transaction it could generate.

For example, in a large scale deployment with hundreds of thousands of
deleted rows and constant server creation and deletion activity, a
value of max_rows=1000 might exceed the database's configured maximum
packet size or timeout due to a database deadlock, forcing the operator
to use a much lower max_rows value like 100 or 50.

And when the operator has e.g. 500,000 deleted instances rows (and
millions of deleted rows total) they are trying to archive, being
forced to use a max_rows value several orders of magnitude lower than
the number of rows they need to archive was a poor user experience.

This changes the logic to archive one parent row and its foreign key
related child rows one at a time in a single database transaction
while limiting the total number of rows per table as soon as it reaches
>= max_rows. Doing this will allow operators to choose more predictable
values for max_rows and get more progress per invocation of
archive_deleted_rows.

Closes-Bug: #2024258

Change-Id: I2209bf1b3320901cf603ec39163cf923b25b0359
This commit is contained in:
melanie witt 2023-02-22 23:58:33 +00:00
parent f6620d48c8
commit 697fa3c000
5 changed files with 202 additions and 47 deletions

View File

@ -229,10 +229,10 @@ class DbCommands(object):
print(migration.db_version())
@args('--max_rows', type=int, metavar='<number>', dest='max_rows',
help='Maximum number of deleted rows to archive. Defaults to 1000. '
'Note that this number does not include the corresponding '
'rows, if any, that are removed from the API database for '
'deleted instances.')
help='Maximum number of deleted rows to archive per table. Defaults '
'to 1000. Note that this number is a soft limit and does not '
'include the corresponding rows, if any, that are removed '
'from the API database for deleted instances.')
@args('--before', metavar='<date>',
help=('Archive rows that have been deleted before this date. '
'Accepts date strings in the default format output by the '
@ -404,7 +404,10 @@ class DbCommands(object):
'cell1.instances': 5}
:param cctxt: Cell-targeted nova.context.RequestContext if archiving
across all cells
:param max_rows: Maximum number of deleted rows to archive
:param max_rows: Maximum number of deleted rows to archive per table.
Note that this number is a soft limit and does not include the
corresponding rows, if any, that are removed from the API database
for deleted instances.
:param until_complete: Whether to run continuously until all deleted
rows are archived
:param verbose: Whether to print how many rows were archived per table
@ -417,15 +420,26 @@ class DbCommands(object):
"""
ctxt = context.get_admin_context()
while True:
run, deleted_instance_uuids, total_rows_archived = \
# table_to_rows = {table_name: number_of_rows_archived}
# deleted_instance_uuids = ['uuid1', 'uuid2', ...]
table_to_rows, deleted_instance_uuids, total_rows_archived = \
db.archive_deleted_rows(
cctxt, max_rows, before=before_date, task_log=task_log)
for table_name, rows_archived in run.items():
for table_name, rows_archived in table_to_rows.items():
if cell_name:
table_name = cell_name + '.' + table_name
table_to_rows_archived.setdefault(table_name, 0)
table_to_rows_archived[table_name] += rows_archived
if deleted_instance_uuids:
# deleted_instance_uuids does not necessarily mean that any
# instances rows were archived because it is obtained by a query
# separate from the archive queries. For example, if a
# DBReferenceError was raised while processing the instances table,
# we would have skipped the table and had 0 rows archived even
# though deleted instances rows were found.
instances_archived = table_to_rows.get('instances', 0)
if deleted_instance_uuids and instances_archived:
table_to_rows_archived.setdefault(
'API_DB.instance_mappings', 0)
table_to_rows_archived.setdefault(
@ -448,8 +462,9 @@ class DbCommands(object):
# If we're not archiving until there is nothing more to archive, we
# have reached max_rows in this cell DB or there was nothing to
# archive.
if not until_complete or not run:
# archive. We check the values() in case we get something like
# table_to_rows = {'instances': 0} back somehow.
if not until_complete or not any(table_to_rows.values()):
break
if verbose:
sys.stdout.write('.')

View File

@ -4407,6 +4407,9 @@ def _archive_deleted_rows_for_table(
select = select.order_by(column).limit(max_rows)
with conn.begin():
rows = conn.execute(select).fetchall()
# This is a list of IDs of rows that should be archived from this table,
# limited to a length of max_rows.
records = [r[0] for r in rows]
# We will archive deleted rows for this table and also generate insert and
@ -4419,48 +4422,100 @@ def _archive_deleted_rows_for_table(
# Keep track of any extra tablenames to number of rows that we archive by
# following FK relationships.
# {tablename: extra_rows_archived}
#
# extras = {tablename: number_of_extra_rows_archived}
extras = collections.defaultdict(int)
if records:
insert = shadow_table.insert().from_select(
columns, sql.select(table).where(column.in_(records))
).inline()
delete = table.delete().where(column.in_(records))
if not records:
# Nothing to archive, so return.
return rows_archived, deleted_instance_uuids, extras
# Keep track of how many rows we accumulate for the insert+delete database
# transaction and cap it as soon as it is >= max_rows. Because we will
# archive all child rows of a parent row along with the parent at the same
# time, we end up with extra rows to archive in addition to len(records).
num_rows_in_batch = 0
# The sequence of query statements we will execute in a batch. These are
# ordered: [child1, child1, parent1, child2, child2, child2, parent2, ...]
# Parent + child "trees" are kept together to avoid FK constraint
# violations.
statements_in_batch = []
# The list of records in the batch. This is used for collecting deleted
# instance UUIDs in the case of the 'instances' table.
records_in_batch = []
# (melwitt): We will gather rows related by foreign key relationship for
# each deleted row, one at a time. We do it this way to keep track of and
# limit the total number of rows that will be archived in a single database
# transaction. In a large scale database with potentially hundreds of
# thousands of deleted rows, if we don't limit the size of the transaction
# based on max_rows, we can get into a situation where we get stuck not
# able to make much progress. The value of max_rows has to be 1) small
# enough to not exceed the database's max packet size limit or timeout with
# a deadlock but 2) large enough to make progress in an environment with a
# constant high volume of create and delete traffic. By archiving each
# parent + child rows tree one at a time, we can ensure meaningful progress
# can be made while allowing the caller to predictably control the size of
# the database transaction with max_rows.
for record in records:
# Walk FK relationships and add insert/delete statements for rows that
# refer to this table via FK constraints. fk_inserts and fk_deletes
# will be prepended to by _get_fk_stmts if referring rows are found by
# FK constraints.
fk_inserts, fk_deletes = _get_fk_stmts(
metadata, conn, table, column, records)
metadata, conn, table, column, [record])
statements_in_batch.extend(fk_inserts + fk_deletes)
# statement to add parent row to shadow table
insert = shadow_table.insert().from_select(
columns, sql.select(table).where(column.in_([record]))).inline()
statements_in_batch.append(insert)
# statement to remove parent row from main table
delete = table.delete().where(column.in_([record]))
statements_in_batch.append(delete)
records_in_batch.append(record)
# Check whether were have a full batch >= max_rows. Rows are counted as
# the number of rows that will be moved in the database transaction.
# So each insert+delete pair represents one row that will be moved.
# 1 parent + its fks
num_rows_in_batch += 1 + len(fk_inserts)
if max_rows is not None and num_rows_in_batch >= max_rows:
break
# NOTE(tssurya): In order to facilitate the deletion of records from
# instance_mappings, request_specs and instance_group_member tables in
# the nova_api DB, the rows of deleted instances from the instances
# table are stored prior to their deletion. Basically the uuids of the
# archived instances are queried and returned.
# instance_mappings, request_specs and instance_group_member tables in the
# nova_api DB, the rows of deleted instances from the instances table are
# stored prior to their deletion. Basically the uuids of the archived
# instances are queried and returned.
if tablename == "instances":
query_select = sql.select(table.c.uuid).where(
table.c.id.in_(records)
)
table.c.id.in_(records_in_batch))
with conn.begin():
rows = conn.execute(query_select).fetchall()
# deleted_instance_uuids = ['uuid1', 'uuid2', ...]
deleted_instance_uuids = [r[0] for r in rows]
try:
# Group the insert and delete in a transaction.
with conn.begin():
for fk_insert in fk_inserts:
conn.execute(fk_insert)
for fk_delete in fk_deletes:
result_fk_delete = conn.execute(fk_delete)
extras[fk_delete.table.name] += result_fk_delete.rowcount
conn.execute(insert)
result_delete = conn.execute(delete)
rows_archived += result_delete.rowcount
for statement in statements_in_batch:
result = conn.execute(statement)
result_tablename = statement.table.name
# Add to archived row counts if not a shadow table.
if not result_tablename.startswith(_SHADOW_TABLE_PREFIX):
if result_tablename == tablename:
# Number of tablename (parent) rows archived.
rows_archived += result.rowcount
else:
# Number(s) of child rows archived.
extras[result_tablename] += result.rowcount
except db_exc.DBReferenceError as ex:
# A foreign key constraint keeps us from deleting some of
# these rows until we clean up a dependent table. Just
# skip this table for now; we'll come back to it later.
# A foreign key constraint keeps us from deleting some of these rows
# until we clean up a dependent table. Just skip this table for now;
# we'll come back to it later.
LOG.warning("IntegrityError detected when archiving table "
"%(tablename)s: %(error)s",
{'tablename': tablename, 'error': str(ex)})

View File

@ -174,6 +174,50 @@ class TestDatabaseArchive(integrated_helpers._IntegratedTestBase):
break
self.assertFalse(exceptions)
def test_archive_deleted_rows_parent_child_trees_one_at_time(self):
"""Test that we are archiving parent + child rows "trees" one at a time
Previously, we archived deleted rows in batches of max_rows parents +
their child rows in a single database transaction. Doing it that way
limited how high a value of max_rows could be specified by the caller
because of the size of the database transaction it could generate.
For example, in a large scale deployment with hundreds of thousands of
deleted rows and constant server creation and deletion activity, a
value of max_rows=1000 might exceed the database's configured maximum
packet size or timeout due to a database deadlock, forcing the operator
to use a much lower max_rows value like 100 or 50.
And when the operator has e.g. 500,000 deleted instances rows (and
millions of deleted rows total) they are trying to archive, being
forced to use a max_rows value serveral orders of magnitude lower than
the number of rows they need to archive was a poor user experience.
This tests that we are archiving each parent plus their child rows one
at a time while limiting the total number of rows per table in a batch
as soon as the total number of archived rows is >= max_rows.
"""
# Boot two servers and delete them, then try to archive rows.
for i in range(2):
server = self._create_server()
self._delete_server(server)
# Use max_rows=5 to limit the archive to one complete parent +
# child rows tree.
table_to_rows, _, _ = db.archive_deleted_rows(max_rows=5)
# We should have archived only one instance because the parent +
# child tree will have >= max_rows of 5.
self.assertEqual(1, table_to_rows['instances'])
# Archive once more to archive the other instance (and its child rows).
table_to_rows, _, _ = db.archive_deleted_rows(max_rows=5)
self.assertEqual(1, table_to_rows['instances'])
# There should be nothing else to archive.
_, _, total_rows_archived = db.archive_deleted_rows(max_rows=100)
self.assertEqual(0, total_rows_archived)
def _get_table_counts(self):
engine = db.get_engine()
conn = engine.connect()

View File

@ -501,6 +501,26 @@ Archiving....stopped
self.assertIn('Nothing was archived.', output)
self.assertEqual(0, result)
@mock.patch.object(db, 'archive_deleted_rows')
@mock.patch.object(objects.CellMappingList, 'get_all')
def test_archive_deleted_rows_deleted_instances_no_rows(self, mock_get_all,
mock_db_archive):
# Simulate a scenario where we didn't archive any rows (example:
# DBReferenceError was raised while processing the instances table) but
# the separate query for deleted_instance_uuids found rows.
mock_db_archive.return_value = (
{}, [uuidsentinel.instance1, uuidsentinel.instance2], 0)
result = self.commands.archive_deleted_rows(20, verbose=True)
mock_db_archive.assert_called_once_with(
test.MatchType(context.RequestContext), 20, before=None,
task_log=False)
output = self.output.getvalue()
# If nothing was archived, there should be no purge messages
self.assertIn('Nothing was archived.', output)
self.assertEqual(0, result)
@mock.patch.object(db, 'archive_deleted_rows')
@mock.patch.object(objects.RequestSpec, 'destroy_bulk')
@mock.patch.object(objects.InstanceGroup, 'destroy_members_bulk')

View File

@ -0,0 +1,21 @@
---
fixes:
- |
`Bug #2024258`_: Fixes an issue with performance degradation archiving
databases with large numbers of foreign key related records.
Previously, deleted rows were archived in batches of max_rows parents +
their child rows in a single database transaction. It limited how high
a value of max_rows could be specified by the user because of the size of
the database transaction it could generate. Symptoms of the behavior were
exceeding the maximum configured packet size of the database or timing out
due to a deadlock.
The behavior has been changed to archive batches of complete parent +
child rows trees while limiting each batch when it has reached >= max_rows
records. This allows the size of the database transaction to be controlled
by the user and enables more rows to be archived per invocation of
``nova-manage db archive_deleted_rows`` when there are a large number of
foreign key related records.
.. _Bug #2024258: https://bugs.launchpad.net/nova/+bug/2024258