From 79df36fecf8c8be5ae9d59397882ac844852043e Mon Sep 17 00:00:00 2001 From: melanie witt Date: Mon, 12 Oct 2020 22:27:52 +0000 Subject: [PATCH] Prevent archiving of pci_devices records because of 'instance_uuid' Currently in the archive_deleted_rows code, we will attempt to clean up "residue" of deleted instance records by assuming any table with a 'instance_uuid' column represents data tied to an instance's lifecycle and delete such records. This behavior poses a problem in the case where an instance has a PCI device allocated and someone deletes the instance. The 'instance_uuid' column in the pci_devices table is used to track the allocation association of a PCI with an instance. There is a small time window during which the instance record has been deleted but the PCI device has not yet been freed from a database record perspective as PCI devices are freed during the _complete_deletion method in the compute manager as part of the resource tracker update call. Records in the pci_devices table are anyway not related to the lifecycle of instances so they should not be considered residue to clean up if an instance is deleted. This adds a condition to avoid archiving pci_devices on the basis of an instance association. Closes-Bug: #1899541 Change-Id: Ie62d3566230aa3e2786d129adbb2e3570b06e4c6 (cherry picked from commit 1c256cf774693e2395ae8fe4a7a2f416a7aeb03a) (cherry picked from commit 09784db62fcd01124a101c4c69cab6e71e1ac781) --- nova/db/sqlalchemy/api.py | 6 +++++- nova/tests/functional/db/test_archive.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index a7f500dc9033..3a43ff40790e 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -4192,7 +4192,11 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before): # NOTE(jake): instance_actions_events doesn't have a instance_uuid column # but still needs to be archived as it is a FK constraint if ((max_rows is None or rows_archived < max_rows) and - ('instance_uuid' in columns or + # NOTE(melwitt): The pci_devices table uses the 'instance_uuid' + # column to track the allocated association of a PCI device and its + # records are not tied to the lifecycles of instance records. + (tablename != 'pci_devices' and + 'instance_uuid' in columns or tablename == 'instance_actions_events')): instances = models.BASE.metadata.tables['instances'] limit = max_rows - rows_archived if max_rows is not None else None diff --git a/nova/tests/functional/db/test_archive.py b/nova/tests/functional/db/test_archive.py index e2c53deac498..8a6211cdcff9 100644 --- a/nova/tests/functional/db/test_archive.py +++ b/nova/tests/functional/db/test_archive.py @@ -114,6 +114,19 @@ class TestDatabaseArchive(test_servers.ServersTestBase): # Verify we have some system_metadata since we'll check that later. self.assertTrue(len(instance.system_metadata), 'No system_metadata for instance: %s' % server_id) + # Create a pci_devices record to simulate an instance that had a PCI + # device allocated at the time it was deleted. There is a window of + # time between deletion of the instance record and freeing of the PCI + # device in nova-compute's _complete_deletion method during RT update. + db.pci_device_update(admin_context, 1, 'fake-address', + {'compute_node_id': 1, + 'address': 'fake-address', + 'vendor_id': 'fake', + 'product_id': 'fake', + 'dev_type': 'fake', + 'label': 'fake', + 'status': 'allocated', + 'instance_uuid': instance.uuid}) # Now try and archive the soft deleted records. results, deleted_instance_uuids, archived = \ db.archive_deleted_rows(max_rows=100) @@ -128,6 +141,8 @@ class TestDatabaseArchive(test_servers.ServersTestBase): self.assertIn('instance_actions', results) self.assertIn('instance_actions_events', results) self.assertEqual(sum(results.values()), archived) + # Verify that the pci_devices record has not been dropped + self.assertNotIn('pci_devices', results) def _get_table_counts(self): engine = sqlalchemy_api.get_engine()