From f6fefb6298cdaeaae18756055f7264af6cb1f8e0 Mon Sep 17 00:00:00 2001 From: melanie witt Date: Fri, 12 Feb 2021 01:50:19 +0000 Subject: [PATCH] Handle instance = None in _local_delete_cleanup Change I4d3193d8401614311010ed0e055fcb3aaeeebaed added some additional local delete cleanup to prevent leaking of placement allocations. The change introduced a regression in our "delete while booting" handling as the _local_delete_cleanup required a valid instance object to do its work and in two cases, we could have instance = None from _lookup_instance if we are racing with a create request and the conductor has deleted the instance record while we are in the middle of processing the delete request. This handles those scenarios by doing two things: (1) Changing the _local_delete_cleanup and _update_queued_for_deletion methods to take an instance UUID instead of a full instance object as they really only need the UUID to do their work (2) Saving a copy of the instance UUID before doing another instance lookup which might return None and passing that UUID to the _local_delete_cleanup and _update_queued_for_deletion methods Closes-Bug: #1914777 Change-Id: I03cf285ad83e09d88cdb702a88dfed53c01610f8 (cherry picked from commit 123f6262f63477d3f50dfad09688978e044bd9e0) (cherry picked from commit 35112d7667cee9fc33db660ee241164b38468c31) --- nova/compute/api.py | 35 +++++++++++-------- .../regressions/test_bug_1914777.py | 23 +++--------- nova/tests/unit/compute/test_compute_api.py | 7 ++-- 3 files changed, 29 insertions(+), 36 deletions(-) diff --git a/nova/compute/api.py b/nova/compute/api.py index e2d85ff0eacb..b188fd2427cf 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -2116,22 +2116,22 @@ class API(base.Base): return True return False - def _local_delete_cleanup(self, context, instance): + def _local_delete_cleanup(self, context, instance_uuid): # NOTE(aarents) Ensure instance allocation is cleared and instance # mapping queued as deleted before _delete() return try: self.placementclient.delete_allocation_for_instance( - context, instance.uuid) + context, instance_uuid) except exception.AllocationDeleteFailed: LOG.info("Allocation delete failed during local delete cleanup.", - instance=instance) + instance_uuid=instance_uuid) try: - self._update_queued_for_deletion(context, instance, True) + self._update_queued_for_deletion(context, instance_uuid, True) except exception.InstanceMappingNotFound: LOG.info("Instance Mapping does not exist while attempting " "local delete cleanup.", - instance=instance) + instance_uuid=instance_uuid) def _attempt_delete_of_buildrequest(self, context, instance): # If there is a BuildRequest then the instance may not have been @@ -2168,7 +2168,7 @@ class API(base.Base): if not instance.host and not may_have_ports_or_volumes: try: if self._delete_while_booting(context, instance): - self._local_delete_cleanup(context, instance) + self._local_delete_cleanup(context, instance.uuid) return # If instance.host was not set it's possible that the Instance # object here was pulled from a BuildRequest object and is not @@ -2177,6 +2177,11 @@ class API(base.Base): # properly. A lookup is attempted which will either return a # full Instance or None if not found. If not found then it's # acceptable to skip the rest of the delete processing. + + # Save a copy of the instance UUID early, in case + # _lookup_instance returns instance = None, to pass to + # _local_delete_cleanup if needed. + instance_uuid = instance.uuid cell, instance = self._lookup_instance(context, instance.uuid) if cell and instance: try: @@ -2187,11 +2192,11 @@ class API(base.Base): except exception.InstanceNotFound: pass # The instance was deleted or is already gone. - self._local_delete_cleanup(context, instance) + self._local_delete_cleanup(context, instance.uuid) return if not instance: # Instance is already deleted. - self._local_delete_cleanup(context, instance) + self._local_delete_cleanup(context, instance_uuid) return except exception.ObjectActionError: # NOTE(melwitt): This means the instance.host changed @@ -2204,7 +2209,7 @@ class API(base.Base): cell, instance = self._lookup_instance(context, instance.uuid) if not instance: # Instance is already deleted - self._local_delete_cleanup(context, instance) + self._local_delete_cleanup(context, instance_uuid) return bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( @@ -2248,7 +2253,7 @@ class API(base.Base): 'field, its vm_state is %(state)s.', {'state': instance.vm_state}, instance=instance) - self._local_delete_cleanup(context, instance) + self._local_delete_cleanup(context, instance.uuid) return except exception.ObjectActionError as ex: # The instance's host likely changed under us as @@ -2433,7 +2438,7 @@ class API(base.Base): instance.destroy() @staticmethod - def _update_queued_for_deletion(context, instance, qfd): + def _update_queued_for_deletion(context, instance_uuid, qfd): # NOTE(tssurya): We query the instance_mapping record of this instance # and update the queued_for_delete flag to True (or False according to # the state of the instance). This just means that the instance is @@ -2442,7 +2447,7 @@ class API(base.Base): # value could be stale which is fine, considering its use is only # during down cell (desperate) situation. im = objects.InstanceMapping.get_by_instance_uuid(context, - instance.uuid) + instance_uuid) im.queued_for_delete = qfd im.save() @@ -2454,7 +2459,7 @@ class API(base.Base): instance.save() else: self.compute_rpcapi.terminate_instance(context, instance, bdms) - self._update_queued_for_deletion(context, instance, True) + self._update_queued_for_deletion(context, instance.uuid, True) def _do_soft_delete(self, context, instance, bdms, local=False): if local: @@ -2464,7 +2469,7 @@ class API(base.Base): instance.save() else: self.compute_rpcapi.soft_delete_instance(context, instance) - self._update_queued_for_deletion(context, instance, True) + self._update_queued_for_deletion(context, instance.uuid, True) # NOTE(maoy): we allow delete to be called no matter what vm_state says. @check_instance_lock @@ -2517,7 +2522,7 @@ class API(base.Base): instance.task_state = None instance.deleted_at = None instance.save(expected_task_state=[None]) - self._update_queued_for_deletion(context, instance, False) + self._update_queued_for_deletion(context, instance.uuid, False) @check_instance_lock @check_instance_state(task_state=None, diff --git a/nova/tests/functional/regressions/test_bug_1914777.py b/nova/tests/functional/regressions/test_bug_1914777.py index 6f5cd92de69c..88c40ab802cc 100644 --- a/nova/tests/functional/regressions/test_bug_1914777.py +++ b/nova/tests/functional/regressions/test_bug_1914777.py @@ -17,7 +17,6 @@ from nova import exception from nova import objects from nova import test from nova.tests import fixtures as nova_fixtures -from nova.tests.functional.api import client from nova.tests.functional import integrated_helpers import nova.tests.unit.image.fake from nova.tests.unit import policy_fixture @@ -80,14 +79,8 @@ class TestDeleteWhileBooting(test.TestCase, # while booting" in compute/api fails after a different request has # deleted it. br_not_found = exception.BuildRequestNotFound(uuid=self.server['id']) - mock_get_br.side_effect = [self.br, br_not_found] - # FIXME(melwitt): Delete request fails due to the AttributeError. - ex = self.assertRaises( - client.OpenStackApiException, self._delete_server, self.server) - self.assertEqual(500, ex.response.status_code) - self.assertIn('AttributeError', str(ex)) - # FIXME(melwitt): Uncomment when the bug is fixed. - # self._delete_server(self.server) + mock_get_br.side_effect = [self.br, br_not_found, br_not_found] + self._delete_server(self.server) @mock.patch('nova.objects.build_request.BuildRequest.get_by_instance_uuid') @mock.patch('nova.objects.InstanceMapping.get_by_instance_uuid') @@ -120,7 +113,7 @@ class TestDeleteWhileBooting(test.TestCase, context=self.ctxt, instance_uuid=self.server['id'], cell_mapping=self.cell_mappings['cell1']) mock_get_im.side_effect = [ - no_cell_im, has_cell_im, has_cell_im, has_cell_im] + no_cell_im, has_cell_im, has_cell_im, has_cell_im, has_cell_im] # Simulate that the instance object has been created by the conductor # in the create path while the delete request is being processed. # First lookups are before the instance has been deleted and the last @@ -128,7 +121,7 @@ class TestDeleteWhileBooting(test.TestCase, # request to make an instance object for testing. i = self.br.get_new_instance(self.ctxt) i_not_found = exception.InstanceNotFound(instance_id=self.server['id']) - mock_get_i.side_effect = [i, i, i, i_not_found] + mock_get_i.side_effect = [i, i, i, i_not_found, i_not_found] # Simulate that the conductor is running instance_destroy at the same # time as we are. @@ -143,10 +136,4 @@ class TestDeleteWhileBooting(test.TestCase, self.stub_out( 'nova.objects.instance.Instance.destroy', fake_instance_destroy) - # FIXME(melwitt): Delete request fails due to the AttributeError. - ex = self.assertRaises( - client.OpenStackApiException, self._delete_server, self.server) - self.assertEqual(500, ex.response.status_code) - self.assertIn('AttributeError', str(ex)) - # FIXME(melwitt): Uncomment when the bug is fixed. - # self._delete_server(self.server) + self._delete_server(self.server) diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index 21a30033445a..364a6078e6dc 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -4237,7 +4237,7 @@ class _ComputeAPIUnitTestMixIn(object): 'cores': 1 + instance.flavor.vcpus, 'ram': 512 + instance.flavor.memory_mb}, project_id=instance.project_id) - update_qfd.assert_called_once_with(admin_context, instance, False) + update_qfd.assert_called_once_with(admin_context, instance.uuid, False) @mock.patch('nova.objects.Quotas.get_all_by_project_and_user', new=mock.MagicMock()) @@ -4278,7 +4278,7 @@ class _ComputeAPIUnitTestMixIn(object): 'cores': 1 + instance.flavor.vcpus, 'ram': 512 + instance.flavor.memory_mb}, project_id=instance.project_id) - update_qfd.assert_called_once_with(self.context, instance, False) + update_qfd.assert_called_once_with(self.context, instance.uuid, False) @mock.patch.object(objects.InstanceAction, 'action_start') def test_external_instance_event(self, mock_action_start): @@ -6016,7 +6016,8 @@ class _ComputeAPIUnitTestMixIn(object): inst = objects.Instance(uuid=uuid) im = objects.InstanceMapping(instance_uuid=uuid) mock_get.return_value = im - self.compute_api._update_queued_for_deletion(self.context, inst, True) + self.compute_api._update_queued_for_deletion( + self.context, inst.uuid, True) self.assertTrue(im.queued_for_delete) mock_get.assert_called_once_with(self.context, inst.uuid) mock_save.assert_called_once_with()