Handle cells race condition deleting unscheduled instance

In cells, if a delete request comes in before an instance has been
scheduled to a cell, the API cell will broadcast a message to
delete the instance "everywhere" (in all cells) followed by a local
delete at the top. This causes a race between cells who have a
record for the instance syncing to the top via instance_destroy_at_top
and the local delete at the top (double delete is possible). If a
second delete is attempted, InstanceNotFound will be raised and
propagated up to the API, resulting in a 404 response to the caller
who requested to delete the instance.

This change catches and ignores InstanceNotFound during local delete
at the API cell to prevent propagating an error when the instance
has already been deleted.

Change-Id: Ic67f0f6edb0d38722787a6b88b39eae218110dc9
This commit is contained in:
melanie witt 2015-06-03 17:44:50 +00:00
parent e8b4eaa2a6
commit c38c1f8277
2 changed files with 66 additions and 4 deletions

View File

@ -232,11 +232,22 @@ class ComputeCellsAPI(compute_api.API):
# that an update came up from a child cell and cell_name is
# set now. If so try the delete again.
with excutils.save_and_reraise_exception() as exc:
instance.refresh()
if instance.cell_name:
try:
instance.refresh()
except exception.InstanceNotFound:
# NOTE(melwitt): If the instance has already been
# deleted by instance_destroy_at_top from a cell,
# instance.refresh() will raise InstanceNotFound.
exc.reraise = False
self._handle_cell_delete(context, instance,
method_name)
else:
if instance.cell_name:
exc.reraise = False
self._handle_cell_delete(context, instance,
method_name)
except exception.InstanceNotFound:
# NOTE(melwitt): We can get here if anything tries to
# lookup the instance after a instance_destroy_at_top hits.
pass
return
method = getattr(super(ComputeCellsAPI, self), method_name)

View File

@ -189,6 +189,57 @@ class CellsComputeAPITestCase(test_compute.ComputeAPITestCase):
_test()
def test_delete_instance_no_cell_destroy_fails_already_deleted(self):
# If the instance.destroy() is reached during _local_delete,
# it will raise ObjectActionError if the instance has already
# been deleted by a instance_destroy_at_top, and instance.refresh()
# will raise InstanceNotFound
instance = objects.Instance(uuid='fake-uuid', cell_name=None)
actionerror = exception.ObjectActionError(action='destroy', reason='')
notfound = exception.InstanceNotFound(instance_id=instance.uuid)
@mock.patch.object(compute_api.API, 'delete')
@mock.patch.object(self.compute_api.cells_rpcapi,
'instance_delete_everywhere')
@mock.patch.object(compute_api.API, '_local_delete',
side_effect=actionerror)
@mock.patch.object(instance, 'refresh', side_effect=notfound)
def _test(mock_refresh, mock_local_delete, mock_delete_everywhere,
mock_compute_delete):
self.compute_api.delete(self.context, instance)
mock_delete_everywhere.assert_called_once_with(self.context,
instance, 'hard')
mock_local_delete.assert_called_once_with(self.context,
instance, mock.ANY, 'delete', self.compute_api._do_delete)
mock_refresh.assert_called_once_with()
self.assertFalse(mock_compute_delete.called)
_test()
def test_delete_instance_no_cell_instance_not_found_already_deleted(self):
# If anything in _local_delete accesses the instance causing a db
# lookup before instance.destroy() is reached, if the instance has
# already been deleted by a instance_destroy_at_top,
# InstanceNotFound will be raised
instance = objects.Instance(uuid='fake-uuid', cell_name=None)
notfound = exception.InstanceNotFound(instance_id=instance.uuid)
@mock.patch.object(compute_api.API, 'delete')
@mock.patch.object(self.compute_api.cells_rpcapi,
'instance_delete_everywhere')
@mock.patch.object(compute_api.API, '_local_delete',
side_effect=notfound)
def _test(mock_local_delete, mock_delete_everywhere,
mock_compute_delete):
self.compute_api.delete(self.context, instance)
mock_delete_everywhere.assert_called_once_with(self.context,
instance, 'hard')
mock_local_delete.assert_called_once_with(self.context,
instance, mock.ANY, 'delete', self.compute_api._do_delete)
self.assertFalse(mock_compute_delete.called)
_test()
def test_soft_delete_instance_no_cell(self):
cells_rpcapi = self.compute_api.cells_rpcapi
self.mox.StubOutWithMock(cells_rpcapi,