Clean up ports and volumes when deleting ERROR instance
Usually, when instance.host = None, it means the instance was never
scheduled. However, the exception handling routine in compute manager
[1] will set instance.host = None and set instance.vm_state = ERROR
if the instance fails to build on the compute host. If that happens, we
end up with an instance with host = None and vm_state = ERROR which may
have ports and volumes still allocated.
This adds some logic around deleting the instance when it may have
ports or volumes allocated.
1. If the instance is not in ERROR or SHELVED_OFFLOADED state, we
expect instance.host to be set to a compute host. So, if we find
instance.host = None in states other than ERROR or
SHELVED_OFFLOADED, we consider the instance to have failed
scheduling and not require ports or volumes to be freed, and we
simply destroy the instance database record and return. This is
the "delete while booting" scenario.
2. If the instance is in ERROR because of a failed build or is
SHELVED_OFFLOADED, we expect instance.host to be None even though
there could be ports or volumes allocated. In this case, run the
_local_delete routine to clean up ports and volumes and delete the
instance database record.
Co-Authored-By: Ankit Agrawal <ankit11.agrawal@nttdata.com>
Co-Authored-By: Samuel Matzek <smatzek@us.ibm.com>
Co-Authored-By: melanie witt <melwittt@gmail.com>
Closes-Bug: 1404867
Closes-Bug: 1408527
Conflicts:
nova/tests/unit/compute/test_compute_api.py
[1] https://github.com/openstack/nova/blob/55ea961/nova/compute/manager.py#L1927-L1929
Change-Id: I4dc6c8bd3bb6c135f8a698af41f5d0e026c39117
(cherry picked from commit b3f39244a3
)
This commit is contained in:
parent
72dcdecdb2
commit
1e59ed99c7
@ -1788,12 +1788,12 @@ class API(base.Base):
|
|||||||
return
|
return
|
||||||
|
|
||||||
cell = None
|
cell = None
|
||||||
# If there is an instance.host (or the instance is shelved-offloaded),
|
# If there is an instance.host (or the instance is shelved-offloaded or
|
||||||
# the instance has been scheduled and sent to a cell/compute which
|
# in error state), the instance has been scheduled and sent to a
|
||||||
# means it was pulled from the cell db.
|
# cell/compute which means it was pulled from the cell db.
|
||||||
# Normal delete should be attempted.
|
# Normal delete should be attempted.
|
||||||
if not (instance.host or
|
may_have_ports_or_volumes = self._may_have_ports_or_volumes(instance)
|
||||||
instance.vm_state == vm_states.SHELVED_OFFLOADED):
|
if not instance.host and not may_have_ports_or_volumes:
|
||||||
try:
|
try:
|
||||||
if self._delete_while_booting(context, instance):
|
if self._delete_while_booting(context, instance):
|
||||||
return
|
return
|
||||||
@ -1872,9 +1872,7 @@ class API(base.Base):
|
|||||||
# which will cause a cast to the child cell.
|
# which will cause a cast to the child cell.
|
||||||
cb(context, instance, bdms)
|
cb(context, instance, bdms)
|
||||||
return
|
return
|
||||||
shelved_offloaded = (instance.vm_state
|
if not instance.host and not may_have_ports_or_volumes:
|
||||||
== vm_states.SHELVED_OFFLOADED)
|
|
||||||
if not instance.host and not shelved_offloaded:
|
|
||||||
try:
|
try:
|
||||||
compute_utils.notify_about_instance_usage(
|
compute_utils.notify_about_instance_usage(
|
||||||
self.notifier, context, instance,
|
self.notifier, context, instance,
|
||||||
@ -1889,7 +1887,12 @@ class API(base.Base):
|
|||||||
{'state': instance.vm_state},
|
{'state': instance.vm_state},
|
||||||
instance=instance)
|
instance=instance)
|
||||||
return
|
return
|
||||||
except exception.ObjectActionError:
|
except exception.ObjectActionError as ex:
|
||||||
|
# The instance's host likely changed under us as
|
||||||
|
# this instance could be building and has since been
|
||||||
|
# scheduled. Continue with attempts to delete it.
|
||||||
|
LOG.debug('Refreshing instance because: %s', ex,
|
||||||
|
instance=instance)
|
||||||
instance.refresh()
|
instance.refresh()
|
||||||
|
|
||||||
if instance.vm_state == vm_states.RESIZED:
|
if instance.vm_state == vm_states.RESIZED:
|
||||||
@ -1897,7 +1900,8 @@ class API(base.Base):
|
|||||||
|
|
||||||
is_local_delete = True
|
is_local_delete = True
|
||||||
try:
|
try:
|
||||||
if not shelved_offloaded:
|
# instance.host must be set in order to look up the service.
|
||||||
|
if instance.host is not None:
|
||||||
service = objects.Service.get_by_compute_host(
|
service = objects.Service.get_by_compute_host(
|
||||||
context.elevated(), instance.host)
|
context.elevated(), instance.host)
|
||||||
is_local_delete = not self.servicegroup_api.service_is_up(
|
is_local_delete = not self.servicegroup_api.service_is_up(
|
||||||
@ -1914,7 +1918,9 @@ class API(base.Base):
|
|||||||
|
|
||||||
cb(context, instance, bdms)
|
cb(context, instance, bdms)
|
||||||
except exception.ComputeHostNotFound:
|
except exception.ComputeHostNotFound:
|
||||||
pass
|
LOG.debug('Compute host %s not found during service up check, '
|
||||||
|
'going to local delete instance', instance.host,
|
||||||
|
instance=instance)
|
||||||
|
|
||||||
if is_local_delete:
|
if is_local_delete:
|
||||||
# If instance is in shelved_offloaded state or compute node
|
# If instance is in shelved_offloaded state or compute node
|
||||||
@ -1941,6 +1947,16 @@ class API(base.Base):
|
|||||||
# NOTE(comstud): Race condition. Instance already gone.
|
# NOTE(comstud): Race condition. Instance already gone.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _may_have_ports_or_volumes(self, instance):
|
||||||
|
# NOTE(melwitt): When an instance build fails in the compute manager,
|
||||||
|
# the instance host and node are set to None and the vm_state is set
|
||||||
|
# to ERROR. In the case, the instance with host = None has actually
|
||||||
|
# been scheduled and may have ports and/or volumes allocated on the
|
||||||
|
# compute node.
|
||||||
|
if instance.vm_state in (vm_states.SHELVED_OFFLOADED, vm_states.ERROR):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def _confirm_resize_on_deleting(self, context, instance):
|
def _confirm_resize_on_deleting(self, context, instance):
|
||||||
# If in the middle of a resize, use confirm_resize to
|
# If in the middle of a resize, use confirm_resize to
|
||||||
# ensure the original instance is cleaned up too
|
# ensure the original instance is cleaned up too
|
||||||
@ -1996,6 +2012,14 @@ class API(base.Base):
|
|||||||
'the instance host %(instance_host)s.',
|
'the instance host %(instance_host)s.',
|
||||||
{'connector_host': connector.get('host'),
|
{'connector_host': connector.get('host'),
|
||||||
'instance_host': instance.host}, instance=instance)
|
'instance_host': instance.host}, instance=instance)
|
||||||
|
if (instance.host is None and
|
||||||
|
self._may_have_ports_or_volumes(instance)):
|
||||||
|
LOG.debug('Allowing use of stashed volume connector with '
|
||||||
|
'instance host None because instance with '
|
||||||
|
'vm_state %(vm_state)s has been scheduled in '
|
||||||
|
'the past.', {'vm_state': instance.vm_state},
|
||||||
|
instance=instance)
|
||||||
|
return connector
|
||||||
|
|
||||||
def _local_cleanup_bdm_volumes(self, bdms, instance, context):
|
def _local_cleanup_bdm_volumes(self, bdms, instance, context):
|
||||||
"""The method deletes the bdm records and, if a bdm is a volume, call
|
"""The method deletes the bdm records and, if a bdm is a volume, call
|
||||||
|
@ -80,9 +80,4 @@ class DeleteWithReservedVolumes(integrated_helpers._IntegratedTestBase,
|
|||||||
|
|
||||||
# The volume should no longer be reserved as the deletion of the
|
# The volume should no longer be reserved as the deletion of the
|
||||||
# server should have released all the resources.
|
# server should have released all the resources.
|
||||||
# TODO(mnaser): Uncomment this in patch resolving the issue
|
self.assertNotIn(volume_id, self.cinder.reserved_volumes)
|
||||||
# self.assertNotIn(volume_id, self.cinder.reserved_volumes)
|
|
||||||
|
|
||||||
# The volume is still reserved at this point, which it should not be.
|
|
||||||
# TODO(mnaser): Remove this in patch resolving the issue
|
|
||||||
self.assertIn(volume_id, self.cinder.reserved_volumes)
|
|
||||||
|
@ -59,6 +59,7 @@ class TestDeleteFromCell0CheckQuota(test.TestCase):
|
|||||||
|
|
||||||
self.start_service('conductor')
|
self.start_service('conductor')
|
||||||
self.start_service('scheduler')
|
self.start_service('scheduler')
|
||||||
|
self.start_service('consoleauth')
|
||||||
|
|
||||||
# We don't actually start a compute service; this way we don't have any
|
# We don't actually start a compute service; this way we don't have any
|
||||||
# compute hosts to schedule the instance to and will go into error and
|
# compute hosts to schedule the instance to and will go into error and
|
||||||
|
@ -54,6 +54,7 @@ class ServerListLimitMarkerCell0Test(test.TestCase,
|
|||||||
self.start_service('conductor')
|
self.start_service('conductor')
|
||||||
self.flags(driver='chance_scheduler', group='scheduler')
|
self.flags(driver='chance_scheduler', group='scheduler')
|
||||||
self.start_service('scheduler')
|
self.start_service('scheduler')
|
||||||
|
self.start_service('consoleauth')
|
||||||
# We don't start the compute service because we want NoValidHost so
|
# We don't start the compute service because we want NoValidHost so
|
||||||
# all of the instances go into ERROR state and get put into cell0.
|
# all of the instances go into ERROR state and get put into cell0.
|
||||||
self.useFixture(cast_as_call.CastAsCall(self))
|
self.useFixture(cast_as_call.CastAsCall(self))
|
||||||
|
@ -952,7 +952,7 @@ class _ComputeAPIUnitTestMixIn(object):
|
|||||||
if self.cell_type != 'api':
|
if self.cell_type != 'api':
|
||||||
if inst.vm_state == vm_states.RESIZED:
|
if inst.vm_state == vm_states.RESIZED:
|
||||||
self._test_delete_resized_part(inst)
|
self._test_delete_resized_part(inst)
|
||||||
if inst.vm_state != vm_states.SHELVED_OFFLOADED:
|
if inst.host is not None:
|
||||||
self.context.elevated().AndReturn(self.context)
|
self.context.elevated().AndReturn(self.context)
|
||||||
objects.Service.get_by_compute_host(self.context,
|
objects.Service.get_by_compute_host(self.context,
|
||||||
inst.host).AndReturn(objects.Service())
|
inst.host).AndReturn(objects.Service())
|
||||||
@ -960,9 +960,7 @@ class _ComputeAPIUnitTestMixIn(object):
|
|||||||
mox.IsA(objects.Service)).AndReturn(
|
mox.IsA(objects.Service)).AndReturn(
|
||||||
inst.host != 'down-host')
|
inst.host != 'down-host')
|
||||||
|
|
||||||
if (inst.host == 'down-host' or
|
if inst.host == 'down-host' or inst.host is None:
|
||||||
inst.vm_state == vm_states.SHELVED_OFFLOADED):
|
|
||||||
|
|
||||||
self._test_downed_host_part(inst, updates, delete_time,
|
self._test_downed_host_part(inst, updates, delete_time,
|
||||||
delete_type)
|
delete_type)
|
||||||
cast = False
|
cast = False
|
||||||
@ -1052,6 +1050,76 @@ class _ComputeAPIUnitTestMixIn(object):
|
|||||||
system_metadata=fake_sys_meta)
|
system_metadata=fake_sys_meta)
|
||||||
self._test_delete('force_delete', vm_state=vm_state)
|
self._test_delete('force_delete', vm_state=vm_state)
|
||||||
|
|
||||||
|
@mock.patch('nova.compute.api.API._delete_while_booting',
|
||||||
|
return_value=False)
|
||||||
|
@mock.patch('nova.compute.api.API._lookup_instance')
|
||||||
|
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
|
||||||
|
@mock.patch('nova.objects.Instance.save')
|
||||||
|
@mock.patch('nova.compute.utils.notify_about_instance_usage')
|
||||||
|
@mock.patch('nova.objects.Service.get_by_compute_host')
|
||||||
|
@mock.patch('nova.compute.api.API._local_delete')
|
||||||
|
def test_delete_error_state_with_no_host(
|
||||||
|
self, mock_local_delete, mock_service_get, _mock_notify,
|
||||||
|
_mock_save, mock_bdm_get, mock_lookup, _mock_del_booting):
|
||||||
|
# Instance in error state with no host should be a local delete
|
||||||
|
# for non API cells
|
||||||
|
inst = self._create_instance_obj(params=dict(vm_state=vm_states.ERROR,
|
||||||
|
host=None))
|
||||||
|
mock_lookup.return_value = None, inst
|
||||||
|
with mock.patch.object(self.compute_api.compute_rpcapi,
|
||||||
|
'terminate_instance') as mock_terminate:
|
||||||
|
self.compute_api.delete(self.context, inst)
|
||||||
|
if self.cell_type == 'api':
|
||||||
|
mock_terminate.assert_called_once_with(
|
||||||
|
self.context, inst, mock_bdm_get.return_value,
|
||||||
|
delete_type='delete')
|
||||||
|
mock_local_delete.assert_not_called()
|
||||||
|
else:
|
||||||
|
mock_local_delete.assert_called_once_with(
|
||||||
|
self.context, inst, mock_bdm_get.return_value,
|
||||||
|
'delete', self.compute_api._do_delete)
|
||||||
|
mock_terminate.assert_not_called()
|
||||||
|
mock_service_get.assert_not_called()
|
||||||
|
|
||||||
|
@mock.patch('nova.compute.api.API._delete_while_booting',
|
||||||
|
return_value=False)
|
||||||
|
@mock.patch('nova.compute.api.API._lookup_instance')
|
||||||
|
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
|
||||||
|
@mock.patch('nova.objects.Instance.save')
|
||||||
|
@mock.patch('nova.compute.utils.notify_about_instance_usage')
|
||||||
|
@mock.patch('nova.objects.Service.get_by_compute_host')
|
||||||
|
@mock.patch('nova.context.RequestContext.elevated')
|
||||||
|
@mock.patch('nova.servicegroup.api.API.service_is_up', return_value=True)
|
||||||
|
@mock.patch('nova.compute.api.API._record_action_start')
|
||||||
|
@mock.patch('nova.compute.api.API._local_delete')
|
||||||
|
def test_delete_error_state_with_host_set(
|
||||||
|
self, mock_local_delete, _mock_record, mock_service_up,
|
||||||
|
mock_elevated, mock_service_get, _mock_notify, _mock_save,
|
||||||
|
mock_bdm_get, mock_lookup, _mock_del_booting):
|
||||||
|
# Instance in error state with host set should be a non-local delete
|
||||||
|
# for non API cells if the service is up
|
||||||
|
inst = self._create_instance_obj(params=dict(vm_state=vm_states.ERROR,
|
||||||
|
host='fake-host'))
|
||||||
|
mock_lookup.return_value = inst
|
||||||
|
with mock.patch.object(self.compute_api.compute_rpcapi,
|
||||||
|
'terminate_instance') as mock_terminate:
|
||||||
|
self.compute_api.delete(self.context, inst)
|
||||||
|
if self.cell_type == 'api':
|
||||||
|
mock_terminate.assert_called_once_with(
|
||||||
|
self.context, inst, mock_bdm_get.return_value,
|
||||||
|
delete_type='delete')
|
||||||
|
mock_local_delete.assert_not_called()
|
||||||
|
mock_service_get.assert_not_called()
|
||||||
|
else:
|
||||||
|
mock_service_get.assert_called_once_with(
|
||||||
|
mock_elevated.return_value, 'fake-host')
|
||||||
|
mock_service_up.assert_called_once_with(
|
||||||
|
mock_service_get.return_value)
|
||||||
|
mock_terminate.assert_called_once_with(
|
||||||
|
self.context, inst, mock_bdm_get.return_value,
|
||||||
|
delete_type='delete')
|
||||||
|
mock_local_delete.assert_not_called()
|
||||||
|
|
||||||
def test_delete_fast_if_host_not_set(self):
|
def test_delete_fast_if_host_not_set(self):
|
||||||
self.useFixture(fixtures.AllServicesCurrent())
|
self.useFixture(fixtures.AllServicesCurrent())
|
||||||
inst = self._create_instance_obj()
|
inst = self._create_instance_obj()
|
||||||
@ -1257,6 +1325,52 @@ class _ComputeAPIUnitTestMixIn(object):
|
|||||||
self.assertIsNone(
|
self.assertIsNone(
|
||||||
self.compute_api._get_stashed_volume_connector(bdm, inst))
|
self.compute_api._get_stashed_volume_connector(bdm, inst))
|
||||||
|
|
||||||
|
@mock.patch.object(objects.BlockDeviceMapping, 'destroy')
|
||||||
|
def test_local_cleanup_bdm_volumes_stashed_connector_host_none(
|
||||||
|
self, mock_destroy):
|
||||||
|
"""Tests that we call volume_api.terminate_connection when we found
|
||||||
|
a stashed connector in the bdm.connection_info dict.
|
||||||
|
|
||||||
|
This tests the case where:
|
||||||
|
|
||||||
|
1) the instance host is None
|
||||||
|
2) the instance vm_state is one where we expect host to be None
|
||||||
|
|
||||||
|
We allow a mismatch of the host in this situation if the instance is
|
||||||
|
in a state where we expect its host to have been set to None, such
|
||||||
|
as ERROR or SHELVED_OFFLOADED.
|
||||||
|
"""
|
||||||
|
params = dict(host=None, vm_state=vm_states.ERROR)
|
||||||
|
inst = self._create_instance_obj(params=params)
|
||||||
|
conn_info = {'connector': {'host': 'orig-host'}}
|
||||||
|
vol_bdm = objects.BlockDeviceMapping(self.context, id=1,
|
||||||
|
instance_uuid=inst.uuid,
|
||||||
|
volume_id=uuids.volume_id,
|
||||||
|
source_type='volume',
|
||||||
|
destination_type='volume',
|
||||||
|
delete_on_termination=True,
|
||||||
|
connection_info=jsonutils.dumps(
|
||||||
|
conn_info),
|
||||||
|
attachment_id=None)
|
||||||
|
bdms = objects.BlockDeviceMappingList(objects=[vol_bdm])
|
||||||
|
|
||||||
|
@mock.patch.object(self.compute_api.volume_api, 'terminate_connection')
|
||||||
|
@mock.patch.object(self.compute_api.volume_api, 'detach')
|
||||||
|
@mock.patch.object(self.compute_api.volume_api, 'delete')
|
||||||
|
@mock.patch.object(self.context, 'elevated', return_value=self.context)
|
||||||
|
def do_test(self, mock_elevated, mock_delete,
|
||||||
|
mock_detach, mock_terminate):
|
||||||
|
self.compute_api._local_cleanup_bdm_volumes(
|
||||||
|
bdms, inst, self.context)
|
||||||
|
mock_terminate.assert_called_once_with(
|
||||||
|
self.context, uuids.volume_id, conn_info['connector'])
|
||||||
|
mock_detach.assert_called_once_with(
|
||||||
|
self.context, uuids.volume_id, inst.uuid)
|
||||||
|
mock_delete.assert_called_once_with(self.context, uuids.volume_id)
|
||||||
|
mock_destroy.assert_called_once_with()
|
||||||
|
|
||||||
|
do_test(self)
|
||||||
|
|
||||||
def test_local_delete_without_info_cache(self):
|
def test_local_delete_without_info_cache(self):
|
||||||
inst = self._create_instance_obj()
|
inst = self._create_instance_obj()
|
||||||
|
|
||||||
@ -5242,6 +5356,57 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
|
|||||||
network_id=None, port_id=None,
|
network_id=None, port_id=None,
|
||||||
requested_ip=None, tag='foo')
|
requested_ip=None, tag='foo')
|
||||||
|
|
||||||
|
@mock.patch('nova.compute.api.API._delete_while_booting',
|
||||||
|
return_value=False)
|
||||||
|
@mock.patch('nova.compute.api.API._lookup_instance')
|
||||||
|
@mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid')
|
||||||
|
@mock.patch('nova.context.RequestContext.elevated')
|
||||||
|
@mock.patch.object(objects.Instance, 'save')
|
||||||
|
@mock.patch.object(compute_utils, 'notify_about_instance_usage')
|
||||||
|
@mock.patch.object(objects.BlockDeviceMapping, 'destroy')
|
||||||
|
@mock.patch.object(objects.Instance, 'destroy')
|
||||||
|
def _test_delete_volume_backed_instance(
|
||||||
|
self, vm_state, mock_instance_destroy, bdm_destroy,
|
||||||
|
notify_about_instance_usage, mock_save, mock_elevated,
|
||||||
|
bdm_get_by_instance_uuid, mock_lookup, _mock_del_booting):
|
||||||
|
volume_id = uuidutils.generate_uuid()
|
||||||
|
conn_info = {'connector': {'host': 'orig-host'}}
|
||||||
|
bdms = [objects.BlockDeviceMapping(
|
||||||
|
**fake_block_device.FakeDbBlockDeviceDict(
|
||||||
|
{'id': 42, 'volume_id': volume_id,
|
||||||
|
'source_type': 'volume', 'destination_type': 'volume',
|
||||||
|
'delete_on_termination': False,
|
||||||
|
'connection_info': jsonutils.dumps(conn_info)}))]
|
||||||
|
|
||||||
|
bdm_get_by_instance_uuid.return_value = bdms
|
||||||
|
mock_elevated.return_value = self.context
|
||||||
|
|
||||||
|
params = {'host': None, 'vm_state': vm_state}
|
||||||
|
inst = self._create_instance_obj(params=params)
|
||||||
|
mock_lookup.return_value = None, inst
|
||||||
|
connector = conn_info['connector']
|
||||||
|
|
||||||
|
with mock.patch.object(self.compute_api.network_api,
|
||||||
|
'deallocate_for_instance') as mock_deallocate, \
|
||||||
|
mock.patch.object(self.compute_api.volume_api,
|
||||||
|
'terminate_connection') as mock_terminate_conn, \
|
||||||
|
mock.patch.object(self.compute_api.volume_api,
|
||||||
|
'detach') as mock_detach:
|
||||||
|
self.compute_api.delete(self.context, inst)
|
||||||
|
|
||||||
|
mock_deallocate.assert_called_once_with(self.context, inst)
|
||||||
|
mock_detach.assert_called_once_with(self.context, volume_id,
|
||||||
|
inst.uuid)
|
||||||
|
mock_terminate_conn.assert_called_once_with(self.context,
|
||||||
|
volume_id, connector)
|
||||||
|
bdm_destroy.assert_called_once_with()
|
||||||
|
|
||||||
|
def test_delete_volume_backed_instance_in_error(self):
|
||||||
|
self._test_delete_volume_backed_instance(vm_states.ERROR)
|
||||||
|
|
||||||
|
def test_delete_volume_backed_instance_in_shelved_offloaded(self):
|
||||||
|
self._test_delete_volume_backed_instance(vm_states.SHELVED_OFFLOADED)
|
||||||
|
|
||||||
|
|
||||||
class ComputeAPIAPICellUnitTestCase(_ComputeAPIUnitTestMixIn,
|
class ComputeAPIAPICellUnitTestCase(_ComputeAPIUnitTestMixIn,
|
||||||
test.NoDBTestCase):
|
test.NoDBTestCase):
|
||||||
|
Loading…
Reference in New Issue
Block a user