Merge "Clean up ports and volumes when deleting ERROR instance" into stable/pike

This commit is contained in:
Zuul 2018-03-01 14:39:23 +00:00 committed by Gerrit Code Review
commit 18d9e99e6e
5 changed files with 207 additions and 21 deletions

View File

@ -1788,12 +1788,12 @@ class API(base.Base):
return
cell = None
# If there is an instance.host (or the instance is shelved-offloaded),
# the instance has been scheduled and sent to a cell/compute which
# means it was pulled from the cell db.
# If there is an instance.host (or the instance is shelved-offloaded or
# in error state), the instance has been scheduled and sent to a
# cell/compute which means it was pulled from the cell db.
# Normal delete should be attempted.
if not (instance.host or
instance.vm_state == vm_states.SHELVED_OFFLOADED):
may_have_ports_or_volumes = self._may_have_ports_or_volumes(instance)
if not instance.host and not may_have_ports_or_volumes:
try:
if self._delete_while_booting(context, instance):
return
@ -1872,9 +1872,7 @@ class API(base.Base):
# which will cause a cast to the child cell.
cb(context, instance, bdms)
return
shelved_offloaded = (instance.vm_state
== vm_states.SHELVED_OFFLOADED)
if not instance.host and not shelved_offloaded:
if not instance.host and not may_have_ports_or_volumes:
try:
compute_utils.notify_about_instance_usage(
self.notifier, context, instance,
@ -1889,7 +1887,12 @@ class API(base.Base):
{'state': instance.vm_state},
instance=instance)
return
except exception.ObjectActionError:
except exception.ObjectActionError as ex:
# The instance's host likely changed under us as
# this instance could be building and has since been
# scheduled. Continue with attempts to delete it.
LOG.debug('Refreshing instance because: %s', ex,
instance=instance)
instance.refresh()
if instance.vm_state == vm_states.RESIZED:
@ -1897,7 +1900,8 @@ class API(base.Base):
is_local_delete = True
try:
if not shelved_offloaded:
# instance.host must be set in order to look up the service.
if instance.host is not None:
service = objects.Service.get_by_compute_host(
context.elevated(), instance.host)
is_local_delete = not self.servicegroup_api.service_is_up(
@ -1914,7 +1918,9 @@ class API(base.Base):
cb(context, instance, bdms)
except exception.ComputeHostNotFound:
pass
LOG.debug('Compute host %s not found during service up check, '
'going to local delete instance', instance.host,
instance=instance)
if is_local_delete:
# If instance is in shelved_offloaded state or compute node
@ -1941,6 +1947,16 @@ class API(base.Base):
# NOTE(comstud): Race condition. Instance already gone.
pass
def _may_have_ports_or_volumes(self, instance):
# NOTE(melwitt): When an instance build fails in the compute manager,
# the instance host and node are set to None and the vm_state is set
# to ERROR. In the case, the instance with host = None has actually
# been scheduled and may have ports and/or volumes allocated on the
# compute node.
if instance.vm_state in (vm_states.SHELVED_OFFLOADED, vm_states.ERROR):
return True
return False
def _confirm_resize_on_deleting(self, context, instance):
# If in the middle of a resize, use confirm_resize to
# ensure the original instance is cleaned up too
@ -1996,6 +2012,14 @@ class API(base.Base):
'the instance host %(instance_host)s.',
{'connector_host': connector.get('host'),
'instance_host': instance.host}, instance=instance)
if (instance.host is None and
self._may_have_ports_or_volumes(instance)):
LOG.debug('Allowing use of stashed volume connector with '
'instance host None because instance with '
'vm_state %(vm_state)s has been scheduled in '
'the past.', {'vm_state': instance.vm_state},
instance=instance)
return connector
def _local_cleanup_bdm_volumes(self, bdms, instance, context):
"""The method deletes the bdm records and, if a bdm is a volume, call

View File

@ -80,9 +80,4 @@ class DeleteWithReservedVolumes(integrated_helpers._IntegratedTestBase,
# The volume should no longer be reserved as the deletion of the
# server should have released all the resources.
# TODO(mnaser): Uncomment this in patch resolving the issue
# self.assertNotIn(volume_id, self.cinder.reserved_volumes)
# The volume is still reserved at this point, which it should not be.
# TODO(mnaser): Remove this in patch resolving the issue
self.assertIn(volume_id, self.cinder.reserved_volumes)
self.assertNotIn(volume_id, self.cinder.reserved_volumes)

View File

@ -59,6 +59,7 @@ class TestDeleteFromCell0CheckQuota(test.TestCase):
self.start_service('conductor')
self.start_service('scheduler')
self.start_service('consoleauth')
# We don't actually start a compute service; this way we don't have any
# compute hosts to schedule the instance to and will go into error and

View File

@ -54,6 +54,7 @@ class ServerListLimitMarkerCell0Test(test.TestCase,
self.start_service('conductor')
self.flags(driver='chance_scheduler', group='scheduler')
self.start_service('scheduler')
self.start_service('consoleauth')
# We don't start the compute service because we want NoValidHost so
# all of the instances go into ERROR state and get put into cell0.
self.useFixture(cast_as_call.CastAsCall(self))

View File

@ -952,7 +952,7 @@ class _ComputeAPIUnitTestMixIn(object):
if self.cell_type != 'api':
if inst.vm_state == vm_states.RESIZED:
self._test_delete_resized_part(inst)
if inst.vm_state != vm_states.SHELVED_OFFLOADED:
if inst.host is not None:
self.context.elevated().AndReturn(self.context)
objects.Service.get_by_compute_host(self.context,
inst.host).AndReturn(objects.Service())
@ -960,9 +960,7 @@ class _ComputeAPIUnitTestMixIn(object):
mox.IsA(objects.Service)).AndReturn(
inst.host != 'down-host')
if (inst.host == 'down-host' or
inst.vm_state == vm_states.SHELVED_OFFLOADED):
if inst.host == 'down-host' or inst.host is None:
self._test_downed_host_part(inst, updates, delete_time,
delete_type)
cast = False
@ -1052,6 +1050,76 @@ class _ComputeAPIUnitTestMixIn(object):
system_metadata=fake_sys_meta)
self._test_delete('force_delete', vm_state=vm_state)
@mock.patch('nova.compute.api.API._delete_while_booting',
return_value=False)
@mock.patch('nova.compute.api.API._lookup_instance')
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.compute.utils.notify_about_instance_usage')
@mock.patch('nova.objects.Service.get_by_compute_host')
@mock.patch('nova.compute.api.API._local_delete')
def test_delete_error_state_with_no_host(
self, mock_local_delete, mock_service_get, _mock_notify,
_mock_save, mock_bdm_get, mock_lookup, _mock_del_booting):
# Instance in error state with no host should be a local delete
# for non API cells
inst = self._create_instance_obj(params=dict(vm_state=vm_states.ERROR,
host=None))
mock_lookup.return_value = None, inst
with mock.patch.object(self.compute_api.compute_rpcapi,
'terminate_instance') as mock_terminate:
self.compute_api.delete(self.context, inst)
if self.cell_type == 'api':
mock_terminate.assert_called_once_with(
self.context, inst, mock_bdm_get.return_value,
delete_type='delete')
mock_local_delete.assert_not_called()
else:
mock_local_delete.assert_called_once_with(
self.context, inst, mock_bdm_get.return_value,
'delete', self.compute_api._do_delete)
mock_terminate.assert_not_called()
mock_service_get.assert_not_called()
@mock.patch('nova.compute.api.API._delete_while_booting',
return_value=False)
@mock.patch('nova.compute.api.API._lookup_instance')
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.compute.utils.notify_about_instance_usage')
@mock.patch('nova.objects.Service.get_by_compute_host')
@mock.patch('nova.context.RequestContext.elevated')
@mock.patch('nova.servicegroup.api.API.service_is_up', return_value=True)
@mock.patch('nova.compute.api.API._record_action_start')
@mock.patch('nova.compute.api.API._local_delete')
def test_delete_error_state_with_host_set(
self, mock_local_delete, _mock_record, mock_service_up,
mock_elevated, mock_service_get, _mock_notify, _mock_save,
mock_bdm_get, mock_lookup, _mock_del_booting):
# Instance in error state with host set should be a non-local delete
# for non API cells if the service is up
inst = self._create_instance_obj(params=dict(vm_state=vm_states.ERROR,
host='fake-host'))
mock_lookup.return_value = inst
with mock.patch.object(self.compute_api.compute_rpcapi,
'terminate_instance') as mock_terminate:
self.compute_api.delete(self.context, inst)
if self.cell_type == 'api':
mock_terminate.assert_called_once_with(
self.context, inst, mock_bdm_get.return_value,
delete_type='delete')
mock_local_delete.assert_not_called()
mock_service_get.assert_not_called()
else:
mock_service_get.assert_called_once_with(
mock_elevated.return_value, 'fake-host')
mock_service_up.assert_called_once_with(
mock_service_get.return_value)
mock_terminate.assert_called_once_with(
self.context, inst, mock_bdm_get.return_value,
delete_type='delete')
mock_local_delete.assert_not_called()
def test_delete_fast_if_host_not_set(self):
self.useFixture(fixtures.AllServicesCurrent())
inst = self._create_instance_obj()
@ -1257,6 +1325,52 @@ class _ComputeAPIUnitTestMixIn(object):
self.assertIsNone(
self.compute_api._get_stashed_volume_connector(bdm, inst))
@mock.patch.object(objects.BlockDeviceMapping, 'destroy')
def test_local_cleanup_bdm_volumes_stashed_connector_host_none(
self, mock_destroy):
"""Tests that we call volume_api.terminate_connection when we found
a stashed connector in the bdm.connection_info dict.
This tests the case where:
1) the instance host is None
2) the instance vm_state is one where we expect host to be None
We allow a mismatch of the host in this situation if the instance is
in a state where we expect its host to have been set to None, such
as ERROR or SHELVED_OFFLOADED.
"""
params = dict(host=None, vm_state=vm_states.ERROR)
inst = self._create_instance_obj(params=params)
conn_info = {'connector': {'host': 'orig-host'}}
vol_bdm = objects.BlockDeviceMapping(self.context, id=1,
instance_uuid=inst.uuid,
volume_id=uuids.volume_id,
source_type='volume',
destination_type='volume',
delete_on_termination=True,
connection_info=jsonutils.dumps(
conn_info),
attachment_id=None)
bdms = objects.BlockDeviceMappingList(objects=[vol_bdm])
@mock.patch.object(self.compute_api.volume_api, 'terminate_connection')
@mock.patch.object(self.compute_api.volume_api, 'detach')
@mock.patch.object(self.compute_api.volume_api, 'delete')
@mock.patch.object(self.context, 'elevated', return_value=self.context)
def do_test(self, mock_elevated, mock_delete,
mock_detach, mock_terminate):
self.compute_api._local_cleanup_bdm_volumes(
bdms, inst, self.context)
mock_terminate.assert_called_once_with(
self.context, uuids.volume_id, conn_info['connector'])
mock_detach.assert_called_once_with(
self.context, uuids.volume_id, inst.uuid)
mock_delete.assert_called_once_with(self.context, uuids.volume_id)
mock_destroy.assert_called_once_with()
do_test(self)
def test_local_delete_without_info_cache(self):
inst = self._create_instance_obj()
@ -5248,6 +5362,57 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
network_id=None, port_id=None,
requested_ip=None, tag='foo')
@mock.patch('nova.compute.api.API._delete_while_booting',
return_value=False)
@mock.patch('nova.compute.api.API._lookup_instance')
@mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid')
@mock.patch('nova.context.RequestContext.elevated')
@mock.patch.object(objects.Instance, 'save')
@mock.patch.object(compute_utils, 'notify_about_instance_usage')
@mock.patch.object(objects.BlockDeviceMapping, 'destroy')
@mock.patch.object(objects.Instance, 'destroy')
def _test_delete_volume_backed_instance(
self, vm_state, mock_instance_destroy, bdm_destroy,
notify_about_instance_usage, mock_save, mock_elevated,
bdm_get_by_instance_uuid, mock_lookup, _mock_del_booting):
volume_id = uuidutils.generate_uuid()
conn_info = {'connector': {'host': 'orig-host'}}
bdms = [objects.BlockDeviceMapping(
**fake_block_device.FakeDbBlockDeviceDict(
{'id': 42, 'volume_id': volume_id,
'source_type': 'volume', 'destination_type': 'volume',
'delete_on_termination': False,
'connection_info': jsonutils.dumps(conn_info)}))]
bdm_get_by_instance_uuid.return_value = bdms
mock_elevated.return_value = self.context
params = {'host': None, 'vm_state': vm_state}
inst = self._create_instance_obj(params=params)
mock_lookup.return_value = None, inst
connector = conn_info['connector']
with mock.patch.object(self.compute_api.network_api,
'deallocate_for_instance') as mock_deallocate, \
mock.patch.object(self.compute_api.volume_api,
'terminate_connection') as mock_terminate_conn, \
mock.patch.object(self.compute_api.volume_api,
'detach') as mock_detach:
self.compute_api.delete(self.context, inst)
mock_deallocate.assert_called_once_with(self.context, inst)
mock_detach.assert_called_once_with(self.context, volume_id,
inst.uuid)
mock_terminate_conn.assert_called_once_with(self.context,
volume_id, connector)
bdm_destroy.assert_called_once_with()
def test_delete_volume_backed_instance_in_error(self):
self._test_delete_volume_backed_instance(vm_states.ERROR)
def test_delete_volume_backed_instance_in_shelved_offloaded(self):
self._test_delete_volume_backed_instance(vm_states.SHELVED_OFFLOADED)
class ComputeAPIAPICellUnitTestCase(_ComputeAPIUnitTestMixIn,
test.NoDBTestCase):