Ensure source compute is up when confirming a resize
When _poll_unconfirmed_resizes runs or a user tries to confirm a resize in the API, if the source compute service is down the migration status will be stuck in "confirming" status if it never reached the source compute. Subsequent runs of _poll_unconfirmed_resizes will not be able to auto-confirm the resize nor will the user be able to manually confirm the resize. An admin could reset the status on the server to ACTIVE or ERROR but that means the source compute never gets cleaned up since you can only confirm or revert a resize on a server with VERIFY_RESIZE status. This adds a check in the API before updating the migration record such that if the source compute service is down the API returns a 409 response as an indication to try again later. SingleCellSimple._fake_target_cell is updated so that tests using it can assert when a context was targeted without having to stub nova.context.target_cell. As a result some HostManager unit tests needed to be updated. Change-Id: I33aa5e32cb321e5a16da51e227af2f67ed9e6713 Closes-Bug: #1855927
This commit is contained in:
parent
8ca9b7254b
commit
e4601c77fb
|
@ -885,7 +885,10 @@ class ServersController(wsgi.Controller):
|
|||
except exception.MigrationNotFound:
|
||||
msg = _("Instance has not been resized.")
|
||||
raise exc.HTTPBadRequest(explanation=msg)
|
||||
except exception.InstanceIsLocked as e:
|
||||
except (
|
||||
exception.InstanceIsLocked,
|
||||
exception.ServiceUnavailable,
|
||||
) as e:
|
||||
raise exc.HTTPConflict(explanation=e.format_message())
|
||||
except exception.InstanceInvalidState as state_error:
|
||||
common.raise_http_conflict_for_instance_invalid_state(state_error,
|
||||
|
|
|
@ -3775,10 +3775,40 @@ class API(base.Base):
|
|||
migration.dest_compute,
|
||||
reqspec)
|
||||
|
||||
@staticmethod
|
||||
def _get_source_compute_service(context, migration):
|
||||
"""Find the source compute Service object given the Migration.
|
||||
|
||||
:param context: nova auth RequestContext target at the destination
|
||||
compute cell
|
||||
:param migration: Migration object for the move operation
|
||||
:return: Service object representing the source host nova-compute
|
||||
"""
|
||||
if migration.cross_cell_move:
|
||||
# The source compute could be in another cell so look up the
|
||||
# HostMapping to determine the source cell.
|
||||
hm = objects.HostMapping.get_by_host(
|
||||
context, migration.source_compute)
|
||||
with nova_context.target_cell(context, hm.cell_mapping) as cctxt:
|
||||
return objects.Service.get_by_compute_host(
|
||||
cctxt, migration.source_compute)
|
||||
# Same-cell migration so just use the context we have.
|
||||
return objects.Service.get_by_compute_host(
|
||||
context, migration.source_compute)
|
||||
|
||||
@check_instance_lock
|
||||
@check_instance_state(vm_state=[vm_states.RESIZED])
|
||||
def confirm_resize(self, context, instance, migration=None):
|
||||
"""Confirms a migration/resize and deletes the 'old' instance."""
|
||||
"""Confirms a migration/resize and deletes the 'old' instance.
|
||||
|
||||
:param context: nova auth RequestContext
|
||||
:param instance: Instance object to confirm the resize
|
||||
:param migration: Migration object; provided if called from the
|
||||
_poll_unconfirmed_resizes periodic task on the dest compute.
|
||||
:raises: MigrationNotFound if migration is not provided and a migration
|
||||
cannot be found for the instance with status "finished".
|
||||
:raises: ServiceUnavailable if the source compute service is down.
|
||||
"""
|
||||
elevated = context.elevated()
|
||||
# NOTE(melwitt): We're not checking quota here because there isn't a
|
||||
# change in resource usage when confirming a resize. Resource
|
||||
|
@ -3789,6 +3819,13 @@ class API(base.Base):
|
|||
migration = objects.Migration.get_by_instance_and_status(
|
||||
elevated, instance.uuid, 'finished')
|
||||
|
||||
# Check if the source compute service is up before modifying the
|
||||
# migration record because once we do we cannot come back through this
|
||||
# method since it will be looking for a "finished" status migration.
|
||||
source_svc = self._get_source_compute_service(context, migration)
|
||||
if not self.servicegroup_api.service_is_up(source_svc):
|
||||
raise exception.ServiceUnavailable()
|
||||
|
||||
migration.status = 'confirming'
|
||||
migration.save()
|
||||
|
||||
|
|
|
@ -324,8 +324,12 @@ class SingleCellSimple(fixtures.Fixture):
|
|||
|
||||
@contextmanager
|
||||
def _fake_target_cell(self, context, target_cell):
|
||||
# NOTE(danms): Just pass through the context without actually
|
||||
# targeting anything.
|
||||
# Just do something simple and set/unset the cell_uuid on the context.
|
||||
if target_cell:
|
||||
context.cell_uuid = getattr(target_cell, 'uuid',
|
||||
uuidsentinel.cell1)
|
||||
else:
|
||||
context.cell_uuid = None
|
||||
yield context
|
||||
|
||||
def _fake_set_target_cell(self, context, cell_mapping):
|
||||
|
|
|
@ -963,6 +963,10 @@ class TestMultiCellMigrate(integrated_helpers.ProviderUsageBaseTestCase):
|
|||
server2['id'], {'migrate': {'host': 'host2'}})
|
||||
self._wait_for_migration_status(server2, ['error'])
|
||||
|
||||
@mock.patch('nova.compute.api.API._get_source_compute_service',
|
||||
new=mock.Mock())
|
||||
@mock.patch('nova.servicegroup.api.API.service_is_up',
|
||||
new=mock.Mock(return_value=True))
|
||||
def test_poll_unconfirmed_resizes_with_upcall(self):
|
||||
"""Tests the _poll_unconfirmed_resizes periodic task with a cross-cell
|
||||
resize once the instance is in VERIFY_RESIZE status on the dest host.
|
||||
|
@ -993,6 +997,10 @@ class TestMultiCellMigrate(integrated_helpers.ProviderUsageBaseTestCase):
|
|||
self._assert_confirm(
|
||||
server, source_rp_uuid, target_rp_uuid, new_flavor)
|
||||
|
||||
@mock.patch('nova.compute.api.API._get_source_compute_service',
|
||||
new=mock.Mock())
|
||||
@mock.patch('nova.servicegroup.api.API.service_is_up',
|
||||
new=mock.Mock(return_value=True))
|
||||
def test_poll_unconfirmed_resizes_with_no_upcall(self):
|
||||
"""Tests the _poll_unconfirmed_resizes periodic task with a cross-cell
|
||||
resize once the instance is in VERIFY_RESIZE status on the dest host.
|
||||
|
|
|
@ -46,7 +46,6 @@ from nova.tests import fixtures as nova_fixtures
|
|||
from nova.tests.functional.api import client
|
||||
from nova.tests.functional import integrated_helpers
|
||||
from nova.tests.unit.api.openstack import fakes
|
||||
from nova.tests.unit import cast_as_call
|
||||
from nova.tests.unit import fake_block_device
|
||||
from nova.tests.unit import fake_notifier
|
||||
from nova.tests.unit import fake_requests
|
||||
|
@ -3359,34 +3358,37 @@ class PollUnconfirmedResizesTest(integrated_helpers.ProviderUsageBaseTestCase):
|
|||
self.api.put_service(
|
||||
source_service['id'], {'status': 'disabled', 'forced_down': True})
|
||||
# Now configure auto-confirm and call the method on the target compute
|
||||
# so we do not have to wait for the periodic to run. Also configure
|
||||
# the RPC call from the API to the source compute to timeout after 1
|
||||
# second.
|
||||
self.flags(resize_confirm_window=1, rpc_response_timeout=1)
|
||||
# so we do not have to wait for the periodic to run.
|
||||
self.flags(resize_confirm_window=1)
|
||||
# Stub timeutils so the DB API query finds the unconfirmed migration.
|
||||
future = timeutils.utcnow() + datetime.timedelta(hours=1)
|
||||
ctxt = context.get_admin_context()
|
||||
with osloutils_fixture.TimeFixture(future):
|
||||
# Use the CastAsCall fixture since the fake rpc is not going to
|
||||
# simulate a failure from the service being down.
|
||||
with cast_as_call.CastAsCall(self):
|
||||
self.computes['host2'].manager._poll_unconfirmed_resizes(ctxt)
|
||||
self.assertIn('Error auto-confirming resize',
|
||||
self.stdlog.logger.output)
|
||||
self.assertIn('No reply on topic compute', self.stdlog.logger.output)
|
||||
# FIXME(mriedem): This is bug 1855927 where the migration status is
|
||||
# left in "confirming" so the _poll_unconfirmed_resizes task will not
|
||||
# process it again nor can the user confirm the resize in the API since
|
||||
# the migration status is not "finished".
|
||||
self._wait_for_migration_status(server, ['confirming'])
|
||||
self.assertIn('Service is unavailable at this time',
|
||||
self.stdlog.logger.output)
|
||||
# The source compute service check should have been done before the
|
||||
# migration status was updated so it should still be "finished".
|
||||
self._wait_for_migration_status(server, ['finished'])
|
||||
# Try to confirm in the API while the source compute service is still
|
||||
# down to assert the 409 (rather than a 500) error.
|
||||
ex = self.assertRaises(client.OpenStackApiException,
|
||||
self.api.post_server_action,
|
||||
server['id'], {'confirmResize': None})
|
||||
self.assertEqual(400, ex.response.status_code)
|
||||
self.assertIn('Instance has not been resized', six.text_type(ex))
|
||||
# That error message is bogus because the server is resized.
|
||||
server = self.api.get_server(server['id'])
|
||||
self.assertEqual('VERIFY_RESIZE', server['status'])
|
||||
self.assertEqual(409, ex.response.status_code)
|
||||
self.assertIn('Service is unavailable at this time', six.text_type(ex))
|
||||
# Bring the source compute back up and try to confirm the resize which
|
||||
# should work since the migration status is still "finished".
|
||||
self.restart_compute_service(self.computes['host1'])
|
||||
self.api.put_service(
|
||||
source_service['id'], {'status': 'enabled', 'forced_down': False})
|
||||
# Use the API to confirm the resize because _poll_unconfirmed_resizes
|
||||
# requires mucking with the current time which causes problems with
|
||||
# the service_is_up check in the API.
|
||||
self.api.post_server_action(server['id'], {'confirmResize': None})
|
||||
self._wait_for_state_change(server, 'ACTIVE')
|
||||
|
||||
|
||||
class ServerLiveMigrateForceAndAbort(
|
||||
|
|
|
@ -1644,10 +1644,13 @@ class _ComputeAPIUnitTestMixIn(object):
|
|||
|
||||
test()
|
||||
|
||||
@mock.patch('nova.compute.api.API._get_source_compute_service')
|
||||
@mock.patch('nova.servicegroup.api.API.service_is_up', return_value=True)
|
||||
@mock.patch.object(objects.Migration, 'save')
|
||||
@mock.patch.object(objects.Migration, 'get_by_instance_and_status')
|
||||
@mock.patch.object(context.RequestContext, 'elevated')
|
||||
def _test_confirm_resize(self, mock_elevated, mock_get, mock_save,
|
||||
mock_service_is_up, mock_get_service,
|
||||
mig_ref_passed=False):
|
||||
params = dict(vm_state=vm_states.RESIZED)
|
||||
fake_inst = self._create_instance_obj(params=params)
|
||||
|
@ -1678,6 +1681,8 @@ class _ComputeAPIUnitTestMixIn(object):
|
|||
self.compute_api.confirm_resize(self.context, fake_inst)
|
||||
|
||||
mock_elevated.assert_called_once_with()
|
||||
mock_service_is_up.assert_called_once_with(
|
||||
mock_get_service.return_value)
|
||||
mock_save.assert_called_once_with()
|
||||
mock_record.assert_called_once_with(self.context, fake_inst,
|
||||
'confirmResize')
|
||||
|
@ -1694,6 +1699,34 @@ class _ComputeAPIUnitTestMixIn(object):
|
|||
def test_confirm_resize_with_migration_ref(self):
|
||||
self._test_confirm_resize(mig_ref_passed=True)
|
||||
|
||||
@mock.patch('nova.objects.HostMapping.get_by_host',
|
||||
return_value=objects.HostMapping(
|
||||
cell_mapping=objects.CellMapping(
|
||||
database_connection='fake://', transport_url='none://',
|
||||
uuid=uuids.cell_uuid)))
|
||||
@mock.patch('nova.objects.Service.get_by_compute_host')
|
||||
def test_get_source_compute_service(self, mock_service_get, mock_hm_get):
|
||||
# First start with a same-cell migration.
|
||||
migration = objects.Migration(source_compute='source.host',
|
||||
cross_cell_move=False)
|
||||
self.compute_api._get_source_compute_service(self.context, migration)
|
||||
mock_hm_get.assert_not_called()
|
||||
mock_service_get.assert_called_once_with(self.context, 'source.host')
|
||||
# Make sure the context was not targeted.
|
||||
ctxt = mock_service_get.call_args[0][0]
|
||||
self.assertIsNone(ctxt.cell_uuid)
|
||||
|
||||
# Now test with a cross-cell migration.
|
||||
mock_service_get.reset_mock()
|
||||
migration.cross_cell_move = True
|
||||
self.compute_api._get_source_compute_service(self.context, migration)
|
||||
mock_hm_get.assert_called_once_with(self.context, 'source.host')
|
||||
mock_service_get.assert_called_once_with(
|
||||
test.MatchType(context.RequestContext), 'source.host')
|
||||
# Make sure the context was targeted.
|
||||
ctxt = mock_service_get.call_args[0][0]
|
||||
self.assertEqual(uuids.cell_uuid, ctxt.cell_uuid)
|
||||
|
||||
@mock.patch('nova.virt.hardware.numa_get_constraints')
|
||||
@mock.patch('nova.network.neutron.API.get_requested_resource_for_instance',
|
||||
return_value=[])
|
||||
|
@ -7460,8 +7493,10 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
|
|||
self._test_get_migrations_sorted_filter_duplicates(
|
||||
[older, newer], newer)
|
||||
|
||||
@mock.patch('nova.servicegroup.api.API.service_is_up', return_value=True)
|
||||
@mock.patch('nova.objects.Migration.get_by_instance_and_status')
|
||||
def test_confirm_resize_cross_cell_move_true(self, mock_migration_get):
|
||||
def test_confirm_resize_cross_cell_move_true(self, mock_migration_get,
|
||||
mock_service_is_up):
|
||||
"""Tests confirm_resize where Migration.cross_cell_move is True"""
|
||||
instance = fake_instance.fake_instance_obj(
|
||||
self.context, vm_state=vm_states.RESIZED, task_state=None,
|
||||
|
@ -7475,12 +7510,15 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
|
|||
mock.patch.object(self.compute_api, '_record_action_start'),
|
||||
mock.patch.object(self.compute_api.compute_task_api,
|
||||
'confirm_snapshot_based_resize'),
|
||||
mock.patch.object(self.compute_api, '_get_source_compute_service'),
|
||||
) as (
|
||||
mock_elevated, mock_migration_save, mock_record_action,
|
||||
mock_conductor_confirm
|
||||
mock_conductor_confirm, mock_get_service
|
||||
):
|
||||
self.compute_api.confirm_resize(self.context, instance)
|
||||
mock_elevated.assert_called_once_with()
|
||||
mock_service_is_up.assert_called_once_with(
|
||||
mock_get_service.return_value)
|
||||
mock_migration_save.assert_called_once_with()
|
||||
self.assertEqual('confirming', migration.status)
|
||||
mock_record_action.assert_called_once_with(
|
||||
|
|
|
@ -219,7 +219,8 @@ class HostManagerTestCase(test.NoDBTestCase):
|
|||
inst2 = objects.Instance(host='host1', uuid=uuids.instance_2)
|
||||
inst3 = objects.Instance(host='host2', uuid=uuids.instance_3)
|
||||
cell = objects.CellMapping(database_connection='',
|
||||
target_url='')
|
||||
target_url='',
|
||||
uuid=uuids.cell_uuid)
|
||||
mock_get_by_filters.return_value = objects.InstanceList(
|
||||
objects=[inst1, inst2, inst3])
|
||||
hm = self.host_manager
|
||||
|
@ -589,7 +590,7 @@ class HostManagerTestCase(test.NoDBTestCase):
|
|||
mock_get_by_host.return_value = []
|
||||
mock_get_all.return_value = fakes.COMPUTE_NODES
|
||||
mock_get_by_binary.return_value = fakes.SERVICES
|
||||
context = 'fake_context'
|
||||
context = nova_context.get_admin_context()
|
||||
compute_nodes, services = self.host_manager._get_computes_for_cells(
|
||||
context, self.host_manager.enabled_cells)
|
||||
|
||||
|
@ -786,7 +787,7 @@ class HostManagerTestCase(test.NoDBTestCase):
|
|||
|
||||
@mock.patch('nova.objects.InstanceList.get_uuids_by_host')
|
||||
def test_host_state_not_updated(self, mock_get_by_host):
|
||||
context = 'fake_context'
|
||||
context = nova_context.get_admin_context()
|
||||
hm = self.host_manager
|
||||
inst1 = objects.Instance(uuid=uuids.instance)
|
||||
cn1 = objects.ComputeNode(host='host1')
|
||||
|
@ -806,10 +807,11 @@ class HostManagerTestCase(test.NoDBTestCase):
|
|||
|
||||
@mock.patch('nova.objects.InstanceList.get_uuids_by_host')
|
||||
def test_recreate_instance_info(self, mock_get_by_host):
|
||||
context = nova_context.get_admin_context()
|
||||
host_name = 'fake_host'
|
||||
inst1 = fake_instance.fake_instance_obj('fake_context',
|
||||
inst1 = fake_instance.fake_instance_obj(context,
|
||||
uuid=uuids.instance_1)
|
||||
inst2 = fake_instance.fake_instance_obj('fake_context',
|
||||
inst2 = fake_instance.fake_instance_obj(context,
|
||||
uuid=uuids.instance_2)
|
||||
orig_inst_dict = {inst1.uuid: inst1, inst2.uuid: inst2}
|
||||
mock_get_by_host.return_value = [uuids.instance_1, uuids.instance_2]
|
||||
|
@ -818,7 +820,7 @@ class HostManagerTestCase(test.NoDBTestCase):
|
|||
'instances': orig_inst_dict,
|
||||
'updated': True,
|
||||
}}
|
||||
self.host_manager._recreate_instance_info('fake_context', host_name)
|
||||
self.host_manager._recreate_instance_info(context, host_name)
|
||||
new_info = self.host_manager._instance_info[host_name]
|
||||
self.assertEqual(len(new_info['instances']),
|
||||
len(mock_get_by_host.return_value))
|
||||
|
@ -1231,7 +1233,7 @@ class HostManagerChangedNodesTestCase(test.NoDBTestCase):
|
|||
mock_get_by_host.return_value = []
|
||||
mock_get_all.return_value = fakes.COMPUTE_NODES
|
||||
mock_get_by_binary.return_value = fakes.SERVICES
|
||||
context = 'fake_context'
|
||||
context = nova_context.get_admin_context()
|
||||
|
||||
compute_nodes, services = self.host_manager._get_computes_for_cells(
|
||||
context, self.host_manager.enabled_cells)
|
||||
|
@ -1256,7 +1258,7 @@ class HostManagerChangedNodesTestCase(test.NoDBTestCase):
|
|||
mock_get_by_host.return_value = []
|
||||
mock_get_all.side_effect = [fakes.COMPUTE_NODES, running_nodes]
|
||||
mock_get_by_binary.side_effect = [fakes.SERVICES, fakes.SERVICES]
|
||||
context = 'fake_context'
|
||||
context = nova_context.get_admin_context()
|
||||
|
||||
# first call: all nodes
|
||||
compute_nodes, services = self.host_manager._get_computes_for_cells(
|
||||
|
@ -1286,7 +1288,7 @@ class HostManagerChangedNodesTestCase(test.NoDBTestCase):
|
|||
mock_get_by_host.return_value = []
|
||||
mock_get_all.side_effect = [fakes.COMPUTE_NODES, []]
|
||||
mock_get_by_binary.side_effect = [fakes.SERVICES, fakes.SERVICES]
|
||||
context = 'fake_context'
|
||||
context = nova_context.get_admin_context()
|
||||
|
||||
# first call: all nodes
|
||||
compute_nodes, services = self.host_manager._get_computes_for_cells(
|
||||
|
|
Loading…
Reference in New Issue