Merge "Move revert resize under semaphore"

This commit is contained in:
Zuul 2020-09-11 16:17:42 +00:00 committed by Gerrit Code Review
commit 648ac72818
7 changed files with 69 additions and 62 deletions

View File

@ -4670,10 +4670,7 @@ class ComputeManager(manager.Manager):
self._delete_volume_attachments(ctxt, bdms) self._delete_volume_attachments(ctxt, bdms)
# Free up the new_flavor usage from the resource tracker for this host. # Free up the new_flavor usage from the resource tracker for this host.
instance.revert_migration_context() self.rt.drop_move_claim_at_dest(ctxt, instance, migration)
instance.save(expected_task_state=task_states.RESIZE_REVERTING)
self.rt.drop_move_claim(ctxt, instance, instance.node,
instance_type=instance.new_flavor)
def _revert_instance_flavor_host_node(self, instance, migration): def _revert_instance_flavor_host_node(self, instance, migration):
"""Revert host, node and flavor fields after a resize-revert.""" """Revert host, node and flavor fields after a resize-revert."""
@ -4870,20 +4867,9 @@ class ComputeManager(manager.Manager):
self._terminate_volume_connections(context, instance, bdms) self._terminate_volume_connections(context, instance, bdms)
migration.status = 'reverted' # Free up the new_flavor usage from the resource tracker for this
migration.save() # host.
self.rt.drop_move_claim_at_dest(context, instance, migration)
# NOTE(ndipanov): We need to do this here because dropping the
# claim means we lose the migration_context data. We really should
# fix this by moving the drop_move_claim call to the
# finish_revert_resize method as this is racy (revert is dropped,
# but instance resources will be tracked with the new flavor until
# it gets rolled back in finish_revert_resize, which is
# potentially wrong for a period of time).
instance.revert_migration_context()
instance.save()
self.rt.drop_move_claim(context, instance, instance.node)
# RPC cast back to the source host to finish the revert there. # RPC cast back to the source host to finish the revert there.
self.compute_rpcapi.finish_revert_resize(context, instance, self.compute_rpcapi.finish_revert_resize(context, instance,

View File

@ -567,6 +567,24 @@ class ResourceTracker(object):
# though. # though.
instance.drop_migration_context() instance.drop_migration_context()
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def drop_move_claim_at_dest(self, context, instance, migration):
"""Drop a move claim after reverting a resize or cold migration."""
# NOTE(stephenfin): This runs on the destination, before we return to
# the source and resume the instance there. As such, the migration
# isn't really really reverted yet, but this status is what we use to
# indicate that we no longer needs to account for usage on this host
migration.status = 'reverted'
migration.save()
self._drop_move_claim(
context, instance, migration.dest_node, instance.new_flavor,
prefix='new_')
instance.revert_migration_context()
instance.save(expected_task_state=[task_states.RESIZE_REVERTING])
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True) @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def drop_move_claim(self, context, instance, nodename, def drop_move_claim(self, context, instance, nodename,
instance_type=None, prefix='new_'): instance_type=None, prefix='new_'):

View File

@ -200,15 +200,22 @@ class InstanceHelperMixin:
api = getattr(self, 'admin_api', self.api) api = getattr(self, 'admin_api', self.api)
statuses = [status.lower() for status in expected_statuses] statuses = [status.lower() for status in expected_statuses]
actual_status = None
for attempt in range(10): for attempt in range(10):
migrations = api.api_get('/os-migrations').body['migrations'] migrations = api.api_get('/os-migrations').body['migrations']
for migration in migrations: for migration in migrations:
if (migration['instance_uuid'] == server['id'] and if migration['instance_uuid'] == server['id']:
migration['status'].lower() in statuses): actual_status = migration['status']
return migration if migration['status'].lower() in statuses:
return migration
time.sleep(0.5) time.sleep(0.5)
self.fail('Timed out waiting for migration with status "%s" for '
'instance: %s' % (expected_statuses, server['id'])) self.fail(
'Timed out waiting for migration with status for instance %s '
'(expected "%s", got "%s")' % (
server['id'], expected_statuses, actual_status,
))
def _wait_for_log(self, log_line): def _wait_for_log(self, log_line):
for i in range(10): for i in range(10):

View File

@ -123,7 +123,7 @@ class TestSameCell(integrated_helpers._IntegratedTestBase):
self.assertUsage(src_host, 1) self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 0) self.assertUsage(dst_host, 0)
orig_drop_claim = rt.ResourceTracker.drop_move_claim orig_drop_claim = rt.ResourceTracker.drop_move_claim_at_dest
def fake_drop_move_claim(*args, **kwargs): def fake_drop_move_claim(*args, **kwargs):
# run periodics after marking the migration reverted, simulating a # run periodics after marking the migration reverted, simulating a
@ -136,15 +136,14 @@ class TestSameCell(integrated_helpers._IntegratedTestBase):
if drop_race: if drop_race:
self._run_periodics() self._run_periodics()
# FIXME(stephenfin): the periodic should not have dropped the self.assertUsage(src_host, 1)
# records for the src self.assertUsage(dst_host, 1)
self.assertUsage(src_host, 0)
self.assertUsage(dst_host, 1)
return orig_drop_claim(*args, **kwargs) return orig_drop_claim(*args, **kwargs)
self.stub_out( self.stub_out(
'nova.compute.resource_tracker.ResourceTracker.drop_move_claim', 'nova.compute.resource_tracker.ResourceTracker.'
'drop_move_claim_at_dest',
fake_drop_move_claim, fake_drop_move_claim,
) )
@ -160,11 +159,8 @@ class TestSameCell(integrated_helpers._IntegratedTestBase):
# migration is now reverted so we should once again only have usage on # migration is now reverted so we should once again only have usage on
# one host # one host
# FIXME(stephenfin): Our usage here should always be 1 and 0 for source self.assertUsage(src_host, 1)
# and dest respectively when reverting, but that won't happen until we self.assertUsage(dst_host, 0)
# run the periodic and rebuild our inventory from scratch
self.assertUsage(src_host, 0 if drop_race else 1)
self.assertUsage(dst_host, 1 if drop_race else 0)
# running periodics shouldn't change things # running periodics shouldn't change things
self._run_periodics() self._run_periodics()

View File

@ -5772,6 +5772,7 @@ class ComputeTestCase(BaseTestCase,
migration.status = 'finished' migration.status = 'finished'
migration.migration_type = 'migration' migration.migration_type = 'migration'
migration.source_node = NODENAME migration.source_node = NODENAME
migration.dest_node = NODENAME
migration.create() migration.create()
migration_context = objects.MigrationContext() migration_context = objects.MigrationContext()

View File

@ -8613,7 +8613,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
request_spec = objects.RequestSpec() request_spec = objects.RequestSpec()
@mock.patch('nova.compute.resource_tracker.ResourceTracker.' @mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'drop_move_claim') 'drop_move_claim_at_dest')
@mock.patch('nova.compute.rpcapi.ComputeAPI.finish_revert_resize') @mock.patch('nova.compute.rpcapi.ComputeAPI.finish_revert_resize')
@mock.patch.object(self.instance, 'revert_migration_context') @mock.patch.object(self.instance, 'revert_migration_context')
@mock.patch.object(self.compute.network_api, 'get_instance_nw_info') @mock.patch.object(self.compute.network_api, 'get_instance_nw_info')
@ -8651,9 +8651,9 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
migration=self.migration, migration=self.migration,
instance=self.instance, instance=self.instance,
request_spec=request_spec) request_spec=request_spec)
mock_drop_move_claim.assert_called_once_with(self.context,
self.instance, self.instance.node) mock_drop_move_claim.assert_called_once_with(
self.assertIsNotNone(self.instance.migration_context) self.context, self.instance, self.migration)
# Three fake BDMs: # Three fake BDMs:
# 1. volume BDM with an attachment_id which will be updated/completed # 1. volume BDM with an attachment_id which will be updated/completed
@ -11784,11 +11784,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
# Assert _error_out_instance_on_exception wasn't tripped somehow. # Assert _error_out_instance_on_exception wasn't tripped somehow.
self.assertNotEqual(vm_states.ERROR, self.instance.vm_state) self.assertNotEqual(vm_states.ERROR, self.instance.vm_state)
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.objects.Instance.revert_migration_context')
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid') @mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
def test_revert_snapshot_based_resize_at_dest( def test_revert_snapshot_based_resize_at_dest(self, mock_get_bdms):
self, mock_get_bdms, mock_revert_mig_ctxt, mock_inst_save):
"""Happy path test for _revert_snapshot_based_resize_at_dest""" """Happy path test for _revert_snapshot_based_resize_at_dest"""
# Setup more mocks. # Setup more mocks.
def stub_migrate_instance_start(ctxt, instance, migration): def stub_migrate_instance_start(ctxt, instance, migration):
@ -11801,7 +11798,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
mock.patch.object(self.compute, '_get_instance_block_device_info'), mock.patch.object(self.compute, '_get_instance_block_device_info'),
mock.patch.object(self.compute.driver, 'destroy'), mock.patch.object(self.compute.driver, 'destroy'),
mock.patch.object(self.compute, '_delete_volume_attachments'), mock.patch.object(self.compute, '_delete_volume_attachments'),
mock.patch.object(self.compute.rt, 'drop_move_claim') mock.patch.object(self.compute.rt, 'drop_move_claim_at_dest')
) as ( ) as (
mock_network_api, mock_get_bdi, mock_destroy, mock_network_api, mock_get_bdi, mock_destroy,
mock_delete_attachments, mock_drop_claim mock_delete_attachments, mock_drop_claim
@ -11816,6 +11813,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
# Run the code. # Run the code.
self.compute._revert_snapshot_based_resize_at_dest( self.compute._revert_snapshot_based_resize_at_dest(
self.context, self.instance, self.migration) self.context, self.instance, self.migration)
# Assert the calls. # Assert the calls.
mock_network_api.get_instance_nw_info.assert_called_once_with( mock_network_api.get_instance_nw_info.assert_called_once_with(
self.context, self.instance) self.context, self.instance)
@ -11836,12 +11834,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
self.stdlog.logger.output) self.stdlog.logger.output)
mock_delete_attachments.assert_called_once_with( mock_delete_attachments.assert_called_once_with(
self.context, mock_get_bdms.return_value) self.context, mock_get_bdms.return_value)
mock_revert_mig_ctxt.assert_called_once_with()
mock_inst_save.assert_called_once_with(
expected_task_state=task_states.RESIZE_REVERTING)
mock_drop_claim.assert_called_once_with( mock_drop_claim.assert_called_once_with(
self.context, self.instance, self.instance.node, self.context, self.instance, self.migration)
instance_type=self.instance.new_flavor)
@mock.patch('nova.compute.manager.ComputeManager.' @mock.patch('nova.compute.manager.ComputeManager.'
'_finish_revert_snapshot_based_resize_at_source') '_finish_revert_snapshot_based_resize_at_source')
@ -11940,9 +11934,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
# Run the code. # Run the code.
self.compute.finish_revert_snapshot_based_resize_at_source( self.compute.finish_revert_snapshot_based_resize_at_source(
self.context, self.instance, self.migration) self.context, self.instance, self.migration)
# Assert the migration status was updated.
self.migration.save.assert_called_once_with()
self.assertEqual('reverted', self.migration.status)
# Assert the instance host/node and flavor info was updated. # Assert the instance host/node and flavor info was updated.
self.assertEqual(self.migration.source_compute, self.instance.host) self.assertEqual(self.migration.source_compute, self.instance.host)
self.assertEqual(self.migration.source_node, self.instance.node) self.assertEqual(self.migration.source_node, self.instance.node)

View File

@ -2649,12 +2649,11 @@ class TestResize(BaseTestCase):
with test.nested( with test.nested(
mock.patch('nova.objects.Migration.save'), mock.patch('nova.objects.Migration.save'),
mock.patch('nova.objects.Instance.drop_migration_context'), mock.patch('nova.objects.Instance.drop_migration_context'),
mock.patch('nova.objects.Instance.save'),
): ):
if revert: if revert:
flavor = new_flavor flavor = new_flavor
prefix = 'new_' self.rt.drop_move_claim_at_dest(ctx, instance, migration)
self.rt.drop_move_claim(
ctx, instance, _NODENAME, flavor, prefix=prefix)
else: # confirm else: # confirm
flavor = old_flavor flavor = old_flavor
self.rt.drop_move_claim_at_source(ctx, instance, migration) self.rt.drop_move_claim_at_source(ctx, instance, migration)
@ -2821,32 +2820,40 @@ class TestResize(BaseTestCase):
instance.migration_context.new_pci_devices = objects.PciDeviceList( instance.migration_context.new_pci_devices = objects.PciDeviceList(
objects=pci_devs) objects=pci_devs)
# When reverting a resize and dropping the move claim, the # When reverting a resize and dropping the move claim, the destination
# destination compute calls drop_move_claim to drop the new_flavor # compute calls drop_move_claim_at_dest to drop the new_flavor
# usage and the instance should be in tracked_migrations from when # usage and the instance should be in tracked_migrations from when
# the resize_claim was made on the dest during prep_resize. # the resize_claim was made on the dest during prep_resize.
self.rt.tracked_migrations = { migration = objects.Migration(
instance.uuid: objects.Migration(migration_type='resize')} dest_node=cn.hypervisor_hostname,
migration_type='resize',
)
self.rt.tracked_migrations = {instance.uuid: migration}
# not using mock.sentinel.ctx because drop_move_claim calls elevated # not using mock.sentinel.ctx because _drop_move_claim calls elevated
ctx = mock.MagicMock() ctx = mock.MagicMock()
with test.nested( with test.nested(
mock.patch.object(self.rt, '_update'), mock.patch.object(self.rt, '_update'),
mock.patch.object(self.rt.pci_tracker, 'free_device'), mock.patch.object(self.rt.pci_tracker, 'free_device'),
mock.patch.object(self.rt, '_get_usage_dict'), mock.patch.object(self.rt, '_get_usage_dict'),
mock.patch.object(self.rt, '_update_usage') mock.patch.object(self.rt, '_update_usage'),
mock.patch.object(migration, 'save'),
mock.patch.object(instance, 'save'),
) as ( ) as (
update_mock, mock_pci_free_device, mock_get_usage, update_mock, mock_pci_free_device, mock_get_usage,
mock_update_usage, mock_update_usage, mock_migrate_save, mock_instance_save,
): ):
self.rt.drop_move_claim(ctx, instance, _NODENAME) self.rt.drop_move_claim_at_dest(ctx, instance, migration)
mock_pci_free_device.assert_called_once_with( mock_pci_free_device.assert_called_once_with(
pci_dev, mock.ANY) pci_dev, mock.ANY)
mock_get_usage.assert_called_once_with( mock_get_usage.assert_called_once_with(
instance.new_flavor, instance, numa_topology=None) instance.new_flavor, instance, numa_topology=None)
mock_update_usage.assert_called_once_with( mock_update_usage.assert_called_once_with(
mock_get_usage.return_value, _NODENAME, sign=-1) mock_get_usage.return_value, _NODENAME, sign=-1)
mock_migrate_save.assert_called_once()
mock_instance_save.assert_called_once()
@mock.patch('nova.compute.resource_tracker.ResourceTracker.' @mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait', new=mock.Mock()) '_sync_compute_service_disabled_trait', new=mock.Mock())