Merge "Drop source node allocations if finish_resize fails" into stable/queens
This commit is contained in:
commit
60d4442512
|
@ -4562,12 +4562,50 @@ class ComputeManager(manager.Manager):
|
|||
new host machine.
|
||||
|
||||
"""
|
||||
# _finish_resize sets instance.old_flavor to instance.flavor and
|
||||
# changes instance.flavor to instance.new_flavor (if doing a resize
|
||||
# rather than a cold migration). We save off the old_flavor here in
|
||||
# case we need it for error handling below.
|
||||
old_flavor = instance.flavor
|
||||
try:
|
||||
self._finish_resize_helper(context, disk_info, image, instance,
|
||||
migration)
|
||||
except Exception:
|
||||
with excutils.save_and_reraise_exception():
|
||||
self._revert_allocation(context, instance, migration)
|
||||
# At this point, resize_instance (which runs on the source) has
|
||||
# already updated the instance host/node values to point to
|
||||
# this (the dest) compute, so we need to leave the allocations
|
||||
# against the dest node resource provider intact and drop the
|
||||
# allocations against the source node resource provider. If the
|
||||
# user tries to recover the server by hard rebooting it, it
|
||||
# will happen on this host so that's where the allocations
|
||||
# should go.
|
||||
LOG.info('Deleting allocations for old flavor on source node '
|
||||
'%s after finish_resize failure. You may be able to '
|
||||
'recover the instance by hard rebooting it.',
|
||||
migration.source_compute, instance=instance)
|
||||
# NOTE(mriedem): We can't use _delete_allocation_after_move
|
||||
# because it relies on the resource tracker to look up the
|
||||
# node uuid and since we are on the dest host, passing the
|
||||
# source nodename won't work since the RT isn't tracking that
|
||||
# node here. So we just try to remove the migration-based
|
||||
# allocations directly and handle the case they don't exist.
|
||||
if not self.reportclient.delete_allocation_for_instance(
|
||||
context, migration.uuid):
|
||||
# No migration-based allocation. Try to cleanup directly.
|
||||
cn = objects.ComputeNode.get_by_host_and_nodename(
|
||||
context, migration.source_compute,
|
||||
migration.source_node)
|
||||
if not scheduler_utils.remove_allocation_from_compute(
|
||||
context, instance, cn.uuid, self.reportclient,
|
||||
flavor=old_flavor):
|
||||
LOG.error('Failed to delete allocations for old '
|
||||
'flavor %s against source node %s. The '
|
||||
'instance is now on the dest node %s. The '
|
||||
'allocations against the source node need '
|
||||
'to be manually cleaned up in Placement.',
|
||||
old_flavor.flavorid, migration.source_node,
|
||||
migration.dest_node, instance=instance)
|
||||
|
||||
def _finish_resize_helper(self, context, disk_info, image, instance,
|
||||
migration):
|
||||
|
|
|
@ -24,6 +24,13 @@ class FinishResizeErrorAllocationCleanupTestCase(
|
|||
|
||||
compute_driver = 'fake.FakeFinishMigrationFailDriver'
|
||||
|
||||
# ProviderUsageBaseTestCase uses the AllServicesCurrent fixture which
|
||||
# means we'll use migration-based allocations by default. This flag allows
|
||||
# us to control the logic in conductor to handle legacy allocations where
|
||||
# the source (old flavor) and dest (new flavor) node allocations are
|
||||
# doubled up on the instance.
|
||||
migration_based_allocations = True
|
||||
|
||||
def setUp(self):
|
||||
super(FinishResizeErrorAllocationCleanupTestCase, self).setUp()
|
||||
# Get the flavors we're going to use.
|
||||
|
@ -31,6 +38,10 @@ class FinishResizeErrorAllocationCleanupTestCase(
|
|||
self.flavor1 = flavors[0]
|
||||
self.flavor2 = flavors[1]
|
||||
|
||||
self.stub_out('nova.conductor.tasks.migrate.'
|
||||
'should_do_migration_allocation',
|
||||
lambda *args, **kwargs: self.migration_based_allocations)
|
||||
|
||||
def _resize_and_assert_error(self, server, dest_host):
|
||||
# Now resize the server and wait for it to go to ERROR status because
|
||||
# the finish_migration virt driver method in host2 should fail.
|
||||
|
@ -67,16 +78,20 @@ class FinishResizeErrorAllocationCleanupTestCase(
|
|||
# allocations should still exist with the new flavor.
|
||||
source_rp_uuid = self._get_provider_uuid_by_host('host1')
|
||||
dest_rp_uuid = self._get_provider_uuid_by_host('host2')
|
||||
# FIXME(mriedem): This is bug 1825537 where the allocations are
|
||||
# reverted when finish_resize fails so the dest node resource provider
|
||||
# does not have any allocations and the instance allocations are for
|
||||
# the old flavor on the source node resource provider even though the
|
||||
# instance is not running on the source host nor pointed at the source
|
||||
# host in the DB.
|
||||
# self.assertFlavorMatchesAllocation(
|
||||
# self.flavor2, server['id'], dest_rp_uuid)
|
||||
dest_rp_usages = self._get_provider_usages(dest_rp_uuid)
|
||||
self.assertFlavorMatchesAllocation(self.flavor2, dest_rp_usages)
|
||||
# And the source node provider should not have any usage.
|
||||
source_rp_usages = self._get_provider_usages(source_rp_uuid)
|
||||
no_usage = {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}
|
||||
self.assertEqual(no_usage, dest_rp_usages)
|
||||
source_usages = self._get_provider_usages(source_rp_uuid)
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
|
||||
self.assertEqual(no_usage, source_rp_usages)
|
||||
|
||||
|
||||
class FinishResizeErrorAllocationCleanupLegacyTestCase(
|
||||
FinishResizeErrorAllocationCleanupTestCase):
|
||||
"""Variant of FinishResizeErrorAllocationCleanupTestCase which does not
|
||||
use migration-based allocations, e.g. tests the scenario that there are
|
||||
older computes in the deployment so the source and dest node allocations
|
||||
are doubled up on the instance consumer record rather than the migration
|
||||
record.
|
||||
"""
|
||||
migration_based_allocations = False
|
||||
|
|
|
@ -2941,10 +2941,13 @@ class ServerMovingTests(ProviderUsageBaseTestCase):
|
|||
# Ensure the allocation records still exist on the host.
|
||||
source_rp_uuid = self._get_provider_uuid_by_host(hostname)
|
||||
source_usages = self._get_provider_usages(source_rp_uuid)
|
||||
# FIXME(mriedem): This is wrong for the _finish_resize case.
|
||||
# The new_flavor should have been subtracted from the doubled
|
||||
# allocation which just leaves us with the original flavor.
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
|
||||
if failing_method == '_finish_resize':
|
||||
# finish_resize will drop the old flavor allocations.
|
||||
self.assertFlavorMatchesAllocation(self.flavor2, source_usages)
|
||||
else:
|
||||
# The new_flavor should have been subtracted from the doubled
|
||||
# allocation which just leaves us with the original flavor.
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
|
||||
|
||||
def test_resize_to_same_host_prep_resize_fails(self):
|
||||
self._test_resize_to_same_host_instance_fails(
|
||||
|
|
|
@ -6290,7 +6290,9 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
|||
test_instance_fault.fake_faults['fake-uuid'][0])
|
||||
yield _finish_resize
|
||||
|
||||
def test_finish_resize_failure(self):
|
||||
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||
'delete_allocation_for_instance')
|
||||
def test_finish_resize_failure(self, mock_del_allocs):
|
||||
self.migration.status = 'post-migrating'
|
||||
|
||||
with self._mock_finish_resize() as _finish_resize:
|
||||
|
@ -6304,10 +6306,14 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
|||
|
||||
# Assert that we set the migration to an error state
|
||||
self.assertEqual("error", self.migration.status)
|
||||
mock_del_allocs.assert_called_once_with(
|
||||
self.context, self.migration.uuid)
|
||||
|
||||
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||
'delete_allocation_for_instance')
|
||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||
'_notify_about_instance_usage')
|
||||
def test_finish_resize_notify_failure(self, notify):
|
||||
def test_finish_resize_notify_failure(self, notify, mock_del_allocs):
|
||||
self.migration.status = 'post-migrating'
|
||||
|
||||
with self._mock_finish_resize():
|
||||
|
@ -6321,6 +6327,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
|||
|
||||
# Assert that we did not set the migration to an error state
|
||||
self.assertEqual('post-migrating', self.migration.status)
|
||||
mock_del_allocs.assert_called_once_with(
|
||||
self.context, self.migration.uuid)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _mock_resize_instance(self):
|
||||
|
|
Loading…
Reference in New Issue