Merge "Drop source node allocations if finish_resize fails" into stable/rocky
This commit is contained in:
commit
069bda35c1
|
@ -4727,12 +4727,50 @@ class ComputeManager(manager.Manager):
|
||||||
new host machine.
|
new host machine.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# _finish_resize sets instance.old_flavor to instance.flavor and
|
||||||
|
# changes instance.flavor to instance.new_flavor (if doing a resize
|
||||||
|
# rather than a cold migration). We save off the old_flavor here in
|
||||||
|
# case we need it for error handling below.
|
||||||
|
old_flavor = instance.flavor
|
||||||
try:
|
try:
|
||||||
self._finish_resize_helper(context, disk_info, image, instance,
|
self._finish_resize_helper(context, disk_info, image, instance,
|
||||||
migration)
|
migration)
|
||||||
except Exception:
|
except Exception:
|
||||||
with excutils.save_and_reraise_exception():
|
with excutils.save_and_reraise_exception():
|
||||||
self._revert_allocation(context, instance, migration)
|
# At this point, resize_instance (which runs on the source) has
|
||||||
|
# already updated the instance host/node values to point to
|
||||||
|
# this (the dest) compute, so we need to leave the allocations
|
||||||
|
# against the dest node resource provider intact and drop the
|
||||||
|
# allocations against the source node resource provider. If the
|
||||||
|
# user tries to recover the server by hard rebooting it, it
|
||||||
|
# will happen on this host so that's where the allocations
|
||||||
|
# should go.
|
||||||
|
LOG.info('Deleting allocations for old flavor on source node '
|
||||||
|
'%s after finish_resize failure. You may be able to '
|
||||||
|
'recover the instance by hard rebooting it.',
|
||||||
|
migration.source_compute, instance=instance)
|
||||||
|
# NOTE(mriedem): We can't use _delete_allocation_after_move
|
||||||
|
# because it relies on the resource tracker to look up the
|
||||||
|
# node uuid and since we are on the dest host, passing the
|
||||||
|
# source nodename won't work since the RT isn't tracking that
|
||||||
|
# node here. So we just try to remove the migration-based
|
||||||
|
# allocations directly and handle the case they don't exist.
|
||||||
|
if not self.reportclient.delete_allocation_for_instance(
|
||||||
|
context, migration.uuid):
|
||||||
|
# No migration-based allocation. Try to cleanup directly.
|
||||||
|
cn = objects.ComputeNode.get_by_host_and_nodename(
|
||||||
|
context, migration.source_compute,
|
||||||
|
migration.source_node)
|
||||||
|
if not scheduler_utils.remove_allocation_from_compute(
|
||||||
|
context, instance, cn.uuid, self.reportclient,
|
||||||
|
flavor=old_flavor):
|
||||||
|
LOG.error('Failed to delete allocations for old '
|
||||||
|
'flavor %s against source node %s. The '
|
||||||
|
'instance is now on the dest node %s. The '
|
||||||
|
'allocations against the source node need '
|
||||||
|
'to be manually cleaned up in Placement.',
|
||||||
|
old_flavor.flavorid, migration.source_node,
|
||||||
|
migration.dest_node, instance=instance)
|
||||||
|
|
||||||
def _finish_resize_helper(self, context, disk_info, image, instance,
|
def _finish_resize_helper(self, context, disk_info, image, instance,
|
||||||
migration):
|
migration):
|
||||||
|
|
|
@ -24,6 +24,13 @@ class FinishResizeErrorAllocationCleanupTestCase(
|
||||||
|
|
||||||
compute_driver = 'fake.FakeFinishMigrationFailDriver'
|
compute_driver = 'fake.FakeFinishMigrationFailDriver'
|
||||||
|
|
||||||
|
# ProviderUsageBaseTestCase uses the AllServicesCurrent fixture which
|
||||||
|
# means we'll use migration-based allocations by default. This flag allows
|
||||||
|
# us to control the logic in conductor to handle legacy allocations where
|
||||||
|
# the source (old flavor) and dest (new flavor) node allocations are
|
||||||
|
# doubled up on the instance.
|
||||||
|
migration_based_allocations = True
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
super(FinishResizeErrorAllocationCleanupTestCase, self).setUp()
|
super(FinishResizeErrorAllocationCleanupTestCase, self).setUp()
|
||||||
# Get the flavors we're going to use.
|
# Get the flavors we're going to use.
|
||||||
|
@ -31,6 +38,10 @@ class FinishResizeErrorAllocationCleanupTestCase(
|
||||||
self.flavor1 = flavors[0]
|
self.flavor1 = flavors[0]
|
||||||
self.flavor2 = flavors[1]
|
self.flavor2 = flavors[1]
|
||||||
|
|
||||||
|
self.stub_out('nova.conductor.tasks.migrate.'
|
||||||
|
'should_do_migration_allocation',
|
||||||
|
lambda *args, **kwargs: self.migration_based_allocations)
|
||||||
|
|
||||||
def _resize_and_assert_error(self, server, dest_host):
|
def _resize_and_assert_error(self, server, dest_host):
|
||||||
# Now resize the server and wait for it to go to ERROR status because
|
# Now resize the server and wait for it to go to ERROR status because
|
||||||
# the finish_migration virt driver method in host2 should fail.
|
# the finish_migration virt driver method in host2 should fail.
|
||||||
|
@ -67,16 +78,20 @@ class FinishResizeErrorAllocationCleanupTestCase(
|
||||||
# allocations should still exist with the new flavor.
|
# allocations should still exist with the new flavor.
|
||||||
source_rp_uuid = self._get_provider_uuid_by_host('host1')
|
source_rp_uuid = self._get_provider_uuid_by_host('host1')
|
||||||
dest_rp_uuid = self._get_provider_uuid_by_host('host2')
|
dest_rp_uuid = self._get_provider_uuid_by_host('host2')
|
||||||
# FIXME(mriedem): This is bug 1825537 where the allocations are
|
|
||||||
# reverted when finish_resize fails so the dest node resource provider
|
|
||||||
# does not have any allocations and the instance allocations are for
|
|
||||||
# the old flavor on the source node resource provider even though the
|
|
||||||
# instance is not running on the source host nor pointed at the source
|
|
||||||
# host in the DB.
|
|
||||||
# self.assertFlavorMatchesAllocation(
|
|
||||||
# self.flavor2, server['id'], dest_rp_uuid)
|
|
||||||
dest_rp_usages = self._get_provider_usages(dest_rp_uuid)
|
dest_rp_usages = self._get_provider_usages(dest_rp_uuid)
|
||||||
|
self.assertFlavorMatchesAllocation(self.flavor2, dest_rp_usages)
|
||||||
|
# And the source node provider should not have any usage.
|
||||||
|
source_rp_usages = self._get_provider_usages(source_rp_uuid)
|
||||||
no_usage = {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}
|
no_usage = {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}
|
||||||
self.assertEqual(no_usage, dest_rp_usages)
|
self.assertEqual(no_usage, source_rp_usages)
|
||||||
source_usages = self._get_provider_usages(source_rp_uuid)
|
|
||||||
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
|
|
||||||
|
class FinishResizeErrorAllocationCleanupLegacyTestCase(
|
||||||
|
FinishResizeErrorAllocationCleanupTestCase):
|
||||||
|
"""Variant of FinishResizeErrorAllocationCleanupTestCase which does not
|
||||||
|
use migration-based allocations, e.g. tests the scenario that there are
|
||||||
|
older computes in the deployment so the source and dest node allocations
|
||||||
|
are doubled up on the instance consumer record rather than the migration
|
||||||
|
record.
|
||||||
|
"""
|
||||||
|
migration_based_allocations = False
|
||||||
|
|
|
@ -3088,10 +3088,13 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
|
||||||
# Ensure the allocation records still exist on the host.
|
# Ensure the allocation records still exist on the host.
|
||||||
source_rp_uuid = self._get_provider_uuid_by_host(hostname)
|
source_rp_uuid = self._get_provider_uuid_by_host(hostname)
|
||||||
source_usages = self._get_provider_usages(source_rp_uuid)
|
source_usages = self._get_provider_usages(source_rp_uuid)
|
||||||
# FIXME(mriedem): This is wrong for the _finish_resize case.
|
if failing_method == '_finish_resize':
|
||||||
# The new_flavor should have been subtracted from the doubled
|
# finish_resize will drop the old flavor allocations.
|
||||||
# allocation which just leaves us with the original flavor.
|
self.assertFlavorMatchesAllocation(self.flavor2, source_usages)
|
||||||
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
|
else:
|
||||||
|
# The new_flavor should have been subtracted from the doubled
|
||||||
|
# allocation which just leaves us with the original flavor.
|
||||||
|
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
|
||||||
|
|
||||||
def test_resize_to_same_host_prep_resize_fails(self):
|
def test_resize_to_same_host_prep_resize_fails(self):
|
||||||
self._test_resize_to_same_host_instance_fails(
|
self._test_resize_to_same_host_instance_fails(
|
||||||
|
|
|
@ -6766,7 +6766,9 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
||||||
test_instance_fault.fake_faults['fake-uuid'][0])
|
test_instance_fault.fake_faults['fake-uuid'][0])
|
||||||
yield _finish_resize
|
yield _finish_resize
|
||||||
|
|
||||||
def test_finish_resize_failure(self):
|
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||||
|
'delete_allocation_for_instance')
|
||||||
|
def test_finish_resize_failure(self, mock_del_allocs):
|
||||||
self.migration.status = 'post-migrating'
|
self.migration.status = 'post-migrating'
|
||||||
|
|
||||||
with self._mock_finish_resize() as _finish_resize:
|
with self._mock_finish_resize() as _finish_resize:
|
||||||
|
@ -6780,10 +6782,14 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
||||||
|
|
||||||
# Assert that we set the migration to an error state
|
# Assert that we set the migration to an error state
|
||||||
self.assertEqual("error", self.migration.status)
|
self.assertEqual("error", self.migration.status)
|
||||||
|
mock_del_allocs.assert_called_once_with(
|
||||||
|
self.context, self.migration.uuid)
|
||||||
|
|
||||||
|
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||||
|
'delete_allocation_for_instance')
|
||||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||||
'_notify_about_instance_usage')
|
'_notify_about_instance_usage')
|
||||||
def test_finish_resize_notify_failure(self, notify):
|
def test_finish_resize_notify_failure(self, notify, mock_del_allocs):
|
||||||
self.migration.status = 'post-migrating'
|
self.migration.status = 'post-migrating'
|
||||||
|
|
||||||
with self._mock_finish_resize():
|
with self._mock_finish_resize():
|
||||||
|
@ -6797,6 +6803,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
||||||
|
|
||||||
# Assert that we did not set the migration to an error state
|
# Assert that we did not set the migration to an error state
|
||||||
self.assertEqual('post-migrating', self.migration.status)
|
self.assertEqual('post-migrating', self.migration.status)
|
||||||
|
mock_del_allocs.assert_called_once_with(
|
||||||
|
self.context, self.migration.uuid)
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def _mock_resize_instance(self):
|
def _mock_resize_instance(self):
|
||||||
|
|
Loading…
Reference in New Issue