Remove dest node allocation if evacuate MoveClaim fails

If the MoveClaim during an evacuate fails we need to
remove the destination node allocation since the
ResourceTracker isn't going to do it.

This also fixes test_rebuild_server_exc which was faking
a rebuild failure by raising an error that would not
actually happen during a rebuild. ComputeResourcesUnavailable
only happens if a claim fails, and for a rebuild, which
is on the same host that the instance is already on, doesn't
involve a claim attempt.

Change-Id: I59ff47bf773c9831d0f44e1caf7877fe08c911c3
Closes-Bug: #1713786
This commit is contained in:
Matt Riedemann 2017-09-01 09:58:03 -04:00
parent 30946f9a5e
commit 5bc137f7eb
5 changed files with 25 additions and 27 deletions

View File

@ -82,9 +82,9 @@
"nova_object.name": "ExceptionPayload",
"nova_object.data": {
"module_name": "nova.tests.functional.notification_sample_tests.test_instance",
"exception_message": "Insufficient compute resources: fake-resource.",
"function_name": "_compute_resources_unavailable",
"exception": "ComputeResourcesUnavailable"
"exception_message": "Virtual Interface creation failed",
"function_name": "_virtual_interface_create_failed",
"exception": "VirtualInterfaceCreateException"
},
"nova_object.version": "1.0",
"nova_object.namespace": "nova"

View File

@ -2799,6 +2799,13 @@ class ComputeManager(manager.Manager):
# NOTE(ndipanov): We just abort the build for now and leave a
# migration record for potential cleanup later
self._set_migration_status(migration, 'failed')
# Since the claim failed, we need to remove the allocation
# created against the destination node. Note that we can only
# get here when evacuating to a destination node. Rebuilding
# on the same host (not evacuate) uses the NopClaim which will
# not raise ComputeResourcesUnavailable.
rt.delete_allocation_for_evacuated_instance(
instance, scheduled_node, node_type='destination')
self._notify_instance_rebuild_error(context, instance, e)
raise exception.BuildAbortException(

View File

@ -1210,14 +1210,16 @@ class ResourceTracker(object):
"host that might need to be removed: %s.",
instance_uuid, instance.host, instance.node, alloc)
def delete_allocation_for_evacuated_instance(self, instance, node):
self._delete_allocation_for_moved_instance(instance, node, 'evacuated')
def delete_allocation_for_evacuated_instance(self, instance, node,
node_type='source'):
self._delete_allocation_for_moved_instance(
instance, node, 'evacuated', node_type)
def delete_allocation_for_migrated_instance(self, instance, node):
self._delete_allocation_for_moved_instance(instance, node, 'migrated')
def _delete_allocation_for_moved_instance(
self, instance, node, move_type):
self, instance, node, move_type, node_type='source'):
# Clean up the instance allocation from this node in placement
my_resources = scheduler_utils.resources_from_flavor(
instance, instance.flavor)
@ -1229,8 +1231,8 @@ class ResourceTracker(object):
instance.project_id, my_resources)
if not res:
LOG.error("Failed to clean allocation of %s "
"instance on the source node %s",
move_type, cn_uuid, instance=instance)
"instance on the %s node %s",
move_type, node_type, cn_uuid, instance=instance)
def delete_allocation_for_failed_resize(self, instance, node, flavor):
"""Delete instance allocations for the node during a failed resize

View File

@ -725,9 +725,9 @@ class TestInstanceNotificationSample(
@mock.patch('nova.compute.manager.ComputeManager.'
'_do_rebuild_instance_with_claim')
def test_rebuild_server_exc(self, mock_rebuild):
def _compute_resources_unavailable(*args, **kwargs):
raise exception.ComputeResourcesUnavailable(
reason="fake-resource")
def _virtual_interface_create_failed(*args, **kwargs):
# A real error that could come out of driver.spawn() during rebuild
raise exception.VirtualInterfaceCreateException()
server = self._boot_a_server(
extra_params={'networks': [{'port': self.neutron.port_1['id']}]})
@ -742,7 +742,7 @@ class TestInstanceNotificationSample(
}
}
self.api.post_server_action(server['id'], post)
mock_rebuild.side_effect = _compute_resources_unavailable
mock_rebuild.side_effect = _virtual_interface_create_failed
self._wait_for_state_change(self.api, server, expected_status='ERROR')
notification = self._get_notifications('instance.rebuild.error')
self.assertEqual(1, len(notification))

View File

@ -1800,20 +1800,13 @@ class ServerMovingTests(ProviderUsageBaseTestCase):
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
dest_usages = self._get_provider_usages(dest_rp_uuid)
# FIXME(mriedem): This is bug 1713786 where the claim fails and the
# dest node allocation isn't cleaned up. Uncomment when fixed.
# self.assertFlavorMatchesAllocation(
# {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
self.assertFlavorMatchesAllocation(
{'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
allocations = self._get_allocations_by_server_uuid(server['id'])
# FIXME(mriedem): Uncomment when bug 1713786 is fixed.
# self.assertEqual(1, len(allocations))
self.assertEqual(2, len(allocations))
self.assertEqual(1, len(allocations))
source_allocation = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
dest_allocation = allocations[dest_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
# start up the source compute
self.compute1.start()
@ -1829,13 +1822,9 @@ class ServerMovingTests(ProviderUsageBaseTestCase):
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
allocations = self._get_allocations_by_server_uuid(server['id'])
# FIXME(mriedem): Uncomment when bug 1713786 is fixed.
# self.assertEqual(1, len(allocations))
self.assertEqual(2, len(allocations))
self.assertEqual(1, len(allocations))
source_allocation = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
dest_allocation = allocations[dest_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
def test_evacuate_rebuild_on_dest_fails(self):
"""Tests that the allocations on the destination node are cleaned up