From 5bc137f7ebc266f8a73d6febc7c10d8d648924e0 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Fri, 1 Sep 2017 09:58:03 -0400 Subject: [PATCH] Remove dest node allocation if evacuate MoveClaim fails If the MoveClaim during an evacuate fails we need to remove the destination node allocation since the ResourceTracker isn't going to do it. This also fixes test_rebuild_server_exc which was faking a rebuild failure by raising an error that would not actually happen during a rebuild. ComputeResourcesUnavailable only happens if a claim fails, and for a rebuild, which is on the same host that the instance is already on, doesn't involve a claim attempt. Change-Id: I59ff47bf773c9831d0f44e1caf7877fe08c911c3 Closes-Bug: #1713786 --- .../instance-rebuild-error.json | 6 +++--- nova/compute/manager.py | 7 +++++++ nova/compute/resource_tracker.py | 12 +++++++----- .../test_instance.py | 8 ++++---- nova/tests/functional/test_servers.py | 19 ++++--------------- 5 files changed, 25 insertions(+), 27 deletions(-) diff --git a/doc/notification_samples/instance-rebuild-error.json b/doc/notification_samples/instance-rebuild-error.json index 9a9ea19ba6dd..c840b05da96e 100644 --- a/doc/notification_samples/instance-rebuild-error.json +++ b/doc/notification_samples/instance-rebuild-error.json @@ -82,9 +82,9 @@ "nova_object.name": "ExceptionPayload", "nova_object.data": { "module_name": "nova.tests.functional.notification_sample_tests.test_instance", - "exception_message": "Insufficient compute resources: fake-resource.", - "function_name": "_compute_resources_unavailable", - "exception": "ComputeResourcesUnavailable" + "exception_message": "Virtual Interface creation failed", + "function_name": "_virtual_interface_create_failed", + "exception": "VirtualInterfaceCreateException" }, "nova_object.version": "1.0", "nova_object.namespace": "nova" diff --git a/nova/compute/manager.py b/nova/compute/manager.py index f5443c48f0bb..7011e3172e7d 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2799,6 +2799,13 @@ class ComputeManager(manager.Manager): # NOTE(ndipanov): We just abort the build for now and leave a # migration record for potential cleanup later self._set_migration_status(migration, 'failed') + # Since the claim failed, we need to remove the allocation + # created against the destination node. Note that we can only + # get here when evacuating to a destination node. Rebuilding + # on the same host (not evacuate) uses the NopClaim which will + # not raise ComputeResourcesUnavailable. + rt.delete_allocation_for_evacuated_instance( + instance, scheduled_node, node_type='destination') self._notify_instance_rebuild_error(context, instance, e) raise exception.BuildAbortException( diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 5cfc724fa3e0..109fddf637f5 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -1210,14 +1210,16 @@ class ResourceTracker(object): "host that might need to be removed: %s.", instance_uuid, instance.host, instance.node, alloc) - def delete_allocation_for_evacuated_instance(self, instance, node): - self._delete_allocation_for_moved_instance(instance, node, 'evacuated') + def delete_allocation_for_evacuated_instance(self, instance, node, + node_type='source'): + self._delete_allocation_for_moved_instance( + instance, node, 'evacuated', node_type) def delete_allocation_for_migrated_instance(self, instance, node): self._delete_allocation_for_moved_instance(instance, node, 'migrated') def _delete_allocation_for_moved_instance( - self, instance, node, move_type): + self, instance, node, move_type, node_type='source'): # Clean up the instance allocation from this node in placement my_resources = scheduler_utils.resources_from_flavor( instance, instance.flavor) @@ -1229,8 +1231,8 @@ class ResourceTracker(object): instance.project_id, my_resources) if not res: LOG.error("Failed to clean allocation of %s " - "instance on the source node %s", - move_type, cn_uuid, instance=instance) + "instance on the %s node %s", + move_type, node_type, cn_uuid, instance=instance) def delete_allocation_for_failed_resize(self, instance, node, flavor): """Delete instance allocations for the node during a failed resize diff --git a/nova/tests/functional/notification_sample_tests/test_instance.py b/nova/tests/functional/notification_sample_tests/test_instance.py index d8b7ff7cbf17..a7145eb62fe7 100644 --- a/nova/tests/functional/notification_sample_tests/test_instance.py +++ b/nova/tests/functional/notification_sample_tests/test_instance.py @@ -725,9 +725,9 @@ class TestInstanceNotificationSample( @mock.patch('nova.compute.manager.ComputeManager.' '_do_rebuild_instance_with_claim') def test_rebuild_server_exc(self, mock_rebuild): - def _compute_resources_unavailable(*args, **kwargs): - raise exception.ComputeResourcesUnavailable( - reason="fake-resource") + def _virtual_interface_create_failed(*args, **kwargs): + # A real error that could come out of driver.spawn() during rebuild + raise exception.VirtualInterfaceCreateException() server = self._boot_a_server( extra_params={'networks': [{'port': self.neutron.port_1['id']}]}) @@ -742,7 +742,7 @@ class TestInstanceNotificationSample( } } self.api.post_server_action(server['id'], post) - mock_rebuild.side_effect = _compute_resources_unavailable + mock_rebuild.side_effect = _virtual_interface_create_failed self._wait_for_state_change(self.api, server, expected_status='ERROR') notification = self._get_notifications('instance.rebuild.error') self.assertEqual(1, len(notification)) diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index ef07de72c2e7..9bd57b5e4336 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -1800,20 +1800,13 @@ class ServerMovingTests(ProviderUsageBaseTestCase): self.assertFlavorMatchesAllocation(self.flavor1, source_usages) dest_usages = self._get_provider_usages(dest_rp_uuid) - # FIXME(mriedem): This is bug 1713786 where the claim fails and the - # dest node allocation isn't cleaned up. Uncomment when fixed. - # self.assertFlavorMatchesAllocation( - # {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages) - self.assertFlavorMatchesAllocation(self.flavor1, dest_usages) + self.assertFlavorMatchesAllocation( + {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages) allocations = self._get_allocations_by_server_uuid(server['id']) - # FIXME(mriedem): Uncomment when bug 1713786 is fixed. - # self.assertEqual(1, len(allocations)) - self.assertEqual(2, len(allocations)) + self.assertEqual(1, len(allocations)) source_allocation = allocations[source_rp_uuid]['resources'] self.assertFlavorMatchesAllocation(self.flavor1, source_allocation) - dest_allocation = allocations[dest_rp_uuid]['resources'] - self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation) # start up the source compute self.compute1.start() @@ -1829,13 +1822,9 @@ class ServerMovingTests(ProviderUsageBaseTestCase): self.assertFlavorMatchesAllocation(self.flavor1, source_usages) allocations = self._get_allocations_by_server_uuid(server['id']) - # FIXME(mriedem): Uncomment when bug 1713786 is fixed. - # self.assertEqual(1, len(allocations)) - self.assertEqual(2, len(allocations)) + self.assertEqual(1, len(allocations)) source_allocation = allocations[source_rp_uuid]['resources'] self.assertFlavorMatchesAllocation(self.flavor1, source_allocation) - dest_allocation = allocations[dest_rp_uuid]['resources'] - self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation) def test_evacuate_rebuild_on_dest_fails(self): """Tests that the allocations on the destination node are cleaned up