diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index 1714126f2432..dd1e41efff23 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -736,6 +736,67 @@ class ComputeTaskManager(base.Base): instance.save() return + def _allocate_for_evacuate_dest_host(self, context, instance, host, + request_spec=None): + # The user is forcing the destination host and bypassing the + # scheduler. We need to copy the source compute node + # allocations in Placement to the destination compute node. + # Normally select_destinations() in the scheduler would do this + # for us, but when forcing the target host we don't call the + # scheduler. + source_node = None # This is used for error handling below. + try: + source_node = objects.ComputeNode.get_by_host_and_nodename( + context, instance.host, instance.node) + dest_node = ( + objects.ComputeNode.get_first_node_by_host_for_old_compat( + context, host, use_slave=True)) + except exception.ComputeHostNotFound as ex: + with excutils.save_and_reraise_exception(): + # TODO(mriedem): This ugly RequestSpec handling should be + # tucked away in _set_vm_state_and_notify. + if request_spec: + request_spec = \ + request_spec.to_legacy_request_spec_dict() + else: + request_spec = {} + self._set_vm_state_and_notify( + context, instance.uuid, 'rebuild_server', + {'vm_state': instance.vm_state, + 'task_state': None}, ex, request_spec) + if source_node: + LOG.warning('Specified host %s for evacuate was not ' + 'found.', host, instance=instance) + else: + LOG.warning('Source host %s and node %s for evacuate was ' + 'not found.', instance.host, instance.node, + instance=instance) + + # TODO(mriedem): In Queens, call select_destinations() with a + # skip_filters=True flag so the scheduler does the work of + # claiming resources on the destination in Placement but still + # bypass the scheduler filters, which honors the 'force' flag + # in the API. + try: + scheduler_utils.claim_resources_on_destination( + self.scheduler_client.reportclient, instance, + source_node, dest_node) + except exception.NoValidHost as ex: + with excutils.save_and_reraise_exception(): + # TODO(mriedem): This ugly RequestSpec handling should be + # tucked away in _set_vm_state_and_notify. + if request_spec: + request_spec = \ + request_spec.to_legacy_request_spec_dict() + else: + request_spec = {} + self._set_vm_state_and_notify( + context, instance.uuid, 'rebuild_server', + {'vm_state': instance.vm_state, + 'task_state': None}, ex, request_spec) + LOG.warning('Specified host %s for evacuate is ' + 'invalid.', host, instance=instance) + @targets_cell def rebuild_instance(self, context, instance, orig_image_ref, image_ref, injected_files, new_pass, orig_sys_metadata, @@ -746,7 +807,28 @@ class ComputeTaskManager(base.Base): with compute_utils.EventReporter(context, 'rebuild_server', instance.uuid): node = limits = None - if not host: + # The host variable is passed in two cases: + # 1. rebuild - the instance.host is passed to rebuild on the + # same host and bypass the scheduler. + # 2. evacuate with specified host and force=True - the specified + # host is passed and is meant to bypass the scheduler. + # NOTE(mriedem): This could be a lot more straight-forward if we + # had separate methods for rebuild and evacuate... + if host: + # We only create a new allocation on the specified host if + # we're doing an evacuate since that is a move operation. + if host != instance.host: + # If a destination host is forced for evacuate, create + # allocations against it in Placement. + self._allocate_for_evacuate_dest_host( + context, instance, host, request_spec) + else: + # At this point, either the user is doing a rebuild on the + # same host (not evacuate), or they are evacuating and + # specified a host but are not forcing it. The API passes + # host=None in this case but sets up the + # RequestSpec.requested_destination field for the specified + # host. if not request_spec: # NOTE(sbauza): We were unable to find an original # RequestSpec object - probably because the instance is old diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 6347c6bbe6be..14e0457d1942 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -1712,20 +1712,14 @@ class ServerMovingTests(ProviderUsageBaseTestCase): self.assertFlavorMatchesAllocation(self.flavor1, source_usages) dest_usages = self._get_provider_usages(dest_rp_uuid) - # FIXME(mriedem): Due to bug 1713786 the dest node won't have - # allocations. Uncomment when fixed. - # self.assertFlavorMatchesAllocation(self.flavor1, dest_usages) - self.assertFlavorMatchesAllocation( - {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages) + self.assertFlavorMatchesAllocation(self.flavor1, dest_usages) allocations = self._get_allocations_by_server_uuid(server['id']) - # FIXME(mriedem): Uncomment when bug 1713786 is fixed - # self.assertEqual(2, len(allocations)) - self.assertEqual(1, len(allocations)) + self.assertEqual(2, len(allocations)) source_allocation = allocations[source_rp_uuid]['resources'] self.assertFlavorMatchesAllocation(self.flavor1, source_allocation) - # dest_allocation = allocations[dest_rp_uuid]['resources'] - # self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation) + dest_allocation = allocations[dest_rp_uuid]['resources'] + self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation) # start up the source compute self.compute1.start() @@ -1738,30 +1732,18 @@ class ServerMovingTests(ProviderUsageBaseTestCase): # When the source node starts up, the instance has moved so the # ResourceTracker should cleanup allocations for the source node. source_usages = self._get_provider_usages(source_rp_uuid) - # FIXME(mriedem): Due to bug 1713786, the source node fails to - # remove the allocation because the dest allocation doesn't exist - # yet and Placement won't allow an empty allocation body. Uncomment - # when fixed. - # self.assertEqual( - # {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, source_usages) - self.assertFlavorMatchesAllocation(self.flavor1, source_usages) + self.assertEqual( + {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, source_usages) # The usages/allocations should still exist on the destination node # after the source node starts back up. dest_usages = self._get_provider_usages(dest_rp_uuid) - # FIXME(mriedem): Uncomment when bug 1713786 is fixed. - # self.assertFlavorMatchesAllocation(self.flavor1, dest_usages) - self.assertFlavorMatchesAllocation( - {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages) + self.assertFlavorMatchesAllocation(self.flavor1, dest_usages) allocations = self._get_allocations_by_server_uuid(server['id']) self.assertEqual(1, len(allocations)) - # FIXME(mriedem): Due to bug 1713786 it's just the source for now. - # Uncomment when fixed. - # dest_allocation = allocations[dest_rp_uuid]['resources'] - # self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation) - source_allocation = allocations[source_rp_uuid]['resources'] - self.assertFlavorMatchesAllocation(self.flavor1, source_allocation) + dest_allocation = allocations[dest_rp_uuid]['resources'] + self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation) self._delete_and_check_allocations( server, source_rp_uuid, dest_rp_uuid) diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index cd4cfd99a495..ad96d1c1282b 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -2517,6 +2517,91 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase): fake_inst.system_metadata) mock_save.assert_called_once_with() + @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename', + side_effect=exc.ComputeHostNotFound('source-host')) + def test_allocate_for_evacuate_dest_host_source_node_not_found_no_reqspec( + self, get_compute_node): + """Tests that the source node for the instance isn't found. In this + case there is no request spec provided. + """ + instance = self.params['build_requests'][0].instance + instance.host = 'source-host' + with mock.patch.object(self.conductor, + '_set_vm_state_and_notify') as notify: + ex = self.assertRaises( + exc.ComputeHostNotFound, + self.conductor._allocate_for_evacuate_dest_host, + self.ctxt, instance, 'dest-host') + get_compute_node.assert_called_once_with( + self.ctxt, instance.host, instance.node) + notify.assert_called_once_with( + self.ctxt, instance.uuid, 'rebuild_server', + {'vm_state': instance.vm_state, 'task_state': None}, ex, {}) + + @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename', + return_value=objects.ComputeNode(host='source-host')) + @mock.patch.object(objects.ComputeNode, + 'get_first_node_by_host_for_old_compat', + side_effect=exc.ComputeHostNotFound(host='dest-host')) + def test_allocate_for_evacuate_dest_host_dest_node_not_found_reqspec( + self, get_dest_node, get_source_node): + """Tests that the destination node for the request isn't found. In this + case there is a request spec provided. + """ + instance = self.params['build_requests'][0].instance + instance.host = 'source-host' + reqspec = self.params['request_specs'][0] + with mock.patch.object(self.conductor, + '_set_vm_state_and_notify') as notify: + ex = self.assertRaises( + exc.ComputeHostNotFound, + self.conductor._allocate_for_evacuate_dest_host, + self.ctxt, instance, 'dest-host', reqspec) + get_source_node.assert_called_once_with( + self.ctxt, instance.host, instance.node) + get_dest_node.assert_called_once_with( + self.ctxt, 'dest-host', use_slave=True) + notify.assert_called_once_with( + self.ctxt, instance.uuid, 'rebuild_server', + {'vm_state': instance.vm_state, 'task_state': None}, ex, + reqspec.to_legacy_request_spec_dict()) + + @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename', + return_value=objects.ComputeNode(host='source-host')) + @mock.patch.object(objects.ComputeNode, + 'get_first_node_by_host_for_old_compat', + return_value=objects.ComputeNode(host='dest-host')) + def test_allocate_for_evacuate_dest_host_claim_fails( + self, get_dest_node, get_source_node): + """Tests that the allocation claim fails.""" + instance = self.params['build_requests'][0].instance + instance.host = 'source-host' + reqspec = self.params['request_specs'][0] + with test.nested( + mock.patch.object(self.conductor, + '_set_vm_state_and_notify'), + mock.patch.object(scheduler_utils, + 'claim_resources_on_destination', + side_effect=exc.NoValidHost(reason='I am full')) + ) as ( + notify, claim + ): + ex = self.assertRaises( + exc.NoValidHost, + self.conductor._allocate_for_evacuate_dest_host, + self.ctxt, instance, 'dest-host', reqspec) + get_source_node.assert_called_once_with( + self.ctxt, instance.host, instance.node) + get_dest_node.assert_called_once_with( + self.ctxt, 'dest-host', use_slave=True) + claim.assert_called_once_with( + self.conductor.scheduler_client.reportclient, instance, + get_source_node.return_value, get_dest_node.return_value) + notify.assert_called_once_with( + self.ctxt, instance.uuid, 'rebuild_server', + {'vm_state': instance.vm_state, 'task_state': None}, ex, + reqspec.to_legacy_request_spec_dict()) + class ConductorTaskRPCAPITestCase(_BaseTaskTestCase, test_compute.BaseTestCase): diff --git a/releasenotes/notes/bug-1713786-0ee9e543683dafa4.yaml b/releasenotes/notes/bug-1713786-0ee9e543683dafa4.yaml new file mode 100644 index 000000000000..1ff00d0dbb1c --- /dev/null +++ b/releasenotes/notes/bug-1713786-0ee9e543683dafa4.yaml @@ -0,0 +1,12 @@ +--- +fixes: + - | + When forcing a specified destination host during evacuate, the + scheduler is bypassed but resource allocations will still be made in the + Placement service against the forced destination host. If the resource + allocation against the destination host fails, the evacuate operation + will fail, regardless of the ``force`` flag being specified in the API. + The guest will be unchanged on the source host. For more details, see + `bug 1713786`_. + + .. _bug 1713786: https://bugs.launchpad.net/nova/+bug/1713786 \ No newline at end of file