Merge "Create allocations against forced dest host during evacuate" into stable/pike

2017-09-15 18:04:07 +00:00 · 2017-09-15 18:04:07 +00:00 · 9a34826f6d
parent ca5400ebcf 2115094824
commit 9a34826f6d
4 changed files with 189 additions and 28 deletions
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@ -736,6 +736,67 @@ class ComputeTaskManager(base.Base):
            instance.save()
            return

+    def _allocate_for_evacuate_dest_host(self, context, instance, host,
+                                         request_spec=None):
+        # The user is forcing the destination host and bypassing the
+        # scheduler. We need to copy the source compute node
+        # allocations in Placement to the destination compute node.
+        # Normally select_destinations() in the scheduler would do this
+        # for us, but when forcing the target host we don't call the
+        # scheduler.
+        source_node = None  # This is used for error handling below.
+        try:
+            source_node = objects.ComputeNode.get_by_host_and_nodename(
+                context, instance.host, instance.node)
+            dest_node = (
+                objects.ComputeNode.get_first_node_by_host_for_old_compat(
+                    context, host, use_slave=True))
+        except exception.ComputeHostNotFound as ex:
+            with excutils.save_and_reraise_exception():
+                # TODO(mriedem): This ugly RequestSpec handling should be
+                # tucked away in _set_vm_state_and_notify.
+                if request_spec:
+                    request_spec = \
+                        request_spec.to_legacy_request_spec_dict()
+                else:
+                    request_spec = {}
+                self._set_vm_state_and_notify(
+                    context, instance.uuid, 'rebuild_server',
+                    {'vm_state': instance.vm_state,
+                     'task_state': None}, ex, request_spec)
+                if source_node:
+                    LOG.warning('Specified host %s for evacuate was not '
+                                'found.', host, instance=instance)
+                else:
+                    LOG.warning('Source host %s and node %s for evacuate was '
+                                'not found.', instance.host, instance.node,
+                                instance=instance)
+
+        # TODO(mriedem): In Queens, call select_destinations() with a
+        # skip_filters=True flag so the scheduler does the work of
+        # claiming resources on the destination in Placement but still
+        # bypass the scheduler filters, which honors the 'force' flag
+        # in the API.
+        try:
+            scheduler_utils.claim_resources_on_destination(
+                self.scheduler_client.reportclient, instance,
+                source_node, dest_node)
+        except exception.NoValidHost as ex:
+            with excutils.save_and_reraise_exception():
+                # TODO(mriedem): This ugly RequestSpec handling should be
+                # tucked away in _set_vm_state_and_notify.
+                if request_spec:
+                    request_spec = \
+                        request_spec.to_legacy_request_spec_dict()
+                else:
+                    request_spec = {}
+                self._set_vm_state_and_notify(
+                    context, instance.uuid, 'rebuild_server',
+                    {'vm_state': instance.vm_state,
+                     'task_state': None}, ex, request_spec)
+                LOG.warning('Specified host %s for evacuate is '
+                            'invalid.', host, instance=instance)
+
    @targets_cell
    def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
                         injected_files, new_pass, orig_sys_metadata,
@ -746,7 +807,28 @@ class ComputeTaskManager(base.Base):
        with compute_utils.EventReporter(context, 'rebuild_server',
                                          instance.uuid):
            node = limits = None
-            if not host:
+            # The host variable is passed in two cases:
+            # 1. rebuild - the instance.host is passed to rebuild on the
+            #       same host and bypass the scheduler.
+            # 2. evacuate with specified host and force=True - the specified
+            #       host is passed and is meant to bypass the scheduler.
+            # NOTE(mriedem): This could be a lot more straight-forward if we
+            # had separate methods for rebuild and evacuate...
+            if host:
+                # We only create a new allocation on the specified host if
+                # we're doing an evacuate since that is a move operation.
+                if host != instance.host:
+                    # If a destination host is forced for evacuate, create
+                    # allocations against it in Placement.
+                    self._allocate_for_evacuate_dest_host(
+                        context, instance, host, request_spec)
+            else:
+                # At this point, either the user is doing a rebuild on the
+                # same host (not evacuate), or they are evacuating and
+                # specified a host but are not forcing it. The API passes
+                # host=None in this case but sets up the
+                # RequestSpec.requested_destination field for the specified
+                # host.
                if not request_spec:
                    # NOTE(sbauza): We were unable to find an original
                    # RequestSpec object - probably because the instance is old
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@ -1704,20 +1704,14 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
        self.assertFlavorMatchesAllocation(self.flavor1, source_usages)

        dest_usages = self._get_provider_usages(dest_rp_uuid)
-        # FIXME(mriedem): Due to bug 1713786 the dest node won't have
-        # allocations. Uncomment when fixed.
-        # self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
-        self.assertFlavorMatchesAllocation(
-            {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
+        self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)

        allocations = self._get_allocations_by_server_uuid(server['id'])
-        # FIXME(mriedem): Uncomment when bug 1713786 is fixed
-        # self.assertEqual(2, len(allocations))
-        self.assertEqual(1, len(allocations))
+        self.assertEqual(2, len(allocations))
        source_allocation = allocations[source_rp_uuid]['resources']
        self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
-        # dest_allocation = allocations[dest_rp_uuid]['resources']
-        # self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
+        dest_allocation = allocations[dest_rp_uuid]['resources']
+        self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)

        # start up the source compute
        self.compute1.start()
@ -1730,30 +1724,18 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
        # When the source node starts up, the instance has moved so the
        # ResourceTracker should cleanup allocations for the source node.
        source_usages = self._get_provider_usages(source_rp_uuid)
-        # FIXME(mriedem): Due to bug 1713786, the source node fails to
-        # remove the allocation because the dest allocation doesn't exist
-        # yet and Placement won't allow an empty allocation body. Uncomment
-        # when fixed.
-        # self.assertEqual(
-        #     {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, source_usages)
-        self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
+        self.assertEqual(
+            {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, source_usages)

        # The usages/allocations should still exist on the destination node
        # after the source node starts back up.
        dest_usages = self._get_provider_usages(dest_rp_uuid)
-        # FIXME(mriedem): Uncomment when bug 1713786 is fixed.
-        # self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
-        self.assertFlavorMatchesAllocation(
-            {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
+        self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)

        allocations = self._get_allocations_by_server_uuid(server['id'])
        self.assertEqual(1, len(allocations))
-        # FIXME(mriedem): Due to bug 1713786 it's just the source for now.
-        # Uncomment when fixed.
-        # dest_allocation = allocations[dest_rp_uuid]['resources']
-        # self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
-        source_allocation = allocations[source_rp_uuid]['resources']
-        self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
+        dest_allocation = allocations[dest_rp_uuid]['resources']
+        self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)

        self._delete_and_check_allocations(
            server, source_rp_uuid, dest_rp_uuid)
--- a/nova/tests/unit/conductor/test_conductor.py
+++ b/nova/tests/unit/conductor/test_conductor.py
@ -2517,6 +2517,91 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
                         fake_inst.system_metadata)
        mock_save.assert_called_once_with()

+    @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
+                       side_effect=exc.ComputeHostNotFound('source-host'))
+    def test_allocate_for_evacuate_dest_host_source_node_not_found_no_reqspec(
+            self, get_compute_node):
+        """Tests that the source node for the instance isn't found. In this
+        case there is no request spec provided.
+        """
+        instance = self.params['build_requests'][0].instance
+        instance.host = 'source-host'
+        with mock.patch.object(self.conductor,
+                               '_set_vm_state_and_notify') as notify:
+            ex = self.assertRaises(
+                exc.ComputeHostNotFound,
+                self.conductor._allocate_for_evacuate_dest_host,
+                self.ctxt, instance, 'dest-host')
+        get_compute_node.assert_called_once_with(
+            self.ctxt, instance.host, instance.node)
+        notify.assert_called_once_with(
+            self.ctxt, instance.uuid, 'rebuild_server',
+            {'vm_state': instance.vm_state, 'task_state': None}, ex, {})
+
+    @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
+                       return_value=objects.ComputeNode(host='source-host'))
+    @mock.patch.object(objects.ComputeNode,
+                       'get_first_node_by_host_for_old_compat',
+                       side_effect=exc.ComputeHostNotFound(host='dest-host'))
+    def test_allocate_for_evacuate_dest_host_dest_node_not_found_reqspec(
+            self, get_dest_node, get_source_node):
+        """Tests that the destination node for the request isn't found. In this
+        case there is a request spec provided.
+        """
+        instance = self.params['build_requests'][0].instance
+        instance.host = 'source-host'
+        reqspec = self.params['request_specs'][0]
+        with mock.patch.object(self.conductor,
+                               '_set_vm_state_and_notify') as notify:
+            ex = self.assertRaises(
+                exc.ComputeHostNotFound,
+                self.conductor._allocate_for_evacuate_dest_host,
+                self.ctxt, instance, 'dest-host', reqspec)
+        get_source_node.assert_called_once_with(
+            self.ctxt, instance.host, instance.node)
+        get_dest_node.assert_called_once_with(
+            self.ctxt, 'dest-host', use_slave=True)
+        notify.assert_called_once_with(
+            self.ctxt, instance.uuid, 'rebuild_server',
+            {'vm_state': instance.vm_state, 'task_state': None}, ex,
+            reqspec.to_legacy_request_spec_dict())
+
+    @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
+                       return_value=objects.ComputeNode(host='source-host'))
+    @mock.patch.object(objects.ComputeNode,
+                       'get_first_node_by_host_for_old_compat',
+                       return_value=objects.ComputeNode(host='dest-host'))
+    def test_allocate_for_evacuate_dest_host_claim_fails(
+            self, get_dest_node, get_source_node):
+        """Tests that the allocation claim fails."""
+        instance = self.params['build_requests'][0].instance
+        instance.host = 'source-host'
+        reqspec = self.params['request_specs'][0]
+        with test.nested(
+            mock.patch.object(self.conductor,
+                              '_set_vm_state_and_notify'),
+            mock.patch.object(scheduler_utils,
+                              'claim_resources_on_destination',
+                              side_effect=exc.NoValidHost(reason='I am full'))
+        ) as (
+            notify, claim
+        ):
+            ex = self.assertRaises(
+                exc.NoValidHost,
+                self.conductor._allocate_for_evacuate_dest_host,
+                self.ctxt, instance, 'dest-host', reqspec)
+        get_source_node.assert_called_once_with(
+            self.ctxt, instance.host, instance.node)
+        get_dest_node.assert_called_once_with(
+            self.ctxt, 'dest-host', use_slave=True)
+        claim.assert_called_once_with(
+            self.conductor.scheduler_client.reportclient, instance,
+            get_source_node.return_value, get_dest_node.return_value)
+        notify.assert_called_once_with(
+            self.ctxt, instance.uuid, 'rebuild_server',
+            {'vm_state': instance.vm_state, 'task_state': None}, ex,
+            reqspec.to_legacy_request_spec_dict())
+

 class ConductorTaskRPCAPITestCase(_BaseTaskTestCase,
        test_compute.BaseTestCase):
--- a/releasenotes/notes/bug-1713786-0ee9e543683dafa4.yaml
+++ b/releasenotes/notes/bug-1713786-0ee9e543683dafa4.yaml
@ -0,0 +1,12 @@
+---
+fixes:
+  - |
+    When forcing a specified destination host during evacuate, the
+    scheduler is bypassed but resource allocations will still be made in the
+    Placement service against the forced destination host. If the resource
+    allocation against the destination host fails, the evacuate operation
+    will fail, regardless of the ``force`` flag being specified in the API.
+    The guest will be unchanged on the source host. For more details, see
+    `bug 1713786`_.
+
+    .. _bug 1713786: https://bugs.launchpad.net/nova/+bug/1713786