Update RequestSpec.instance_uuid during scheduling

Before change I4b67ec9dd4ce846a704d0f75ad64c41e693de0fb the ServerGroupAntiAffinityFilter did not rely on the HostState.instances dict to determine **within the same request** how many members of the same anti-affinity group were on a given host. In fact, at that time, the HostState.instances dict wasn't updated between filter runs in the same multi-create request. That was fixed with change Iacc636fa8a59a9e8670a8d683c10bdbb0dc8237b so that as we select a host for each group member being created within a single request, we also update the HostState.instances dict so the ServerGroupAntiAffinityFilter would have accurate tracking of which group members were on which hosts. However, that did not account for a wrinkle in the filter added in change Ie016f59f5b98bb9c70b3e33556bd747f79fc77bd which is needed to allow resizing a server to the same host when that server is in an anti-affinity group. That wrinkle, combined with the fact the RequestSpec the filter is acting upon for a given instance in a multi-create request might not actually have the same instance_uuid can cause the filter to think it's in a resize situation and accept a host which already has another member from the same anti-affinity group on it, which breaks the anti-affinity policy. For background, during a multi-create request, we create a RequestSpec per instance being created, but conductor only sends the first RequestSpec for the first instance to the scheduler. This means RequestSpec.num_instances can be >1 and we can be processing the Nth instance in the list during scheduling but working on a RequestSpec for the first instance. That is what breaks the resize check in ServerGroupAntiAffinityFilter with regard to multi-create. To resolve this, we update the RequestSpec.instance_uuid when filtering hosts for a given instance but we don't persist that change to the RequestSpec. This is a bit clunky, but it kind of comes with the territory of how we hack scheduling requests together using a single RequestSpec for multi-create requests. An alternative to this is found in change I0dd1fa5a70ac169efd509a50b5d69ee5deb8deb7 where we rely on the RequestSpec.num_instances field to determine if we're in a multi-create situation, but that has its own flaws because num_instances is persisted with the RequestSpec which might cause us to re-introduce bug 1558532 if we're not careful. In that case we'd have to either (1) stop persisting RequestSpec.num_instances or (2) temporarily unset it like we do using RequestSpec.reset_forced_destinations() during move operations. Co-Authored-By: Sean Mooney <work@seanmooney.info> Closes-Bug: #1781710 Change-Id: Icba22060cb379ebd5e906981ec283667350b8c5a
2018-07-17 17:43:37 -04:00 · 2018-07-17 17:43:37 -04:00 · c22b53c248
parent 5e262d673b
commit c22b53c248
3 changed files with 57 additions and 20 deletions
--- a/nova/scheduler/filter_scheduler.py
+++ b/nova/scheduler/filter_scheduler.py
@ -186,7 +186,18 @@ class FilterScheduler(driver.Scheduler):
        # The list of hosts that have been selected (and claimed).
        claimed_hosts = []

-        for num in range(num_instances):
+        for num, instance_uuid in enumerate(instance_uuids):
+            # In a multi-create request, the first request spec from the list
+            # is passed to the scheduler and that request spec's instance_uuid
+            # might not be the same as the instance we're processing, so we
+            # update the instance_uuid in that case before passing the request
+            # spec to filters since at least one filter
+            # (ServerGroupAntiAffinityFilter) depends on that information being
+            # accurate.
+            spec_obj.instance_uuid = instance_uuid
+            # Reset the field so it's not persisted accidentally.
+            spec_obj.obj_reset_changes(['instance_uuid'])
+
            hosts = self._get_sorted_hosts(spec_obj, hosts, num)
            if not hosts:
                # NOTE(jaypipes): If we get here, that means not all instances
@ -195,7 +206,6 @@ class FilterScheduler(driver.Scheduler):
                # _ensure_sufficient_hosts() call.
                break

-            instance_uuid = instance_uuids[num]
            # Attempt to claim the resources against one or more resource
            # providers, looping over the sorted list of possible hosts
            # looking for an allocation_request that contains that host's
@ -305,6 +315,13 @@ class FilterScheduler(driver.Scheduler):
        selections_to_return = []

        for num in range(num_instances):
+            instance_uuid = instance_uuids[num] if instance_uuids else None
+            if instance_uuid:
+                # Update the RequestSpec.instance_uuid before sending it to
+                # the filters in case we're doing a multi-create request, but
+                # don't persist the change.
+                spec_obj.instance_uuid = instance_uuid
+                spec_obj.obj_reset_changes(['instance_uuid'])
            hosts = self._get_sorted_hosts(spec_obj, hosts, num)
            if not hosts:
                # No hosts left, so break here, and the
@ -312,7 +329,6 @@ class FilterScheduler(driver.Scheduler):
                break
            selected_host = hosts[0]
            selected_hosts.append(selected_host)
-            instance_uuid = instance_uuids[num] if instance_uuids else None
            self._consume_selected_host(selected_host, spec_obj,
                                        instance_uuid=instance_uuid)

--- a/nova/tests/functional/regressions/test_bug_1781710.py
+++ b/nova/tests/functional/regressions/test_bug_1781710.py
@ -108,14 +108,9 @@ class AntiAffinityMultiCreateRequest(test.TestCase,
                             'anti-affinity group.')
            hosts = set([])
            for selection_list in selections_to_return:
-                # FIXME(mriedem): selection_list should be length 1 when bug
-                # 1781710 is fixed since we shouldn't have alternates with
-                # 2 servers and only 2 hosts.
-                self.assertEqual(2, len(selection_list))
+                self.assertEqual(1, len(selection_list), selection_list)
                hosts.add(selection_list[0].service_host)
-            # FIXME(mriedem): Similarly, this should be 2 once the bug is
-            # fixed.
-            self.assertEqual(1, len(hosts), hosts)
+            self.assertEqual(2, len(hosts), hosts)
            return selections_to_return
        self.stub_out('nova.scheduler.filter_scheduler.FilterScheduler.'
                      '_get_alternate_hosts', stub_get_alternate_hosts)
@ -139,8 +134,4 @@ class AntiAffinityMultiCreateRequest(test.TestCase,
            selected_hosts.add(server['OS-EXT-SRV-ATTR:host'])

        # Assert that each server is on a separate host.
-        # FIXME(sean-k-mooney): we should be asserting len is 2
-        # however until bug 1781710 is resolved we assert incorrect
-        # behavior to match reality. This assertion and fixme should
-        # be corrected as part of closing the bug.
-        self.assertEqual(1, len(selected_hosts))
+        self.assertEqual(2, len(selected_hosts))
--- a/nova/tests/unit/scheduler/test_filter_scheduler.py
+++ b/nova/tests/unit/scheduler/test_filter_scheduler.py
@ -85,14 +85,25 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
                                  swap=0,
                                  vcpus=1),
            project_id=uuids.project_id,
-            instance_group=None)
+            instance_group=None, instance_uuid=uuids.instance)
+        # Reset the RequestSpec changes so they don't interfere with the
+        # assertion at the end of the test.
+        spec_obj.obj_reset_changes(recursive=True)

        host_state = mock.Mock(spec=host_manager.HostState, host="fake_host",
                uuid=uuids.cn1, cell_uuid=uuids.cell, nodename="fake_node",
                limits={})
        all_host_states = [host_state]
        mock_get_all_states.return_value = all_host_states
-        mock_get_hosts.return_value = all_host_states
+
+        visited_instances = set([])
+
+        def fake_get_sorted_hosts(_spec_obj, host_states, index):
+            # Keep track of which instances are passed to the filters.
+            visited_instances.add(_spec_obj.instance_uuid)
+            return all_host_states
+
+        mock_get_hosts.side_effect = fake_get_sorted_hosts

        instance_uuids = [uuids.instance]
        ctx = mock.Mock()
@ -114,6 +125,11 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
        # And ensure we never called claim_resources()
        self.assertFalse(mock_claim.called)

+        # Make sure that the RequestSpec.instance_uuid is not dirty.
+        self.assertEqual(sorted(instance_uuids), sorted(visited_instances))
+        self.assertEqual(0, len(spec_obj.obj_what_changed()),
+                         spec_obj.obj_what_changed())
+
    @mock.patch('nova.scheduler.utils.claim_resources')
    @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
                '_get_all_host_states')
@ -486,7 +502,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
                                  swap=0,
                                  vcpus=1),
            project_id=uuids.project_id,
-            instance_group=ig)
+            instance_group=ig, instance_uuid=uuids.instance0)
+        # Reset the RequestSpec changes so they don't interfere with the
+        # assertion at the end of the test.
+        spec_obj.obj_reset_changes(recursive=True)

        hs1 = mock.Mock(spec=host_manager.HostState, host='host1',
                nodename="node1", limits={}, uuid=uuids.cn1,
@ -506,8 +525,15 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
        # Simulate host 1 and host 2 being randomly returned first by
        # _get_sorted_hosts() in the two iterations for each instance in
        # num_instances
-        mock_get_hosts.side_effect = ([hs2, hs1], [hs1, hs2],
-                                      [hs2, hs1], [hs1, hs2])
+        visited_instances = set([])
+
+        def fake_get_sorted_hosts(_spec_obj, host_states, index):
+            # Keep track of which instances are passed to the filters.
+            visited_instances.add(_spec_obj.instance_uuid)
+            if index % 2:
+                return [hs1, hs2]
+            return [hs2, hs1]
+        mock_get_hosts.side_effect = fake_get_sorted_hosts
        instance_uuids = [
            getattr(uuids, 'instance%d' % x) for x in range(num_instances)
        ]
@ -546,6 +572,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
        # Assert that we updated HostState.instances for each host.
        self.assertIn(uuids.instance0, hs2.instances)
        self.assertIn(uuids.instance1, hs1.instances)
+        # Make sure that the RequestSpec.instance_uuid is not dirty.
+        self.assertEqual(sorted(instance_uuids), sorted(visited_instances))
+        self.assertEqual(0, len(spec_obj.obj_what_changed()),
+                         spec_obj.obj_what_changed())

    @mock.patch('random.choice', side_effect=lambda x: x[1])
    @mock.patch('nova.scheduler.host_manager.HostManager.get_weighed_hosts')