Update RequestSpec.instance_uuid during scheduling
Before change I4b67ec9dd4ce846a704d0f75ad64c41e693de0fb the ServerGroupAntiAffinityFilter did not rely on the HostState.instances dict to determine **within the same request** how many members of the same anti-affinity group were on a given host. In fact, at that time, the HostState.instances dict wasn't updated between filter runs in the same multi-create request. That was fixed with change Iacc636fa8a59a9e8670a8d683c10bdbb0dc8237b so that as we select a host for each group member being created within a single request, we also update the HostState.instances dict so the ServerGroupAntiAffinityFilter would have accurate tracking of which group members were on which hosts. However, that did not account for a wrinkle in the filter added in change Ie016f59f5b98bb9c70b3e33556bd747f79fc77bd which is needed to allow resizing a server to the same host when that server is in an anti-affinity group. That wrinkle, combined with the fact the RequestSpec the filter is acting upon for a given instance in a multi-create request might not actually have the same instance_uuid can cause the filter to think it's in a resize situation and accept a host which already has another member from the same anti-affinity group on it, which breaks the anti-affinity policy. For background, during a multi-create request, we create a RequestSpec per instance being created, but conductor only sends the first RequestSpec for the first instance to the scheduler. This means RequestSpec.num_instances can be >1 and we can be processing the Nth instance in the list during scheduling but working on a RequestSpec for the first instance. That is what breaks the resize check in ServerGroupAntiAffinityFilter with regard to multi-create. To resolve this, we update the RequestSpec.instance_uuid when filtering hosts for a given instance but we don't persist that change to the RequestSpec. This is a bit clunky, but it kind of comes with the territory of how we hack scheduling requests together using a single RequestSpec for multi-create requests. An alternative to this is found in change I0dd1fa5a70ac169efd509a50b5d69ee5deb8deb7 where we rely on the RequestSpec.num_instances field to determine if we're in a multi-create situation, but that has its own flaws because num_instances is persisted with the RequestSpec which might cause us to re-introduce bug 1558532 if we're not careful. In that case we'd have to either (1) stop persisting RequestSpec.num_instances or (2) temporarily unset it like we do using RequestSpec.reset_forced_destinations() during move operations. Co-Authored-By: Sean Mooney <work@seanmooney.info> Closes-Bug: #1781710 Change-Id: Icba22060cb379ebd5e906981ec283667350b8c5a
This commit is contained in:
parent
5e262d673b
commit
c22b53c248
|
@ -186,7 +186,18 @@ class FilterScheduler(driver.Scheduler):
|
|||
# The list of hosts that have been selected (and claimed).
|
||||
claimed_hosts = []
|
||||
|
||||
for num in range(num_instances):
|
||||
for num, instance_uuid in enumerate(instance_uuids):
|
||||
# In a multi-create request, the first request spec from the list
|
||||
# is passed to the scheduler and that request spec's instance_uuid
|
||||
# might not be the same as the instance we're processing, so we
|
||||
# update the instance_uuid in that case before passing the request
|
||||
# spec to filters since at least one filter
|
||||
# (ServerGroupAntiAffinityFilter) depends on that information being
|
||||
# accurate.
|
||||
spec_obj.instance_uuid = instance_uuid
|
||||
# Reset the field so it's not persisted accidentally.
|
||||
spec_obj.obj_reset_changes(['instance_uuid'])
|
||||
|
||||
hosts = self._get_sorted_hosts(spec_obj, hosts, num)
|
||||
if not hosts:
|
||||
# NOTE(jaypipes): If we get here, that means not all instances
|
||||
|
@ -195,7 +206,6 @@ class FilterScheduler(driver.Scheduler):
|
|||
# _ensure_sufficient_hosts() call.
|
||||
break
|
||||
|
||||
instance_uuid = instance_uuids[num]
|
||||
# Attempt to claim the resources against one or more resource
|
||||
# providers, looping over the sorted list of possible hosts
|
||||
# looking for an allocation_request that contains that host's
|
||||
|
@ -305,6 +315,13 @@ class FilterScheduler(driver.Scheduler):
|
|||
selections_to_return = []
|
||||
|
||||
for num in range(num_instances):
|
||||
instance_uuid = instance_uuids[num] if instance_uuids else None
|
||||
if instance_uuid:
|
||||
# Update the RequestSpec.instance_uuid before sending it to
|
||||
# the filters in case we're doing a multi-create request, but
|
||||
# don't persist the change.
|
||||
spec_obj.instance_uuid = instance_uuid
|
||||
spec_obj.obj_reset_changes(['instance_uuid'])
|
||||
hosts = self._get_sorted_hosts(spec_obj, hosts, num)
|
||||
if not hosts:
|
||||
# No hosts left, so break here, and the
|
||||
|
@ -312,7 +329,6 @@ class FilterScheduler(driver.Scheduler):
|
|||
break
|
||||
selected_host = hosts[0]
|
||||
selected_hosts.append(selected_host)
|
||||
instance_uuid = instance_uuids[num] if instance_uuids else None
|
||||
self._consume_selected_host(selected_host, spec_obj,
|
||||
instance_uuid=instance_uuid)
|
||||
|
||||
|
|
|
@ -108,14 +108,9 @@ class AntiAffinityMultiCreateRequest(test.TestCase,
|
|||
'anti-affinity group.')
|
||||
hosts = set([])
|
||||
for selection_list in selections_to_return:
|
||||
# FIXME(mriedem): selection_list should be length 1 when bug
|
||||
# 1781710 is fixed since we shouldn't have alternates with
|
||||
# 2 servers and only 2 hosts.
|
||||
self.assertEqual(2, len(selection_list))
|
||||
self.assertEqual(1, len(selection_list), selection_list)
|
||||
hosts.add(selection_list[0].service_host)
|
||||
# FIXME(mriedem): Similarly, this should be 2 once the bug is
|
||||
# fixed.
|
||||
self.assertEqual(1, len(hosts), hosts)
|
||||
self.assertEqual(2, len(hosts), hosts)
|
||||
return selections_to_return
|
||||
self.stub_out('nova.scheduler.filter_scheduler.FilterScheduler.'
|
||||
'_get_alternate_hosts', stub_get_alternate_hosts)
|
||||
|
@ -139,8 +134,4 @@ class AntiAffinityMultiCreateRequest(test.TestCase,
|
|||
selected_hosts.add(server['OS-EXT-SRV-ATTR:host'])
|
||||
|
||||
# Assert that each server is on a separate host.
|
||||
# FIXME(sean-k-mooney): we should be asserting len is 2
|
||||
# however until bug 1781710 is resolved we assert incorrect
|
||||
# behavior to match reality. This assertion and fixme should
|
||||
# be corrected as part of closing the bug.
|
||||
self.assertEqual(1, len(selected_hosts))
|
||||
self.assertEqual(2, len(selected_hosts))
|
||||
|
|
|
@ -85,14 +85,25 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
|
|||
swap=0,
|
||||
vcpus=1),
|
||||
project_id=uuids.project_id,
|
||||
instance_group=None)
|
||||
instance_group=None, instance_uuid=uuids.instance)
|
||||
# Reset the RequestSpec changes so they don't interfere with the
|
||||
# assertion at the end of the test.
|
||||
spec_obj.obj_reset_changes(recursive=True)
|
||||
|
||||
host_state = mock.Mock(spec=host_manager.HostState, host="fake_host",
|
||||
uuid=uuids.cn1, cell_uuid=uuids.cell, nodename="fake_node",
|
||||
limits={})
|
||||
all_host_states = [host_state]
|
||||
mock_get_all_states.return_value = all_host_states
|
||||
mock_get_hosts.return_value = all_host_states
|
||||
|
||||
visited_instances = set([])
|
||||
|
||||
def fake_get_sorted_hosts(_spec_obj, host_states, index):
|
||||
# Keep track of which instances are passed to the filters.
|
||||
visited_instances.add(_spec_obj.instance_uuid)
|
||||
return all_host_states
|
||||
|
||||
mock_get_hosts.side_effect = fake_get_sorted_hosts
|
||||
|
||||
instance_uuids = [uuids.instance]
|
||||
ctx = mock.Mock()
|
||||
|
@ -114,6 +125,11 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
|
|||
# And ensure we never called claim_resources()
|
||||
self.assertFalse(mock_claim.called)
|
||||
|
||||
# Make sure that the RequestSpec.instance_uuid is not dirty.
|
||||
self.assertEqual(sorted(instance_uuids), sorted(visited_instances))
|
||||
self.assertEqual(0, len(spec_obj.obj_what_changed()),
|
||||
spec_obj.obj_what_changed())
|
||||
|
||||
@mock.patch('nova.scheduler.utils.claim_resources')
|
||||
@mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
|
||||
'_get_all_host_states')
|
||||
|
@ -486,7 +502,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
|
|||
swap=0,
|
||||
vcpus=1),
|
||||
project_id=uuids.project_id,
|
||||
instance_group=ig)
|
||||
instance_group=ig, instance_uuid=uuids.instance0)
|
||||
# Reset the RequestSpec changes so they don't interfere with the
|
||||
# assertion at the end of the test.
|
||||
spec_obj.obj_reset_changes(recursive=True)
|
||||
|
||||
hs1 = mock.Mock(spec=host_manager.HostState, host='host1',
|
||||
nodename="node1", limits={}, uuid=uuids.cn1,
|
||||
|
@ -506,8 +525,15 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
|
|||
# Simulate host 1 and host 2 being randomly returned first by
|
||||
# _get_sorted_hosts() in the two iterations for each instance in
|
||||
# num_instances
|
||||
mock_get_hosts.side_effect = ([hs2, hs1], [hs1, hs2],
|
||||
[hs2, hs1], [hs1, hs2])
|
||||
visited_instances = set([])
|
||||
|
||||
def fake_get_sorted_hosts(_spec_obj, host_states, index):
|
||||
# Keep track of which instances are passed to the filters.
|
||||
visited_instances.add(_spec_obj.instance_uuid)
|
||||
if index % 2:
|
||||
return [hs1, hs2]
|
||||
return [hs2, hs1]
|
||||
mock_get_hosts.side_effect = fake_get_sorted_hosts
|
||||
instance_uuids = [
|
||||
getattr(uuids, 'instance%d' % x) for x in range(num_instances)
|
||||
]
|
||||
|
@ -546,6 +572,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
|
|||
# Assert that we updated HostState.instances for each host.
|
||||
self.assertIn(uuids.instance0, hs2.instances)
|
||||
self.assertIn(uuids.instance1, hs1.instances)
|
||||
# Make sure that the RequestSpec.instance_uuid is not dirty.
|
||||
self.assertEqual(sorted(instance_uuids), sorted(visited_instances))
|
||||
self.assertEqual(0, len(spec_obj.obj_what_changed()),
|
||||
spec_obj.obj_what_changed())
|
||||
|
||||
@mock.patch('random.choice', side_effect=lambda x: x[1])
|
||||
@mock.patch('nova.scheduler.host_manager.HostManager.get_weighed_hosts')
|
||||
|
|
Loading…
Reference in New Issue