Update RequestSpec.instance_uuid during scheduling

Before change I4b67ec9dd4ce846a704d0f75ad64c41e693de0fb
the ServerGroupAntiAffinityFilter did not rely on the
HostState.instances dict to determine **within the same
request** how many members of the same anti-affinity
group were on a given host. In fact, at that time, the
HostState.instances dict wasn't updated between filter
runs in the same multi-create request. That was fixed with
change Iacc636fa8a59a9e8670a8d683c10bdbb0dc8237b so that
as we select a host for each group member being created
within a single request, we also update the HostState.instances
dict so the ServerGroupAntiAffinityFilter would have
accurate tracking of which group members were on which
hosts.

However, that did not account for a wrinkle in the filter
added in change Ie016f59f5b98bb9c70b3e33556bd747f79fc77bd
which is needed to allow resizing a server to the same host
when that server is in an anti-affinity group. That wrinkle,
combined with the fact the RequestSpec the filter is acting
upon for a given instance in a multi-create request might
not actually have the same instance_uuid can cause the filter
to think it's in a resize situation and accept a host which
already has another member from the same anti-affinity group
on it, which breaks the anti-affinity policy.

For background, during a multi-create request, we create a
RequestSpec per instance being created, but conductor only
sends the first RequestSpec for the first instance to the
scheduler. This means RequestSpec.num_instances can be >1
and we can be processing the Nth instance in the list during
scheduling but working on a RequestSpec for the first instance.
That is what breaks the resize check in ServerGroupAntiAffinityFilter
with regard to multi-create.

To resolve this, we update the RequestSpec.instance_uuid when
filtering hosts for a given instance but we don't persist that
change to the RequestSpec.

This is a bit clunky, but it kind of comes with the territory of
how we hack scheduling requests together using a single RequestSpec
for multi-create requests. An alternative to this is found in change
I0dd1fa5a70ac169efd509a50b5d69ee5deb8deb7 where we rely on the
RequestSpec.num_instances field to determine if we're in a multi-create
situation, but that has its own flaws because num_instances is
persisted with the RequestSpec which might cause us to re-introduce
bug 1558532 if we're not careful. In that case we'd have to either
(1) stop persisting RequestSpec.num_instances or (2) temporarily
unset it like we do using RequestSpec.reset_forced_destinations()
during move operations.

Co-Authored-By: Sean Mooney <work@seanmooney.info>

Closes-Bug: #1781710

Change-Id: Icba22060cb379ebd5e906981ec283667350b8c5a
This commit is contained in:
Matt Riedemann 2018-07-17 17:43:37 -04:00
parent 5e262d673b
commit c22b53c248
3 changed files with 57 additions and 20 deletions

View File

@ -186,7 +186,18 @@ class FilterScheduler(driver.Scheduler):
# The list of hosts that have been selected (and claimed).
claimed_hosts = []
for num in range(num_instances):
for num, instance_uuid in enumerate(instance_uuids):
# In a multi-create request, the first request spec from the list
# is passed to the scheduler and that request spec's instance_uuid
# might not be the same as the instance we're processing, so we
# update the instance_uuid in that case before passing the request
# spec to filters since at least one filter
# (ServerGroupAntiAffinityFilter) depends on that information being
# accurate.
spec_obj.instance_uuid = instance_uuid
# Reset the field so it's not persisted accidentally.
spec_obj.obj_reset_changes(['instance_uuid'])
hosts = self._get_sorted_hosts(spec_obj, hosts, num)
if not hosts:
# NOTE(jaypipes): If we get here, that means not all instances
@ -195,7 +206,6 @@ class FilterScheduler(driver.Scheduler):
# _ensure_sufficient_hosts() call.
break
instance_uuid = instance_uuids[num]
# Attempt to claim the resources against one or more resource
# providers, looping over the sorted list of possible hosts
# looking for an allocation_request that contains that host's
@ -305,6 +315,13 @@ class FilterScheduler(driver.Scheduler):
selections_to_return = []
for num in range(num_instances):
instance_uuid = instance_uuids[num] if instance_uuids else None
if instance_uuid:
# Update the RequestSpec.instance_uuid before sending it to
# the filters in case we're doing a multi-create request, but
# don't persist the change.
spec_obj.instance_uuid = instance_uuid
spec_obj.obj_reset_changes(['instance_uuid'])
hosts = self._get_sorted_hosts(spec_obj, hosts, num)
if not hosts:
# No hosts left, so break here, and the
@ -312,7 +329,6 @@ class FilterScheduler(driver.Scheduler):
break
selected_host = hosts[0]
selected_hosts.append(selected_host)
instance_uuid = instance_uuids[num] if instance_uuids else None
self._consume_selected_host(selected_host, spec_obj,
instance_uuid=instance_uuid)

View File

@ -108,14 +108,9 @@ class AntiAffinityMultiCreateRequest(test.TestCase,
'anti-affinity group.')
hosts = set([])
for selection_list in selections_to_return:
# FIXME(mriedem): selection_list should be length 1 when bug
# 1781710 is fixed since we shouldn't have alternates with
# 2 servers and only 2 hosts.
self.assertEqual(2, len(selection_list))
self.assertEqual(1, len(selection_list), selection_list)
hosts.add(selection_list[0].service_host)
# FIXME(mriedem): Similarly, this should be 2 once the bug is
# fixed.
self.assertEqual(1, len(hosts), hosts)
self.assertEqual(2, len(hosts), hosts)
return selections_to_return
self.stub_out('nova.scheduler.filter_scheduler.FilterScheduler.'
'_get_alternate_hosts', stub_get_alternate_hosts)
@ -139,8 +134,4 @@ class AntiAffinityMultiCreateRequest(test.TestCase,
selected_hosts.add(server['OS-EXT-SRV-ATTR:host'])
# Assert that each server is on a separate host.
# FIXME(sean-k-mooney): we should be asserting len is 2
# however until bug 1781710 is resolved we assert incorrect
# behavior to match reality. This assertion and fixme should
# be corrected as part of closing the bug.
self.assertEqual(1, len(selected_hosts))
self.assertEqual(2, len(selected_hosts))

View File

@ -85,14 +85,25 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
swap=0,
vcpus=1),
project_id=uuids.project_id,
instance_group=None)
instance_group=None, instance_uuid=uuids.instance)
# Reset the RequestSpec changes so they don't interfere with the
# assertion at the end of the test.
spec_obj.obj_reset_changes(recursive=True)
host_state = mock.Mock(spec=host_manager.HostState, host="fake_host",
uuid=uuids.cn1, cell_uuid=uuids.cell, nodename="fake_node",
limits={})
all_host_states = [host_state]
mock_get_all_states.return_value = all_host_states
mock_get_hosts.return_value = all_host_states
visited_instances = set([])
def fake_get_sorted_hosts(_spec_obj, host_states, index):
# Keep track of which instances are passed to the filters.
visited_instances.add(_spec_obj.instance_uuid)
return all_host_states
mock_get_hosts.side_effect = fake_get_sorted_hosts
instance_uuids = [uuids.instance]
ctx = mock.Mock()
@ -114,6 +125,11 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
# And ensure we never called claim_resources()
self.assertFalse(mock_claim.called)
# Make sure that the RequestSpec.instance_uuid is not dirty.
self.assertEqual(sorted(instance_uuids), sorted(visited_instances))
self.assertEqual(0, len(spec_obj.obj_what_changed()),
spec_obj.obj_what_changed())
@mock.patch('nova.scheduler.utils.claim_resources')
@mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
'_get_all_host_states')
@ -486,7 +502,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
swap=0,
vcpus=1),
project_id=uuids.project_id,
instance_group=ig)
instance_group=ig, instance_uuid=uuids.instance0)
# Reset the RequestSpec changes so they don't interfere with the
# assertion at the end of the test.
spec_obj.obj_reset_changes(recursive=True)
hs1 = mock.Mock(spec=host_manager.HostState, host='host1',
nodename="node1", limits={}, uuid=uuids.cn1,
@ -506,8 +525,15 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
# Simulate host 1 and host 2 being randomly returned first by
# _get_sorted_hosts() in the two iterations for each instance in
# num_instances
mock_get_hosts.side_effect = ([hs2, hs1], [hs1, hs2],
[hs2, hs1], [hs1, hs2])
visited_instances = set([])
def fake_get_sorted_hosts(_spec_obj, host_states, index):
# Keep track of which instances are passed to the filters.
visited_instances.add(_spec_obj.instance_uuid)
if index % 2:
return [hs1, hs2]
return [hs2, hs1]
mock_get_hosts.side_effect = fake_get_sorted_hosts
instance_uuids = [
getattr(uuids, 'instance%d' % x) for x in range(num_instances)
]
@ -546,6 +572,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
# Assert that we updated HostState.instances for each host.
self.assertIn(uuids.instance0, hs2.instances)
self.assertIn(uuids.instance1, hs1.instances)
# Make sure that the RequestSpec.instance_uuid is not dirty.
self.assertEqual(sorted(instance_uuids), sorted(visited_instances))
self.assertEqual(0, len(spec_obj.obj_what_changed()),
spec_obj.obj_what_changed())
@mock.patch('random.choice', side_effect=lambda x: x[1])
@mock.patch('nova.scheduler.host_manager.HostManager.get_weighed_hosts')