scheduler: Remove 'USES_ALLOCATION_CANDIDATES'

There is only one scheduler driver now. This variable is no longer
necessary.

We remove a couple of test that validated behavior for scheduler drivers
that didn't support allocation candidates (there are none) and update
the test for the 'nova-manage placement heal_allocations' command to
drop allocations instead of relying on them not being created.

Change-Id: I64dc67e2bacd7a6c86153db5ae983dfb54bd40eb
Signed-off-by: Stephen Finucane <stephenfin@redhat.com>
This commit is contained in:
Stephen Finucane 2020-09-25 14:46:10 +01:00
parent 1f6c351712
commit 97d25371b3
8 changed files with 42 additions and 184 deletions

View File

@ -29,18 +29,6 @@ from nova import servicegroup
class Scheduler(metaclass=abc.ABCMeta):
"""The base class that all Scheduler classes should inherit from."""
# TODO(mriedem): We should remove this flag now so that all scheduler
# drivers, both in-tree and out-of-tree, must rely on placement for
# scheduling decisions. We're likely going to have more and more code
# over time that relies on the scheduler creating allocations and it
# will not be sustainable to try and keep compatibility code around for
# scheduler drivers that do not create allocations in Placement.
USES_ALLOCATION_CANDIDATES = True
"""Indicates that the scheduler driver calls the Placement API for
allocation candidates and uses those allocation candidates in its
decision-making.
"""
def __init__(self):
self.host_manager = host_manager.HostManager()
self.servicegroup_api = servicegroup.API()

View File

@ -169,18 +169,15 @@ class FilterScheduler(driver.Scheduler):
num_alts = (CONF.scheduler.max_attempts - 1
if return_alternates else 0)
if (instance_uuids is None or
not self.USES_ALLOCATION_CANDIDATES or
alloc_reqs_by_rp_uuid is None):
# We still support external scheduler drivers that don't use the
# placement API (and set USES_ALLOCATION_CANDIDATE = False) and
# therefore we skip all the claiming logic for those scheduler
# drivers. Also, if there was a problem communicating with the
if instance_uuids is None or alloc_reqs_by_rp_uuid is None:
# If there was a problem communicating with the
# placement API, alloc_reqs_by_rp_uuid will be None, so we skip
# claiming in that case as well. In the case where instance_uuids
# is None, that indicates an older conductor, so we need to return
# the objects without alternates. They will be converted back to
# the older dict format representing HostState objects.
# TODO(stephenfin): Remove this when we bump scheduler the RPC API
# version to 5.0
return self._legacy_find_hosts(context, num_instances, spec_obj,
hosts, num_alts,
instance_uuids=instance_uuids)
@ -310,9 +307,11 @@ class FilterScheduler(driver.Scheduler):
def _legacy_find_hosts(self, context, num_instances, spec_obj, hosts,
num_alts, instance_uuids=None):
"""Some schedulers do not do claiming, or we can sometimes not be able
to if the Placement service is not reachable. Additionally, we may be
working with older conductors that don't pass in instance_uuids.
"""Find hosts without invoking placement.
We may not be able to claim if the Placement service is not reachable.
Additionally, we may be working with older conductors that don't pass
in instance_uuids.
"""
# The list of hosts selected for each instance
selected_hosts = []
@ -476,10 +475,6 @@ class FilterScheduler(driver.Scheduler):
def _get_all_host_states(self, context, spec_obj, provider_summaries):
"""Template method, so a subclass can implement caching."""
# NOTE(jaypipes): provider_summaries being None is treated differently
# from an empty dict. provider_summaries is None when we want to grab
# all compute nodes, for instance when using a scheduler driver that
# sets USES_ALLOCATION_CANDIDATES=False.
# The provider_summaries variable will be an empty dict when the
# Placement API found no providers that match the requested
# constraints, which in turn makes compute_uuids an empty list and

View File

@ -133,7 +133,7 @@ class SchedulerManager(manager.Manager):
is_rebuild = utils.request_is_rebuild(spec_obj)
alloc_reqs_by_rp_uuid, provider_summaries, allocation_request_version \
= None, None, None
if self.driver.USES_ALLOCATION_CANDIDATES and not is_rebuild:
if not is_rebuild:
# Only process the Placement request spec filters when Placement
# is used.
try:

View File

@ -787,6 +787,9 @@ class PlacementInstanceHelperMixin(InstanceHelperMixin, PlacementHelperMixin):
'/allocations/%s' % server_uuid
).body['allocations']
def _delete_server_allocations(self, server_uuid):
self.placement.delete(f'/allocations/{server_uuid}')
def _wait_for_server_allocations(self, consumer_id, max_retries=20):
retry_count = 0
while True:

View File

@ -135,16 +135,3 @@ class TestRetryBetweenComputeNodeBuilds(test.TestCase):
# Assert that we retried.
self.assertEqual(2, self.attempts)
class TestRetryBetweenComputeNodeBuildsNoAllocations(
TestRetryBetweenComputeNodeBuilds):
"""Tests the reschedule scenario using a scheduler driver which does
not use Placement.
"""
def setUp(self):
super(TestRetryBetweenComputeNodeBuildsNoAllocations, self).setUp()
# We need to mock the FilterScheduler to not use Placement so that
# allocations won't be created during scheduling.
self.scheduler_service.manager.driver.USES_ALLOCATION_CANDIDATES = \
False

View File

@ -78,17 +78,3 @@ class TestServerResizeReschedule(integrated_helpers.ProviderUsageBaseTestCase):
'VERIFY_RESIZE')
self.assertEqual(self.flavor2['name'],
server['flavor']['original_name'])
class TestServerResizeRescheduleWithNoAllocations(
TestServerResizeReschedule):
"""Tests the reschedule scenario using a scheduler driver which does not
use Placement.
"""
def setUp(self):
super(TestServerResizeRescheduleWithNoAllocations, self).setUp()
# We need to mock the FilterScheduler to not use Placement so that
# allocations won't be created during scheduling.
self.scheduler_service.manager.driver.USES_ALLOCATION_CANDIDATES = \
False

View File

@ -1,100 +0,0 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional import fixtures as func_fixtures
from nova.tests.functional import integrated_helpers
class TestResizeWithNoAllocationScheduler(
test.TestCase, integrated_helpers.InstanceHelperMixin):
"""Regression tests for bug #1741307
Some scheduler drivers, like the old CachingScheduler driver, do not use
Placement to make claims (allocations) against compute nodes during
scheduling like the FilterScheduler does.
During a cold migrate / resize, the FilterScheduler will "double up" the
instance allocations so the instance has resource allocations made against
both the source node and the chosen destination node. Conductor will then
attempt to "swap" the source node allocation to the migration record. If
using a non-Placement driver, there are no allocations for the instance on
the source node and conductor fails. Note that if the compute running the
instance was running Ocata code or older, then the compute itself would
create the allocations in Placement via the ResourceTracker, but once all
computes are upgraded to Pike or newer, the compute no longer creates
allocations in Placement because it assumes the scheduler is doing that,
which is not the case with these outlier scheduler drivers.
This is a regression test to show the failure before it's fixed and then
can be used to confirm the fix.
"""
microversion = 'latest'
def setUp(self):
super(TestResizeWithNoAllocationScheduler, self).setUp()
self.useFixture(nova_fixtures.RealPolicyFixture())
self.useFixture(nova_fixtures.GlanceFixture(self))
self.useFixture(nova_fixtures.NeutronFixture(self))
self.useFixture(func_fixtures.PlacementFixture())
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
api_version='v2.1'))
self.api = api_fixture.admin_api
self.api.microversion = self.microversion
self.start_service('conductor')
# Create two compute nodes/services.
for host in ('host1', 'host2'):
self.start_service('compute', host=host)
scheduler_service = self.start_service('scheduler')
# We need to mock the FilterScheduler to not use Placement so that
# allocations won't be created during scheduling.
scheduler_service.manager.driver.USES_ALLOCATION_CANDIDATES = False
flavors = self.api.get_flavors()
self.old_flavor = flavors[0]
self.new_flavor = flavors[1]
def test_resize(self):
# Create our server without networking just to keep things simple.
server_req = self._build_server(
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=self.old_flavor['id'],
networks='none')
server = self.api.post_server({'server': server_req})
server = self._wait_for_state_change(server, 'ACTIVE')
original_host = server['OS-EXT-SRV-ATTR:host']
target_host = 'host1' if original_host == 'host2' else 'host2'
# Issue the resize request.
post = {
'resize': {
'flavorRef': self.new_flavor['id']
}
}
self.api.post_server_action(server['id'], post)
# Poll the server until the resize is done.
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
# Assert that the server was migrated to the other host.
self.assertEqual(target_host, server['OS-EXT-SRV-ATTR:host'])
# Confirm the resize.
post = {'confirmResize': None}
self.api.post_server_action(server['id'], post,
check_response_status=[204])

View File

@ -278,28 +278,24 @@ class TestNovaManagePlacementHealAllocations(
self.flavor = self.api.get_flavors()[0]
self.output = StringIO()
self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output))
# We need to mock the FilterScheduler to not use Placement so that
# allocations won't be created during scheduling and then we can heal
# them in the CLI.
self.scheduler_service.manager.driver.USES_ALLOCATION_CANDIDATES = \
False
def _boot_and_assert_no_allocations(self, flavor, hostname,
volume_backed=False):
"""Creates a server on the given host and asserts neither have usage
def _boot_and_remove_allocations(
self, flavor, hostname, volume_backed=False,
):
"""Creates a server on the given host and remove all allocations.
:param flavor: the flavor used to create the server
:param hostname: the host on which to create the server
:param volume_backed: True if the server should be volume-backed and
as a result not have any DISK_GB allocation
:returns: two-item tuple of the server and the compute node resource
provider uuid
provider UUID
"""
server_req = self._build_server(
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=flavor['id'],
networks='none')
server_req['availability_zone'] = 'nova:%s' % hostname
networks='none',
az=f'nova:{hostname}')
if volume_backed:
vol_id = nova_fixtures.CinderFixture.IMAGE_BACKED_VOL
server_req['block_device_mapping_v2'] = [{
@ -315,6 +311,9 @@ class TestNovaManagePlacementHealAllocations(
# Verify that our source host is what the server ended up on
self.assertEqual(hostname, server['OS-EXT-SRV-ATTR:host'])
# Delete the server's allocations
self._delete_server_allocations(server['id'])
# Check that the compute node resource provider has no allocations.
rp_uuid = self._get_provider_uuid_by_host(hostname)
provider_usages = self._get_provider_usages(rp_uuid)
@ -351,9 +350,9 @@ class TestNovaManagePlacementHealAllocations(
both instances now have allocations against their respective compute
node resource providers.
"""
server1, rp_uuid1 = self._boot_and_assert_no_allocations(
server1, rp_uuid1 = self._boot_and_remove_allocations(
self.flavor, 'cell1')
server2, rp_uuid2 = self._boot_and_assert_no_allocations(
server2, rp_uuid2 = self._boot_and_remove_allocations(
self.flavor, 'cell2')
# heal server1 and server2 in separate calls
@ -383,9 +382,9 @@ class TestNovaManagePlacementHealAllocations(
servers = [] # This is really a list of 2-item tuples.
for x in range(2):
servers.append(
self._boot_and_assert_no_allocations(self.flavor, 'cell1'))
self._boot_and_remove_allocations(self.flavor, 'cell1'))
servers.append(
self._boot_and_assert_no_allocations(self.flavor, 'cell2'))
self._boot_and_remove_allocations(self.flavor, 'cell2'))
result = self.cli.heal_allocations(max_count=10, verbose=True)
self.assertEqual(0, result, self.output.getvalue())
self.assertIn('Processed 3 instances.', self.output.getvalue())
@ -399,10 +398,10 @@ class TestNovaManagePlacementHealAllocations(
"""
servers = [] # This is really a list of 2-item tuples.
servers.append(
self._boot_and_assert_no_allocations(self.flavor, 'cell1'))
self._boot_and_remove_allocations(self.flavor, 'cell1'))
for x in range(2):
servers.append(
self._boot_and_assert_no_allocations(self.flavor, 'cell2'))
self._boot_and_remove_allocations(self.flavor, 'cell2'))
result = self.cli.heal_allocations(max_count=2, verbose=True)
self.assertEqual(1, result, self.output.getvalue())
self.assertIn('Max count reached. Processed 2 instances.',
@ -426,9 +425,9 @@ class TestNovaManagePlacementHealAllocations(
servers = [] # This is really a list of 2-item tuples.
for x in range(2):
servers.append(
self._boot_and_assert_no_allocations(self.flavor, 'cell1'))
self._boot_and_remove_allocations(self.flavor, 'cell1'))
servers.append(
self._boot_and_assert_no_allocations(self.flavor, 'cell2'))
self._boot_and_remove_allocations(self.flavor, 'cell2'))
result = self.cli.heal_allocations(verbose=True)
self.assertEqual(0, result, self.output.getvalue())
self.assertIn('Processed 3 instances.', self.output.getvalue())
@ -439,7 +438,7 @@ class TestNovaManagePlacementHealAllocations(
"""Tests the scenario that an instance with no allocations is shelved
so heal_allocations skips it (since the instance is not on a host).
"""
server, rp_uuid = self._boot_and_assert_no_allocations(
server, rp_uuid = self._boot_and_remove_allocations(
self.flavor, 'cell1')
self.api.post_server_action(server['id'], {'shelve': None})
# The server status goes to SHELVED_OFFLOADED before the host/node
@ -461,7 +460,7 @@ class TestNovaManagePlacementHealAllocations(
"""Tests the case that heal_allocations skips over an instance which
is undergoing a task state transition (in this case pausing).
"""
server, rp_uuid = self._boot_and_assert_no_allocations(
server, rp_uuid = self._boot_and_remove_allocations(
self.flavor, 'cell1')
def fake_pause_instance(_self, ctxt, instance, *a, **kw):
@ -487,9 +486,9 @@ class TestNovaManagePlacementHealAllocations(
to make sure deleted servers are filtered out.
"""
# Create a server that we'll leave alive
self._boot_and_assert_no_allocations(self.flavor, 'cell1')
self._boot_and_remove_allocations(self.flavor, 'cell1')
# and another that we'll delete
server, _ = self._boot_and_assert_no_allocations(self.flavor, 'cell1')
server, _ = self._boot_and_remove_allocations(self.flavor, 'cell1')
self._delete_server(server)
result = self.cli.heal_allocations(verbose=True)
self.assertEqual(0, result, self.output.getvalue())
@ -505,7 +504,7 @@ class TestNovaManagePlacementHealAllocations(
out-of-band and then run our heal routine to see they get updated with
the instance project and user information.
"""
server, rp_uuid = self._boot_and_assert_no_allocations(
server, rp_uuid = self._boot_and_remove_allocations(
self.flavor, 'cell1')
# Now we'll create allocations using microversion < 1.8 to so that
# placement creates the consumer record with the config-based project
@ -558,7 +557,7 @@ class TestNovaManagePlacementHealAllocations(
def test_heal_allocations_dry_run(self):
"""Tests to make sure the --dry-run option does not commit changes."""
# Create a server with no allocations.
server, rp_uuid = self._boot_and_assert_no_allocations(
server, rp_uuid = self._boot_and_remove_allocations(
self.flavor, 'cell1')
result = self.cli.heal_allocations(verbose=True, dry_run=True)
# Nothing changed so the return code should be 4.
@ -574,10 +573,10 @@ class TestNovaManagePlacementHealAllocations(
instance even though there are two which require processing.
"""
# Create one that we won't process.
self._boot_and_assert_no_allocations(
self._boot_and_remove_allocations(
self.flavor, 'cell1')
# Create another that we will process specifically.
server, rp_uuid = self._boot_and_assert_no_allocations(
server, rp_uuid = self._boot_and_remove_allocations(
self.flavor, 'cell1', volume_backed=True)
# First do a dry run to make sure two instances need processing.
result = self.cli.heal_allocations(
@ -633,10 +632,10 @@ class TestNovaManagePlacementHealAllocations(
cell even though there are two which require processing.
"""
# Create one that we won't process.
server1, rp_uuid1 = self._boot_and_assert_no_allocations(
server1, rp_uuid1 = self._boot_and_remove_allocations(
self.flavor, 'cell1')
# Create another that we will process specifically.
server2, rp_uuid2 = self._boot_and_assert_no_allocations(
server2, rp_uuid2 = self._boot_and_remove_allocations(
self.flavor, 'cell2')
# Get Cell_id of cell2
@ -683,7 +682,7 @@ class TestNovaManagePlacementHealAllocations(
You should get rc=4 back since nothing changed.
"""
# 1. Create server that we will forcefully heal specifically.
server, rp_uuid = self._boot_and_assert_no_allocations(
server, rp_uuid = self._boot_and_remove_allocations(
self.flavor, 'cell1', volume_backed=True)
# 2. heal allocations without forcing them