Add placement request pre-filter compute_status_filter

This adds a new mandatory placement request pre-filter
which is used to exclude compute node resource providers
with the COMPUTE_STATUS_DISABLED trait. The trait is
managed by the nova-compute service when the service's
disabled status changes.

Change I3005b46221ac3c0e559e1072131a7e4846c9867c makes
the compute service sync the trait during the
update_available_resource flow (either on start of the
compute service or during the periodic task run).

Change Ifabbb543aab62b917394eefe48126231df7cd503 makes
the libvirt driver's _set_host_enabled callback reflect
the trait when the hypervisor goes up or down out of band.

Change If32bca070185937ef83f689b7163d965a89ec10a will add
the final piece which is the os-services API calling the
compute service to add/remove the trait when a compute
service is disabled or enabled.

Since this series technically functions without the API
change, the docs and release note are added here.

Part of blueprint pre-filter-disabled-computes

Change-Id: I317cabbe49a337848325f96df79d478fd65811d9
This commit is contained in:
Matt Riedemann 2019-07-02 16:01:02 -04:00
parent 099b490c2f
commit 168d34c8d1
5 changed files with 86 additions and 5 deletions

View File

@ -75,6 +75,27 @@ non-ceph backed computes), enabling this feature will ensure that the
scheduler does not send requests to boot a ``qcow2`` image to computes
backed by ceph.
Compute Disabled Status Support
-------------------------------
Starting in the Train release, there is a mandatory `pre-filter
<https://specs.openstack.org/openstack/nova-specs/specs/train/approved/pre-filter-disabled-computes.html>`_
which will exclude disabled compute nodes similar to the `ComputeFilter`_.
Compute node resource providers with the ``COMPUTE_STATUS_DISABLED`` trait will
be excluded as scheduling candidates. The trait is managed by the
``nova-compute`` service and should mirror the ``disabled`` status on the
related compute service record in the `os-services`_ API. For example, if a
compute service's status is ``disabled``, the related compute node resource
provider(s) for that service should have the ``COMPUTE_STATUS_DISABLED`` trait.
When the service status is ``enabled`` the ``COMPUTE_STATUS_DISABLED`` trait
shall be removed.
If the compute service is down when the status is changed, the trait will be
synchronized by the compute service when it is restarted.
.. _os-services: https://developer.openstack.org/api-ref/compute/#compute-services-os-services
Filter scheduler
~~~~~~~~~~~~~~~~

View File

@ -154,10 +154,32 @@ def require_image_type_support(ctxt, request_spec):
return True
@trace_request_filter
def compute_status_filter(ctxt, request_spec):
"""Pre-filter compute node resource providers using COMPUTE_STATUS_DISABLED
The ComputeFilter filters out hosts for compute services that are
disabled. Compute node resource providers managed by a disabled compute
service should have the COMPUTE_STATUS_DISABLED trait set and be excluded
by this mandatory pre-filter.
"""
# We're called before scheduler utils resources_from_request_spec builds
# the RequestGroup stuff which gets used to form the
# GET /allocation_candidates call, so mutate the flavor for that call but
# don't persist the change.
trait_name = os_traits.COMPUTE_STATUS_DISABLED
request_spec.flavor.extra_specs['trait:%s' % trait_name] = 'forbidden'
request_spec.obj_reset_changes(fields=['flavor'], recursive=True)
LOG.debug('compute_status_filter request filter added forbidden '
'trait %s', trait_name)
return True
ALL_REQUEST_FILTERS = [
require_tenant_aggregate,
map_az_to_placement_aggregate,
require_image_type_support,
compute_status_filter,
]

View File

@ -170,6 +170,8 @@ class TestRequestFilter(test.NoDBTestCase):
]
reqspec = objects.RequestSpec(project_id='owner',
availability_zone='myaz')
# flavor is needed for the compute_status_filter
reqspec.flavor = objects.Flavor(extra_specs={})
request_filter.process_reqspec(self.context, reqspec)
self.assertEqual(
','.join(sorted([uuids.agg1, uuids.agg2])),
@ -231,3 +233,18 @@ class TestRequestFilter(test.NoDBTestCase):
log_lines = [c[0][0] for c in mock_log.debug.call_args_list]
self.assertIn('added required trait', log_lines[0])
self.assertIn('took %.1f seconds', log_lines[1])
@mock.patch.object(request_filter, 'LOG')
def test_compute_status_filter(self, mock_log):
reqspec = objects.RequestSpec(flavor=objects.Flavor(extra_specs={}))
request_filter.compute_status_filter(self.context, reqspec)
# The forbidden trait should be added to the RequestSpec.flavor.
self.assertEqual({'trait:COMPUTE_STATUS_DISABLED': 'forbidden'},
reqspec.flavor.extra_specs)
# The RequestSpec.flavor changes should be reset so they are not
# persisted.
self.assertEqual(set(), reqspec.flavor.obj_what_changed())
# Assert both the in-method logging and trace decorator.
log_lines = [c[0][0] for c in mock_log.debug.call_args_list]
self.assertIn('added forbidden trait', log_lines[0])
self.assertIn('took %.1f seconds', log_lines[1])

View File

@ -81,10 +81,11 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
manager = self.manager
self.assertIsInstance(manager.driver, self.driver_cls)
@mock.patch('nova.scheduler.request_filter.process_reqspec')
@mock.patch('nova.scheduler.utils.resources_from_request_spec')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocation_candidates')
def test_select_destination(self, mock_get_ac, mock_rfrs):
def test_select_destination(self, mock_get_ac, mock_rfrs, mock_process):
fake_spec = objects.RequestSpec()
fake_spec.instance_uuid = uuids.instance
fake_version = "9.42"
@ -98,6 +99,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
) as select_destinations:
self.manager.select_destinations(self.context, spec_obj=fake_spec,
instance_uuids=[fake_spec.instance_uuid])
mock_process.assert_called_once_with(self.context, fake_spec)
select_destinations.assert_called_once_with(
self.context, fake_spec,
[fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid,
@ -115,11 +117,12 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
[fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid,
mock.sentinel.p_sums, fake_version, True)
@mock.patch('nova.scheduler.request_filter.process_reqspec')
@mock.patch('nova.scheduler.utils.resources_from_request_spec')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocation_candidates')
def test_select_destination_return_objects(self, mock_get_ac,
mock_rfrs):
mock_rfrs, mock_process):
fake_spec = objects.RequestSpec()
fake_spec.instance_uuid = uuids.instance
fake_version = "9.42"
@ -141,6 +144,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
return_objects=True, return_alternates=True)
sel_host = dests[0][0]
self.assertIsInstance(sel_host, objects.Selection)
mock_process.assert_called_once_with(None, fake_spec)
# Since both return_objects and return_alternates are True, the
# driver should have been called with True for return_alternates.
select_destinations.assert_called_once_with(None, fake_spec,
@ -163,11 +167,12 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
[fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid,
mock.sentinel.p_sums, fake_version, False)
@mock.patch('nova.scheduler.request_filter.process_reqspec')
@mock.patch('nova.scheduler.utils.resources_from_request_spec')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocation_candidates')
def _test_select_destination(self, get_allocation_candidates_response,
mock_get_ac, mock_rfrs):
mock_get_ac, mock_rfrs, mock_process):
fake_spec = objects.RequestSpec()
fake_spec.instance_uuid = uuids.instance
place_res = get_allocation_candidates_response
@ -179,6 +184,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
spec_obj=fake_spec,
instance_uuids=[fake_spec.instance_uuid])
select_destinations.assert_not_called()
mock_process.assert_called_once_with(self.context, fake_spec)
mock_get_ac.assert_called_once_with(
self.context, mock_rfrs.return_value)
@ -227,10 +233,12 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
mock_get_ac.assert_not_called()
mock_process.assert_not_called()
@mock.patch('nova.scheduler.request_filter.process_reqspec')
@mock.patch('nova.scheduler.utils.resources_from_request_spec')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocation_candidates')
def test_select_destination_with_4_3_client(self, mock_get_ac, mock_rfrs):
def test_select_destination_with_4_3_client(self, mock_get_ac, mock_rfrs,
mock_process):
fake_spec = objects.RequestSpec()
place_res = (fakes.ALLOC_REQS, mock.sentinel.p_sums, "42.0")
mock_get_ac.return_value = place_res
@ -241,6 +249,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
with mock.patch.object(self.manager.driver, 'select_destinations'
) as select_destinations:
self.manager.select_destinations(self.context, spec_obj=fake_spec)
mock_process.assert_called_once_with(self.context, fake_spec)
select_destinations.assert_called_once_with(self.context,
fake_spec, None, expected_alloc_reqs_by_rp_uuid,
mock.sentinel.p_sums, "42.0", False)
@ -248,12 +257,13 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
self.context, mock_rfrs.return_value)
# TODO(sbauza): Remove that test once the API v4 is removed
@mock.patch('nova.scheduler.request_filter.process_reqspec')
@mock.patch('nova.scheduler.utils.resources_from_request_spec')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocation_candidates')
@mock.patch.object(objects.RequestSpec, 'from_primitives')
def test_select_destination_with_old_client(self, from_primitives,
mock_get_ac, mock_rfrs):
mock_get_ac, mock_rfrs, mock_process):
fake_spec = objects.RequestSpec()
fake_spec.instance_uuid = uuids.instance
from_primitives.return_value = fake_spec
@ -269,6 +279,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
self.context, request_spec='fake_spec',
filter_properties='fake_props',
instance_uuids=[fake_spec.instance_uuid])
mock_process.assert_called_once_with(self.context, fake_spec)
select_destinations.assert_called_once_with(
self.context, fake_spec,
[fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid,

View File

@ -0,0 +1,10 @@
---
features:
- |
A mandatory scheduling pre-filter has been added which will exclude
disabled compute nodes where the related ``nova-compute`` service status
is mirrored with a ``COMPUTE_STATUS_DISABLED`` trait on the compute node
resource provider(s) for that service in Placement. See the
`admin scheduler configuration docs`__ for details.
__ https://docs.openstack.org/nova/latest/admin/configuration/schedulers.html#compute-disabled-status-support