From 168d34c8d1161dc4d62493e194819297e079bb51 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Tue, 2 Jul 2019 16:01:02 -0400 Subject: [PATCH] Add placement request pre-filter compute_status_filter This adds a new mandatory placement request pre-filter which is used to exclude compute node resource providers with the COMPUTE_STATUS_DISABLED trait. The trait is managed by the nova-compute service when the service's disabled status changes. Change I3005b46221ac3c0e559e1072131a7e4846c9867c makes the compute service sync the trait during the update_available_resource flow (either on start of the compute service or during the periodic task run). Change Ifabbb543aab62b917394eefe48126231df7cd503 makes the libvirt driver's _set_host_enabled callback reflect the trait when the hypervisor goes up or down out of band. Change If32bca070185937ef83f689b7163d965a89ec10a will add the final piece which is the os-services API calling the compute service to add/remove the trait when a compute service is disabled or enabled. Since this series technically functions without the API change, the docs and release note are added here. Part of blueprint pre-filter-disabled-computes Change-Id: I317cabbe49a337848325f96df79d478fd65811d9 --- doc/source/admin/configuration/schedulers.rst | 21 ++++++++++++++++++ nova/scheduler/request_filter.py | 22 +++++++++++++++++++ .../unit/scheduler/test_request_filter.py | 17 ++++++++++++++ nova/tests/unit/scheduler/test_scheduler.py | 21 +++++++++++++----- ...er-disabled-computes-0b15d2cad19398e4.yaml | 10 +++++++++ 5 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 releasenotes/notes/pre-filter-disabled-computes-0b15d2cad19398e4.yaml diff --git a/doc/source/admin/configuration/schedulers.rst b/doc/source/admin/configuration/schedulers.rst index e00a900c4642..b66c9c8cf2c9 100644 --- a/doc/source/admin/configuration/schedulers.rst +++ b/doc/source/admin/configuration/schedulers.rst @@ -75,6 +75,27 @@ non-ceph backed computes), enabling this feature will ensure that the scheduler does not send requests to boot a ``qcow2`` image to computes backed by ceph. +Compute Disabled Status Support +------------------------------- + +Starting in the Train release, there is a mandatory `pre-filter +`_ +which will exclude disabled compute nodes similar to the `ComputeFilter`_. +Compute node resource providers with the ``COMPUTE_STATUS_DISABLED`` trait will +be excluded as scheduling candidates. The trait is managed by the +``nova-compute`` service and should mirror the ``disabled`` status on the +related compute service record in the `os-services`_ API. For example, if a +compute service's status is ``disabled``, the related compute node resource +provider(s) for that service should have the ``COMPUTE_STATUS_DISABLED`` trait. +When the service status is ``enabled`` the ``COMPUTE_STATUS_DISABLED`` trait +shall be removed. + +If the compute service is down when the status is changed, the trait will be +synchronized by the compute service when it is restarted. + +.. _os-services: https://developer.openstack.org/api-ref/compute/#compute-services-os-services + + Filter scheduler ~~~~~~~~~~~~~~~~ diff --git a/nova/scheduler/request_filter.py b/nova/scheduler/request_filter.py index f2f41c0ac10c..f8990a552766 100644 --- a/nova/scheduler/request_filter.py +++ b/nova/scheduler/request_filter.py @@ -154,10 +154,32 @@ def require_image_type_support(ctxt, request_spec): return True +@trace_request_filter +def compute_status_filter(ctxt, request_spec): + """Pre-filter compute node resource providers using COMPUTE_STATUS_DISABLED + + The ComputeFilter filters out hosts for compute services that are + disabled. Compute node resource providers managed by a disabled compute + service should have the COMPUTE_STATUS_DISABLED trait set and be excluded + by this mandatory pre-filter. + """ + # We're called before scheduler utils resources_from_request_spec builds + # the RequestGroup stuff which gets used to form the + # GET /allocation_candidates call, so mutate the flavor for that call but + # don't persist the change. + trait_name = os_traits.COMPUTE_STATUS_DISABLED + request_spec.flavor.extra_specs['trait:%s' % trait_name] = 'forbidden' + request_spec.obj_reset_changes(fields=['flavor'], recursive=True) + LOG.debug('compute_status_filter request filter added forbidden ' + 'trait %s', trait_name) + return True + + ALL_REQUEST_FILTERS = [ require_tenant_aggregate, map_az_to_placement_aggregate, require_image_type_support, + compute_status_filter, ] diff --git a/nova/tests/unit/scheduler/test_request_filter.py b/nova/tests/unit/scheduler/test_request_filter.py index 6107d9fe9918..681479646bc3 100644 --- a/nova/tests/unit/scheduler/test_request_filter.py +++ b/nova/tests/unit/scheduler/test_request_filter.py @@ -170,6 +170,8 @@ class TestRequestFilter(test.NoDBTestCase): ] reqspec = objects.RequestSpec(project_id='owner', availability_zone='myaz') + # flavor is needed for the compute_status_filter + reqspec.flavor = objects.Flavor(extra_specs={}) request_filter.process_reqspec(self.context, reqspec) self.assertEqual( ','.join(sorted([uuids.agg1, uuids.agg2])), @@ -231,3 +233,18 @@ class TestRequestFilter(test.NoDBTestCase): log_lines = [c[0][0] for c in mock_log.debug.call_args_list] self.assertIn('added required trait', log_lines[0]) self.assertIn('took %.1f seconds', log_lines[1]) + + @mock.patch.object(request_filter, 'LOG') + def test_compute_status_filter(self, mock_log): + reqspec = objects.RequestSpec(flavor=objects.Flavor(extra_specs={})) + request_filter.compute_status_filter(self.context, reqspec) + # The forbidden trait should be added to the RequestSpec.flavor. + self.assertEqual({'trait:COMPUTE_STATUS_DISABLED': 'forbidden'}, + reqspec.flavor.extra_specs) + # The RequestSpec.flavor changes should be reset so they are not + # persisted. + self.assertEqual(set(), reqspec.flavor.obj_what_changed()) + # Assert both the in-method logging and trace decorator. + log_lines = [c[0][0] for c in mock_log.debug.call_args_list] + self.assertIn('added forbidden trait', log_lines[0]) + self.assertIn('took %.1f seconds', log_lines[1]) diff --git a/nova/tests/unit/scheduler/test_scheduler.py b/nova/tests/unit/scheduler/test_scheduler.py index 29351db3db70..678eb8513efb 100644 --- a/nova/tests/unit/scheduler/test_scheduler.py +++ b/nova/tests/unit/scheduler/test_scheduler.py @@ -81,10 +81,11 @@ class SchedulerManagerTestCase(test.NoDBTestCase): manager = self.manager self.assertIsInstance(manager.driver, self.driver_cls) + @mock.patch('nova.scheduler.request_filter.process_reqspec') @mock.patch('nova.scheduler.utils.resources_from_request_spec') @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' 'get_allocation_candidates') - def test_select_destination(self, mock_get_ac, mock_rfrs): + def test_select_destination(self, mock_get_ac, mock_rfrs, mock_process): fake_spec = objects.RequestSpec() fake_spec.instance_uuid = uuids.instance fake_version = "9.42" @@ -98,6 +99,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase): ) as select_destinations: self.manager.select_destinations(self.context, spec_obj=fake_spec, instance_uuids=[fake_spec.instance_uuid]) + mock_process.assert_called_once_with(self.context, fake_spec) select_destinations.assert_called_once_with( self.context, fake_spec, [fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid, @@ -115,11 +117,12 @@ class SchedulerManagerTestCase(test.NoDBTestCase): [fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid, mock.sentinel.p_sums, fake_version, True) + @mock.patch('nova.scheduler.request_filter.process_reqspec') @mock.patch('nova.scheduler.utils.resources_from_request_spec') @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' 'get_allocation_candidates') def test_select_destination_return_objects(self, mock_get_ac, - mock_rfrs): + mock_rfrs, mock_process): fake_spec = objects.RequestSpec() fake_spec.instance_uuid = uuids.instance fake_version = "9.42" @@ -141,6 +144,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase): return_objects=True, return_alternates=True) sel_host = dests[0][0] self.assertIsInstance(sel_host, objects.Selection) + mock_process.assert_called_once_with(None, fake_spec) # Since both return_objects and return_alternates are True, the # driver should have been called with True for return_alternates. select_destinations.assert_called_once_with(None, fake_spec, @@ -163,11 +167,12 @@ class SchedulerManagerTestCase(test.NoDBTestCase): [fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid, mock.sentinel.p_sums, fake_version, False) + @mock.patch('nova.scheduler.request_filter.process_reqspec') @mock.patch('nova.scheduler.utils.resources_from_request_spec') @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' 'get_allocation_candidates') def _test_select_destination(self, get_allocation_candidates_response, - mock_get_ac, mock_rfrs): + mock_get_ac, mock_rfrs, mock_process): fake_spec = objects.RequestSpec() fake_spec.instance_uuid = uuids.instance place_res = get_allocation_candidates_response @@ -179,6 +184,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase): spec_obj=fake_spec, instance_uuids=[fake_spec.instance_uuid]) select_destinations.assert_not_called() + mock_process.assert_called_once_with(self.context, fake_spec) mock_get_ac.assert_called_once_with( self.context, mock_rfrs.return_value) @@ -227,10 +233,12 @@ class SchedulerManagerTestCase(test.NoDBTestCase): mock_get_ac.assert_not_called() mock_process.assert_not_called() + @mock.patch('nova.scheduler.request_filter.process_reqspec') @mock.patch('nova.scheduler.utils.resources_from_request_spec') @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' 'get_allocation_candidates') - def test_select_destination_with_4_3_client(self, mock_get_ac, mock_rfrs): + def test_select_destination_with_4_3_client(self, mock_get_ac, mock_rfrs, + mock_process): fake_spec = objects.RequestSpec() place_res = (fakes.ALLOC_REQS, mock.sentinel.p_sums, "42.0") mock_get_ac.return_value = place_res @@ -241,6 +249,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase): with mock.patch.object(self.manager.driver, 'select_destinations' ) as select_destinations: self.manager.select_destinations(self.context, spec_obj=fake_spec) + mock_process.assert_called_once_with(self.context, fake_spec) select_destinations.assert_called_once_with(self.context, fake_spec, None, expected_alloc_reqs_by_rp_uuid, mock.sentinel.p_sums, "42.0", False) @@ -248,12 +257,13 @@ class SchedulerManagerTestCase(test.NoDBTestCase): self.context, mock_rfrs.return_value) # TODO(sbauza): Remove that test once the API v4 is removed + @mock.patch('nova.scheduler.request_filter.process_reqspec') @mock.patch('nova.scheduler.utils.resources_from_request_spec') @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' 'get_allocation_candidates') @mock.patch.object(objects.RequestSpec, 'from_primitives') def test_select_destination_with_old_client(self, from_primitives, - mock_get_ac, mock_rfrs): + mock_get_ac, mock_rfrs, mock_process): fake_spec = objects.RequestSpec() fake_spec.instance_uuid = uuids.instance from_primitives.return_value = fake_spec @@ -269,6 +279,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase): self.context, request_spec='fake_spec', filter_properties='fake_props', instance_uuids=[fake_spec.instance_uuid]) + mock_process.assert_called_once_with(self.context, fake_spec) select_destinations.assert_called_once_with( self.context, fake_spec, [fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid, diff --git a/releasenotes/notes/pre-filter-disabled-computes-0b15d2cad19398e4.yaml b/releasenotes/notes/pre-filter-disabled-computes-0b15d2cad19398e4.yaml new file mode 100644 index 000000000000..30ee7efb4df0 --- /dev/null +++ b/releasenotes/notes/pre-filter-disabled-computes-0b15d2cad19398e4.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + A mandatory scheduling pre-filter has been added which will exclude + disabled compute nodes where the related ``nova-compute`` service status + is mirrored with a ``COMPUTE_STATUS_DISABLED`` trait on the compute node + resource provider(s) for that service in Placement. See the + `admin scheduler configuration docs`__ for details. + + __ https://docs.openstack.org/nova/latest/admin/configuration/schedulers.html#compute-disabled-status-support