Add placement request pre-filter compute_status_filter

This adds a new mandatory placement request pre-filter which is used to exclude compute node resource providers with the COMPUTE_STATUS_DISABLED trait. The trait is managed by the nova-compute service when the service's disabled status changes. Change I3005b46221ac3c0e559e1072131a7e4846c9867c makes the compute service sync the trait during the update_available_resource flow (either on start of the compute service or during the periodic task run). Change Ifabbb543aab62b917394eefe48126231df7cd503 makes the libvirt driver's _set_host_enabled callback reflect the trait when the hypervisor goes up or down out of band. Change If32bca070185937ef83f689b7163d965a89ec10a will add the final piece which is the os-services API calling the compute service to add/remove the trait when a compute service is disabled or enabled. Since this series technically functions without the API change, the docs and release note are added here. Part of blueprint pre-filter-disabled-computes Change-Id: I317cabbe49a337848325f96df79d478fd65811d9
2019-07-02 16:01:02 -04:00 · 2019-07-02 16:01:02 -04:00 · 168d34c8d1
commit 168d34c8d1
parent 099b490c2f
5 changed files with 86 additions and 5 deletions
--- a/doc/source/admin/configuration/schedulers.rst
+++ b/doc/source/admin/configuration/schedulers.rst
@ -75,6 +75,27 @@ non-ceph backed computes), enabling this feature will ensure that the
 scheduler does not send requests to boot a ``qcow2`` image to computes
 backed by ceph.

+Compute Disabled Status Support
+-------------------------------
+
+Starting in the Train release, there is a mandatory `pre-filter
+<https://specs.openstack.org/openstack/nova-specs/specs/train/approved/pre-filter-disabled-computes.html>`_
+which will exclude disabled compute nodes similar to the `ComputeFilter`_.
+Compute node resource providers with the ``COMPUTE_STATUS_DISABLED`` trait will
+be excluded as scheduling candidates. The trait is managed by the
+``nova-compute`` service and should mirror the ``disabled`` status on the
+related compute service record in the `os-services`_ API. For example, if a
+compute service's status is ``disabled``, the related compute node resource
+provider(s) for that service should have the ``COMPUTE_STATUS_DISABLED`` trait.
+When the service status is ``enabled`` the ``COMPUTE_STATUS_DISABLED`` trait
+shall be removed.
+
+If the compute service is down when the status is changed, the trait will be
+synchronized by the compute service when it is restarted.
+
+.. _os-services: https://developer.openstack.org/api-ref/compute/#compute-services-os-services
+
+
 Filter scheduler
 ~~~~~~~~~~~~~~~~

--- a/nova/scheduler/request_filter.py
+++ b/nova/scheduler/request_filter.py
@ -154,10 +154,32 @@ def require_image_type_support(ctxt, request_spec):
    return True


+@trace_request_filter
+def compute_status_filter(ctxt, request_spec):
+    """Pre-filter compute node resource providers using COMPUTE_STATUS_DISABLED
+
+    The ComputeFilter filters out hosts for compute services that are
+    disabled. Compute node resource providers managed by a disabled compute
+    service should have the COMPUTE_STATUS_DISABLED trait set and be excluded
+    by this mandatory pre-filter.
+    """
+    # We're called before scheduler utils resources_from_request_spec builds
+    # the RequestGroup stuff which gets used to form the
+    # GET /allocation_candidates call, so mutate the flavor for that call but
+    # don't persist the change.
+    trait_name = os_traits.COMPUTE_STATUS_DISABLED
+    request_spec.flavor.extra_specs['trait:%s' % trait_name] = 'forbidden'
+    request_spec.obj_reset_changes(fields=['flavor'], recursive=True)
+    LOG.debug('compute_status_filter request filter added forbidden '
+              'trait %s', trait_name)
+    return True
+
+
 ALL_REQUEST_FILTERS = [
    require_tenant_aggregate,
    map_az_to_placement_aggregate,
    require_image_type_support,
+    compute_status_filter,
 ]


--- a/nova/tests/unit/scheduler/test_request_filter.py
+++ b/nova/tests/unit/scheduler/test_request_filter.py
@ -170,6 +170,8 @@ class TestRequestFilter(test.NoDBTestCase):
        ]
        reqspec = objects.RequestSpec(project_id='owner',
                                      availability_zone='myaz')
+        # flavor is needed for the compute_status_filter
+        reqspec.flavor = objects.Flavor(extra_specs={})
        request_filter.process_reqspec(self.context, reqspec)
        self.assertEqual(
            ','.join(sorted([uuids.agg1, uuids.agg2])),
@ -231,3 +233,18 @@ class TestRequestFilter(test.NoDBTestCase):
        log_lines = [c[0][0] for c in mock_log.debug.call_args_list]
        self.assertIn('added required trait', log_lines[0])
        self.assertIn('took %.1f seconds', log_lines[1])
+
+    @mock.patch.object(request_filter, 'LOG')
+    def test_compute_status_filter(self, mock_log):
+        reqspec = objects.RequestSpec(flavor=objects.Flavor(extra_specs={}))
+        request_filter.compute_status_filter(self.context, reqspec)
+        # The forbidden trait should be added to the RequestSpec.flavor.
+        self.assertEqual({'trait:COMPUTE_STATUS_DISABLED': 'forbidden'},
+                         reqspec.flavor.extra_specs)
+        # The RequestSpec.flavor changes should be reset so they are not
+        # persisted.
+        self.assertEqual(set(), reqspec.flavor.obj_what_changed())
+        # Assert both the in-method logging and trace decorator.
+        log_lines = [c[0][0] for c in mock_log.debug.call_args_list]
+        self.assertIn('added forbidden trait', log_lines[0])
+        self.assertIn('took %.1f seconds', log_lines[1])
--- a/nova/tests/unit/scheduler/test_scheduler.py
+++ b/nova/tests/unit/scheduler/test_scheduler.py
@ -81,10 +81,11 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
        manager = self.manager
        self.assertIsInstance(manager.driver, self.driver_cls)

+    @mock.patch('nova.scheduler.request_filter.process_reqspec')
    @mock.patch('nova.scheduler.utils.resources_from_request_spec')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                'get_allocation_candidates')
-    def test_select_destination(self, mock_get_ac, mock_rfrs):
+    def test_select_destination(self, mock_get_ac, mock_rfrs, mock_process):
        fake_spec = objects.RequestSpec()
        fake_spec.instance_uuid = uuids.instance
        fake_version = "9.42"
@ -98,6 +99,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
                ) as select_destinations:
            self.manager.select_destinations(self.context, spec_obj=fake_spec,
                    instance_uuids=[fake_spec.instance_uuid])
+            mock_process.assert_called_once_with(self.context, fake_spec)
            select_destinations.assert_called_once_with(
                self.context, fake_spec,
                [fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid,
@ -115,11 +117,12 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
                [fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid,
                mock.sentinel.p_sums, fake_version, True)

+    @mock.patch('nova.scheduler.request_filter.process_reqspec')
    @mock.patch('nova.scheduler.utils.resources_from_request_spec')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                'get_allocation_candidates')
    def test_select_destination_return_objects(self, mock_get_ac,
-            mock_rfrs):
+            mock_rfrs, mock_process):
        fake_spec = objects.RequestSpec()
        fake_spec.instance_uuid = uuids.instance
        fake_version = "9.42"
@ -141,6 +144,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
                    return_objects=True, return_alternates=True)
            sel_host = dests[0][0]
            self.assertIsInstance(sel_host, objects.Selection)
+            mock_process.assert_called_once_with(None, fake_spec)
            # Since both return_objects and return_alternates are True, the
            # driver should have been called with True for return_alternates.
            select_destinations.assert_called_once_with(None, fake_spec,
@ -163,11 +167,12 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
                    [fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid,
                    mock.sentinel.p_sums, fake_version, False)

+    @mock.patch('nova.scheduler.request_filter.process_reqspec')
    @mock.patch('nova.scheduler.utils.resources_from_request_spec')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                'get_allocation_candidates')
    def _test_select_destination(self, get_allocation_candidates_response,
-                                 mock_get_ac, mock_rfrs):
+                                 mock_get_ac, mock_rfrs, mock_process):
        fake_spec = objects.RequestSpec()
        fake_spec.instance_uuid = uuids.instance
        place_res = get_allocation_candidates_response
@ -179,6 +184,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
                    spec_obj=fake_spec,
                    instance_uuids=[fake_spec.instance_uuid])
            select_destinations.assert_not_called()
+            mock_process.assert_called_once_with(self.context, fake_spec)
            mock_get_ac.assert_called_once_with(
                self.context, mock_rfrs.return_value)

@ -227,10 +233,12 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
            mock_get_ac.assert_not_called()
            mock_process.assert_not_called()

+    @mock.patch('nova.scheduler.request_filter.process_reqspec')
    @mock.patch('nova.scheduler.utils.resources_from_request_spec')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                'get_allocation_candidates')
-    def test_select_destination_with_4_3_client(self, mock_get_ac, mock_rfrs):
+    def test_select_destination_with_4_3_client(self, mock_get_ac, mock_rfrs,
+                                                mock_process):
        fake_spec = objects.RequestSpec()
        place_res = (fakes.ALLOC_REQS, mock.sentinel.p_sums, "42.0")
        mock_get_ac.return_value = place_res
@ -241,6 +249,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
        with mock.patch.object(self.manager.driver, 'select_destinations'
                ) as select_destinations:
            self.manager.select_destinations(self.context, spec_obj=fake_spec)
+            mock_process.assert_called_once_with(self.context, fake_spec)
            select_destinations.assert_called_once_with(self.context,
                fake_spec, None, expected_alloc_reqs_by_rp_uuid,
                mock.sentinel.p_sums, "42.0", False)
@ -248,12 +257,13 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
                self.context, mock_rfrs.return_value)

    # TODO(sbauza): Remove that test once the API v4 is removed
+    @mock.patch('nova.scheduler.request_filter.process_reqspec')
    @mock.patch('nova.scheduler.utils.resources_from_request_spec')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                'get_allocation_candidates')
    @mock.patch.object(objects.RequestSpec, 'from_primitives')
    def test_select_destination_with_old_client(self, from_primitives,
-            mock_get_ac, mock_rfrs):
+            mock_get_ac, mock_rfrs, mock_process):
        fake_spec = objects.RequestSpec()
        fake_spec.instance_uuid = uuids.instance
        from_primitives.return_value = fake_spec
@ -269,6 +279,7 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
                self.context, request_spec='fake_spec',
                filter_properties='fake_props',
                instance_uuids=[fake_spec.instance_uuid])
+            mock_process.assert_called_once_with(self.context, fake_spec)
            select_destinations.assert_called_once_with(
                self.context, fake_spec,
                [fake_spec.instance_uuid], expected_alloc_reqs_by_rp_uuid,
--- a/releasenotes/notes/pre-filter-disabled-computes-0b15d2cad19398e4.yaml
+++ b/releasenotes/notes/pre-filter-disabled-computes-0b15d2cad19398e4.yaml
@ -0,0 +1,10 @@
+---
+features:
+  - |
+    A mandatory scheduling pre-filter has been added which will exclude
+    disabled compute nodes where the related ``nova-compute`` service status
+    is mirrored with a ``COMPUTE_STATUS_DISABLED`` trait on the compute node
+    resource provider(s) for that service in Placement. See the
+    `admin scheduler configuration docs`__ for details.
+
+    __ https://docs.openstack.org/nova/latest/admin/configuration/schedulers.html#compute-disabled-status-support