From 099b490c2f5201732b97fbcc2560892086f32038 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Tue, 2 Jul 2019 15:05:08 -0400 Subject: [PATCH] Update COMPUTE_STATUS_DISABLED from set_host_enabled compute call This adds code to the ComputeManager.set_host_enabled method, which will be called by the API in a subsequent change when a compute service is enabled or disabled, which will add or remove the COMPUTE_STATUS_DISABLED trait to/from the related compute node resource providers managed by the compute service. The set_host_enabled compute RPC API is a synchronous RPC call and the only (non-fake) in-tree compute driver that implements the set_host_enabled driver interface is XenAPIDriver. As such, the method is changed to handle NotImplementedError so an error is not returned to the API if a driver does not implement the interface. Before this series, only the PUT /os-hosts/{host_name} API would call this compute service method and that API was deprecated in 2.43. In other words, most users (admins) are likely not using that API and the only ones that could before were running with XenAPIDriver. The change to update the COMPUTE_STATUS_DISABLED trait usage is best effort so errors, expected or unexpected, from the ComputeVirtAPI are logged but not raised back to the caller. With change I3005b46221ac3c0e559e1072131a7e4846c9867c the ResourceTracker update_available_resource flow will sync the trait based on the current compute service disabled status. The compute service RPC API version is incremented so that the API will be able to determine if the compute service is new enough for this new behavior when disabling/enabling a compute service in the os-services API. Part of blueprint pre-filter-disabled-computes Change-Id: Ia95de2f23f12b002b2189c9294ec190569a628ab --- nova/compute/manager.py | 63 +++++++- nova/objects/service.py | 4 +- nova/tests/unit/compute/test_compute_mgr.py | 171 ++++++++++++++++++++ 3 files changed, 235 insertions(+), 3 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 163559d8cd57..b9d4f7a4b3a6 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -4982,10 +4982,69 @@ class ComputeManager(manager.Manager): """ return self.driver.host_maintenance_mode(host, mode) + def _update_compute_provider_status(self, context, enabled): + """Adds or removes the COMPUTE_STATUS_DISABLED trait for this host. + + For each ComputeNode managed by this service, adds or removes the + COMPUTE_STATUS_DISABLED traits to/from the associated resource provider + in Placement. + + :param context: nova auth RequestContext + :param enabled: True if the node is enabled in which case the trait + would be removed, False if the node is disabled in which case + the trait would be added. + :raises: ComputeHostNotFound if there are no compute nodes found in + the ResourceTracker for this service. + """ + # Get the compute node(s) on this host. Remember that ironic can be + # managing more than one compute node. + nodes = self.rt.compute_nodes.values() + if not nodes: + raise exception.ComputeHostNotFound(host=self.host) + # For each node, we want to add (or remove) the COMPUTE_STATUS_DISABLED + # trait on the related resource provider in placement so the scheduler + # (pre-)filters the provider based on its status. + for node in nodes: + try: + self.virtapi.update_compute_provider_status( + context, node.uuid, enabled) + except (exception.ResourceProviderTraitRetrievalFailed, + exception.ResourceProviderUpdateConflict, + exception.ResourceProviderUpdateFailed, + exception.TraitRetrievalFailed) as e: + # This is best effort so just log a warning and continue. The + # update_available_resource periodic task will sync the trait. + LOG.warning('An error occurred while updating ' + 'COMPUTE_STATUS_DISABLED trait on compute node ' + 'resource provider %s. Error: %s', + node.uuid, e.format_message()) + except Exception: + LOG.exception('An error occurred while updating ' + 'COMPUTE_STATUS_DISABLED trait on compute node ' + 'resource provider %s.', node.uuid) + @wrap_exception() def set_host_enabled(self, context, enabled): - """Sets the specified host's ability to accept new instances.""" - return self.driver.set_host_enabled(enabled) + """Sets the specified host's ability to accept new instances. + + This method will add or remove the COMPUTE_STATUS_DISABLED trait + to/from the associated compute node resource provider(s) for this + compute service. + """ + try: + self._update_compute_provider_status(context, enabled) + except exception.ComputeHostNotFound: + LOG.warning('Unable to add/remove trait COMPUTE_STATUS_DISABLED. ' + 'No ComputeNode(s) found for host: %s', self.host) + + try: + return self.driver.set_host_enabled(enabled) + except NotImplementedError: + # Only the xenapi driver implements set_host_enabled but we don't + # want NotImplementedError to get raised back to the API. We still + # need to honor the compute RPC API contract and return 'enabled' + # or 'disabled' though. + return 'enabled' if enabled else 'disabled' @wrap_exception() def get_host_uptime(self, context): diff --git a/nova/objects/service.py b/nova/objects/service.py index 0bc82e7b50c9..b8b4d2f7ff7e 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__) # NOTE(danms): This is the global service version counter -SERVICE_VERSION = 37 +SERVICE_VERSION = 38 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any @@ -151,6 +151,8 @@ SERVICE_VERSION_HISTORY = ( {'compute_rpc': '5.0'}, # Version 37: prep_resize takes a RequestSpec object {'compute_rpc': '5.1'}, + # Version 38: set_host_enabled reflects COMPUTE_STATUS_DISABLED trait + {'compute_rpc': '5.1'}, ) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index a8bea88a207e..93266c89d7ad 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -9082,3 +9082,174 @@ class ComputeManagerInstanceUsageAuditTestCase(test.TestCase): self.assertEqual(0, mock_task_log().errors, 'an error was encountered processing the deleted test' ' instance') + + +@ddt.ddt +class ComputeManagerSetHostEnabledTestCase(test.NoDBTestCase): + + def setUp(self): + super(ComputeManagerSetHostEnabledTestCase, self).setUp() + self.compute = manager.ComputeManager() + self.context = context.RequestContext(user_id=fakes.FAKE_USER_ID, + project_id=fakes.FAKE_PROJECT_ID) + + @ddt.data(True, False) + def test_set_host_enabled(self, enabled): + """Happy path test for set_host_enabled""" + with mock.patch.object(self.compute, + '_update_compute_provider_status') as ucpt: + retval = self.compute.set_host_enabled(self.context, enabled) + expected_retval = 'enabled' if enabled else 'disabled' + self.assertEqual(expected_retval, retval) + ucpt.assert_called_once_with(self.context, enabled) + + @mock.patch('nova.compute.manager.LOG.warning') + def test_set_host_enabled_compute_host_not_found(self, mock_warning): + """Tests _update_compute_provider_status raising ComputeHostNotFound""" + error = exception.ComputeHostNotFound(host=self.compute.host) + with mock.patch.object(self.compute, + '_update_compute_provider_status', + side_effect=error) as ucps: + retval = self.compute.set_host_enabled(self.context, False) + self.assertEqual('disabled', retval) + ucps.assert_called_once_with(self.context, False) + # A warning should have been logged for the ComputeHostNotFound error. + mock_warning.assert_called_once() + self.assertIn('Unable to add/remove trait COMPUTE_STATUS_DISABLED. ' + 'No ComputeNode(s) found for host', + mock_warning.call_args[0][0]) + + def test_set_host_enabled_update_provider_status_error(self): + """Tests _update_compute_provider_status raising some unexpected error + """ + error = messaging.MessagingTimeout + with test.nested( + mock.patch.object(self.compute, + '_update_compute_provider_status', + side_effect=error), + mock.patch.object(self.compute.driver, 'set_host_enabled', + # The driver is not called in this case. + new_callable=mock.NonCallableMock), + ) as ( + ucps, driver_set_host_enabled, + ): + self.assertRaises(error, + self.compute.set_host_enabled, + self.context, False) + ucps.assert_called_once_with(self.context, False) + + @ddt.data(True, False) + def test_set_host_enabled_not_implemented_error(self, enabled): + """Tests the driver raising NotImplementedError""" + with test.nested( + mock.patch.object(self.compute, '_update_compute_provider_status'), + mock.patch.object(self.compute.driver, 'set_host_enabled', + side_effect=NotImplementedError), + ) as ( + ucps, driver_set_host_enabled, + ): + retval = self.compute.set_host_enabled(self.context, enabled) + expected_retval = 'enabled' if enabled else 'disabled' + self.assertEqual(expected_retval, retval) + ucps.assert_called_once_with(self.context, enabled) + driver_set_host_enabled.assert_called_once_with(enabled) + + def test_set_host_enabled_driver_error(self): + """Tests the driver raising some unexpected error""" + error = exception.HypervisorUnavailable(host=self.compute.host) + with test.nested( + mock.patch.object(self.compute, '_update_compute_provider_status'), + mock.patch.object(self.compute.driver, 'set_host_enabled', + side_effect=error), + ) as ( + ucps, driver_set_host_enabled, + ): + self.assertRaises(exception.HypervisorUnavailable, + self.compute.set_host_enabled, + self.context, False) + ucps.assert_called_once_with(self.context, False) + driver_set_host_enabled.assert_called_once_with(False) + + @ddt.data(True, False) + def test_update_compute_provider_status(self, enabled): + """Happy path test for _update_compute_provider_status""" + # Fake out some fake compute nodes (ironic driver case). + self.compute.rt.compute_nodes = { + uuids.node1: objects.ComputeNode(uuid=uuids.node1), + uuids.node2: objects.ComputeNode(uuid=uuids.node2), + } + with mock.patch.object(self.compute.virtapi, + 'update_compute_provider_status') as ucps: + self.compute._update_compute_provider_status( + self.context, enabled=enabled) + self.assertEqual(2, ucps.call_count) + ucps.assert_has_calls([ + mock.call(self.context, uuids.node1, enabled), + mock.call(self.context, uuids.node2, enabled), + ], any_order=True) + + def test_update_compute_provider_status_no_nodes(self): + """Tests the case that _update_compute_provider_status will raise + ComputeHostNotFound if there are no nodes in the resource tracker. + """ + self.assertRaises(exception.ComputeHostNotFound, + self.compute._update_compute_provider_status, + self.context, enabled=True) + + @mock.patch('nova.compute.manager.LOG.warning') + def test_update_compute_provider_status_expected_errors(self, m_warn): + """Tests _update_compute_provider_status handling a set of expected + errors from the ComputeVirtAPI and logging a warning. + """ + # Setup a fake compute in the resource tracker. + self.compute.rt.compute_nodes = { + uuids.node: objects.ComputeNode(uuid=uuids.node) + } + errors = ( + exception.ResourceProviderTraitRetrievalFailed(uuid=uuids.node), + exception.ResourceProviderUpdateConflict( + uuid=uuids.node, generation=1, error='conflict'), + exception.ResourceProviderUpdateFailed( + url='https://placement', error='dogs'), + exception.TraitRetrievalFailed(error='cats'), + ) + for error in errors: + with mock.patch.object( + self.compute.virtapi, 'update_compute_provider_status', + side_effect=error) as ucps: + self.compute._update_compute_provider_status( + self.context, enabled=False) + ucps.assert_called_once_with(self.context, uuids.node, False) + # The expected errors are logged as a warning. + m_warn.assert_called_once() + self.assertIn('An error occurred while updating ' + 'COMPUTE_STATUS_DISABLED trait on compute node', + m_warn.call_args[0][0]) + m_warn.reset_mock() + + @mock.patch('nova.compute.manager.LOG.exception') + def test_update_compute_provider_status_unexpected_error(self, m_exc): + """Tests _update_compute_provider_status handling an unexpected + exception from the ComputeVirtAPI and logging it. + """ + # Use two fake nodes here to make sure we try updating each even when + # an error occurs. + self.compute.rt.compute_nodes = { + uuids.node1: objects.ComputeNode(uuid=uuids.node1), + uuids.node2: objects.ComputeNode(uuid=uuids.node2), + } + with mock.patch.object( + self.compute.virtapi, 'update_compute_provider_status', + side_effect=(TypeError, AttributeError)) as ucps: + self.compute._update_compute_provider_status( + self.context, enabled=False) + self.assertEqual(2, ucps.call_count) + ucps.assert_has_calls([ + mock.call(self.context, uuids.node1, False), + mock.call(self.context, uuids.node2, False), + ], any_order=True) + # Each exception should have been logged. + self.assertEqual(2, m_exc.call_count) + self.assertIn('An error occurred while updating ' + 'COMPUTE_STATUS_DISABLED trait', + m_exc.call_args_list[0][0][0])