Update COMPUTE_STATUS_DISABLED from set_host_enabled compute call

This adds code to the ComputeManager.set_host_enabled method,
which will be called by the API in a subsequent change when
a compute service is enabled or disabled, which will add or remove
the COMPUTE_STATUS_DISABLED trait to/from the related compute node
resource providers managed by the compute service.

The set_host_enabled compute RPC API is a synchronous RPC call
and the only (non-fake) in-tree compute driver that implements the
set_host_enabled driver interface is XenAPIDriver. As such, the
method is changed to handle NotImplementedError so an error is not
returned to the API if a driver does not implement the interface.
Before this series, only the PUT /os-hosts/{host_name} API would
call this compute service method and that API was deprecated in 2.43.
In other words, most users (admins) are likely not using that API
and the only ones that could before were running with XenAPIDriver.

The change to update the COMPUTE_STATUS_DISABLED trait usage is
best effort so errors, expected or unexpected, from the ComputeVirtAPI
are logged but not raised back to the caller. With change
I3005b46221ac3c0e559e1072131a7e4846c9867c the ResourceTracker
update_available_resource flow will sync the trait based on the
current compute service disabled status.

The compute service RPC API version is incremented so that the API
will be able to determine if the compute service is new enough for
this new behavior when disabling/enabling a compute service in the
os-services API.

Part of blueprint pre-filter-disabled-computes

Change-Id: Ia95de2f23f12b002b2189c9294ec190569a628ab
This commit is contained in:
Matt Riedemann 2019-07-02 15:05:08 -04:00
parent 9b99be44cd
commit 099b490c2f
3 changed files with 235 additions and 3 deletions

View File

@ -4982,10 +4982,69 @@ class ComputeManager(manager.Manager):
"""
return self.driver.host_maintenance_mode(host, mode)
def _update_compute_provider_status(self, context, enabled):
"""Adds or removes the COMPUTE_STATUS_DISABLED trait for this host.
For each ComputeNode managed by this service, adds or removes the
COMPUTE_STATUS_DISABLED traits to/from the associated resource provider
in Placement.
:param context: nova auth RequestContext
:param enabled: True if the node is enabled in which case the trait
would be removed, False if the node is disabled in which case
the trait would be added.
:raises: ComputeHostNotFound if there are no compute nodes found in
the ResourceTracker for this service.
"""
# Get the compute node(s) on this host. Remember that ironic can be
# managing more than one compute node.
nodes = self.rt.compute_nodes.values()
if not nodes:
raise exception.ComputeHostNotFound(host=self.host)
# For each node, we want to add (or remove) the COMPUTE_STATUS_DISABLED
# trait on the related resource provider in placement so the scheduler
# (pre-)filters the provider based on its status.
for node in nodes:
try:
self.virtapi.update_compute_provider_status(
context, node.uuid, enabled)
except (exception.ResourceProviderTraitRetrievalFailed,
exception.ResourceProviderUpdateConflict,
exception.ResourceProviderUpdateFailed,
exception.TraitRetrievalFailed) as e:
# This is best effort so just log a warning and continue. The
# update_available_resource periodic task will sync the trait.
LOG.warning('An error occurred while updating '
'COMPUTE_STATUS_DISABLED trait on compute node '
'resource provider %s. Error: %s',
node.uuid, e.format_message())
except Exception:
LOG.exception('An error occurred while updating '
'COMPUTE_STATUS_DISABLED trait on compute node '
'resource provider %s.', node.uuid)
@wrap_exception()
def set_host_enabled(self, context, enabled):
"""Sets the specified host's ability to accept new instances."""
return self.driver.set_host_enabled(enabled)
"""Sets the specified host's ability to accept new instances.
This method will add or remove the COMPUTE_STATUS_DISABLED trait
to/from the associated compute node resource provider(s) for this
compute service.
"""
try:
self._update_compute_provider_status(context, enabled)
except exception.ComputeHostNotFound:
LOG.warning('Unable to add/remove trait COMPUTE_STATUS_DISABLED. '
'No ComputeNode(s) found for host: %s', self.host)
try:
return self.driver.set_host_enabled(enabled)
except NotImplementedError:
# Only the xenapi driver implements set_host_enabled but we don't
# want NotImplementedError to get raised back to the API. We still
# need to honor the compute RPC API contract and return 'enabled'
# or 'disabled' though.
return 'enabled' if enabled else 'disabled'
@wrap_exception()
def get_host_uptime(self, context):

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 37
SERVICE_VERSION = 38
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -151,6 +151,8 @@ SERVICE_VERSION_HISTORY = (
{'compute_rpc': '5.0'},
# Version 37: prep_resize takes a RequestSpec object
{'compute_rpc': '5.1'},
# Version 38: set_host_enabled reflects COMPUTE_STATUS_DISABLED trait
{'compute_rpc': '5.1'},
)

View File

@ -9082,3 +9082,174 @@ class ComputeManagerInstanceUsageAuditTestCase(test.TestCase):
self.assertEqual(0, mock_task_log().errors,
'an error was encountered processing the deleted test'
' instance')
@ddt.ddt
class ComputeManagerSetHostEnabledTestCase(test.NoDBTestCase):
def setUp(self):
super(ComputeManagerSetHostEnabledTestCase, self).setUp()
self.compute = manager.ComputeManager()
self.context = context.RequestContext(user_id=fakes.FAKE_USER_ID,
project_id=fakes.FAKE_PROJECT_ID)
@ddt.data(True, False)
def test_set_host_enabled(self, enabled):
"""Happy path test for set_host_enabled"""
with mock.patch.object(self.compute,
'_update_compute_provider_status') as ucpt:
retval = self.compute.set_host_enabled(self.context, enabled)
expected_retval = 'enabled' if enabled else 'disabled'
self.assertEqual(expected_retval, retval)
ucpt.assert_called_once_with(self.context, enabled)
@mock.patch('nova.compute.manager.LOG.warning')
def test_set_host_enabled_compute_host_not_found(self, mock_warning):
"""Tests _update_compute_provider_status raising ComputeHostNotFound"""
error = exception.ComputeHostNotFound(host=self.compute.host)
with mock.patch.object(self.compute,
'_update_compute_provider_status',
side_effect=error) as ucps:
retval = self.compute.set_host_enabled(self.context, False)
self.assertEqual('disabled', retval)
ucps.assert_called_once_with(self.context, False)
# A warning should have been logged for the ComputeHostNotFound error.
mock_warning.assert_called_once()
self.assertIn('Unable to add/remove trait COMPUTE_STATUS_DISABLED. '
'No ComputeNode(s) found for host',
mock_warning.call_args[0][0])
def test_set_host_enabled_update_provider_status_error(self):
"""Tests _update_compute_provider_status raising some unexpected error
"""
error = messaging.MessagingTimeout
with test.nested(
mock.patch.object(self.compute,
'_update_compute_provider_status',
side_effect=error),
mock.patch.object(self.compute.driver, 'set_host_enabled',
# The driver is not called in this case.
new_callable=mock.NonCallableMock),
) as (
ucps, driver_set_host_enabled,
):
self.assertRaises(error,
self.compute.set_host_enabled,
self.context, False)
ucps.assert_called_once_with(self.context, False)
@ddt.data(True, False)
def test_set_host_enabled_not_implemented_error(self, enabled):
"""Tests the driver raising NotImplementedError"""
with test.nested(
mock.patch.object(self.compute, '_update_compute_provider_status'),
mock.patch.object(self.compute.driver, 'set_host_enabled',
side_effect=NotImplementedError),
) as (
ucps, driver_set_host_enabled,
):
retval = self.compute.set_host_enabled(self.context, enabled)
expected_retval = 'enabled' if enabled else 'disabled'
self.assertEqual(expected_retval, retval)
ucps.assert_called_once_with(self.context, enabled)
driver_set_host_enabled.assert_called_once_with(enabled)
def test_set_host_enabled_driver_error(self):
"""Tests the driver raising some unexpected error"""
error = exception.HypervisorUnavailable(host=self.compute.host)
with test.nested(
mock.patch.object(self.compute, '_update_compute_provider_status'),
mock.patch.object(self.compute.driver, 'set_host_enabled',
side_effect=error),
) as (
ucps, driver_set_host_enabled,
):
self.assertRaises(exception.HypervisorUnavailable,
self.compute.set_host_enabled,
self.context, False)
ucps.assert_called_once_with(self.context, False)
driver_set_host_enabled.assert_called_once_with(False)
@ddt.data(True, False)
def test_update_compute_provider_status(self, enabled):
"""Happy path test for _update_compute_provider_status"""
# Fake out some fake compute nodes (ironic driver case).
self.compute.rt.compute_nodes = {
uuids.node1: objects.ComputeNode(uuid=uuids.node1),
uuids.node2: objects.ComputeNode(uuid=uuids.node2),
}
with mock.patch.object(self.compute.virtapi,
'update_compute_provider_status') as ucps:
self.compute._update_compute_provider_status(
self.context, enabled=enabled)
self.assertEqual(2, ucps.call_count)
ucps.assert_has_calls([
mock.call(self.context, uuids.node1, enabled),
mock.call(self.context, uuids.node2, enabled),
], any_order=True)
def test_update_compute_provider_status_no_nodes(self):
"""Tests the case that _update_compute_provider_status will raise
ComputeHostNotFound if there are no nodes in the resource tracker.
"""
self.assertRaises(exception.ComputeHostNotFound,
self.compute._update_compute_provider_status,
self.context, enabled=True)
@mock.patch('nova.compute.manager.LOG.warning')
def test_update_compute_provider_status_expected_errors(self, m_warn):
"""Tests _update_compute_provider_status handling a set of expected
errors from the ComputeVirtAPI and logging a warning.
"""
# Setup a fake compute in the resource tracker.
self.compute.rt.compute_nodes = {
uuids.node: objects.ComputeNode(uuid=uuids.node)
}
errors = (
exception.ResourceProviderTraitRetrievalFailed(uuid=uuids.node),
exception.ResourceProviderUpdateConflict(
uuid=uuids.node, generation=1, error='conflict'),
exception.ResourceProviderUpdateFailed(
url='https://placement', error='dogs'),
exception.TraitRetrievalFailed(error='cats'),
)
for error in errors:
with mock.patch.object(
self.compute.virtapi, 'update_compute_provider_status',
side_effect=error) as ucps:
self.compute._update_compute_provider_status(
self.context, enabled=False)
ucps.assert_called_once_with(self.context, uuids.node, False)
# The expected errors are logged as a warning.
m_warn.assert_called_once()
self.assertIn('An error occurred while updating '
'COMPUTE_STATUS_DISABLED trait on compute node',
m_warn.call_args[0][0])
m_warn.reset_mock()
@mock.patch('nova.compute.manager.LOG.exception')
def test_update_compute_provider_status_unexpected_error(self, m_exc):
"""Tests _update_compute_provider_status handling an unexpected
exception from the ComputeVirtAPI and logging it.
"""
# Use two fake nodes here to make sure we try updating each even when
# an error occurs.
self.compute.rt.compute_nodes = {
uuids.node1: objects.ComputeNode(uuid=uuids.node1),
uuids.node2: objects.ComputeNode(uuid=uuids.node2),
}
with mock.patch.object(
self.compute.virtapi, 'update_compute_provider_status',
side_effect=(TypeError, AttributeError)) as ucps:
self.compute._update_compute_provider_status(
self.context, enabled=False)
self.assertEqual(2, ucps.call_count)
ucps.assert_has_calls([
mock.call(self.context, uuids.node1, False),
mock.call(self.context, uuids.node2, False),
], any_order=True)
# Each exception should have been logged.
self.assertEqual(2, m_exc.call_count)
self.assertIn('An error occurred while updating '
'COMPUTE_STATUS_DISABLED trait',
m_exc.call_args_list[0][0][0])