Follow up for pre-filter-disabled-computes series

This is a collection of follow up changes from the
patches in the series.

* https://review.opendev.org/#/c/668565/1/nova/tests/unit/compute/test_resource_tracker.py
* https://review.opendev.org/#/c/668707/1/nova/virt/libvirt/driver.py
* https://review.opendev.org/#/c/668743/1/nova/compute/manager.py@5020
* https://review.opendev.org/#/c/668752/1/doc/source/admin/configuration/schedulers.rst
* https://review.opendev.org/#/c/654596/8/nova/compute/api.py

Related to blueprint pre-filter-disabled-computes

Change-Id: I2efb833c6f18b13f86abc6c62bcf2c6f77aa15c1
This commit is contained in:
Matt Riedemann 2019-07-03 15:53:06 -04:00
parent 3f0605c289
commit 12a6fe57b9
6 changed files with 37 additions and 24 deletions

View File

@ -80,18 +80,22 @@ Compute Disabled Status Support
Starting in the Train release, there is a mandatory `pre-filter
<https://specs.openstack.org/openstack/nova-specs/specs/train/approved/pre-filter-disabled-computes.html>`_
which will exclude disabled compute nodes similar to the `ComputeFilter`_.
Compute node resource providers with the ``COMPUTE_STATUS_DISABLED`` trait will
be excluded as scheduling candidates. The trait is managed by the
``nova-compute`` service and should mirror the ``disabled`` status on the
related compute service record in the `os-services`_ API. For example, if a
compute service's status is ``disabled``, the related compute node resource
provider(s) for that service should have the ``COMPUTE_STATUS_DISABLED`` trait.
When the service status is ``enabled`` the ``COMPUTE_STATUS_DISABLED`` trait
shall be removed.
which will exclude disabled compute nodes similar to (but does not fully
replace) the `ComputeFilter`_. Compute node resource providers with the
``COMPUTE_STATUS_DISABLED`` trait will be excluded as scheduling candidates.
The trait is managed by the ``nova-compute`` service and should mirror the
``disabled`` status on the related compute service record in the
`os-services`_ API. For example, if a compute service's status is ``disabled``,
the related compute node resource provider(s) for that service should have the
``COMPUTE_STATUS_DISABLED`` trait. When the service status is ``enabled`` the
``COMPUTE_STATUS_DISABLED`` trait shall be removed.
If the compute service is down when the status is changed, the trait will be
synchronized by the compute service when it is restarted.
synchronized by the compute service when it is restarted. Similarly, if an
error occurs when trying to add or remove the trait on a given resource
provider, the trait will be synchronized when the ``update_available_resource``
periodic task runs - which is controlled by the
:oslo.config:option:`update_resources_interval` configuration option.
.. _os-services: https://developer.openstack.org/api-ref/compute/#compute-services-os-services

View File

@ -5129,10 +5129,12 @@ class HostAPI(base.Base):
'COMPUTE_STATUS_DISABLED trait.', service.host)
self.rpcapi.set_host_enabled(context, service.host, enabled)
except Exception:
LOG.exception('An error occurred while updating host enabled '
'status to "%s" for compute host: %s',
'enabled' if enabled else 'disabled',
service.host)
LOG.exception('An error occurred while updating the '
'COMPUTE_STATUS_DISABLED trait on compute node '
'resource providers managed by host %s. The trait '
'will be synchronized automatically by the compute '
'service when the update_available_resource '
'periodic task runs.', service.host)
def service_update(self, context, service):
"""Performs the actual service update operation.

View File

@ -5012,16 +5012,20 @@ class ComputeManager(manager.Manager):
exception.ResourceProviderUpdateConflict,
exception.ResourceProviderUpdateFailed,
exception.TraitRetrievalFailed) as e:
# This is best effort so just log a warning and continue. The
# update_available_resource periodic task will sync the trait.
# This is best effort so just log a warning and continue.
LOG.warning('An error occurred while updating '
'COMPUTE_STATUS_DISABLED trait on compute node '
'resource provider %s. Error: %s',
'resource provider %s. The trait will be '
'synchronized when the update_available_resource '
'periodic task runs. Error: %s',
node.uuid, e.format_message())
except Exception:
LOG.exception('An error occurred while updating '
'COMPUTE_STATUS_DISABLED trait on compute node '
'resource provider %s.', node.uuid)
'resource provider %s. The trait will be '
'synchronized when the '
'update_available_resource periodic task runs.',
node.uuid)
@wrap_exception()
def set_host_enabled(self, context, enabled):

View File

@ -390,8 +390,9 @@ class ComputeHostAPITestCase(test.TestCase):
service_is_up.assert_called_once_with(service)
mock_she.assert_called_once_with(self.ctxt, 'fake-host', False)
log_output = self.stdlog.logger.output
self.assertIn('An error occurred while updating host enabled '
'status to "disabled" for compute host: fake-host',
self.assertIn('An error occurred while updating the '
'COMPUTE_STATUS_DISABLED trait on compute node '
'resource providers managed by host fake-host.',
log_output)
self.assertIn('MessagingTimeout', log_output)

View File

@ -1450,8 +1450,8 @@ class TestUpdateComputeNode(BaseTestCase):
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait')
@mock.patch('nova.objects.ComputeNode.save')
def test_existing_node_update_provider_tree_implemented(self, save_mock,
mock_sync_disable):
def test_existing_node_update_provider_tree_implemented(
self, save_mock, mock_sync_disabled):
"""The update_provider_tree() virt driver method is only implemented
for some virt drivers. This method returns inventory, trait, and
aggregate information for resource providers in a tree associated with
@ -1534,7 +1534,7 @@ class TestUpdateComputeNode(BaseTestCase):
# 1024MB in GB
exp_inv[orc.DISK_GB]['reserved'] = 1
self.assertEqual(exp_inv, ptree.data(new_compute.uuid).inventory)
mock_sync_disable.assert_called_once()
mock_sync_disabled.assert_called_once()
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait')
@ -1622,7 +1622,7 @@ class TestUpdateComputeNode(BaseTestCase):
# First test with the trait actually in the set.
traits = {os_traits.COMPUTE_STATUS_DISABLED}
self.rt._sync_compute_service_disabled_trait(ctxt, traits)
self.assertEqual(0, len(traits))
self.assertEqual(set(), traits)
mock_get_by_host.assert_called_once_with(ctxt, self.rt.host)
# Now run it again with the empty set to make sure the method handles
# the trait not already being in the set (idempotency).

View File

@ -3915,6 +3915,8 @@ class LibvirtDriver(driver.ComputeDriver):
self.virtapi.update_compute_provider_status(
context, rp_uuid, enabled=not service.disabled)
except Exception:
# This is best effort so just log the exception but don't fail.
# The update_available_resource periodic task will sync the trait.
LOG.warning(
'An error occurred while updating compute node '
'resource provider status to "%s" for provider: %s',