libvirt: manage COMPUTE_STATUS_DISABLED for hypervisor connection

The libvirt driver has a callback method for when the hypervisor
connection drops and is re-connected which mirrors those changes
in the related compute Service record's "disabled" field.

This builds on that by hooking back into the ComputeVirtAPI
update_compute_provider_status method when the Service.disabled
value changes so the COMPUTE_STATUS_DISABLED trait can be added
to or removed from the related compute node resource provider.

Part of blueprint pre-filter-disabled-computes

Change-Id: Ifabbb543aab62b917394eefe48126231df7cd503
This commit is contained in:
Matt Riedemann 2019-07-02 12:33:03 -04:00
parent b7ad974723
commit 9b99be44cd
2 changed files with 80 additions and 4 deletions

View File

@ -1583,7 +1583,11 @@ class LibvirtConnTestCase(test.NoDBTestCase,
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
svc = self._create_service(host='fake-mini')
mock_svc.return_value = svc
drvr._set_host_enabled(False)
with mock.patch.object(
drvr, '_update_compute_provider_status') as ucps:
drvr._set_host_enabled(False)
ucps.assert_called_once_with(
test.MatchType(context.RequestContext), svc)
self.assertTrue(svc.disabled)
mock_save.assert_called_once_with()
@ -1594,7 +1598,10 @@ class LibvirtConnTestCase(test.NoDBTestCase,
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
svc = self._create_service(disabled=True, host='fake-mini')
mock_svc.return_value = svc
drvr._set_host_enabled(True)
with mock.patch.object(
drvr, '_update_compute_provider_status') as ucps:
drvr._set_host_enabled(True)
ucps.assert_not_called()
# since disabled_reason is not set and not prefixed with "AUTO:",
# service should not be enabled.
mock_save.assert_not_called()
@ -1608,7 +1615,10 @@ class LibvirtConnTestCase(test.NoDBTestCase,
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
svc = self._create_service(disabled=False, host='fake-mini')
mock_svc.return_value = svc
drvr._set_host_enabled(True)
with mock.patch.object(
drvr, '_update_compute_provider_status') as ucps:
drvr._set_host_enabled(True)
ucps.assert_not_called()
self.assertFalse(svc.disabled)
mock_save.assert_not_called()
@ -1620,7 +1630,10 @@ class LibvirtConnTestCase(test.NoDBTestCase,
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
svc = self._create_service(disabled=True, host='fake-mini')
mock_svc.return_value = svc
drvr._set_host_enabled(False)
with mock.patch.object(
drvr, '_update_compute_provider_status') as ucps:
drvr._set_host_enabled(False)
ucps.assert_not_called()
mock_save.assert_not_called()
self.assertTrue(svc.disabled)
@ -1635,6 +1648,47 @@ class LibvirtConnTestCase(test.NoDBTestCase,
db_mock.side_effect = exception.NovaException
drvr._set_host_enabled(False)
def test_update_compute_provider_status(self):
"""Tests happy path of calling _update_compute_provider_status"""
virtapi = mock.Mock()
drvr = libvirt_driver.LibvirtDriver(virtapi, read_only=True)
ctxt = context.get_admin_context()
service = self._create_service()
service.compute_node = objects.ComputeNode(uuid=uuids.rp_uuid)
drvr._update_compute_provider_status(ctxt, service)
virtapi.update_compute_provider_status.assert_called_once_with(
ctxt, uuids.rp_uuid, enabled=not service.disabled)
def test_update_compute_provider_status_swallows_exceptions(self):
"""Tests error path handling in _update_compute_provider_status"""
# First we'll make Service.compute_node loading raise an exception
# by not setting the field and we cannot lazy-load it from an orphaned
# Service object.
virtapi = mock.Mock()
drvr = libvirt_driver.LibvirtDriver(virtapi, read_only=True)
ctxt = context.get_admin_context()
service = self._create_service(host='fake-host', disabled=True)
drvr._update_compute_provider_status(ctxt, service)
virtapi.update_compute_provider_status.assert_not_called()
self.assertIn('An error occurred while updating compute node resource '
'provider status to "disabled" for provider: fake-host',
self.stdlog.logger.output)
# Now fix Service.compute_node loading but make the VirtAPI call fail.
service.compute_node = objects.ComputeNode(uuid=uuids.rp_uuid)
service.disabled = False # make sure the log message logic works
error = exception.TraitRetrievalFailed(error='oops')
virtapi.update_compute_provider_status.side_effect = error
drvr._update_compute_provider_status(ctxt, service)
virtapi.update_compute_provider_status.assert_called_once_with(
ctxt, uuids.rp_uuid, enabled=True)
log_output = self.stdlog.logger.output
self.assertIn('An error occurred while updating compute node resource '
'provider status to "enabled" for provider: %s' %
uuids.rp_uuid, log_output)
# The error should have been logged as well.
self.assertIn(six.text_type(error), log_output)
@mock.patch.object(fakelibvirt.virConnect, "nodeDeviceLookupByName")
def test_prepare_pci_device(self, mock_lookup):

View File

@ -3902,6 +3902,25 @@ class LibvirtDriver(driver.ComputeDriver):
if pci_dev.address in direct_passthrough_pci_addresses])
self._detach_pci_devices(guest, direct_passthrough_pci_addresses)
def _update_compute_provider_status(self, context, service):
"""Calls the ComputeVirtAPI.update_compute_provider_status method
:param context: nova auth RequestContext
:param service: nova.objects.Service record for this host which is
expected to only manage a single ComputeNode
"""
rp_uuid = None
try:
rp_uuid = service.compute_node.uuid
self.virtapi.update_compute_provider_status(
context, rp_uuid, enabled=not service.disabled)
except Exception:
LOG.warning(
'An error occurred while updating compute node '
'resource provider status to "%s" for provider: %s',
'disabled' if service.disabled else 'enabled',
rp_uuid or service.host, exc_info=True)
def _set_host_enabled(self, enabled,
disable_reason=DISABLE_REASON_UNDEFINED):
"""Enables / Disables the compute service on this host.
@ -3938,6 +3957,9 @@ class LibvirtDriver(driver.ComputeDriver):
service.save()
LOG.debug('Updating compute service status to %s',
status_name[disable_service])
# Update the disabled trait status on the corresponding
# compute node resource provider in placement.
self._update_compute_provider_status(ctx, service)
else:
LOG.debug('Not overriding manual compute service '
'status with: %s',