Add VirtAPI.update_compute_provider_status

This adds the VirtAPI interface to add/remove the
COMPUTE_STATUS_DISABLED trait on a compute node
resource provider based on whether or not the related
compute service is disabled. This will be used in future
patches by both the ComputeManager's set_host_enabled method,
which will be called from the API when a compute service is
enabled/disabled, and from the libvirt driver when the
hypervisor connection is dropped/re-connected.

Note that some refactoring was required to make sure the
ComputeManager, ResourceTracker and ComputeVirtAPI are all
using the same SchedulerReportClient instance with the same
ProviderTree cache since the set_traits_for_provider method
relies on the cache to determine if updates are flushed back
to Placement.

Part of blueprint pre-filter-disabled-computes

Change-Id: Ie756ba5e405ad988667c75a723f1c9b9ff3e4a93
This commit is contained in:
Matt Riedemann 2019-07-01 19:30:35 -04:00
parent 13df416fa7
commit b7ad974723
6 changed files with 118 additions and 4 deletions

View File

@ -43,6 +43,7 @@ import eventlet.semaphore
import eventlet.timeout
import futurist
from keystoneauth1 import exceptions as keystone_exception
import os_traits
from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_serialization import jsonutils
@ -90,6 +91,7 @@ from nova.pci import whitelist
from nova import rpc
from nova import safe_utils
from nova.scheduler.client import query
from nova.scheduler.client import report
from nova import utils
from nova.virt import block_device as driver_block_device
from nova.virt import configdrive
@ -420,6 +422,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
def __init__(self, compute):
super(ComputeVirtAPI, self).__init__()
self._compute = compute
self.reportclient = compute.reportclient
def _default_error_callback(self, event_name, instance):
raise exception.NovaException(_('Instance event failed'))
@ -484,6 +487,41 @@ class ComputeVirtAPI(virtapi.VirtAPI):
if decision is False:
break
def update_compute_provider_status(self, context, rp_uuid, enabled):
"""Used to add/remove the COMPUTE_STATUS_DISABLED trait on the provider
:param context: nova auth RequestContext
:param rp_uuid: UUID of a compute node resource provider in Placement
:param enabled: True if the node is enabled in which case the trait
would be removed, False if the node is disabled in which case
the trait would be added.
:raises: ResourceProviderTraitRetrievalFailed
:raises: ResourceProviderUpdateConflict
:raises: ResourceProviderUpdateFailed
:raises: TraitRetrievalFailed
:raises: keystoneauth1.exceptions.ClientException
"""
trait_name = os_traits.COMPUTE_STATUS_DISABLED
# Get the current traits (and generation) for the provider.
# TODO(mriedem): Leverage the ProviderTree cache in get_provider_traits
trait_info = self.reportclient.get_provider_traits(context, rp_uuid)
# If the host is enabled, remove the trait (if set), else add
# the trait if it doesn't already exist.
original_traits = trait_info.traits
new_traits = None
if enabled and trait_name in original_traits:
new_traits = original_traits - {trait_name}
LOG.debug('Removing trait %s from compute node resource '
'provider %s in placement.', trait_name, rp_uuid)
elif not enabled and trait_name not in original_traits:
new_traits = original_traits | {trait_name}
LOG.debug('Adding trait %s to compute node resource '
'provider %s in placement.', trait_name, rp_uuid)
if new_traits is not None:
self.reportclient.set_traits_for_provider(
context, rp_uuid, new_traits)
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
@ -492,6 +530,10 @@ class ComputeManager(manager.Manager):
def __init__(self, compute_driver=None, *args, **kwargs):
"""Load configuration options and connect to the hypervisor."""
# We want the ComputeManager, ResourceTracker and ComputeVirtAPI all
# using the same instance of SchedulerReportClient which has the
# ProviderTree cache for this compute service.
self.reportclient = report.SchedulerReportClient()
self.virtapi = ComputeVirtAPI(self)
self.network_api = network.API()
self.volume_api = cinder.API()
@ -535,8 +577,8 @@ class ComputeManager(manager.Manager):
self.driver = driver.load_compute_driver(self.virtapi, compute_driver)
self.use_legacy_block_device_info = \
self.driver.need_legacy_block_device_info
self.rt = resource_tracker.ResourceTracker(self.host, self.driver)
self.reportclient = self.rt.reportclient
self.rt = resource_tracker.ResourceTracker(
self.host, self.driver, reportclient=self.reportclient)
def reset(self):
LOG.info('Reloading compute RPC API')

View File

@ -131,7 +131,7 @@ class ResourceTracker(object):
are built and destroyed.
"""
def __init__(self, host, driver):
def __init__(self, host, driver, reportclient=None):
self.host = host
self.driver = driver
self.pci_tracker = None
@ -146,7 +146,7 @@ class ResourceTracker(object):
monitor_handler = monitors.MonitorHandler(self)
self.monitors = monitor_handler.monitors
self.old_resources = collections.defaultdict(objects.ComputeNode)
self.reportclient = report.SchedulerReportClient()
self.reportclient = reportclient or report.SchedulerReportClient()
self.ram_allocation_ratio = CONF.ram_allocation_ratio
self.cpu_allocation_ratio = CONF.cpu_allocation_ratio
self.disk_allocation_ratio = CONF.disk_allocation_ratio

View File

@ -3531,3 +3531,12 @@ class TestPciTrackerDelegationMethods(BaseTestCase):
self.context,
self.instance)
self.assertTrue(self.rt.pci_tracker.save.called)
class ResourceTrackerTestCase(test.NoDBTestCase):
def test_init_ensure_provided_reportclient_is_used(self):
"""Simple test to make sure if a reportclient is provided it is used"""
rt = resource_tracker.ResourceTracker(
_HOSTNAME, mock.sentinel.driver, mock.sentinel.reportclient)
self.assertIs(rt.reportclient, mock.sentinel.reportclient)

View File

@ -12,11 +12,17 @@
# License for the specific language governing permissions and limitations
# under the License.
import collections
import mock
import os_traits
from oslo_utils.fixture import uuidsentinel as uuids
from nova.compute import manager as compute_manager
from nova import context as nova_context
from nova import exception
from nova import objects
from nova.scheduler.client import report
from nova import test
from nova.virt import fake
from nova.virt import virtapi
@ -42,6 +48,11 @@ class VirtAPIBaseTest(test.NoDBTestCase, test.APICoverage):
self.assertExpected('wait_for_instance_event',
'instance', ['event'])
def test_update_compute_provider_status(self):
self.assertExpected('update_compute_provider_status',
nova_context.get_admin_context(), uuids.rp_uuid,
enabled=False)
class FakeVirtAPITest(VirtAPIBaseTest):
@ -56,6 +67,8 @@ class FakeVirtAPITest(VirtAPIBaseTest):
with self.virtapi.wait_for_instance_event(*args, **kwargs):
run = True
self.assertTrue(run)
elif method == 'update_compute_provider_status':
self.virtapi.update_compute_provider_status(*args, **kwargs)
else:
self.fail("Unhandled FakeVirtAPI method: %s" % method)
@ -69,6 +82,20 @@ class FakeCompute(object):
self.instance_events.prepare_for_instance_event.side_effect = \
self._prepare_for_instance_event
self.reportclient = mock.Mock(spec=report.SchedulerReportClient)
# Keep track of the traits set on each provider in the test.
self.provider_traits = collections.defaultdict(set)
self.reportclient.get_provider_traits.side_effect = (
self._get_provider_traits)
self.reportclient.set_traits_for_provider.side_effect = (
self._set_traits_for_provider)
def _get_provider_traits(self, context, rp_uuid):
return mock.Mock(traits=self.provider_traits[rp_uuid])
def _set_traits_for_provider(self, context, rp_uuid, traits):
self.provider_traits[rp_uuid] = traits
def _event_waiter(self):
event = mock.MagicMock()
event.status = 'completed'
@ -154,3 +181,25 @@ class ComputeVirtAPITest(VirtAPIBaseTest):
pass
self.assertRaises(test.TestingException, do_test)
def test_update_compute_provider_status(self):
"""Tests scenarios for adding/removing the COMPUTE_STATUS_DISABLED
trait on a given compute node resource provider.
"""
ctxt = nova_context.get_admin_context()
# Start by adding the trait to a disabled provider.
self.assertNotIn(uuids.rp_uuid, self.compute.provider_traits)
self.virtapi.update_compute_provider_status(
ctxt, uuids.rp_uuid, enabled=False)
self.assertEqual({os_traits.COMPUTE_STATUS_DISABLED},
self.compute.provider_traits[uuids.rp_uuid])
# Now run it again to make sure nothing changed.
with mock.patch.object(self.compute.reportclient,
'set_traits_for_provider',
new_callable=mock.NonCallableMock):
self.virtapi.update_compute_provider_status(
ctxt, uuids.rp_uuid, enabled=False)
# Now enable the provider and make sure the trait is removed.
self.virtapi.update_compute_provider_status(
ctxt, uuids.rp_uuid, enabled=True)
self.assertEqual(set(), self.compute.provider_traits[uuids.rp_uuid])

View File

@ -661,6 +661,9 @@ class FakeVirtAPI(virtapi.VirtAPI):
# fall through
yield
def update_compute_provider_status(self, context, rp_uuid, enabled):
pass
class SmallFakeDriver(FakeDriver):
# The api samples expect specific cpu memory and disk sizes. In order to

View File

@ -20,3 +20,14 @@ class VirtAPI(object):
def wait_for_instance_event(self, instance, event_names, deadline=300,
error_callback=None):
raise NotImplementedError()
def update_compute_provider_status(self, context, rp_uuid, enabled):
"""Used to add/remove the COMPUTE_STATUS_DISABLED trait on the provider
:param context: nova auth RequestContext
:param rp_uuid: UUID of a compute node resource provider in Placement
:param enabled: True if the node is enabled in which case the trait
would be removed, False if the node is disabled in which case
the trait would be added.
"""
raise NotImplementedError()