Unify placement client singleton implementations

We have many places where we implement singleton behavior for the
placement client. This unifies them into a single place and
implementation. Not only does this DRY things up, but may cause us
to initialize it fewer times and also allows for emitting a common
set of error messages about expected failures for better
troubleshooting.

Change-Id: Iab8a791f64323f996e1d6e6d5a7e7a7c34eb4fb3
Related-Bug: #1846820
This commit is contained in:
Dan Smith 2022-08-11 09:50:30 -07:00
parent 627af67f5e
commit c178d93606
16 changed files with 115 additions and 44 deletions

View File

@ -48,13 +48,10 @@ class ServiceController(wsgi.Controller):
self.actions = {"enable": self._enable,
"disable": self._disable,
"disable-log-reason": self._disable_log_reason}
self._placementclient = None # Lazy-load on first access.
@property
def placementclient(self):
if self._placementclient is None:
self._placementclient = report.SchedulerReportClient()
return self._placementclient
return report.report_client_singleton()
def _get_services(self, req):
# The API services are filtered out since they are not RPC services
@ -328,7 +325,7 @@ class ServiceController(wsgi.Controller):
"Failed to delete compute node resource provider "
"for compute node %s: %s",
compute_node.uuid, str(e))
# remove the host_mapping of this host.
# Remove the host_mapping of this host.
try:
hm = objects.HostMapping.get_by_host(context, service.host)
hm.destroy()

View File

@ -2209,7 +2209,7 @@ class PlacementCommands(object):
output(_('No cells to process.'))
return 4
placement = report.SchedulerReportClient()
placement = report.report_client_singleton()
neutron = None
if heal_port_allocations:
@ -2710,7 +2710,7 @@ class PlacementCommands(object):
if verbose:
output = lambda msg: print(msg)
placement = report.SchedulerReportClient()
placement = report.report_client_singleton()
# Resets two in-memory dicts for knowing instances per compute node
self.cn_uuid_mapping = collections.defaultdict(tuple)
self.instances_mapping = collections.defaultdict(list)

View File

@ -386,7 +386,6 @@ class API:
self.image_api = image_api or glance.API()
self.network_api = network_api or neutron.API()
self.volume_api = volume_api or cinder.API()
self._placementclient = None # Lazy-load on first access.
self.compute_rpcapi = compute_rpcapi.ComputeAPI()
self.compute_task_api = conductor.ComputeTaskAPI()
self.servicegroup_api = servicegroup.API()
@ -2575,9 +2574,7 @@ class API:
@property
def placementclient(self):
if self._placementclient is None:
self._placementclient = report.SchedulerReportClient()
return self._placementclient
return report.report_client_singleton()
def _local_delete(self, context, instance, bdms, delete_type, cb):
if instance.vm_state == vm_states.SHELVED_OFFLOADED:
@ -6405,13 +6402,10 @@ class AggregateAPI:
def __init__(self):
self.compute_rpcapi = compute_rpcapi.ComputeAPI()
self.query_client = query.SchedulerQueryClient()
self._placement_client = None # Lazy-load on first access.
@property
def placement_client(self):
if self._placement_client is None:
self._placement_client = report.SchedulerReportClient()
return self._placement_client
return report.report_client_singleton()
@wrap_exception()
def create_aggregate(self, context, aggregate_name, availability_zone):

View File

@ -622,6 +622,11 @@ class ComputeManager(manager.Manager):
# We want the ComputeManager, ResourceTracker and ComputeVirtAPI all
# using the same instance of SchedulerReportClient which has the
# ProviderTree cache for this compute service.
# NOTE(danms): We do not use the global placement client
# singleton here, because the above-mentioned stack of objects
# maintain local state in the client. Thus, keeping our own
# private object for that stack avoids any potential conflict
# with other users in our process outside of the above.
self.reportclient = report.SchedulerReportClient()
self.virtapi = ComputeVirtAPI(self)
self.network_api = neutron.API()

View File

@ -103,7 +103,7 @@ class ResourceTracker(object):
monitor_handler = monitors.MonitorHandler(self)
self.monitors = monitor_handler.monitors
self.old_resources = collections.defaultdict(objects.ComputeNode)
self.reportclient = reportclient or report.SchedulerReportClient()
self.reportclient = reportclient or report.report_client_singleton()
self.ram_allocation_ratio = CONF.ram_allocation_ratio
self.cpu_allocation_ratio = CONF.cpu_allocation_ratio
self.disk_allocation_ratio = CONF.disk_allocation_ratio

View File

@ -243,7 +243,7 @@ class ComputeTaskManager:
self.network_api = neutron.API()
self.servicegroup_api = servicegroup.API()
self.query_client = query.SchedulerQueryClient()
self.report_client = report.SchedulerReportClient()
self.report_client = report.report_client_singleton()
self.notifier = rpc.get_notifier('compute')
# Help us to record host in EventReporter
self.host = CONF.host

View File

@ -54,7 +54,7 @@ def replace_allocation_with_migration(context, instance, migration):
# and do any rollback required
raise
reportclient = report.SchedulerReportClient()
reportclient = report.report_client_singleton()
orig_alloc = reportclient.get_allocs_for_consumer(
context, instance.uuid)['allocations']
@ -94,7 +94,7 @@ def replace_allocation_with_migration(context, instance, migration):
def revert_allocation_for_migration(context, source_cn, instance, migration):
"""Revert an allocation made for a migration back to the instance."""
reportclient = report.SchedulerReportClient()
reportclient = report.report_client_singleton()
# FIXME(gibi): This method is flawed in that it does not handle allocations
# against sharing providers in any special way. This leads to duplicate

View File

@ -43,10 +43,8 @@ LEGACY_LIMITS = {
def _get_placement_usages(
context: 'nova.context.RequestContext', project_id: str
) -> ty.Dict[str, int]:
global PLACEMENT_CLIENT
if not PLACEMENT_CLIENT:
PLACEMENT_CLIENT = report.SchedulerReportClient()
return PLACEMENT_CLIENT.get_usages_counts_for_limits(context, project_id)
return report.report_client_singleton().get_usages_counts_for_limits(
context, project_id)
def _get_usage(

View File

@ -1348,11 +1348,8 @@ def _instances_cores_ram_count_legacy(context, project_id, user_id=None):
def _cores_ram_count_placement(context, project_id, user_id=None):
global PLACEMENT_CLIENT
if not PLACEMENT_CLIENT:
PLACEMENT_CLIENT = report.SchedulerReportClient()
return PLACEMENT_CLIENT.get_usages_counts_for_quota(context, project_id,
user_id=user_id)
return report.report_client_singleton().get_usages_counts_for_quota(
context, project_id, user_id=user_id)
def _instances_cores_ram_count_api_db_placement(context, project_id,

View File

@ -52,6 +52,7 @@ AGGREGATE_GENERATION_VERSION = '1.19'
NESTED_PROVIDER_API_VERSION = '1.14'
POST_ALLOCATIONS_API_VERSION = '1.13'
GET_USAGES_VERSION = '1.9'
PLACEMENTCLIENT = None
AggInfo = collections.namedtuple('AggInfo', ['aggregates', 'generation'])
TraitInfo = collections.namedtuple('TraitInfo', ['traits', 'generation'])
@ -67,6 +68,51 @@ def warn_limit(self, msg):
LOG.warning(msg)
def report_client_singleton():
"""Return a reference to the global placement client singleton.
This initializes the placement client once and returns a reference
to that singleton on subsequent calls. Errors are raised
(particularly ks_exc.*) but context-specific error messages are
logged for consistency.
"""
# NOTE(danms): The report client maintains internal state in the
# form of the provider tree, which will be shared across all users
# of this global client. That is not a problem now, but in the
# future it may be beneficial to fix that. One idea would be to
# change the behavior of the client such that the static-config
# pieces of the actual keystone client are separate from the
# internal state, so that we can return a new object here with a
# context-specific local state object, but with the client bits
# shared.
global PLACEMENTCLIENT
if PLACEMENTCLIENT is None:
try:
PLACEMENTCLIENT = SchedulerReportClient()
except ks_exc.EndpointNotFound:
LOG.error('The placement API endpoint was not found.')
raise
except ks_exc.MissingAuthPlugin:
LOG.error('No authentication information found for placement API.')
raise
except ks_exc.Unauthorized:
LOG.error('Placement service credentials do not work.')
raise
except ks_exc.DiscoveryFailure:
LOG.error('Discovering suitable URL for placement API failed.')
raise
except (ks_exc.ConnectFailure,
ks_exc.RequestTimeout,
ks_exc.GatewayTimeout):
LOG.error('Placement API service is not responding.')
raise
except Exception:
LOG.error('Failed to initialize placement client '
'(is keystone available?)')
raise
return PLACEMENTCLIENT
def safe_connect(f):
@functools.wraps(f)
def wrapper(self, *a, **k):

View File

@ -66,7 +66,7 @@ class SchedulerManager(manager.Manager):
self.host_manager = host_manager.HostManager()
self.servicegroup_api = servicegroup.API()
self.notifier = rpc.get_notifier('scheduler')
self.placement_client = report.SchedulerReportClient()
self.placement_client = report.report_client_singleton()
super().__init__(service_name='scheduler', *args, **kwargs)

View File

@ -311,7 +311,7 @@ def routed_networks_filter(
# Get the clients we need
network_api = neutron.API()
report_api = report.SchedulerReportClient()
report_api = report.report_client_singleton()
for requested_network in requested_networks:
network_id = None

View File

@ -61,6 +61,7 @@ from nova import exception
from nova import objects
from nova.objects import base as objects_base
from nova import quota
from nova.scheduler.client import report
from nova.tests import fixtures as nova_fixtures
from nova.tests.unit import matchers
from nova import utils
@ -291,6 +292,9 @@ class TestCase(base.BaseTestCase):
# instead of only once initialized for test worker
wsgi_app.init_global_data.reset()
# Reset the placement client singleton
report.PLACEMENTCLIENT = None
def _setup_cells(self):
"""Setup a normal cellsv2 environment.

View File

@ -7764,16 +7764,13 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
self.assertTrue(hasattr(self.compute_api, 'host'))
self.assertEqual(CONF.host, self.compute_api.host)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient')
@mock.patch('nova.scheduler.client.report.report_client_singleton')
def test_placement_client_init(self, mock_report_client):
"""Tests to make sure that the construction of the placement client
only happens once per API class instance.
uses the singleton helper, and happens only when needed.
"""
self.assertIsNone(self.compute_api._placementclient)
# Access the property twice to make sure SchedulerReportClient is
# only loaded once.
for x in range(2):
self.compute_api.placementclient
self.assertFalse(mock_report_client.called)
self.compute_api.placementclient
mock_report_client.assert_called_once_with()
def test_validate_host_for_cold_migrate_same_host_fails(self):

View File

@ -13091,16 +13091,13 @@ class ComputeAPIAggrTestCase(BaseTestCase):
hosts = aggregate.hosts if 'hosts' in aggregate else None
self.assertIn(values[0][1][0], hosts)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient')
@mock.patch('nova.scheduler.client.report.report_client_singleton')
def test_placement_client_init(self, mock_report_client):
"""Tests to make sure that the construction of the placement client
only happens once per AggregateAPI class instance.
uses the singleton helper, and happens only when needed.
"""
self.assertIsNone(self.api._placement_client)
# Access the property twice to make sure SchedulerReportClient is
# only loaded once.
for x in range(2):
self.api.placement_client
self.assertFalse(mock_report_client.called)
self.api.placement_client
mock_report_client.assert_called_once_with()

View File

@ -11,6 +11,7 @@
# under the License.
import copy
import ddt
import time
from unittest import mock
from urllib import parse
@ -157,6 +158,41 @@ class SafeConnectedTestCase(test.NoDBTestCase):
self.assertTrue(req.called)
@ddt.ddt
class TestSingleton(test.NoDBTestCase):
def test_singleton(self):
# Make sure we start with a clean slate
self.assertIsNone(report.PLACEMENTCLIENT)
# Make sure the first call creates the singleton, sets it
# globally, and returns it
client = report.report_client_singleton()
self.assertEqual(client, report.PLACEMENTCLIENT)
# Make sure that a subsequent call returns the same thing
# again and that the global is unchanged
self.assertEqual(client, report.report_client_singleton())
self.assertEqual(client, report.PLACEMENTCLIENT)
@ddt.data(ks_exc.EndpointNotFound,
ks_exc.MissingAuthPlugin,
ks_exc.Unauthorized,
ks_exc.DiscoveryFailure,
ks_exc.ConnectFailure,
ks_exc.RequestTimeout,
ks_exc.GatewayTimeout,
test.TestingException)
def test_errors(self, exc):
self._test_error(exc)
@mock.patch.object(report, 'LOG')
def _test_error(self, exc, mock_log):
with mock.patch.object(report.SchedulerReportClient, '_create_client',
side_effect=exc):
self.assertRaises(exc, report.report_client_singleton)
mock_log.error.assert_called_once()
class TestConstructor(test.NoDBTestCase):
def setUp(self):
super(TestConstructor, self).setUp()