From c178d9360665c219cbcc71c9f37b9e6e3055a5e5 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 11 Aug 2022 09:50:30 -0700 Subject: [PATCH] Unify placement client singleton implementations We have many places where we implement singleton behavior for the placement client. This unifies them into a single place and implementation. Not only does this DRY things up, but may cause us to initialize it fewer times and also allows for emitting a common set of error messages about expected failures for better troubleshooting. Change-Id: Iab8a791f64323f996e1d6e6d5a7e7a7c34eb4fb3 Related-Bug: #1846820 --- nova/api/openstack/compute/services.py | 7 +-- nova/cmd/manage.py | 4 +- nova/compute/api.py | 10 +--- nova/compute/manager.py | 5 ++ nova/compute/resource_tracker.py | 2 +- nova/conductor/manager.py | 2 +- nova/conductor/tasks/migrate.py | 4 +- nova/limit/placement.py | 6 +-- nova/quota.py | 7 +-- nova/scheduler/client/report.py | 46 +++++++++++++++++++ nova/scheduler/manager.py | 2 +- nova/scheduler/request_filter.py | 2 +- nova/test.py | 4 ++ nova/tests/unit/compute/test_api.py | 11 ++--- nova/tests/unit/compute/test_compute.py | 11 ++--- .../unit/scheduler/client/test_report.py | 36 +++++++++++++++ 16 files changed, 115 insertions(+), 44 deletions(-) diff --git a/nova/api/openstack/compute/services.py b/nova/api/openstack/compute/services.py index 6deb84a7f1ab..e9d51d4d0c8e 100644 --- a/nova/api/openstack/compute/services.py +++ b/nova/api/openstack/compute/services.py @@ -48,13 +48,10 @@ class ServiceController(wsgi.Controller): self.actions = {"enable": self._enable, "disable": self._disable, "disable-log-reason": self._disable_log_reason} - self._placementclient = None # Lazy-load on first access. @property def placementclient(self): - if self._placementclient is None: - self._placementclient = report.SchedulerReportClient() - return self._placementclient + return report.report_client_singleton() def _get_services(self, req): # The API services are filtered out since they are not RPC services @@ -328,7 +325,7 @@ class ServiceController(wsgi.Controller): "Failed to delete compute node resource provider " "for compute node %s: %s", compute_node.uuid, str(e)) - # remove the host_mapping of this host. + # Remove the host_mapping of this host. try: hm = objects.HostMapping.get_by_host(context, service.host) hm.destroy() diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index 32b0dd57f808..08b8ebb31047 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -2209,7 +2209,7 @@ class PlacementCommands(object): output(_('No cells to process.')) return 4 - placement = report.SchedulerReportClient() + placement = report.report_client_singleton() neutron = None if heal_port_allocations: @@ -2710,7 +2710,7 @@ class PlacementCommands(object): if verbose: output = lambda msg: print(msg) - placement = report.SchedulerReportClient() + placement = report.report_client_singleton() # Resets two in-memory dicts for knowing instances per compute node self.cn_uuid_mapping = collections.defaultdict(tuple) self.instances_mapping = collections.defaultdict(list) diff --git a/nova/compute/api.py b/nova/compute/api.py index 66543f57dc0a..cc400cfa7ac0 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -386,7 +386,6 @@ class API: self.image_api = image_api or glance.API() self.network_api = network_api or neutron.API() self.volume_api = volume_api or cinder.API() - self._placementclient = None # Lazy-load on first access. self.compute_rpcapi = compute_rpcapi.ComputeAPI() self.compute_task_api = conductor.ComputeTaskAPI() self.servicegroup_api = servicegroup.API() @@ -2575,9 +2574,7 @@ class API: @property def placementclient(self): - if self._placementclient is None: - self._placementclient = report.SchedulerReportClient() - return self._placementclient + return report.report_client_singleton() def _local_delete(self, context, instance, bdms, delete_type, cb): if instance.vm_state == vm_states.SHELVED_OFFLOADED: @@ -6405,13 +6402,10 @@ class AggregateAPI: def __init__(self): self.compute_rpcapi = compute_rpcapi.ComputeAPI() self.query_client = query.SchedulerQueryClient() - self._placement_client = None # Lazy-load on first access. @property def placement_client(self): - if self._placement_client is None: - self._placement_client = report.SchedulerReportClient() - return self._placement_client + return report.report_client_singleton() @wrap_exception() def create_aggregate(self, context, aggregate_name, availability_zone): diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 5e39b1b76645..6a42e1769600 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -622,6 +622,11 @@ class ComputeManager(manager.Manager): # We want the ComputeManager, ResourceTracker and ComputeVirtAPI all # using the same instance of SchedulerReportClient which has the # ProviderTree cache for this compute service. + # NOTE(danms): We do not use the global placement client + # singleton here, because the above-mentioned stack of objects + # maintain local state in the client. Thus, keeping our own + # private object for that stack avoids any potential conflict + # with other users in our process outside of the above. self.reportclient = report.SchedulerReportClient() self.virtapi = ComputeVirtAPI(self) self.network_api = neutron.API() diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 0b801f7ddf99..058777d1ed08 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -103,7 +103,7 @@ class ResourceTracker(object): monitor_handler = monitors.MonitorHandler(self) self.monitors = monitor_handler.monitors self.old_resources = collections.defaultdict(objects.ComputeNode) - self.reportclient = reportclient or report.SchedulerReportClient() + self.reportclient = reportclient or report.report_client_singleton() self.ram_allocation_ratio = CONF.ram_allocation_ratio self.cpu_allocation_ratio = CONF.cpu_allocation_ratio self.disk_allocation_ratio = CONF.disk_allocation_ratio diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index f6b0815d1b71..5b1b9f9c6055 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -243,7 +243,7 @@ class ComputeTaskManager: self.network_api = neutron.API() self.servicegroup_api = servicegroup.API() self.query_client = query.SchedulerQueryClient() - self.report_client = report.SchedulerReportClient() + self.report_client = report.report_client_singleton() self.notifier = rpc.get_notifier('compute') # Help us to record host in EventReporter self.host = CONF.host diff --git a/nova/conductor/tasks/migrate.py b/nova/conductor/tasks/migrate.py index 6ff6206f6599..8838d0240a6c 100644 --- a/nova/conductor/tasks/migrate.py +++ b/nova/conductor/tasks/migrate.py @@ -54,7 +54,7 @@ def replace_allocation_with_migration(context, instance, migration): # and do any rollback required raise - reportclient = report.SchedulerReportClient() + reportclient = report.report_client_singleton() orig_alloc = reportclient.get_allocs_for_consumer( context, instance.uuid)['allocations'] @@ -94,7 +94,7 @@ def replace_allocation_with_migration(context, instance, migration): def revert_allocation_for_migration(context, source_cn, instance, migration): """Revert an allocation made for a migration back to the instance.""" - reportclient = report.SchedulerReportClient() + reportclient = report.report_client_singleton() # FIXME(gibi): This method is flawed in that it does not handle allocations # against sharing providers in any special way. This leads to duplicate diff --git a/nova/limit/placement.py b/nova/limit/placement.py index 497986c4ab87..eedf7d69e19f 100644 --- a/nova/limit/placement.py +++ b/nova/limit/placement.py @@ -43,10 +43,8 @@ LEGACY_LIMITS = { def _get_placement_usages( context: 'nova.context.RequestContext', project_id: str ) -> ty.Dict[str, int]: - global PLACEMENT_CLIENT - if not PLACEMENT_CLIENT: - PLACEMENT_CLIENT = report.SchedulerReportClient() - return PLACEMENT_CLIENT.get_usages_counts_for_limits(context, project_id) + return report.report_client_singleton().get_usages_counts_for_limits( + context, project_id) def _get_usage( diff --git a/nova/quota.py b/nova/quota.py index b9dd76301276..eafad4cd23d5 100644 --- a/nova/quota.py +++ b/nova/quota.py @@ -1348,11 +1348,8 @@ def _instances_cores_ram_count_legacy(context, project_id, user_id=None): def _cores_ram_count_placement(context, project_id, user_id=None): - global PLACEMENT_CLIENT - if not PLACEMENT_CLIENT: - PLACEMENT_CLIENT = report.SchedulerReportClient() - return PLACEMENT_CLIENT.get_usages_counts_for_quota(context, project_id, - user_id=user_id) + return report.report_client_singleton().get_usages_counts_for_quota( + context, project_id, user_id=user_id) def _instances_cores_ram_count_api_db_placement(context, project_id, diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index e4d0c8e3db62..ff86527cf55c 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -52,6 +52,7 @@ AGGREGATE_GENERATION_VERSION = '1.19' NESTED_PROVIDER_API_VERSION = '1.14' POST_ALLOCATIONS_API_VERSION = '1.13' GET_USAGES_VERSION = '1.9' +PLACEMENTCLIENT = None AggInfo = collections.namedtuple('AggInfo', ['aggregates', 'generation']) TraitInfo = collections.namedtuple('TraitInfo', ['traits', 'generation']) @@ -67,6 +68,51 @@ def warn_limit(self, msg): LOG.warning(msg) +def report_client_singleton(): + """Return a reference to the global placement client singleton. + + This initializes the placement client once and returns a reference + to that singleton on subsequent calls. Errors are raised + (particularly ks_exc.*) but context-specific error messages are + logged for consistency. + """ + # NOTE(danms): The report client maintains internal state in the + # form of the provider tree, which will be shared across all users + # of this global client. That is not a problem now, but in the + # future it may be beneficial to fix that. One idea would be to + # change the behavior of the client such that the static-config + # pieces of the actual keystone client are separate from the + # internal state, so that we can return a new object here with a + # context-specific local state object, but with the client bits + # shared. + global PLACEMENTCLIENT + if PLACEMENTCLIENT is None: + try: + PLACEMENTCLIENT = SchedulerReportClient() + except ks_exc.EndpointNotFound: + LOG.error('The placement API endpoint was not found.') + raise + except ks_exc.MissingAuthPlugin: + LOG.error('No authentication information found for placement API.') + raise + except ks_exc.Unauthorized: + LOG.error('Placement service credentials do not work.') + raise + except ks_exc.DiscoveryFailure: + LOG.error('Discovering suitable URL for placement API failed.') + raise + except (ks_exc.ConnectFailure, + ks_exc.RequestTimeout, + ks_exc.GatewayTimeout): + LOG.error('Placement API service is not responding.') + raise + except Exception: + LOG.error('Failed to initialize placement client ' + '(is keystone available?)') + raise + return PLACEMENTCLIENT + + def safe_connect(f): @functools.wraps(f) def wrapper(self, *a, **k): diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index 03df615f6a6c..10b330653deb 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -66,7 +66,7 @@ class SchedulerManager(manager.Manager): self.host_manager = host_manager.HostManager() self.servicegroup_api = servicegroup.API() self.notifier = rpc.get_notifier('scheduler') - self.placement_client = report.SchedulerReportClient() + self.placement_client = report.report_client_singleton() super().__init__(service_name='scheduler', *args, **kwargs) diff --git a/nova/scheduler/request_filter.py b/nova/scheduler/request_filter.py index bd237b06cac0..3f96b7a88063 100644 --- a/nova/scheduler/request_filter.py +++ b/nova/scheduler/request_filter.py @@ -311,7 +311,7 @@ def routed_networks_filter( # Get the clients we need network_api = neutron.API() - report_api = report.SchedulerReportClient() + report_api = report.report_client_singleton() for requested_network in requested_networks: network_id = None diff --git a/nova/test.py b/nova/test.py index d645d06cfa5c..689d5ba29164 100644 --- a/nova/test.py +++ b/nova/test.py @@ -61,6 +61,7 @@ from nova import exception from nova import objects from nova.objects import base as objects_base from nova import quota +from nova.scheduler.client import report from nova.tests import fixtures as nova_fixtures from nova.tests.unit import matchers from nova import utils @@ -291,6 +292,9 @@ class TestCase(base.BaseTestCase): # instead of only once initialized for test worker wsgi_app.init_global_data.reset() + # Reset the placement client singleton + report.PLACEMENTCLIENT = None + def _setup_cells(self): """Setup a normal cellsv2 environment. diff --git a/nova/tests/unit/compute/test_api.py b/nova/tests/unit/compute/test_api.py index f62f8c7477ca..b0442afb90f8 100644 --- a/nova/tests/unit/compute/test_api.py +++ b/nova/tests/unit/compute/test_api.py @@ -7764,16 +7764,13 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase): self.assertTrue(hasattr(self.compute_api, 'host')) self.assertEqual(CONF.host, self.compute_api.host) - @mock.patch('nova.scheduler.client.report.SchedulerReportClient') + @mock.patch('nova.scheduler.client.report.report_client_singleton') def test_placement_client_init(self, mock_report_client): """Tests to make sure that the construction of the placement client - only happens once per API class instance. + uses the singleton helper, and happens only when needed. """ - self.assertIsNone(self.compute_api._placementclient) - # Access the property twice to make sure SchedulerReportClient is - # only loaded once. - for x in range(2): - self.compute_api.placementclient + self.assertFalse(mock_report_client.called) + self.compute_api.placementclient mock_report_client.assert_called_once_with() def test_validate_host_for_cold_migrate_same_host_fails(self): diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 672e5c894ef2..b5754226a30b 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -13091,16 +13091,13 @@ class ComputeAPIAggrTestCase(BaseTestCase): hosts = aggregate.hosts if 'hosts' in aggregate else None self.assertIn(values[0][1][0], hosts) - @mock.patch('nova.scheduler.client.report.SchedulerReportClient') + @mock.patch('nova.scheduler.client.report.report_client_singleton') def test_placement_client_init(self, mock_report_client): """Tests to make sure that the construction of the placement client - only happens once per AggregateAPI class instance. + uses the singleton helper, and happens only when needed. """ - self.assertIsNone(self.api._placement_client) - # Access the property twice to make sure SchedulerReportClient is - # only loaded once. - for x in range(2): - self.api.placement_client + self.assertFalse(mock_report_client.called) + self.api.placement_client mock_report_client.assert_called_once_with() diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py index 9a1766927c3e..2829c160340d 100644 --- a/nova/tests/unit/scheduler/client/test_report.py +++ b/nova/tests/unit/scheduler/client/test_report.py @@ -11,6 +11,7 @@ # under the License. import copy +import ddt import time from unittest import mock from urllib import parse @@ -157,6 +158,41 @@ class SafeConnectedTestCase(test.NoDBTestCase): self.assertTrue(req.called) +@ddt.ddt +class TestSingleton(test.NoDBTestCase): + def test_singleton(self): + # Make sure we start with a clean slate + self.assertIsNone(report.PLACEMENTCLIENT) + + # Make sure the first call creates the singleton, sets it + # globally, and returns it + client = report.report_client_singleton() + self.assertEqual(client, report.PLACEMENTCLIENT) + + # Make sure that a subsequent call returns the same thing + # again and that the global is unchanged + self.assertEqual(client, report.report_client_singleton()) + self.assertEqual(client, report.PLACEMENTCLIENT) + + @ddt.data(ks_exc.EndpointNotFound, + ks_exc.MissingAuthPlugin, + ks_exc.Unauthorized, + ks_exc.DiscoveryFailure, + ks_exc.ConnectFailure, + ks_exc.RequestTimeout, + ks_exc.GatewayTimeout, + test.TestingException) + def test_errors(self, exc): + self._test_error(exc) + + @mock.patch.object(report, 'LOG') + def _test_error(self, exc, mock_log): + with mock.patch.object(report.SchedulerReportClient, '_create_client', + side_effect=exc): + self.assertRaises(exc, report.report_client_singleton) + mock_log.error.assert_called_once() + + class TestConstructor(test.NoDBTestCase): def setUp(self): super(TestConstructor, self).setUp()