From c280d747fb23f1abaaf91eea7f6d11e716c6db42 Mon Sep 17 00:00:00 2001 From: Andrey Volkov Date: Thu, 14 Feb 2019 15:39:45 +0300 Subject: [PATCH] AZ list performance optimization: avoid double service list DB fetch Assume number of services can be large (10000 as in the bug description), this patch removes second service_get_all call. zone_hosts changed from dict of lists to dict of sets. The HostAPI instance from the API controller is also passed to the get_availability_zones method so it does not have to recreate it per call (this is both for a slight performance gain but mostly also for test sanity). On devstack with 10000 services patch decreased response time twice. openstack availability zone list --long --timing ... Before: +-------------------------------------------------------------------------------------------+-----------+ | URL | Seconds | +-------------------------------------------------------------------------------------------+-----------+ | GET http://192.168.0.45/identity | 0.006816 | | POST http://192.168.0.45/identity/v3/auth/tokens | 0.456708 | | POST http://192.168.0.45/identity/v3/auth/tokens | 0.087485 | | GET http://172.18.237.203/compute/v2.1/os-availability-zone/detail | 95.667192 | | GET http://172.18.237.203/volume/v2/e2671d37ee2c4374bd1533645261f1d4/os-availability-zone | 0.036528 | | Total | 96.254729 | +-------------------------------------------------------------------------------------------+-----------+ After: +-------------------------------------------------------------------------------------------+-----------+ | URL | Seconds | +-------------------------------------------------------------------------------------------+-----------+ | GET http://192.168.0.45/identity | 0.020215 | | POST http://192.168.0.45/identity/v3/auth/tokens | 0.102987 | | POST http://192.168.0.45/identity/v3/auth/tokens | 0.111899 | | GET http://172.18.237.203/compute/v2.1/os-availability-zone/detail | 39.346657 | | GET http://172.18.237.203/volume/v2/e2671d37ee2c4374bd1533645261f1d4/os-availability-zone | 0.026403 | | Total | 39.608161 | +-------------------------------------------------------------------------------------------+-----------+ The test_availability_zone_detail unit test is updated to assert that services are only retrieved twice (once for enabled, once for disabled). While in there, the expected response dict is formatted for readability and a duplicate zone/host is added to make sure duplicates are handled for available services. To ensure service_get_all is called only twice, the low-level DB API service_get_all stub is replaced with a mock and the mock is changed to be on the HostAPI.service_get_all method which is (1) what the API controller code is actually using and (2) allows the test to only mock the instance of the HostAPI being tested - trying to mock the DB API service_get_all method causes intermittent failures in unrelated tests because of the global nature of that mock. There is another opportunity for optimizing get_availability_zones which is marked with a TODO but left for a separate patch. Co-Authored-By: Matt Riedemann Partial-Bug: #1801897 Change-Id: Ib9a9a9a79499272d740a64cc0b909f0299a237d1 (cherry picked from commit 74cefe4266a613d4c2afbb0c791e16eb7789aef4) --- .../openstack/compute/availability_zone.py | 16 ++-- nova/availability_zones.py | 23 +++-- .../compute/test_availability_zone.py | 92 ++++++++++++++----- 3 files changed, 95 insertions(+), 36 deletions(-) diff --git a/nova/api/openstack/compute/availability_zone.py b/nova/api/openstack/compute/availability_zone.py index b9fa5ebf111f..514e78eb9ed7 100644 --- a/nova/api/openstack/compute/availability_zone.py +++ b/nova/api/openstack/compute/availability_zone.py @@ -45,7 +45,8 @@ class AvailabilityZoneController(wsgi.Controller): def _describe_availability_zones(self, context, **kwargs): ctxt = context.elevated() available_zones, not_available_zones = \ - availability_zones.get_availability_zones(ctxt) + availability_zones.get_availability_zones( + ctxt, hostapi=self.host_api) filtered_available_zones = \ self._get_filtered_availability_zones(available_zones, True) @@ -56,13 +57,16 @@ class AvailabilityZoneController(wsgi.Controller): def _describe_availability_zones_verbose(self, context, **kwargs): ctxt = context.elevated() - available_zones, not_available_zones = \ - availability_zones.get_availability_zones(ctxt) # Available services enabled_services = self.host_api.service_get_all( context, {'disabled': False}, set_zones=True, all_cells=True) + available_zones, not_available_zones = ( + availability_zones.get_availability_zones( + ctxt, enabled_services=enabled_services, + hostapi=self.host_api)) + zone_hosts = {} host_services = {} api_services = ('nova-osapi_compute', 'nova-metadata') @@ -71,10 +75,8 @@ class AvailabilityZoneController(wsgi.Controller): # Skip API services in the listing since they are not # maintained in the same way as other services continue - zone_hosts.setdefault(service['availability_zone'], []) - if service['host'] not in zone_hosts[service['availability_zone']]: - zone_hosts[service['availability_zone']].append( - service['host']) + zone_hosts.setdefault(service['availability_zone'], set()) + zone_hosts[service['availability_zone']].add(service['host']) host_services.setdefault(service['availability_zone'] + service['host'], []) diff --git a/nova/availability_zones.py b/nova/availability_zones.py index 7c8d94890c4e..579efacb6cd7 100644 --- a/nova/availability_zones.py +++ b/nova/availability_zones.py @@ -110,7 +110,8 @@ def update_host_availability_zone_cache(context, host, availability_zone=None): def get_availability_zones(context, get_only_available=False, - with_hosts=False): + with_hosts=False, enabled_services=None, + hostapi=None): """Return available and unavailable zones on demand. :param get_only_available: flag to determine whether to return @@ -119,13 +120,19 @@ def get_availability_zones(context, get_only_available=False, available zones only :param with_hosts: whether to return hosts part of the AZs :type with_hosts: bool + :param enabled_services: list of enabled services to use; if None + enabled services will be retrieved from all cells with zones set + :param hostapi: nova.compute.api.HostAPI instance """ - # NOTE(danms): Avoid circular import - from nova import compute - hostapi = compute.HostAPI() + # TODO(mriedem): Make hostapi a required arg in a non-backportable FUP. + if hostapi is None: + # NOTE(danms): Avoid circular import + from nova import compute + hostapi = compute.HostAPI() - enabled_services = hostapi.service_get_all( - context, {'disabled': False}, set_zones=True, all_cells=True) + if enabled_services is None: + enabled_services = hostapi.service_get_all( + context, {'disabled': False}, set_zones=True, all_cells=True) available_zones = [] for (zone, host) in [(service['availability_zone'], service['host']) @@ -140,6 +147,10 @@ def get_availability_zones(context, get_only_available=False, available_zones = list(_available_zones.items()) if not get_only_available: + # TODO(mriedem): We could probably optimize if we know that we're going + # to get both enabled and disabled services and just pull them all from + # the cell DBs at the same time and then filter into enabled/disabled + # lists in python. disabled_services = hostapi.service_get_all( context, {'disabled': True}, set_zones=True, all_cells=True) not_available_zones = [] diff --git a/nova/tests/unit/api/openstack/compute/test_availability_zone.py b/nova/tests/unit/api/openstack/compute/test_availability_zone.py index 982c4b1fdb28..bb6a2b0a2b2c 100644 --- a/nova/tests/unit/api/openstack/compute/test_availability_zone.py +++ b/nova/tests/unit/api/openstack/compute/test_availability_zone.py @@ -36,10 +36,12 @@ from nova.tests.unit.objects import test_service FAKE_UUID = fakes.FAKE_UUID -def fake_service_get_all(context, disabled=None): +def fake_service_get_all(context, filters=None, **kwargs): + disabled = filters.get('disabled') if filters else None + def __fake_service(binary, availability_zone, created_at, updated_at, host, disabled): - return dict(test_service.fake_service, + db_s = dict(test_service.fake_service, binary=binary, availability_zone=availability_zone, available_zones=availability_zone, @@ -47,9 +49,12 @@ def fake_service_get_all(context, disabled=None): updated_at=updated_at, host=host, disabled=disabled) + # The version field is immutable so remove that before creating the obj + db_s.pop('version', None) + return objects.Service(context, **db_s) if disabled: - return [__fake_service("nova-compute", "zone-2", + svcs = [__fake_service("nova-compute", "zone-2", datetime.datetime(2012, 11, 14, 9, 53, 25, 0), datetime.datetime(2012, 12, 26, 14, 45, 25, 0), "fake_host-1", True), @@ -62,7 +67,7 @@ def fake_service_get_all(context, disabled=None): datetime.datetime(2012, 12, 26, 14, 45, 24, 0), "fake_host-2", True)] else: - return [__fake_service("nova-compute", "zone-1", + svcs = [__fake_service("nova-compute", "zone-1", datetime.datetime(2012, 11, 14, 9, 53, 25, 0), datetime.datetime(2012, 12, 26, 14, 45, 25, 0), "fake_host-1", False), @@ -70,10 +75,17 @@ def fake_service_get_all(context, disabled=None): datetime.datetime(2012, 11, 14, 9, 57, 3, 0), datetime.datetime(2012, 12, 26, 14, 45, 25, 0), "fake_host-1", False), + # nova-conductor is in the same zone and host as nova-sched + # and is here to make sure /detail filters out duplicates. + __fake_service("nova-conductor", "internal", + datetime.datetime(2012, 11, 14, 9, 57, 3, 0), + datetime.datetime(2012, 12, 26, 14, 45, 25, 0), + "fake_host-1", False), __fake_service("nova-network", "internal", datetime.datetime(2012, 11, 16, 7, 25, 46, 0), datetime.datetime(2012, 12, 26, 14, 45, 24, 0), "fake_host-2", False)] + return objects.ServiceList(objects=svcs) class AvailabilityZoneApiTestV21(test.NoDBTestCase): @@ -83,12 +95,15 @@ class AvailabilityZoneApiTestV21(test.NoDBTestCase): super(AvailabilityZoneApiTestV21, self).setUp() availability_zones.reset_cache() fakes.stub_out_nw_api(self) - self.stub_out('nova.db.api.service_get_all', fake_service_get_all) self.stub_out('nova.availability_zones.set_availability_zones', lambda c, services: services) self.stub_out('nova.servicegroup.API.service_is_up', lambda s, service: service['binary'] != u"nova-network") self.controller = self.availability_zone.AvailabilityZoneController() + self.mock_service_get_all = mock.patch.object( + self.controller.host_api, 'service_get_all', + side_effect=fake_service_get_all).start() + self.addCleanup(self.mock_service_get_all.stop) self.req = fakes.HTTPRequest.blank('') def test_filtered_availability_zones(self): @@ -127,25 +142,56 @@ class AvailabilityZoneApiTestV21(test.NoDBTestCase): self.assertEqual(len(zones), 3) timestamp = iso8601.parse_date("2012-12-26T14:45:25Z") nova_network_timestamp = iso8601.parse_date("2012-12-26T14:45:24Z") - expected = [{'zoneName': 'zone-1', - 'zoneState': {'available': True}, - 'hosts': {'fake_host-1': { - 'nova-compute': {'active': True, 'available': True, - 'updated_at': timestamp}}}}, - {'zoneName': 'internal', - 'zoneState': {'available': True}, - 'hosts': {'fake_host-1': { - 'nova-sched': {'active': True, 'available': True, - 'updated_at': timestamp}}, - 'fake_host-2': { - 'nova-network': { - 'active': True, - 'available': False, - 'updated_at': nova_network_timestamp}}}}, - {'zoneName': 'zone-2', - 'zoneState': {'available': False}, - 'hosts': None}] + expected = [ + { + 'zoneName': 'zone-1', + 'zoneState': {'available': True}, + 'hosts': { + 'fake_host-1': { + 'nova-compute': { + 'active': True, + 'available': True, + 'updated_at': timestamp + } + } + } + }, + { + 'zoneName': 'internal', + 'zoneState': {'available': True}, + 'hosts': { + 'fake_host-1': { + 'nova-sched': { + 'active': True, + 'available': True, + 'updated_at': timestamp + }, + 'nova-conductor': { + 'active': True, + 'available': True, + 'updated_at': timestamp + } + }, + 'fake_host-2': { + 'nova-network': { + 'active': True, + 'available': False, + 'updated_at': nova_network_timestamp + } + } + } + }, + { + 'zoneName': 'zone-2', + 'zoneState': {'available': False}, + 'hosts': None + } + ] self.assertEqual(expected, zones) + # We get both enabled and disabled services per cell (just one in this + # test case) so we'll query the services table twice. + self.assertEqual(2, self.mock_service_get_all.call_count, + self.mock_service_get_all.call_args_list) @mock.patch.object(availability_zones, 'get_availability_zones', return_value=[['nova'], []])