AZ list performance optimization: avoid double service list DB fetch

Assume number of services can be large (10000 as in the bug description),
this patch removes second service_get_all call.

zone_hosts changed from dict of lists to dict of sets.

The HostAPI instance from the API controller is also passed to the
get_availability_zones method so it does not have to recreate it
per call (this is both for a slight performance gain but mostly also
for test sanity).

On devstack with 10000 services patch decreased response time twice.

openstack availability zone list --long --timing

...

Before:

+-------------------------------------------------------------------------------------------+-----------+
| URL                                                                                       |   Seconds |
+-------------------------------------------------------------------------------------------+-----------+
| GET http://192.168.0.45/identity                                                          |  0.006816 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.456708 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.087485 |
| GET http://172.18.237.203/compute/v2.1/os-availability-zone/detail                        | 95.667192 |
| GET http://172.18.237.203/volume/v2/e2671d37ee2c4374bd1533645261f1d4/os-availability-zone |  0.036528 |
| Total                                                                                     | 96.254729 |
+-------------------------------------------------------------------------------------------+-----------+

After:

+-------------------------------------------------------------------------------------------+-----------+
| URL                                                                                       |   Seconds |
+-------------------------------------------------------------------------------------------+-----------+
| GET http://192.168.0.45/identity                                                          |  0.020215 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.102987 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.111899 |
| GET http://172.18.237.203/compute/v2.1/os-availability-zone/detail                        | 39.346657 |
| GET http://172.18.237.203/volume/v2/e2671d37ee2c4374bd1533645261f1d4/os-availability-zone |  0.026403 |
| Total                                                                                     | 39.608161 |
+-------------------------------------------------------------------------------------------+-----------+

The test_availability_zone_detail unit test is updated to assert that
services are only retrieved twice (once for enabled, once for disabled).
While in there, the expected response dict is formatted for readability
and a duplicate zone/host is added to make sure duplicates are handled
for available services. To ensure service_get_all is called only twice,
the low-level DB API service_get_all stub is replaced with a mock and
the mock is changed to be on the HostAPI.service_get_all method which
is (1) what the API controller code is actually using and (2) allows the
test to only mock the instance of the HostAPI being tested - trying to
mock the DB API service_get_all method causes intermittent failures
in unrelated tests because of the global nature of that mock.

There is another opportunity for optimizing get_availability_zones which
is marked with a TODO but left for a separate patch.

Co-Authored-By: Matt Riedemann <mriedem.os@gmail.com>

Partial-Bug: #1801897
Change-Id: Ib9a9a9a79499272d740a64cc0b909f0299a237d1
(cherry picked from commit 74cefe4266)
(cherry picked from commit c280d747fb)
This commit is contained in:
Andrey Volkov 2019-02-14 15:39:45 +03:00 committed by Matt Riedemann
parent 75af081868
commit fa275544c2
3 changed files with 95 additions and 36 deletions

View File

@ -45,7 +45,8 @@ class AvailabilityZoneController(wsgi.Controller):
def _describe_availability_zones(self, context, **kwargs): def _describe_availability_zones(self, context, **kwargs):
ctxt = context.elevated() ctxt = context.elevated()
available_zones, not_available_zones = \ available_zones, not_available_zones = \
availability_zones.get_availability_zones(ctxt) availability_zones.get_availability_zones(
ctxt, hostapi=self.host_api)
filtered_available_zones = \ filtered_available_zones = \
self._get_filtered_availability_zones(available_zones, True) self._get_filtered_availability_zones(available_zones, True)
@ -56,13 +57,16 @@ class AvailabilityZoneController(wsgi.Controller):
def _describe_availability_zones_verbose(self, context, **kwargs): def _describe_availability_zones_verbose(self, context, **kwargs):
ctxt = context.elevated() ctxt = context.elevated()
available_zones, not_available_zones = \
availability_zones.get_availability_zones(ctxt)
# Available services # Available services
enabled_services = self.host_api.service_get_all( enabled_services = self.host_api.service_get_all(
context, {'disabled': False}, set_zones=True, all_cells=True) context, {'disabled': False}, set_zones=True, all_cells=True)
available_zones, not_available_zones = (
availability_zones.get_availability_zones(
ctxt, enabled_services=enabled_services,
hostapi=self.host_api))
zone_hosts = {} zone_hosts = {}
host_services = {} host_services = {}
api_services = ('nova-osapi_compute', 'nova-metadata') api_services = ('nova-osapi_compute', 'nova-metadata')
@ -71,10 +75,8 @@ class AvailabilityZoneController(wsgi.Controller):
# Skip API services in the listing since they are not # Skip API services in the listing since they are not
# maintained in the same way as other services # maintained in the same way as other services
continue continue
zone_hosts.setdefault(service['availability_zone'], []) zone_hosts.setdefault(service['availability_zone'], set())
if service['host'] not in zone_hosts[service['availability_zone']]: zone_hosts[service['availability_zone']].add(service['host'])
zone_hosts[service['availability_zone']].append(
service['host'])
host_services.setdefault(service['availability_zone'] + host_services.setdefault(service['availability_zone'] +
service['host'], []) service['host'], [])

View File

@ -110,7 +110,8 @@ def update_host_availability_zone_cache(context, host, availability_zone=None):
def get_availability_zones(context, get_only_available=False, def get_availability_zones(context, get_only_available=False,
with_hosts=False): with_hosts=False, enabled_services=None,
hostapi=None):
"""Return available and unavailable zones on demand. """Return available and unavailable zones on demand.
:param get_only_available: flag to determine whether to return :param get_only_available: flag to determine whether to return
@ -119,13 +120,19 @@ def get_availability_zones(context, get_only_available=False,
available zones only available zones only
:param with_hosts: whether to return hosts part of the AZs :param with_hosts: whether to return hosts part of the AZs
:type with_hosts: bool :type with_hosts: bool
:param enabled_services: list of enabled services to use; if None
enabled services will be retrieved from all cells with zones set
:param hostapi: nova.compute.api.HostAPI instance
""" """
# NOTE(danms): Avoid circular import # TODO(mriedem): Make hostapi a required arg in a non-backportable FUP.
from nova import compute if hostapi is None:
hostapi = compute.HostAPI() # NOTE(danms): Avoid circular import
from nova import compute
hostapi = compute.HostAPI()
enabled_services = hostapi.service_get_all( if enabled_services is None:
context, {'disabled': False}, set_zones=True, all_cells=True) enabled_services = hostapi.service_get_all(
context, {'disabled': False}, set_zones=True, all_cells=True)
available_zones = [] available_zones = []
for (zone, host) in [(service['availability_zone'], service['host']) for (zone, host) in [(service['availability_zone'], service['host'])
@ -140,6 +147,10 @@ def get_availability_zones(context, get_only_available=False,
available_zones = list(_available_zones.items()) available_zones = list(_available_zones.items())
if not get_only_available: if not get_only_available:
# TODO(mriedem): We could probably optimize if we know that we're going
# to get both enabled and disabled services and just pull them all from
# the cell DBs at the same time and then filter into enabled/disabled
# lists in python.
disabled_services = hostapi.service_get_all( disabled_services = hostapi.service_get_all(
context, {'disabled': True}, set_zones=True, all_cells=True) context, {'disabled': True}, set_zones=True, all_cells=True)
not_available_zones = [] not_available_zones = []

View File

@ -36,10 +36,12 @@ from nova.tests import uuidsentinel
FAKE_UUID = fakes.FAKE_UUID FAKE_UUID = fakes.FAKE_UUID
def fake_service_get_all(context, disabled=None): def fake_service_get_all(context, filters=None, **kwargs):
disabled = filters.get('disabled') if filters else None
def __fake_service(binary, availability_zone, def __fake_service(binary, availability_zone,
created_at, updated_at, host, disabled): created_at, updated_at, host, disabled):
return dict(test_service.fake_service, db_s = dict(test_service.fake_service,
binary=binary, binary=binary,
availability_zone=availability_zone, availability_zone=availability_zone,
available_zones=availability_zone, available_zones=availability_zone,
@ -47,9 +49,12 @@ def fake_service_get_all(context, disabled=None):
updated_at=updated_at, updated_at=updated_at,
host=host, host=host,
disabled=disabled) disabled=disabled)
# The version field is immutable so remove that before creating the obj
db_s.pop('version', None)
return objects.Service(context, **db_s)
if disabled: if disabled:
return [__fake_service("nova-compute", "zone-2", svcs = [__fake_service("nova-compute", "zone-2",
datetime.datetime(2012, 11, 14, 9, 53, 25, 0), datetime.datetime(2012, 11, 14, 9, 53, 25, 0),
datetime.datetime(2012, 12, 26, 14, 45, 25, 0), datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
"fake_host-1", True), "fake_host-1", True),
@ -62,7 +67,7 @@ def fake_service_get_all(context, disabled=None):
datetime.datetime(2012, 12, 26, 14, 45, 24, 0), datetime.datetime(2012, 12, 26, 14, 45, 24, 0),
"fake_host-2", True)] "fake_host-2", True)]
else: else:
return [__fake_service("nova-compute", "zone-1", svcs = [__fake_service("nova-compute", "zone-1",
datetime.datetime(2012, 11, 14, 9, 53, 25, 0), datetime.datetime(2012, 11, 14, 9, 53, 25, 0),
datetime.datetime(2012, 12, 26, 14, 45, 25, 0), datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
"fake_host-1", False), "fake_host-1", False),
@ -70,10 +75,17 @@ def fake_service_get_all(context, disabled=None):
datetime.datetime(2012, 11, 14, 9, 57, 3, 0), datetime.datetime(2012, 11, 14, 9, 57, 3, 0),
datetime.datetime(2012, 12, 26, 14, 45, 25, 0), datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
"fake_host-1", False), "fake_host-1", False),
# nova-conductor is in the same zone and host as nova-sched
# and is here to make sure /detail filters out duplicates.
__fake_service("nova-conductor", "internal",
datetime.datetime(2012, 11, 14, 9, 57, 3, 0),
datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
"fake_host-1", False),
__fake_service("nova-network", "internal", __fake_service("nova-network", "internal",
datetime.datetime(2012, 11, 16, 7, 25, 46, 0), datetime.datetime(2012, 11, 16, 7, 25, 46, 0),
datetime.datetime(2012, 12, 26, 14, 45, 24, 0), datetime.datetime(2012, 12, 26, 14, 45, 24, 0),
"fake_host-2", False)] "fake_host-2", False)]
return objects.ServiceList(objects=svcs)
class AvailabilityZoneApiTestV21(test.NoDBTestCase): class AvailabilityZoneApiTestV21(test.NoDBTestCase):
@ -83,12 +95,15 @@ class AvailabilityZoneApiTestV21(test.NoDBTestCase):
super(AvailabilityZoneApiTestV21, self).setUp() super(AvailabilityZoneApiTestV21, self).setUp()
availability_zones.reset_cache() availability_zones.reset_cache()
fakes.stub_out_nw_api(self) fakes.stub_out_nw_api(self)
self.stub_out('nova.db.api.service_get_all', fake_service_get_all)
self.stub_out('nova.availability_zones.set_availability_zones', self.stub_out('nova.availability_zones.set_availability_zones',
lambda c, services: services) lambda c, services: services)
self.stub_out('nova.servicegroup.API.service_is_up', self.stub_out('nova.servicegroup.API.service_is_up',
lambda s, service: service['binary'] != u"nova-network") lambda s, service: service['binary'] != u"nova-network")
self.controller = self.availability_zone.AvailabilityZoneController() self.controller = self.availability_zone.AvailabilityZoneController()
self.mock_service_get_all = mock.patch.object(
self.controller.host_api, 'service_get_all',
side_effect=fake_service_get_all).start()
self.addCleanup(self.mock_service_get_all.stop)
self.req = fakes.HTTPRequest.blank('') self.req = fakes.HTTPRequest.blank('')
def test_filtered_availability_zones(self): def test_filtered_availability_zones(self):
@ -127,25 +142,56 @@ class AvailabilityZoneApiTestV21(test.NoDBTestCase):
self.assertEqual(len(zones), 3) self.assertEqual(len(zones), 3)
timestamp = iso8601.parse_date("2012-12-26T14:45:25Z") timestamp = iso8601.parse_date("2012-12-26T14:45:25Z")
nova_network_timestamp = iso8601.parse_date("2012-12-26T14:45:24Z") nova_network_timestamp = iso8601.parse_date("2012-12-26T14:45:24Z")
expected = [{'zoneName': 'zone-1', expected = [
'zoneState': {'available': True}, {
'hosts': {'fake_host-1': { 'zoneName': 'zone-1',
'nova-compute': {'active': True, 'available': True, 'zoneState': {'available': True},
'updated_at': timestamp}}}}, 'hosts': {
{'zoneName': 'internal', 'fake_host-1': {
'zoneState': {'available': True}, 'nova-compute': {
'hosts': {'fake_host-1': { 'active': True,
'nova-sched': {'active': True, 'available': True, 'available': True,
'updated_at': timestamp}}, 'updated_at': timestamp
'fake_host-2': { }
'nova-network': { }
'active': True, }
'available': False, },
'updated_at': nova_network_timestamp}}}}, {
{'zoneName': 'zone-2', 'zoneName': 'internal',
'zoneState': {'available': False}, 'zoneState': {'available': True},
'hosts': None}] 'hosts': {
'fake_host-1': {
'nova-sched': {
'active': True,
'available': True,
'updated_at': timestamp
},
'nova-conductor': {
'active': True,
'available': True,
'updated_at': timestamp
}
},
'fake_host-2': {
'nova-network': {
'active': True,
'available': False,
'updated_at': nova_network_timestamp
}
}
}
},
{
'zoneName': 'zone-2',
'zoneState': {'available': False},
'hosts': None
}
]
self.assertEqual(expected, zones) self.assertEqual(expected, zones)
# We get both enabled and disabled services per cell (just one in this
# test case) so we'll query the services table twice.
self.assertEqual(2, self.mock_service_get_all.call_count,
self.mock_service_get_all.call_args_list)
@mock.patch.object(availability_zones, 'get_availability_zones', @mock.patch.object(availability_zones, 'get_availability_zones',
return_value=[['nova'], []]) return_value=[['nova'], []])