AZ list performance optimization: avoid double service list DB fetch

Assume number of services can be large (10000 as in the bug description),
this patch removes second service_get_all call.

zone_hosts changed from dict of lists to dict of sets.

The HostAPI instance from the API controller is also passed to the
get_availability_zones method so it does not have to recreate it
per call (this is both for a slight performance gain but mostly also
for test sanity).

On devstack with 10000 services patch decreased response time twice.

openstack availability zone list --long --timing

...

Before:

+-------------------------------------------------------------------------------------------+-----------+
| URL                                                                                       |   Seconds |
+-------------------------------------------------------------------------------------------+-----------+
| GET http://192.168.0.45/identity                                                          |  0.006816 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.456708 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.087485 |
| GET http://172.18.237.203/compute/v2.1/os-availability-zone/detail                        | 95.667192 |
| GET http://172.18.237.203/volume/v2/e2671d37ee2c4374bd1533645261f1d4/os-availability-zone |  0.036528 |
| Total                                                                                     | 96.254729 |
+-------------------------------------------------------------------------------------------+-----------+

After:

+-------------------------------------------------------------------------------------------+-----------+
| URL                                                                                       |   Seconds |
+-------------------------------------------------------------------------------------------+-----------+
| GET http://192.168.0.45/identity                                                          |  0.020215 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.102987 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.111899 |
| GET http://172.18.237.203/compute/v2.1/os-availability-zone/detail                        | 39.346657 |
| GET http://172.18.237.203/volume/v2/e2671d37ee2c4374bd1533645261f1d4/os-availability-zone |  0.026403 |
| Total                                                                                     | 39.608161 |
+-------------------------------------------------------------------------------------------+-----------+

The test_availability_zone_detail unit test is updated to assert that
services are only retrieved twice (once for enabled, once for disabled).
While in there, the expected response dict is formatted for readability
and a duplicate zone/host is added to make sure duplicates are handled
for available services. To ensure service_get_all is called only twice,
the low-level DB API service_get_all stub is replaced with a mock and
the mock is changed to be on the HostAPI.service_get_all method which
is (1) what the API controller code is actually using and (2) allows the
test to only mock the instance of the HostAPI being tested - trying to
mock the DB API service_get_all method causes intermittent failures
in unrelated tests because of the global nature of that mock.

There is another opportunity for optimizing get_availability_zones which
is marked with a TODO but left for a separate patch.

Co-Authored-By: Matt Riedemann <mriedem.os@gmail.com>

Partial-Bug: #1801897
Change-Id: Ib9a9a9a79499272d740a64cc0b909f0299a237d1
(cherry picked from commit 74cefe4266)
This commit is contained in:
Andrey Volkov 2019-02-14 15:39:45 +03:00 committed by Matt Riedemann
parent 870e5bcfb6
commit c280d747fb
3 changed files with 95 additions and 36 deletions

View File

@ -45,7 +45,8 @@ class AvailabilityZoneController(wsgi.Controller):
def _describe_availability_zones(self, context, **kwargs):
ctxt = context.elevated()
available_zones, not_available_zones = \
availability_zones.get_availability_zones(ctxt)
availability_zones.get_availability_zones(
ctxt, hostapi=self.host_api)
filtered_available_zones = \
self._get_filtered_availability_zones(available_zones, True)
@ -56,13 +57,16 @@ class AvailabilityZoneController(wsgi.Controller):
def _describe_availability_zones_verbose(self, context, **kwargs):
ctxt = context.elevated()
available_zones, not_available_zones = \
availability_zones.get_availability_zones(ctxt)
# Available services
enabled_services = self.host_api.service_get_all(
context, {'disabled': False}, set_zones=True, all_cells=True)
available_zones, not_available_zones = (
availability_zones.get_availability_zones(
ctxt, enabled_services=enabled_services,
hostapi=self.host_api))
zone_hosts = {}
host_services = {}
api_services = ('nova-osapi_compute', 'nova-metadata')
@ -71,10 +75,8 @@ class AvailabilityZoneController(wsgi.Controller):
# Skip API services in the listing since they are not
# maintained in the same way as other services
continue
zone_hosts.setdefault(service['availability_zone'], [])
if service['host'] not in zone_hosts[service['availability_zone']]:
zone_hosts[service['availability_zone']].append(
service['host'])
zone_hosts.setdefault(service['availability_zone'], set())
zone_hosts[service['availability_zone']].add(service['host'])
host_services.setdefault(service['availability_zone'] +
service['host'], [])

View File

@ -110,7 +110,8 @@ def update_host_availability_zone_cache(context, host, availability_zone=None):
def get_availability_zones(context, get_only_available=False,
with_hosts=False):
with_hosts=False, enabled_services=None,
hostapi=None):
"""Return available and unavailable zones on demand.
:param get_only_available: flag to determine whether to return
@ -119,13 +120,19 @@ def get_availability_zones(context, get_only_available=False,
available zones only
:param with_hosts: whether to return hosts part of the AZs
:type with_hosts: bool
:param enabled_services: list of enabled services to use; if None
enabled services will be retrieved from all cells with zones set
:param hostapi: nova.compute.api.HostAPI instance
"""
# NOTE(danms): Avoid circular import
from nova import compute
hostapi = compute.HostAPI()
# TODO(mriedem): Make hostapi a required arg in a non-backportable FUP.
if hostapi is None:
# NOTE(danms): Avoid circular import
from nova import compute
hostapi = compute.HostAPI()
enabled_services = hostapi.service_get_all(
context, {'disabled': False}, set_zones=True, all_cells=True)
if enabled_services is None:
enabled_services = hostapi.service_get_all(
context, {'disabled': False}, set_zones=True, all_cells=True)
available_zones = []
for (zone, host) in [(service['availability_zone'], service['host'])
@ -140,6 +147,10 @@ def get_availability_zones(context, get_only_available=False,
available_zones = list(_available_zones.items())
if not get_only_available:
# TODO(mriedem): We could probably optimize if we know that we're going
# to get both enabled and disabled services and just pull them all from
# the cell DBs at the same time and then filter into enabled/disabled
# lists in python.
disabled_services = hostapi.service_get_all(
context, {'disabled': True}, set_zones=True, all_cells=True)
not_available_zones = []

View File

@ -36,10 +36,12 @@ from nova.tests.unit.objects import test_service
FAKE_UUID = fakes.FAKE_UUID
def fake_service_get_all(context, disabled=None):
def fake_service_get_all(context, filters=None, **kwargs):
disabled = filters.get('disabled') if filters else None
def __fake_service(binary, availability_zone,
created_at, updated_at, host, disabled):
return dict(test_service.fake_service,
db_s = dict(test_service.fake_service,
binary=binary,
availability_zone=availability_zone,
available_zones=availability_zone,
@ -47,9 +49,12 @@ def fake_service_get_all(context, disabled=None):
updated_at=updated_at,
host=host,
disabled=disabled)
# The version field is immutable so remove that before creating the obj
db_s.pop('version', None)
return objects.Service(context, **db_s)
if disabled:
return [__fake_service("nova-compute", "zone-2",
svcs = [__fake_service("nova-compute", "zone-2",
datetime.datetime(2012, 11, 14, 9, 53, 25, 0),
datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
"fake_host-1", True),
@ -62,7 +67,7 @@ def fake_service_get_all(context, disabled=None):
datetime.datetime(2012, 12, 26, 14, 45, 24, 0),
"fake_host-2", True)]
else:
return [__fake_service("nova-compute", "zone-1",
svcs = [__fake_service("nova-compute", "zone-1",
datetime.datetime(2012, 11, 14, 9, 53, 25, 0),
datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
"fake_host-1", False),
@ -70,10 +75,17 @@ def fake_service_get_all(context, disabled=None):
datetime.datetime(2012, 11, 14, 9, 57, 3, 0),
datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
"fake_host-1", False),
# nova-conductor is in the same zone and host as nova-sched
# and is here to make sure /detail filters out duplicates.
__fake_service("nova-conductor", "internal",
datetime.datetime(2012, 11, 14, 9, 57, 3, 0),
datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
"fake_host-1", False),
__fake_service("nova-network", "internal",
datetime.datetime(2012, 11, 16, 7, 25, 46, 0),
datetime.datetime(2012, 12, 26, 14, 45, 24, 0),
"fake_host-2", False)]
return objects.ServiceList(objects=svcs)
class AvailabilityZoneApiTestV21(test.NoDBTestCase):
@ -83,12 +95,15 @@ class AvailabilityZoneApiTestV21(test.NoDBTestCase):
super(AvailabilityZoneApiTestV21, self).setUp()
availability_zones.reset_cache()
fakes.stub_out_nw_api(self)
self.stub_out('nova.db.api.service_get_all', fake_service_get_all)
self.stub_out('nova.availability_zones.set_availability_zones',
lambda c, services: services)
self.stub_out('nova.servicegroup.API.service_is_up',
lambda s, service: service['binary'] != u"nova-network")
self.controller = self.availability_zone.AvailabilityZoneController()
self.mock_service_get_all = mock.patch.object(
self.controller.host_api, 'service_get_all',
side_effect=fake_service_get_all).start()
self.addCleanup(self.mock_service_get_all.stop)
self.req = fakes.HTTPRequest.blank('')
def test_filtered_availability_zones(self):
@ -127,25 +142,56 @@ class AvailabilityZoneApiTestV21(test.NoDBTestCase):
self.assertEqual(len(zones), 3)
timestamp = iso8601.parse_date("2012-12-26T14:45:25Z")
nova_network_timestamp = iso8601.parse_date("2012-12-26T14:45:24Z")
expected = [{'zoneName': 'zone-1',
'zoneState': {'available': True},
'hosts': {'fake_host-1': {
'nova-compute': {'active': True, 'available': True,
'updated_at': timestamp}}}},
{'zoneName': 'internal',
'zoneState': {'available': True},
'hosts': {'fake_host-1': {
'nova-sched': {'active': True, 'available': True,
'updated_at': timestamp}},
'fake_host-2': {
'nova-network': {
'active': True,
'available': False,
'updated_at': nova_network_timestamp}}}},
{'zoneName': 'zone-2',
'zoneState': {'available': False},
'hosts': None}]
expected = [
{
'zoneName': 'zone-1',
'zoneState': {'available': True},
'hosts': {
'fake_host-1': {
'nova-compute': {
'active': True,
'available': True,
'updated_at': timestamp
}
}
}
},
{
'zoneName': 'internal',
'zoneState': {'available': True},
'hosts': {
'fake_host-1': {
'nova-sched': {
'active': True,
'available': True,
'updated_at': timestamp
},
'nova-conductor': {
'active': True,
'available': True,
'updated_at': timestamp
}
},
'fake_host-2': {
'nova-network': {
'active': True,
'available': False,
'updated_at': nova_network_timestamp
}
}
}
},
{
'zoneName': 'zone-2',
'zoneState': {'available': False},
'hosts': None
}
]
self.assertEqual(expected, zones)
# We get both enabled and disabled services per cell (just one in this
# test case) so we'll query the services table twice.
self.assertEqual(2, self.mock_service_get_all.call_count,
self.mock_service_get_all.call_args_list)
@mock.patch.object(availability_zones, 'get_availability_zones',
return_value=[['nova'], []])