Browse Source

AZ list performance optimization: avoid double service list DB fetch

Assume number of services can be large (10000 as in the bug description),
this patch removes second service_get_all call.

zone_hosts changed from dict of lists to dict of sets.

The HostAPI instance from the API controller is also passed to the
get_availability_zones method so it does not have to recreate it
per call (this is both for a slight performance gain but mostly also
for test sanity).

On devstack with 10000 services patch decreased response time twice.

openstack availability zone list --long --timing

...

Before:

+-------------------------------------------------------------------------------------------+-----------+
| URL                                                                                       |   Seconds |
+-------------------------------------------------------------------------------------------+-----------+
| GET http://192.168.0.45/identity                                                          |  0.006816 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.456708 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.087485 |
| GET http://172.18.237.203/compute/v2.1/os-availability-zone/detail                        | 95.667192 |
| GET http://172.18.237.203/volume/v2/e2671d37ee2c4374bd1533645261f1d4/os-availability-zone |  0.036528 |
| Total                                                                                     | 96.254729 |
+-------------------------------------------------------------------------------------------+-----------+

After:

+-------------------------------------------------------------------------------------------+-----------+
| URL                                                                                       |   Seconds |
+-------------------------------------------------------------------------------------------+-----------+
| GET http://192.168.0.45/identity                                                          |  0.020215 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.102987 |
| POST http://192.168.0.45/identity/v3/auth/tokens                                          |  0.111899 |
| GET http://172.18.237.203/compute/v2.1/os-availability-zone/detail                        | 39.346657 |
| GET http://172.18.237.203/volume/v2/e2671d37ee2c4374bd1533645261f1d4/os-availability-zone |  0.026403 |
| Total                                                                                     | 39.608161 |
+-------------------------------------------------------------------------------------------+-----------+

The test_availability_zone_detail unit test is updated to assert that
services are only retrieved twice (once for enabled, once for disabled).
While in there, the expected response dict is formatted for readability
and a duplicate zone/host is added to make sure duplicates are handled
for available services. To ensure service_get_all is called only twice,
the low-level DB API service_get_all stub is replaced with a mock and
the mock is changed to be on the HostAPI.service_get_all method which
is (1) what the API controller code is actually using and (2) allows the
test to only mock the instance of the HostAPI being tested - trying to
mock the DB API service_get_all method causes intermittent failures
in unrelated tests because of the global nature of that mock.

There is another opportunity for optimizing get_availability_zones which
is marked with a TODO but left for a separate patch.

Co-Authored-By: Matt Riedemann <mriedem.os@gmail.com>

Partial-Bug: #1801897
Change-Id: Ib9a9a9a79499272d740a64cc0b909f0299a237d1
(cherry picked from commit 74cefe4266)
(cherry picked from commit c280d747fb)
tags/18.2.1
Andrey Volkov 8 months ago
parent
commit
fa275544c2

+ 9
- 7
nova/api/openstack/compute/availability_zone.py View File

@@ -45,7 +45,8 @@ class AvailabilityZoneController(wsgi.Controller):
45 45
     def _describe_availability_zones(self, context, **kwargs):
46 46
         ctxt = context.elevated()
47 47
         available_zones, not_available_zones = \
48
-            availability_zones.get_availability_zones(ctxt)
48
+            availability_zones.get_availability_zones(
49
+                ctxt, hostapi=self.host_api)
49 50
 
50 51
         filtered_available_zones = \
51 52
             self._get_filtered_availability_zones(available_zones, True)
@@ -56,13 +57,16 @@ class AvailabilityZoneController(wsgi.Controller):
56 57
 
57 58
     def _describe_availability_zones_verbose(self, context, **kwargs):
58 59
         ctxt = context.elevated()
59
-        available_zones, not_available_zones = \
60
-            availability_zones.get_availability_zones(ctxt)
61 60
 
62 61
         # Available services
63 62
         enabled_services = self.host_api.service_get_all(
64 63
             context, {'disabled': False}, set_zones=True, all_cells=True)
65 64
 
65
+        available_zones, not_available_zones = (
66
+            availability_zones.get_availability_zones(
67
+                ctxt, enabled_services=enabled_services,
68
+                hostapi=self.host_api))
69
+
66 70
         zone_hosts = {}
67 71
         host_services = {}
68 72
         api_services = ('nova-osapi_compute', 'nova-metadata')
@@ -71,10 +75,8 @@ class AvailabilityZoneController(wsgi.Controller):
71 75
                 # Skip API services in the listing since they are not
72 76
                 # maintained in the same way as other services
73 77
                 continue
74
-            zone_hosts.setdefault(service['availability_zone'], [])
75
-            if service['host'] not in zone_hosts[service['availability_zone']]:
76
-                zone_hosts[service['availability_zone']].append(
77
-                    service['host'])
78
+            zone_hosts.setdefault(service['availability_zone'], set())
79
+            zone_hosts[service['availability_zone']].add(service['host'])
78 80
 
79 81
             host_services.setdefault(service['availability_zone'] +
80 82
                     service['host'], [])

+ 17
- 6
nova/availability_zones.py View File

@@ -110,7 +110,8 @@ def update_host_availability_zone_cache(context, host, availability_zone=None):
110 110
 
111 111
 
112 112
 def get_availability_zones(context, get_only_available=False,
113
-                           with_hosts=False):
113
+                           with_hosts=False, enabled_services=None,
114
+                           hostapi=None):
114 115
     """Return available and unavailable zones on demand.
115 116
 
116 117
         :param get_only_available: flag to determine whether to return
@@ -119,13 +120,19 @@ def get_availability_zones(context, get_only_available=False,
119 120
             available zones only
120 121
         :param with_hosts: whether to return hosts part of the AZs
121 122
         :type with_hosts: bool
123
+        :param enabled_services: list of enabled services to use; if None
124
+            enabled services will be retrieved from all cells with zones set
125
+        :param hostapi: nova.compute.api.HostAPI instance
122 126
     """
123
-    # NOTE(danms): Avoid circular import
124
-    from nova import compute
125
-    hostapi = compute.HostAPI()
127
+    # TODO(mriedem): Make hostapi a required arg in a non-backportable FUP.
128
+    if hostapi is None:
129
+        # NOTE(danms): Avoid circular import
130
+        from nova import compute
131
+        hostapi = compute.HostAPI()
126 132
 
127
-    enabled_services = hostapi.service_get_all(
128
-        context, {'disabled': False}, set_zones=True, all_cells=True)
133
+    if enabled_services is None:
134
+        enabled_services = hostapi.service_get_all(
135
+            context, {'disabled': False}, set_zones=True, all_cells=True)
129 136
 
130 137
     available_zones = []
131 138
     for (zone, host) in [(service['availability_zone'], service['host'])
@@ -140,6 +147,10 @@ def get_availability_zones(context, get_only_available=False,
140 147
             available_zones = list(_available_zones.items())
141 148
 
142 149
     if not get_only_available:
150
+        # TODO(mriedem): We could probably optimize if we know that we're going
151
+        # to get both enabled and disabled services and just pull them all from
152
+        # the cell DBs at the same time and then filter into enabled/disabled
153
+        # lists in python.
143 154
         disabled_services = hostapi.service_get_all(
144 155
             context, {'disabled': True}, set_zones=True, all_cells=True)
145 156
         not_available_zones = []

+ 69
- 23
nova/tests/unit/api/openstack/compute/test_availability_zone.py View File

@@ -36,10 +36,12 @@ from nova.tests import uuidsentinel
36 36
 FAKE_UUID = fakes.FAKE_UUID
37 37
 
38 38
 
39
-def fake_service_get_all(context, disabled=None):
39
+def fake_service_get_all(context, filters=None, **kwargs):
40
+    disabled = filters.get('disabled') if filters else None
41
+
40 42
     def __fake_service(binary, availability_zone,
41 43
                        created_at, updated_at, host, disabled):
42
-        return dict(test_service.fake_service,
44
+        db_s = dict(test_service.fake_service,
43 45
                     binary=binary,
44 46
                     availability_zone=availability_zone,
45 47
                     available_zones=availability_zone,
@@ -47,9 +49,12 @@ def fake_service_get_all(context, disabled=None):
47 49
                     updated_at=updated_at,
48 50
                     host=host,
49 51
                     disabled=disabled)
52
+        # The version field is immutable so remove that before creating the obj
53
+        db_s.pop('version', None)
54
+        return objects.Service(context, **db_s)
50 55
 
51 56
     if disabled:
52
-        return [__fake_service("nova-compute", "zone-2",
57
+        svcs = [__fake_service("nova-compute", "zone-2",
53 58
                                datetime.datetime(2012, 11, 14, 9, 53, 25, 0),
54 59
                                datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
55 60
                                "fake_host-1", True),
@@ -62,7 +67,7 @@ def fake_service_get_all(context, disabled=None):
62 67
                                datetime.datetime(2012, 12, 26, 14, 45, 24, 0),
63 68
                                "fake_host-2", True)]
64 69
     else:
65
-        return [__fake_service("nova-compute", "zone-1",
70
+        svcs = [__fake_service("nova-compute", "zone-1",
66 71
                                datetime.datetime(2012, 11, 14, 9, 53, 25, 0),
67 72
                                datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
68 73
                                "fake_host-1", False),
@@ -70,10 +75,17 @@ def fake_service_get_all(context, disabled=None):
70 75
                                datetime.datetime(2012, 11, 14, 9, 57, 3, 0),
71 76
                                datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
72 77
                                "fake_host-1", False),
78
+                # nova-conductor is in the same zone and host as nova-sched
79
+                # and is here to make sure /detail filters out duplicates.
80
+                __fake_service("nova-conductor", "internal",
81
+                               datetime.datetime(2012, 11, 14, 9, 57, 3, 0),
82
+                               datetime.datetime(2012, 12, 26, 14, 45, 25, 0),
83
+                               "fake_host-1", False),
73 84
                 __fake_service("nova-network", "internal",
74 85
                                datetime.datetime(2012, 11, 16, 7, 25, 46, 0),
75 86
                                datetime.datetime(2012, 12, 26, 14, 45, 24, 0),
76 87
                                "fake_host-2", False)]
88
+    return objects.ServiceList(objects=svcs)
77 89
 
78 90
 
79 91
 class AvailabilityZoneApiTestV21(test.NoDBTestCase):
@@ -83,12 +95,15 @@ class AvailabilityZoneApiTestV21(test.NoDBTestCase):
83 95
         super(AvailabilityZoneApiTestV21, self).setUp()
84 96
         availability_zones.reset_cache()
85 97
         fakes.stub_out_nw_api(self)
86
-        self.stub_out('nova.db.api.service_get_all', fake_service_get_all)
87 98
         self.stub_out('nova.availability_zones.set_availability_zones',
88 99
                       lambda c, services: services)
89 100
         self.stub_out('nova.servicegroup.API.service_is_up',
90 101
                       lambda s, service: service['binary'] != u"nova-network")
91 102
         self.controller = self.availability_zone.AvailabilityZoneController()
103
+        self.mock_service_get_all = mock.patch.object(
104
+            self.controller.host_api, 'service_get_all',
105
+            side_effect=fake_service_get_all).start()
106
+        self.addCleanup(self.mock_service_get_all.stop)
92 107
         self.req = fakes.HTTPRequest.blank('')
93 108
 
94 109
     def test_filtered_availability_zones(self):
@@ -127,25 +142,56 @@ class AvailabilityZoneApiTestV21(test.NoDBTestCase):
127 142
         self.assertEqual(len(zones), 3)
128 143
         timestamp = iso8601.parse_date("2012-12-26T14:45:25Z")
129 144
         nova_network_timestamp = iso8601.parse_date("2012-12-26T14:45:24Z")
130
-        expected = [{'zoneName': 'zone-1',
131
-                    'zoneState': {'available': True},
132
-                    'hosts': {'fake_host-1': {
133
-                        'nova-compute': {'active': True, 'available': True,
134
-                                         'updated_at': timestamp}}}},
135
-                   {'zoneName': 'internal',
136
-                    'zoneState': {'available': True},
137
-                    'hosts': {'fake_host-1': {
138
-                        'nova-sched': {'active': True, 'available': True,
139
-                                       'updated_at': timestamp}},
140
-                              'fake_host-2': {
141
-                                  'nova-network': {
142
-                                      'active': True,
143
-                                      'available': False,
144
-                                      'updated_at': nova_network_timestamp}}}},
145
-                   {'zoneName': 'zone-2',
146
-                    'zoneState': {'available': False},
147
-                    'hosts': None}]
145
+        expected = [
146
+            {
147
+                'zoneName': 'zone-1',
148
+                'zoneState': {'available': True},
149
+                'hosts': {
150
+                    'fake_host-1': {
151
+                        'nova-compute': {
152
+                            'active': True,
153
+                            'available': True,
154
+                            'updated_at': timestamp
155
+                        }
156
+                    }
157
+                }
158
+            },
159
+            {
160
+                'zoneName': 'internal',
161
+                'zoneState': {'available': True},
162
+                'hosts': {
163
+                    'fake_host-1': {
164
+                        'nova-sched': {
165
+                            'active': True,
166
+                            'available': True,
167
+                            'updated_at': timestamp
168
+                        },
169
+                        'nova-conductor': {
170
+                            'active': True,
171
+                            'available': True,
172
+                            'updated_at': timestamp
173
+                        }
174
+                    },
175
+                    'fake_host-2': {
176
+                        'nova-network': {
177
+                            'active': True,
178
+                            'available': False,
179
+                            'updated_at': nova_network_timestamp
180
+                        }
181
+                    }
182
+                }
183
+            },
184
+            {
185
+                'zoneName': 'zone-2',
186
+                'zoneState': {'available': False},
187
+                'hosts': None
188
+            }
189
+        ]
148 190
         self.assertEqual(expected, zones)
191
+        # We get both enabled and disabled services per cell (just one in this
192
+        # test case) so we'll query the services table twice.
193
+        self.assertEqual(2, self.mock_service_get_all.call_count,
194
+                         self.mock_service_get_all.call_args_list)
149 195
 
150 196
     @mock.patch.object(availability_zones, 'get_availability_zones',
151 197
                        return_value=[['nova'], []])

Loading…
Cancel
Save