From bb2734b0d524aef348b69ae02988449f9dd63c56 Mon Sep 17 00:00:00 2001 From: LIU Yulong Date: Thu, 21 Feb 2019 16:39:50 +0800 Subject: [PATCH] Do not call update_device_list in large sets Ovs-agent can process the ports in large sets, then all of these ports will have to update DB status or attributes. But neutron server is centralized. It may have to do something else, or the database processing can be also time-consuming. Because of these, it sometimes returns the RPC timeout exception to ovs-agent. And a fullsync will be triggered in next rpc loop. The restart time is becoming longer and longer. Adds a default step to update the port to reduce the probability of RPC timeout. Related-Bug: #1813703 Related-Bug: #1813704 Related-Bug: #1813706 Related-Bug: #1813707 Conflicts: neutron/common/constants.py neutron/agent/rpc.py neutron/tests/unit/plugins/ml2/test_rpc.py Change-Id: Ie37f4a4869969e235ce16b73cdfcbdc98626823e (cherry picked from commit 8408af4f173a0ffde354599e26c49bf9e17e8bef) --- neutron/agent/rpc.py | 29 +++++++++++++++++++--- neutron/common/constants.py | 4 +++ neutron/tests/unit/plugins/ml2/test_rpc.py | 10 +++++++- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/neutron/agent/rpc.py b/neutron/agent/rpc.py index f4f30ecc4c1..e08ace5b464 100644 --- a/neutron/agent/rpc.py +++ b/neutron/agent/rpc.py @@ -141,10 +141,31 @@ class PluginApi(object): def update_device_list(self, context, devices_up, devices_down, agent_id, host, agent_restarted=False): cctxt = self.client.prepare(version='1.5') - return cctxt.call(context, 'update_device_list', - devices_up=devices_up, devices_down=devices_down, - agent_id=agent_id, host=host, - agent_restarted=agent_restarted) + + ret_devices_up = [] + failed_devices_up = [] + ret_devices_down = [] + failed_devices_down = [] + + step = n_const.RPC_RES_PROCESSING_STEP + devices_up = list(devices_up) + devices_down = list(devices_down) + for i in range(0, max(len(devices_up), len(devices_down)), step): + # Divide-and-conquer RPC timeout + ret = cctxt.call(context, 'update_device_list', + devices_up=devices_up[i:i + step], + devices_down=devices_down[i:i + step], + agent_id=agent_id, host=host, + agent_restarted=agent_restarted) + ret_devices_up.extend(ret.get("devices_up", [])) + failed_devices_up.extend(ret.get("failed_devices_up", [])) + ret_devices_down.extend(ret.get("devices_down", [])) + failed_devices_down.extend(ret.get("failed_devices_down", [])) + + return {'devices_up': ret_devices_up, + 'failed_devices_up': failed_devices_up, + 'devices_down': ret_devices_down, + 'failed_devices_down': failed_devices_down} def tunnel_sync(self, context, tunnel_ip, tunnel_type=None, host=None): cctxt = self.client.prepare(version='1.4') diff --git a/neutron/common/constants.py b/neutron/common/constants.py index c43300d5307..f3550aa93af 100644 --- a/neutron/common/constants.py +++ b/neutron/common/constants.py @@ -246,3 +246,7 @@ AGENT_RES_PROCESSING_STEP = 100 # IPtables version to support --random-fully option. # Do not move this constant to neutron-lib, since it is temporary IPTABLES_RANDOM_FULLY_VERSION = '1.6.2' + +# Number of resources for neutron to divide the large RPC +# call data sets. +RPC_RES_PROCESSING_STEP = 20 diff --git a/neutron/tests/unit/plugins/ml2/test_rpc.py b/neutron/tests/unit/plugins/ml2/test_rpc.py index c471fd3358b..8e85fff784d 100644 --- a/neutron/tests/unit/plugins/ml2/test_rpc.py +++ b/neutron/tests/unit/plugins/ml2/test_rpc.py @@ -317,9 +317,17 @@ class RpcCallbacksTestCase(base.BaseTestCase): class RpcApiTestCase(base.BaseTestCase): def _test_rpc_api(self, rpcapi, topic, method, rpc_method, **kwargs): + if method == "update_device_list": + expected = {'devices_up': [], + 'failed_devices_up': [], + 'devices_down': [], + 'failed_devices_down': []} + else: + expected = 'foo' + ctxt = oslo_context.RequestContext(user='fake_user', tenant='fake_project') - expected_retval = 'foo' if rpc_method == 'call' else None + expected_retval = expected if rpc_method == 'call' else None expected_version = kwargs.pop('version', None) fanout = kwargs.pop('fanout', False)