Merge "Reconcile quitting_rpc_timeout with backoff RPC client"
This commit is contained in:
commit
f6546bcb1d
@ -89,6 +89,14 @@ def get_allowed_exmods():
|
|||||||
return ALLOWED_EXMODS + EXTRA_EXMODS
|
return ALLOWED_EXMODS + EXTRA_EXMODS
|
||||||
|
|
||||||
|
|
||||||
|
def _get_default_method_timeout():
|
||||||
|
return TRANSPORT.conf.rpc_response_timeout
|
||||||
|
|
||||||
|
|
||||||
|
def _get_default_method_timeouts():
|
||||||
|
return collections.defaultdict(_get_default_method_timeout)
|
||||||
|
|
||||||
|
|
||||||
class _ContextWrapper(object):
|
class _ContextWrapper(object):
|
||||||
"""Wraps oslo messaging contexts to set the timeout for calls.
|
"""Wraps oslo messaging contexts to set the timeout for calls.
|
||||||
|
|
||||||
@ -99,12 +107,28 @@ class _ContextWrapper(object):
|
|||||||
servers are more frequently the cause of timeouts rather than lost
|
servers are more frequently the cause of timeouts rather than lost
|
||||||
messages.
|
messages.
|
||||||
"""
|
"""
|
||||||
_METHOD_TIMEOUTS = collections.defaultdict(
|
_METHOD_TIMEOUTS = _get_default_method_timeouts()
|
||||||
lambda: TRANSPORT.conf.rpc_response_timeout)
|
_max_timeout = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def reset_timeouts(cls):
|
def reset_timeouts(cls):
|
||||||
cls._METHOD_TIMEOUTS.clear()
|
# restore the original default timeout factory
|
||||||
|
cls._METHOD_TIMEOUTS = _get_default_method_timeouts()
|
||||||
|
cls._max_timeout = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_max_timeout(cls):
|
||||||
|
return cls._max_timeout or _get_default_method_timeout() * 10
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def set_max_timeout(cls, max_timeout):
|
||||||
|
if max_timeout < cls.get_max_timeout():
|
||||||
|
cls._METHOD_TIMEOUTS = collections.defaultdict(
|
||||||
|
lambda: max_timeout, **{
|
||||||
|
k: min(v, max_timeout)
|
||||||
|
for k, v in cls._METHOD_TIMEOUTS.items()
|
||||||
|
})
|
||||||
|
cls._max_timeout = max_timeout
|
||||||
|
|
||||||
def __init__(self, original_context):
|
def __init__(self, original_context):
|
||||||
self._original_context = original_context
|
self._original_context = original_context
|
||||||
@ -127,7 +151,11 @@ class _ContextWrapper(object):
|
|||||||
return self._original_context.call(ctxt, method, **kwargs)
|
return self._original_context.call(ctxt, method, **kwargs)
|
||||||
except oslo_messaging.MessagingTimeout:
|
except oslo_messaging.MessagingTimeout:
|
||||||
with excutils.save_and_reraise_exception():
|
with excutils.save_and_reraise_exception():
|
||||||
wait = random.uniform(0, TRANSPORT.conf.rpc_response_timeout)
|
wait = random.uniform(
|
||||||
|
0,
|
||||||
|
min(self._METHOD_TIMEOUTS[scoped_method],
|
||||||
|
TRANSPORT.conf.rpc_response_timeout)
|
||||||
|
)
|
||||||
LOG.error(_LE("Timeout in RPC method %(method)s. Waiting for "
|
LOG.error(_LE("Timeout in RPC method %(method)s. Waiting for "
|
||||||
"%(wait)s seconds before next attempt. If the "
|
"%(wait)s seconds before next attempt. If the "
|
||||||
"server is not down, consider increasing the "
|
"server is not down, consider increasing the "
|
||||||
@ -135,8 +163,8 @@ class _ContextWrapper(object):
|
|||||||
"server(s) may be overloaded and unable to "
|
"server(s) may be overloaded and unable to "
|
||||||
"respond quickly enough."),
|
"respond quickly enough."),
|
||||||
{'wait': int(round(wait)), 'method': scoped_method})
|
{'wait': int(round(wait)), 'method': scoped_method})
|
||||||
ceiling = TRANSPORT.conf.rpc_response_timeout * 10
|
new_timeout = min(
|
||||||
new_timeout = min(self._original_context.timeout * 2, ceiling)
|
self._original_context.timeout * 2, self.get_max_timeout())
|
||||||
if new_timeout > self._METHOD_TIMEOUTS[scoped_method]:
|
if new_timeout > self._METHOD_TIMEOUTS[scoped_method]:
|
||||||
LOG.warning(_LW("Increasing timeout for %(method)s calls "
|
LOG.warning(_LW("Increasing timeout for %(method)s calls "
|
||||||
"to %(new)s seconds. Restart the agent to "
|
"to %(new)s seconds. Restart the agent to "
|
||||||
@ -159,6 +187,11 @@ class BackingOffClient(oslo_messaging.RPCClient):
|
|||||||
# don't enclose Contexts that explicitly set a timeout
|
# don't enclose Contexts that explicitly set a timeout
|
||||||
return _ContextWrapper(ctx) if 'timeout' not in kwargs else ctx
|
return _ContextWrapper(ctx) if 'timeout' not in kwargs else ctx
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def set_max_timeout(max_timeout):
|
||||||
|
'''Set RPC timeout ceiling for all backing-off RPC clients.'''
|
||||||
|
_ContextWrapper.set_max_timeout(max_timeout)
|
||||||
|
|
||||||
|
|
||||||
def get_client(target, version_cap=None, serializer=None):
|
def get_client(target, version_cap=None, serializer=None):
|
||||||
assert TRANSPORT is not None
|
assert TRANSPORT is not None
|
||||||
|
@ -2090,6 +2090,9 @@ class OVSNeutronAgent(sg_rpc.SecurityGroupAgentRpcCallbackMixin,
|
|||||||
def _handle_sigterm(self, signum, frame):
|
def _handle_sigterm(self, signum, frame):
|
||||||
self.catch_sigterm = True
|
self.catch_sigterm = True
|
||||||
if self.quitting_rpc_timeout:
|
if self.quitting_rpc_timeout:
|
||||||
|
LOG.info(
|
||||||
|
_LI('SIGTERM received, capping RPC timeout by %d seconds.'),
|
||||||
|
self.quitting_rpc_timeout)
|
||||||
self.set_rpc_timeout(self.quitting_rpc_timeout)
|
self.set_rpc_timeout(self.quitting_rpc_timeout)
|
||||||
|
|
||||||
def _handle_sighup(self, signum, frame):
|
def _handle_sighup(self, signum, frame):
|
||||||
@ -2112,7 +2115,7 @@ class OVSNeutronAgent(sg_rpc.SecurityGroupAgentRpcCallbackMixin,
|
|||||||
def set_rpc_timeout(self, timeout):
|
def set_rpc_timeout(self, timeout):
|
||||||
for rpc_api in (self.plugin_rpc, self.sg_plugin_rpc,
|
for rpc_api in (self.plugin_rpc, self.sg_plugin_rpc,
|
||||||
self.dvr_plugin_rpc, self.state_rpc):
|
self.dvr_plugin_rpc, self.state_rpc):
|
||||||
rpc_api.client.timeout = timeout
|
rpc_api.client.set_max_timeout(timeout)
|
||||||
|
|
||||||
def _check_agent_configurations(self):
|
def _check_agent_configurations(self):
|
||||||
if (self.enable_distributed_routing and self.enable_tunneling
|
if (self.enable_distributed_routing and self.enable_tunneling
|
||||||
|
@ -433,6 +433,26 @@ class TimeoutTestCase(base.DietTestCase):
|
|||||||
for call in rpc.TRANSPORT._send.call_args_list]
|
for call in rpc.TRANSPORT._send.call_args_list]
|
||||||
self.assertEqual([1, 2], timeouts)
|
self.assertEqual([1, 2], timeouts)
|
||||||
|
|
||||||
|
def test_set_max_timeout_caps_all_methods(self):
|
||||||
|
rpc.TRANSPORT.conf.rpc_response_timeout = 300
|
||||||
|
rpc._ContextWrapper._METHOD_TIMEOUTS['method_1'] = 100
|
||||||
|
rpc.BackingOffClient.set_max_timeout(50)
|
||||||
|
# both explicitly tracked
|
||||||
|
self.assertEqual(50, rpc._ContextWrapper._METHOD_TIMEOUTS['method_1'])
|
||||||
|
# as well as new methods
|
||||||
|
self.assertEqual(50, rpc._ContextWrapper._METHOD_TIMEOUTS['method_2'])
|
||||||
|
|
||||||
|
def test_set_max_timeout_retains_lower_timeouts(self):
|
||||||
|
rpc._ContextWrapper._METHOD_TIMEOUTS['method_1'] = 10
|
||||||
|
rpc.BackingOffClient.set_max_timeout(50)
|
||||||
|
self.assertEqual(10, rpc._ContextWrapper._METHOD_TIMEOUTS['method_1'])
|
||||||
|
|
||||||
|
def test_set_max_timeout_overrides_default_timeout(self):
|
||||||
|
rpc.TRANSPORT.conf.rpc_response_timeout = 10
|
||||||
|
self.assertEqual(10 * 10, rpc._ContextWrapper.get_max_timeout())
|
||||||
|
rpc._ContextWrapper.set_max_timeout(10)
|
||||||
|
self.assertEqual(10, rpc._ContextWrapper.get_max_timeout())
|
||||||
|
|
||||||
|
|
||||||
class TestConnection(base.DietTestCase):
|
class TestConnection(base.DietTestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
@ -28,6 +28,7 @@ from neutron.agent.common import utils
|
|||||||
from neutron.agent.linux import async_process
|
from neutron.agent.linux import async_process
|
||||||
from neutron.agent.linux import ip_lib
|
from neutron.agent.linux import ip_lib
|
||||||
from neutron.common import constants as c_const
|
from neutron.common import constants as c_const
|
||||||
|
from neutron.common import rpc as n_rpc
|
||||||
from neutron.plugins.common import constants as p_const
|
from neutron.plugins.common import constants as p_const
|
||||||
from neutron.plugins.ml2.drivers.l2pop import rpc as l2pop_rpc
|
from neutron.plugins.ml2.drivers.l2pop import rpc as l2pop_rpc
|
||||||
from neutron.plugins.ml2.drivers.openvswitch.agent.common import constants
|
from neutron.plugins.ml2.drivers.openvswitch.agent.common import constants
|
||||||
@ -1929,12 +1930,14 @@ class TestOvsNeutronAgent(object):
|
|||||||
self.assertFalse(cleanup.called)
|
self.assertFalse(cleanup.called)
|
||||||
|
|
||||||
def test_set_rpc_timeout(self):
|
def test_set_rpc_timeout(self):
|
||||||
self.agent._handle_sigterm(None, None)
|
with mock.patch.object(
|
||||||
for rpc_client in (self.agent.plugin_rpc.client,
|
n_rpc.BackingOffClient, 'set_max_timeout') as smt:
|
||||||
self.agent.sg_plugin_rpc.client,
|
self.agent._handle_sigterm(None, None)
|
||||||
self.agent.dvr_plugin_rpc.client,
|
for rpc_client in (self.agent.plugin_rpc.client,
|
||||||
self.agent.state_rpc.client):
|
self.agent.sg_plugin_rpc.client,
|
||||||
self.assertEqual(10, rpc_client.timeout)
|
self.agent.dvr_plugin_rpc.client,
|
||||||
|
self.agent.state_rpc.client):
|
||||||
|
smt.assert_called_with(10)
|
||||||
|
|
||||||
def test_set_rpc_timeout_no_value(self):
|
def test_set_rpc_timeout_no_value(self):
|
||||||
self.agent.quitting_rpc_timeout = None
|
self.agent.quitting_rpc_timeout = None
|
||||||
|
Loading…
Reference in New Issue
Block a user