Retry getting the list of service plugins
On systems that start both neutron-server and neutron-l3-agent together, there is a chance that the first call to neutron will timeout. Retry upto 4 more times to avoid the l3 agent exiting on startup. This should make the l3 agent a little more robust on startup but still not ideal, ideally it wouldn't exit and retry periodically. Change-Id: I2171a164f3f77bccd89895d73c1c8d67f7190488 Closes-Bug: #1353953 Closes-Bug: #1368152 Closes-Bug: #1368795
This commit is contained in:
@@ -22,6 +22,7 @@ eventlet.monkey_patch()
|
|||||||
import netaddr
|
import netaddr
|
||||||
import os
|
import os
|
||||||
from oslo.config import cfg
|
from oslo.config import cfg
|
||||||
|
from oslo import messaging
|
||||||
import Queue
|
import Queue
|
||||||
|
|
||||||
from neutron.agent.common import config
|
from neutron.agent.common import config
|
||||||
@@ -40,6 +41,7 @@ from neutron.common import utils as common_utils
|
|||||||
from neutron import context
|
from neutron import context
|
||||||
from neutron import manager
|
from neutron import manager
|
||||||
from neutron.openstack.common import excutils
|
from neutron.openstack.common import excutils
|
||||||
|
from neutron.openstack.common.gettextutils import _LW
|
||||||
from neutron.openstack.common import importutils
|
from neutron.openstack.common import importutils
|
||||||
from neutron.openstack.common import log as logging
|
from neutron.openstack.common import log as logging
|
||||||
from neutron.openstack.common import loopingcall
|
from neutron.openstack.common import loopingcall
|
||||||
@@ -521,17 +523,35 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager):
|
|||||||
self.sync_progress = False
|
self.sync_progress = False
|
||||||
|
|
||||||
# Get the list of service plugins from Neutron Server
|
# Get the list of service plugins from Neutron Server
|
||||||
try:
|
# This is the first place where we contact neutron-server on startup
|
||||||
self.neutron_service_plugins = (
|
# so retry in case its not ready to respond.
|
||||||
self.plugin_rpc.get_service_plugin_list(self.context))
|
retry_count = 5
|
||||||
except n_rpc.RemoteError as e:
|
while True:
|
||||||
LOG.warning(_('l3-agent cannot check service plugins '
|
retry_count = retry_count - 1
|
||||||
'enabled at the neutron server when startup '
|
try:
|
||||||
'due to RPC error. It happens when the server '
|
self.neutron_service_plugins = (
|
||||||
'does not support this RPC API. If the error '
|
self.plugin_rpc.get_service_plugin_list(self.context))
|
||||||
'is UnsupportedVersion you can ignore '
|
except n_rpc.RemoteError as e:
|
||||||
'this warning. Detail message: %s'), e)
|
with excutils.save_and_reraise_exception() as ctx:
|
||||||
self.neutron_service_plugins = None
|
ctx.reraise = False
|
||||||
|
LOG.warning(_LW('l3-agent cannot check service plugins '
|
||||||
|
'enabled at the neutron server when '
|
||||||
|
'startup due to RPC error. It happens '
|
||||||
|
'when the server does not support this '
|
||||||
|
'RPC API. If the error is '
|
||||||
|
'UnsupportedVersion you can ignore this '
|
||||||
|
'warning. Detail message: %s'), e)
|
||||||
|
self.neutron_service_plugins = None
|
||||||
|
except messaging.MessagingTimeout as e:
|
||||||
|
with excutils.save_and_reraise_exception() as ctx:
|
||||||
|
if retry_count > 0:
|
||||||
|
ctx.reraise = False
|
||||||
|
LOG.warning(_LW('l3-agent cannot check service '
|
||||||
|
'plugins enabled on the neutron '
|
||||||
|
'server. Retrying. '
|
||||||
|
'Detail message: %s'), e)
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
|
||||||
self._clean_stale_namespaces = self.conf.use_namespaces
|
self._clean_stale_namespaces = self.conf.use_namespaces
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import datetime
|
|||||||
import mock
|
import mock
|
||||||
import netaddr
|
import netaddr
|
||||||
from oslo.config import cfg
|
from oslo.config import cfg
|
||||||
|
from oslo import messaging
|
||||||
from testtools import matchers
|
from testtools import matchers
|
||||||
|
|
||||||
from neutron.agent.common import config as agent_config
|
from neutron.agent.common import config as agent_config
|
||||||
@@ -2140,6 +2141,26 @@ class TestBasicRouterOperations(base.BaseTestCase):
|
|||||||
self.assertIsNone(agent.neutron_service_plugins)
|
self.assertIsNone(agent.neutron_service_plugins)
|
||||||
self.assertTrue(self.plugin_api.get_service_plugin_list.called)
|
self.assertTrue(self.plugin_api.get_service_plugin_list.called)
|
||||||
|
|
||||||
|
def test_get_service_plugin_list_retried(self):
|
||||||
|
raise_timeout = messaging.MessagingTimeout()
|
||||||
|
# Raise a timeout the first 2 times it calls
|
||||||
|
# get_service_plugin_list then return a empty tuple
|
||||||
|
self.plugin_api.get_service_plugin_list.side_effect = (
|
||||||
|
raise_timeout, raise_timeout, tuple()
|
||||||
|
)
|
||||||
|
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||||
|
|
||||||
|
self.assertEqual(agent.neutron_service_plugins, tuple())
|
||||||
|
|
||||||
|
def test_get_service_plugin_list_retried_max(self):
|
||||||
|
raise_timeout = messaging.MessagingTimeout()
|
||||||
|
# Raise a timeout 5 times
|
||||||
|
self.plugin_api.get_service_plugin_list.side_effect = (
|
||||||
|
(raise_timeout, ) * 5
|
||||||
|
)
|
||||||
|
self.assertRaises(messaging.MessagingTimeout, l3_agent.L3NATAgent,
|
||||||
|
HOSTNAME, self.conf)
|
||||||
|
|
||||||
|
|
||||||
class TestL3AgentEventHandler(base.BaseTestCase):
|
class TestL3AgentEventHandler(base.BaseTestCase):
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user