Check if agent can reach neutron server

The ovs agent will install some basic drop flows first for the
physical bridge mappings during the init procedure. If message
queue is not connected, or neutron-servers are all down, real
traffic flows will not be refreshed anymore. This will cause
the data plane down if tenant network and provider network are
sharing the physical NICs.

This patch adds a RPC check during init L2 agent. When restart
the ovs-agent, if the MQ is OK and we have available neutron-server,
go next step. Otherwise, a rpc timeout will be raised. L2 agent
will start fail, physical bridge mapping drop flows will not be
installed. The original flows will not be replaced, so the traffic
can still work properly.

Closes-Bug: #1803919
Change-Id: Ie15cf625b3710eaf290d6aafecb3f65df664b9df
This commit is contained in:
LIU Yulong 2018-11-15 17:49:12 +08:00
parent 9b357c11e1
commit 0385868848
5 changed files with 54 additions and 8 deletions

View File

@ -78,10 +78,14 @@ class PluginReportStateAPI(object):
doc/source/contributor/internals/rpc_api.rst.
"""
def __init__(self, topic):
target = oslo_messaging.Target(topic=topic, version='1.0',
target = oslo_messaging.Target(topic=topic, version='1.2',
namespace=n_const.RPC_NAMESPACE_STATE)
self.client = n_rpc.get_client(target)
def has_alive_neutron_server(self, context, **kwargs):
cctxt = self.client.prepare()
return cctxt.call(context, 'has_alive_neutron_server', **kwargs)
def report_state(self, context, agent_state, use_call=False):
cctxt = self.client.prepare(
timeout=lib_rpc.TRANSPORT.conf.rpc_response_timeout)

View File

@ -461,9 +461,10 @@ class AgentExtRpcCallback(object):
API version history:
1.0 - Initial version.
1.1 - report_state now returns agent state.
1.2 - add method has_alive_neutron_server.
"""
target = oslo_messaging.Target(version='1.1',
target = oslo_messaging.Target(version='1.2',
namespace=n_const.RPC_NAMESPACE_STATE)
START_TIME = timeutils.utcnow()
@ -477,6 +478,9 @@ class AgentExtRpcCallback(object):
# Initialize RPC api directed to other neutron-servers
self.server_versions_rpc = resources_rpc.ResourcesPushToServersRpcApi()
def has_alive_neutron_server(self, context, **kwargs):
return True
@db_api.retry_if_session_inactive()
def report_state(self, context, **kwargs):
"""Report state from agent to server.

View File

@ -399,6 +399,18 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
# RPC network init
self.context = context.get_admin_context_without_session()
# Made a simple RPC call to Neutron Server.
while True:
try:
self.state_rpc.has_alive_neutron_server(self.context)
except oslo_messaging.MessagingTimeout as e:
LOG.warning('l2-agent cannot contact neutron server. '
'Check connectivity to neutron server. '
'Retrying... '
'Detailed message: %(msg)s.', {'msg': e})
continue
break
# Define the listening consumers for the agent
consumers = [[constants.TUNNEL, topics.UPDATE],
[constants.TUNNEL, topics.DELETE],
@ -1221,6 +1233,17 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
self.int_ofports[physical_network] = int_ofport
self.phys_ofports[physical_network] = phys_ofport
# These two drop flows are the root cause for the bug #1803919.
# And now we add a rpc check during agent start procedure. If
# ovs agent can not reach any neutron server, or all neutron
# servers are down, these flows will not be installed anymore.
# Bug #1803919 was fixed in that way.
# And as a reminder, we can not do much work on this. Because
# the bridge mappings can be varied. Provider (external) network
# can be implicitly set on any physical bridge due to the basic
# NORMAL flow. Different vlan range networks can also have many
# bridge map settings, these tenant network traffic can also be
# blocked by the following drop flows.
# block all untranslated traffic between bridges
self.int_br.drop_port(in_port=int_ofport)
br.drop_port(in_port=phys_ofport)

View File

@ -143,7 +143,9 @@ class TestOvsNeutronAgent(object):
new=MockFixedIntervalLoopingCall),\
mock.patch(
'neutron.agent.common.ovs_lib.OVSBridge.' 'get_vif_ports',
return_value=[]):
return_value=[]),\
mock.patch('neutron.agent.rpc.PluginReportStateAPI.'
'has_alive_neutron_server'):
ext_manager = mock.Mock()
agent = self.mod_agent.OVSNeutronAgent(self._bridge_classes(),
ext_manager, cfg.CONF)
@ -200,7 +202,9 @@ class TestOvsNeutronAgent(object):
return_value=[]), \
mock.patch('neutron.agent.common.ovs_lib.BaseOVS.config',
new_callable=mock.PropertyMock,
return_value={'datapath_types': ['netdev']}):
return_value={'datapath_types': ['netdev']}),\
mock.patch('neutron.agent.rpc.PluginReportStateAPI.'
'has_alive_neutron_server'):
# validate setting non default datapath
expected = constants.OVS_DATAPATH_NETDEV
cfg.CONF.set_override('datapath_type',
@ -245,7 +249,9 @@ class TestOvsNeutronAgent(object):
new=MockFixedIntervalLoopingCall), \
mock.patch(
'neutron.agent.common.ovs_lib.OVSBridge.' 'get_vif_ports',
return_value=[]):
return_value=[]),\
mock.patch('neutron.agent.rpc.PluginReportStateAPI.'
'has_alive_neutron_server'):
# validate setting non default agent_type
expected = 'alt agent type'
cfg.CONF.set_override('agent_type',
@ -2434,7 +2440,9 @@ class AncillaryBridgesTest(object):
return_value=[]),\
mock.patch(
'neutron.agent.common.ovs_lib.OVSBridge.' 'get_vif_ports',
return_value=[]):
return_value=[]),\
mock.patch('neutron.agent.rpc.PluginReportStateAPI.'
'has_alive_neutron_server'):
ext_manager = mock.Mock()
self.agent = self.mod_agent.OVSNeutronAgent(self._bridge_classes(),
ext_manager, cfg.CONF)
@ -2472,7 +2480,9 @@ class AncillaryBridgesTest(object):
side_effect=ancillary), \
mock.patch('neutron.agent.common.ovs_lib.OVSBridge.'
'get_vif_port_set',
return_value=vif_port_set):
return_value=vif_port_set),\
mock.patch('neutron.agent.rpc.PluginReportStateAPI.'
'has_alive_neutron_server'):
ext_manager = mock.Mock()
self.agent = self.mod_agent.OVSNeutronAgent(self._bridge_classes(),
ext_manager, cfg.CONF)
@ -2549,7 +2559,9 @@ class TestOvsDvrNeutronAgent(object):
return_value=[]),\
mock.patch(
'neutron.agent.common.ovs_lib.OVSBridge.' 'get_vif_ports',
return_value=[]):
return_value=[]),\
mock.patch('neutron.agent.rpc.PluginReportStateAPI.'
'has_alive_neutron_server'):
ext_manager = mock.Mock()
self.agent = self.mod_agent.OVSNeutronAgent(self._bridge_classes(),
ext_manager, cfg.CONF)

View File

@ -129,6 +129,9 @@ class TunnelTest(object):
'int-%s' % self.MAP_TUN_BRIDGE: self.MAP_TUN_INT_OFPORT
}
mock.patch('neutron.agent.rpc.PluginReportStateAPI.'
'has_alive_neutron_server').start()
def lookup_br(br_name, *args, **kwargs):
return self.ovs_bridges[br_name]