ovs-agent: React to DB down just like to server down
When neutron-server is down, ovs-agent waits for it to become available during agent startup. When neutron-server is up, but it cannot reach the DB, it can do nothing pretty much the same way. However ovs-agent reacted differently to this failure. With this patch it reacts the same way and delays its startup until neutron-server is up together with its DB. Change-Id: Ia55e82540aedc236e9b016bb58047d0b437eeb99 Closes-Bug: #2025341
This commit is contained in:
parent
0c5d4b8728
commit
6c513217c2
@ -81,7 +81,7 @@ class PluginReportStateAPI(object):
|
||||
doc/source/contributor/internals/rpc_api.rst.
|
||||
"""
|
||||
def __init__(self, topic):
|
||||
target = oslo_messaging.Target(topic=topic, version='1.2',
|
||||
target = oslo_messaging.Target(topic=topic, version='1.3',
|
||||
namespace=constants.RPC_NAMESPACE_STATE)
|
||||
self.client = lib_rpc.get_client(target)
|
||||
self.timeout = cfg.CONF.AGENT.report_interval
|
||||
|
@ -33,6 +33,7 @@ from neutron_lib.exceptions import agent as agent_exc
|
||||
from neutron_lib.exceptions import availability_zone as az_exc
|
||||
from neutron_lib.plugins import directory
|
||||
from oslo_config import cfg
|
||||
from oslo_db import exception as db_exc
|
||||
from oslo_log import log as logging
|
||||
import oslo_messaging
|
||||
from oslo_serialization import jsonutils
|
||||
@ -472,9 +473,10 @@ class AgentExtRpcCallback(object):
|
||||
1.0 - Initial version.
|
||||
1.1 - report_state now returns agent state.
|
||||
1.2 - add method has_alive_neutron_server.
|
||||
1.3 - has_alive_neutron_server tests db connection.
|
||||
"""
|
||||
|
||||
target = oslo_messaging.Target(version='1.2',
|
||||
target = oslo_messaging.Target(version='1.3',
|
||||
namespace=constants.RPC_NAMESPACE_STATE)
|
||||
START_TIME = timeutils.utcnow()
|
||||
|
||||
@ -488,8 +490,23 @@ class AgentExtRpcCallback(object):
|
||||
# Initialize RPC api directed to other neutron-servers
|
||||
self.server_versions_rpc = resources_rpc.ResourcesPushToServersRpcApi()
|
||||
|
||||
@db_api.CONTEXT_READER
|
||||
def has_alive_neutron_server(self, context, **kwargs):
|
||||
return True
|
||||
"""Give basic server status to agents.
|
||||
|
||||
Method for agents to check basic server status. In version 1.2 returned
|
||||
always True so MQ connectivity could be checked. From version 1.3
|
||||
return True or False according to a DB connection liveness check, so
|
||||
both MQ and DB connectivity can be checked from the agent.
|
||||
"""
|
||||
try:
|
||||
context.session.execute('SELECT 1;')
|
||||
return True
|
||||
except db_exc.DBConnectionError:
|
||||
return False
|
||||
except Exception:
|
||||
LOG.exception('Unexpected exception')
|
||||
return False
|
||||
|
||||
@db_api.retry_if_session_inactive()
|
||||
def report_state(self, context, **kwargs):
|
||||
|
@ -553,7 +553,14 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
|
||||
# Made a simple RPC call to Neutron Server.
|
||||
while True:
|
||||
try:
|
||||
self.state_rpc.has_alive_neutron_server(self.context)
|
||||
alive = self.state_rpc.has_alive_neutron_server(self.context)
|
||||
if not alive:
|
||||
LOG.warning(
|
||||
'neutron server cannot contact the database or the '
|
||||
'database is slow to respond. Check connectivity '
|
||||
'from neutron server to db. Retrying... ')
|
||||
time.sleep(self.conf.AGENT.report_interval)
|
||||
continue
|
||||
except oslo_messaging.MessagingTimeout as e:
|
||||
LOG.warning('l2-agent cannot contact neutron server. '
|
||||
'Check connectivity to neutron server. '
|
||||
|
@ -382,3 +382,7 @@ class TestAgentExtRpcCallback(TestAgentsDbBase):
|
||||
agent_objs[0].heartbeat_timestamp - datetime.timedelta(
|
||||
hours=1))
|
||||
agent_objs[0].update()
|
||||
|
||||
def test_has_alive_neutron_server(self):
|
||||
alive = self.callback.has_alive_neutron_server(self.context)
|
||||
self.assertTrue(alive)
|
||||
|
@ -3008,6 +3008,20 @@ class TestOvsNeutronAgent(object):
|
||||
{"invalid": "thread"}),
|
||||
)
|
||||
|
||||
def test_setup_rpc_waits_for_alive_neutron_server(self):
|
||||
with mock.patch(
|
||||
'neutron.plugins.ml2.drivers.openvswitch.agent.'
|
||||
'ovs_neutron_agent.OVSPluginApi'),\
|
||||
mock.patch(
|
||||
'neutron.agent.rpc.PluginReportStateAPI.'
|
||||
'has_alive_neutron_server') as mock_has_alive:
|
||||
mock_has_alive.side_effect = [
|
||||
oslo_messaging.MessagingTimeout,
|
||||
False,
|
||||
True
|
||||
]
|
||||
self.agent.setup_rpc()
|
||||
|
||||
|
||||
class TestOvsNeutronAgentOSKen(TestOvsNeutronAgent,
|
||||
ovs_test_base.OVSOSKenTestBase):
|
||||
|
@ -0,0 +1,7 @@
|
||||
---
|
||||
other:
|
||||
- |
|
||||
PluginReportStateAPI has a new version (1.3) in which
|
||||
has_alive_neutron_server() no longer returns always True, but performs
|
||||
a DB connection check and returns True/False accordingly. Using this, an
|
||||
agent can check not just MQ but the server's DB connectivity too.
|
Loading…
Reference in New Issue
Block a user