ovs-agent: React to DB down just like to server down

When neutron-server is down, ovs-agent waits for it to become available
during agent startup. When neutron-server is up, but it cannot reach the
DB, it can do nothing pretty much the same way. However ovs-agent
reacted differently to this failure. With this patch it reacts the same
way and delays its startup until neutron-server is up together with its
DB.

Change-Id: Ia55e82540aedc236e9b016bb58047d0b437eeb99
Closes-Bug: #2025341
This commit is contained in:
Bence Romsics 2023-06-27 13:24:43 +02:00
parent 0c5d4b8728
commit 6c513217c2
6 changed files with 53 additions and 4 deletions

View File

@ -81,7 +81,7 @@ class PluginReportStateAPI(object):
doc/source/contributor/internals/rpc_api.rst.
"""
def __init__(self, topic):
target = oslo_messaging.Target(topic=topic, version='1.2',
target = oslo_messaging.Target(topic=topic, version='1.3',
namespace=constants.RPC_NAMESPACE_STATE)
self.client = lib_rpc.get_client(target)
self.timeout = cfg.CONF.AGENT.report_interval

View File

@ -33,6 +33,7 @@ from neutron_lib.exceptions import agent as agent_exc
from neutron_lib.exceptions import availability_zone as az_exc
from neutron_lib.plugins import directory
from oslo_config import cfg
from oslo_db import exception as db_exc
from oslo_log import log as logging
import oslo_messaging
from oslo_serialization import jsonutils
@ -472,9 +473,10 @@ class AgentExtRpcCallback(object):
1.0 - Initial version.
1.1 - report_state now returns agent state.
1.2 - add method has_alive_neutron_server.
1.3 - has_alive_neutron_server tests db connection.
"""
target = oslo_messaging.Target(version='1.2',
target = oslo_messaging.Target(version='1.3',
namespace=constants.RPC_NAMESPACE_STATE)
START_TIME = timeutils.utcnow()
@ -488,8 +490,23 @@ class AgentExtRpcCallback(object):
# Initialize RPC api directed to other neutron-servers
self.server_versions_rpc = resources_rpc.ResourcesPushToServersRpcApi()
@db_api.CONTEXT_READER
def has_alive_neutron_server(self, context, **kwargs):
"""Give basic server status to agents.
Method for agents to check basic server status. In version 1.2 returned
always True so MQ connectivity could be checked. From version 1.3
return True or False according to a DB connection liveness check, so
both MQ and DB connectivity can be checked from the agent.
"""
try:
context.session.execute('SELECT 1;')
return True
except db_exc.DBConnectionError:
return False
except Exception:
LOG.exception('Unexpected exception')
return False
@db_api.retry_if_session_inactive()
def report_state(self, context, **kwargs):

View File

@ -553,7 +553,14 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
# Made a simple RPC call to Neutron Server.
while True:
try:
self.state_rpc.has_alive_neutron_server(self.context)
alive = self.state_rpc.has_alive_neutron_server(self.context)
if not alive:
LOG.warning(
'neutron server cannot contact the database or the '
'database is slow to respond. Check connectivity '
'from neutron server to db. Retrying... ')
time.sleep(self.conf.AGENT.report_interval)
continue
except oslo_messaging.MessagingTimeout as e:
LOG.warning('l2-agent cannot contact neutron server. '
'Check connectivity to neutron server. '

View File

@ -382,3 +382,7 @@ class TestAgentExtRpcCallback(TestAgentsDbBase):
agent_objs[0].heartbeat_timestamp - datetime.timedelta(
hours=1))
agent_objs[0].update()
def test_has_alive_neutron_server(self):
alive = self.callback.has_alive_neutron_server(self.context)
self.assertTrue(alive)

View File

@ -3008,6 +3008,20 @@ class TestOvsNeutronAgent(object):
{"invalid": "thread"}),
)
def test_setup_rpc_waits_for_alive_neutron_server(self):
with mock.patch(
'neutron.plugins.ml2.drivers.openvswitch.agent.'
'ovs_neutron_agent.OVSPluginApi'),\
mock.patch(
'neutron.agent.rpc.PluginReportStateAPI.'
'has_alive_neutron_server') as mock_has_alive:
mock_has_alive.side_effect = [
oslo_messaging.MessagingTimeout,
False,
True
]
self.agent.setup_rpc()
class TestOvsNeutronAgentOSKen(TestOvsNeutronAgent,
ovs_test_base.OVSOSKenTestBase):

View File

@ -0,0 +1,7 @@
---
other:
- |
PluginReportStateAPI has a new version (1.3) in which
has_alive_neutron_server() no longer returns always True, but performs
a DB connection check and returns True/False accordingly. Using this, an
agent can check not just MQ but the server's DB connectivity too.