Do not report ovs agent state if ovs is dead

Do not report ovs agent state when ovs is dead,
and let neutron-server mark service as down. So
cluster admin could determine there is a problem
of the given ovs agent

Change-Id: Ib4b06c7877a7343f4204d4f4f5863931717ff507
Closes-Bug: #1910946
This commit is contained in:
shenjiatong 2021-01-11 11:57:28 +08:00
parent 607f15c1ac
commit 5d8f3fd614
3 changed files with 24 additions and 4 deletions

View File

@ -306,6 +306,7 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
self.prevent_arp_spoofing = (
not self.sg_agent.firewall.provides_arp_spoofing_protection)
self.ovs_status = None
self.failed_report_state = False
# TODO(mangelajo): optimize resource_versions to only report
# versions about resources which are common,
@ -372,6 +373,11 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
raise ValueError(_("Parsing bridge_mappings failed: %s.") % e)
def _report_state(self):
# return and skip reporting agent state if OVS is dead
if self.ovs_status == constants.OVS_DEAD:
LOG.error("OVS is down, not reporting state to server")
return
# How many devices are likely used by a VM
self.agent_state.get('configurations')['devices'] = (
self.int_br_device_count)
@ -2554,12 +2560,12 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
start = time.time()
LOG.info("Agent rpc_loop - iteration:%d started",
self.iter_num)
ovs_status = self.check_ovs_status()
self.ovs_status = self.check_ovs_status()
bridges_recreated = False
if ovs_status == constants.OVS_RESTARTED:
if self.ovs_status == constants.OVS_RESTARTED:
self._handle_ovs_restart(polling_manager)
tunnel_sync = self.enable_tunneling or tunnel_sync
elif ovs_status == constants.OVS_DEAD:
elif self.ovs_status == constants.OVS_DEAD:
# Agent doesn't apply any operations when ovs is dead, to
# prevent unexpected failure or crash. Sleep and continue
# loop in which ovs status will be checked periodically.
@ -2584,7 +2590,7 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
except Exception:
LOG.exception("Error while configuring tunnel endpoints")
tunnel_sync = True
ovs_restarted |= (ovs_status == constants.OVS_RESTARTED)
ovs_restarted |= (self.ovs_status == constants.OVS_RESTARTED)
devices_need_retry = (any(failed_devices.values()) or
any(failed_ancillary_devices.values()) or
ports_not_ready_yet)

View File

@ -1256,6 +1256,14 @@ class TestOvsNeutronAgent(object):
self.agent.agent_state, True)
self.systemd_notify.assert_not_called()
def test_not_report_state_when_ovs_dead(self):
with mock.patch.object(self.agent.state_rpc,
"report_state") as report_st:
self.agent.ovs_status = constants.OVS_DEAD
self.agent._report_state()
report_st.assert_not_called()
self.systemd_notify.assert_not_called()
def test_report_state_revived(self):
with mock.patch.object(self.agent.state_rpc,
"report_state") as report_st:

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Stop sending agent heartbeat from ovs agent when it detects
OVS is dead. This helps to alarm cloud operators that there
is something wrong on the given node.