Do not report ovs agent state if ovs is dead
Do not report ovs agent state when ovs is dead, and let neutron-server mark service as down. So cluster admin could determine there is a problem of the given ovs agent Change-Id: Ib4b06c7877a7343f4204d4f4f5863931717ff507 Closes-Bug: #1910946
This commit is contained in:
parent
607f15c1ac
commit
5d8f3fd614
@ -306,6 +306,7 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
|
||||
self.prevent_arp_spoofing = (
|
||||
not self.sg_agent.firewall.provides_arp_spoofing_protection)
|
||||
|
||||
self.ovs_status = None
|
||||
self.failed_report_state = False
|
||||
# TODO(mangelajo): optimize resource_versions to only report
|
||||
# versions about resources which are common,
|
||||
@ -372,6 +373,11 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
|
||||
raise ValueError(_("Parsing bridge_mappings failed: %s.") % e)
|
||||
|
||||
def _report_state(self):
|
||||
# return and skip reporting agent state if OVS is dead
|
||||
if self.ovs_status == constants.OVS_DEAD:
|
||||
LOG.error("OVS is down, not reporting state to server")
|
||||
return
|
||||
|
||||
# How many devices are likely used by a VM
|
||||
self.agent_state.get('configurations')['devices'] = (
|
||||
self.int_br_device_count)
|
||||
@ -2554,12 +2560,12 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
|
||||
start = time.time()
|
||||
LOG.info("Agent rpc_loop - iteration:%d started",
|
||||
self.iter_num)
|
||||
ovs_status = self.check_ovs_status()
|
||||
self.ovs_status = self.check_ovs_status()
|
||||
bridges_recreated = False
|
||||
if ovs_status == constants.OVS_RESTARTED:
|
||||
if self.ovs_status == constants.OVS_RESTARTED:
|
||||
self._handle_ovs_restart(polling_manager)
|
||||
tunnel_sync = self.enable_tunneling or tunnel_sync
|
||||
elif ovs_status == constants.OVS_DEAD:
|
||||
elif self.ovs_status == constants.OVS_DEAD:
|
||||
# Agent doesn't apply any operations when ovs is dead, to
|
||||
# prevent unexpected failure or crash. Sleep and continue
|
||||
# loop in which ovs status will be checked periodically.
|
||||
@ -2584,7 +2590,7 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
|
||||
except Exception:
|
||||
LOG.exception("Error while configuring tunnel endpoints")
|
||||
tunnel_sync = True
|
||||
ovs_restarted |= (ovs_status == constants.OVS_RESTARTED)
|
||||
ovs_restarted |= (self.ovs_status == constants.OVS_RESTARTED)
|
||||
devices_need_retry = (any(failed_devices.values()) or
|
||||
any(failed_ancillary_devices.values()) or
|
||||
ports_not_ready_yet)
|
||||
|
@ -1256,6 +1256,14 @@ class TestOvsNeutronAgent(object):
|
||||
self.agent.agent_state, True)
|
||||
self.systemd_notify.assert_not_called()
|
||||
|
||||
def test_not_report_state_when_ovs_dead(self):
|
||||
with mock.patch.object(self.agent.state_rpc,
|
||||
"report_state") as report_st:
|
||||
self.agent.ovs_status = constants.OVS_DEAD
|
||||
self.agent._report_state()
|
||||
report_st.assert_not_called()
|
||||
self.systemd_notify.assert_not_called()
|
||||
|
||||
def test_report_state_revived(self):
|
||||
with mock.patch.object(self.agent.state_rpc,
|
||||
"report_state") as report_st:
|
||||
|
6
releasenotes/notes/bug-7dc8245da8e0e571.yaml
Normal file
6
releasenotes/notes/bug-7dc8245da8e0e571.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Stop sending agent heartbeat from ovs agent when it detects
|
||||
OVS is dead. This helps to alarm cloud operators that there
|
||||
is something wrong on the given node.
|
Loading…
x
Reference in New Issue
Block a user