diff --git a/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py b/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py index 758af784343..16915f9eb70 100644 --- a/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py +++ b/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py @@ -306,6 +306,7 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin, self.prevent_arp_spoofing = ( not self.sg_agent.firewall.provides_arp_spoofing_protection) + self.ovs_status = None self.failed_report_state = False # TODO(mangelajo): optimize resource_versions to only report # versions about resources which are common, @@ -372,6 +373,11 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin, raise ValueError(_("Parsing bridge_mappings failed: %s.") % e) def _report_state(self): + # return and skip reporting agent state if OVS is dead + if self.ovs_status == constants.OVS_DEAD: + LOG.error("OVS is down, not reporting state to server") + return + # How many devices are likely used by a VM self.agent_state.get('configurations')['devices'] = ( self.int_br_device_count) @@ -2554,12 +2560,12 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin, start = time.time() LOG.info("Agent rpc_loop - iteration:%d started", self.iter_num) - ovs_status = self.check_ovs_status() + self.ovs_status = self.check_ovs_status() bridges_recreated = False - if ovs_status == constants.OVS_RESTARTED: + if self.ovs_status == constants.OVS_RESTARTED: self._handle_ovs_restart(polling_manager) tunnel_sync = self.enable_tunneling or tunnel_sync - elif ovs_status == constants.OVS_DEAD: + elif self.ovs_status == constants.OVS_DEAD: # Agent doesn't apply any operations when ovs is dead, to # prevent unexpected failure or crash. Sleep and continue # loop in which ovs status will be checked periodically. @@ -2584,7 +2590,7 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin, except Exception: LOG.exception("Error while configuring tunnel endpoints") tunnel_sync = True - ovs_restarted |= (ovs_status == constants.OVS_RESTARTED) + ovs_restarted |= (self.ovs_status == constants.OVS_RESTARTED) devices_need_retry = (any(failed_devices.values()) or any(failed_ancillary_devices.values()) or ports_not_ready_yet) diff --git a/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_ovs_neutron_agent.py b/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_ovs_neutron_agent.py index 7d059d7e6be..97c8cf75066 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_ovs_neutron_agent.py +++ b/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_ovs_neutron_agent.py @@ -1256,6 +1256,14 @@ class TestOvsNeutronAgent(object): self.agent.agent_state, True) self.systemd_notify.assert_not_called() + def test_not_report_state_when_ovs_dead(self): + with mock.patch.object(self.agent.state_rpc, + "report_state") as report_st: + self.agent.ovs_status = constants.OVS_DEAD + self.agent._report_state() + report_st.assert_not_called() + self.systemd_notify.assert_not_called() + def test_report_state_revived(self): with mock.patch.object(self.agent.state_rpc, "report_state") as report_st: diff --git a/releasenotes/notes/bug-7dc8245da8e0e571.yaml b/releasenotes/notes/bug-7dc8245da8e0e571.yaml new file mode 100644 index 00000000000..707987b681c --- /dev/null +++ b/releasenotes/notes/bug-7dc8245da8e0e571.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Stop sending agent heartbeat from ovs agent when it detects + OVS is dead. This helps to alarm cloud operators that there + is something wrong on the given node.