From 598c05ab6a2db24f9cbb899517b7daffc7c73dd1 Mon Sep 17 00:00:00 2001 From: Arnaud Morin Date: Thu, 13 Apr 2023 16:54:14 +0200 Subject: [PATCH] Do not dispose local_vlan_hints After first iteration, neutron ovs agent was releasing the vlan hints back into available vlan. This could be an issue if neutron agent failed to configure a device during that first loop. Example of failure: a RPC call like get_device_details. When such failure occur, the vlan is not removed from local_vlan_hints when configuring the device, but the hint is anyway restored in available vlans at the end of the iteration. In such situation, a new port can be plugged into br-int reusing a vlan that is still used by a previous port. Not disposing the local_vlan_hints after first iteration allow neutron to keep looking into the hints on next iteration, until it succeed. Anyway, having a local_vlan_hints not empty after an iteration is an error and should be logged so operator can eventually fix ovs manually on some edge cases. Change-Id: Id2e8aca7c52359eee7c4fd8006d93c97c84f817a Signed-off-by: Arnaud Morin --- .../openvswitch/agent/ovs_neutron_agent.py | 18 ++-- .../agent/test_ovs_neutron_agent.py | 90 +++++++++++++++++++ 2 files changed, 103 insertions(+), 5 deletions(-) diff --git a/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py b/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py index eeae09abedd..fbec145e848 100644 --- a/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py +++ b/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py @@ -491,10 +491,6 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin, self.available_local_vlans.remove(local_vlan) self._local_vlan_hints[key] = local_vlan - def _dispose_local_vlan_hints(self): - self.available_local_vlans.update(self._local_vlan_hints.values()) - self._local_vlan_hints = {} - def _reset_tunnel_ofports(self): self.tun_br_ofports = {n_const.TYPE_GENEVE: {}, n_const.TYPE_GRE: {}, @@ -2888,7 +2884,19 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin, self.update_retries_map_and_remove_devs_not_to_retry( failed_devices, failed_ancillary_devices, failed_devices_retries_map)) - self._dispose_local_vlan_hints() + + if len(self._local_vlan_hints): + LOG.warning( + "Agent rpc_loop - iteration:%(iter_num)d - " + "the process was not able to clean local vlans " + "(%(nb_hints)s failed). Please ensure that you " + "do not have dead ports in the integration " + "bridge.", + {'iter_num': self.iter_num, + 'nb_hints': len(self._local_vlan_hints)}) + LOG.debug( + "local_vlan_hints: %s", self._local_vlan_hints + ) except Exception: LOG.exception("Error while processing VIF ports") # Put the ports back in self.updated_port diff --git a/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_ovs_neutron_agent.py b/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_ovs_neutron_agent.py index 4ad2460b456..b4d9efd77c8 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_ovs_neutron_agent.py +++ b/neutron/tests/unit/plugins/ml2/drivers/openvswitch/agent/test_ovs_neutron_agent.py @@ -83,6 +83,7 @@ class FakeVif(object): ofport = 99 port_name = 'name' vif_mac = 'aa:bb:cc:11:22:33' + vif_id = 'dead-beaf' class MockFixedIntervalLoopingCall(object): @@ -2486,6 +2487,95 @@ class TestOvsNeutronAgent(object): cleanup.assert_not_called() rpc_stop.assert_called_once() + def _test_rpc_loop_hints(self, devices, failed_devices, hints): + devices_details = [] + vifs = {} + for device in devices: + details = { + 'admin_state_up': True, + 'port_id': mock.Mock(), + 'device': device, + 'network_id': device + 'net', + 'physical_network': 'physnet42', + 'segmentation_id': '4242', + 'network_type': n_const.TYPE_LOCAL, + 'fixed_ips': [{ + 'subnet_id': '8a66d0cc-2950-dead-beef-d090140d3607', + 'ip_address': '1.1.1.1'}], + 'device_owner': DEVICE_OWNER_COMPUTE, + } + devices_details.append(details) + vifs[device] = FakeVif() + port_info = {'current': set(), + 'added': set(devices + failed_devices)} + + with mock.patch.object( + self.agent, + 'process_port_info') as process_port_info, \ + mock.patch.object( + self.agent, + '_check_and_handle_signal', side_effect=[True, False]), \ + mock.patch.object( + self.agent, + 'check_ovs_status', + return_value=ovs_constants.OVS_RESTARTED), \ + mock.patch.object( + self.agent, + '_handle_ovs_restart'), \ + mock.patch.object( + self.agent, + 'cleanup_stale_flows'), \ + mock.patch.object( + self.agent.plugin_rpc, + 'get_devices_details_list_and_failed_devices') as gddl, \ + mock.patch.object( + self.agent.int_br, + 'get_vifs_by_ids') as get_vifs_by_ids: + # Simulate a restart + self.agent.fullsync = True + self.agent._local_vlan_hints = hints + + # Populate fake info + process_port_info.return_value = (port_info, set(), 0, set()) + gddl.return_value = { + 'devices': devices_details, + 'failed_devices': failed_devices, + } + get_vifs_by_ids.return_value = vifs + + # Run the rpc_loop + self.agent.rpc_loop(polling_manager=mock.Mock()) + + def test_rpc_loop_hints_all_used(self): + devices = ['tap1234', 'tap2345'] + failed_devices = [] + hints = {'tap1234net/4242': 42, 'tap2345net/4242': 43} + self._test_rpc_loop_hints( + devices=devices, failed_devices=failed_devices, hints=hints) + + # Assert that we do not have any value in the hints + self.assertEqual(0, len(self.agent._local_vlan_hints)) + + def test_rpc_loop_hints_one_left(self): + devices = ['tap1234'] + failed_devices = [] + hints = {'tap1234net/4242': 42, 'tap2345net/4242': 43} + self._test_rpc_loop_hints( + devices=devices, failed_devices=failed_devices, hints=hints) + + # Assert that we have one value left in the hints + self.assertEqual(1, len(self.agent._local_vlan_hints)) + + def test_rpc_loop_hints_with_failed_devices(self): + devices = ['tap1234'] + failed_devices = ['tap2345'] + hints = {'tap1234net/4242': 42, 'tap2345net/4242': 43} + self._test_rpc_loop_hints( + devices=devices, failed_devices=failed_devices, hints=hints) + + # Assert that we have one value left in the hints + self.assertEqual(1, len(self.agent._local_vlan_hints)) + def test_set_rpc_timeout(self): with mock.patch.object(n_rpc.BackingOffClient, 'set_max_timeout') as smt: