Do not dispose local_vlan_hints
After first iteration, neutron ovs agent was releasing the vlan hints back into available vlan. This could be an issue if neutron agent failed to configure a device during that first loop. Example of failure: a RPC call like get_device_details. When such failure occur, the vlan is not removed from local_vlan_hints when configuring the device, but the hint is anyway restored in available vlans at the end of the iteration. In such situation, a new port can be plugged into br-int reusing a vlan that is still used by a previous port. Not disposing the local_vlan_hints after first iteration allow neutron to keep looking into the hints on next iteration, until it succeed. Anyway, having a local_vlan_hints not empty after an iteration is an error and should be logged so operator can eventually fix ovs manually on some edge cases. Change-Id: Id2e8aca7c52359eee7c4fd8006d93c97c84f817a Signed-off-by: Arnaud Morin <arnaud.morin@ovhcloud.com>
This commit is contained in:
parent
8c6ad0f49e
commit
598c05ab6a
@ -491,10 +491,6 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
|
||||
self.available_local_vlans.remove(local_vlan)
|
||||
self._local_vlan_hints[key] = local_vlan
|
||||
|
||||
def _dispose_local_vlan_hints(self):
|
||||
self.available_local_vlans.update(self._local_vlan_hints.values())
|
||||
self._local_vlan_hints = {}
|
||||
|
||||
def _reset_tunnel_ofports(self):
|
||||
self.tun_br_ofports = {n_const.TYPE_GENEVE: {},
|
||||
n_const.TYPE_GRE: {},
|
||||
@ -2888,7 +2884,19 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
|
||||
self.update_retries_map_and_remove_devs_not_to_retry(
|
||||
failed_devices, failed_ancillary_devices,
|
||||
failed_devices_retries_map))
|
||||
self._dispose_local_vlan_hints()
|
||||
|
||||
if len(self._local_vlan_hints):
|
||||
LOG.warning(
|
||||
"Agent rpc_loop - iteration:%(iter_num)d - "
|
||||
"the process was not able to clean local vlans "
|
||||
"(%(nb_hints)s failed). Please ensure that you "
|
||||
"do not have dead ports in the integration "
|
||||
"bridge.",
|
||||
{'iter_num': self.iter_num,
|
||||
'nb_hints': len(self._local_vlan_hints)})
|
||||
LOG.debug(
|
||||
"local_vlan_hints: %s", self._local_vlan_hints
|
||||
)
|
||||
except Exception:
|
||||
LOG.exception("Error while processing VIF ports")
|
||||
# Put the ports back in self.updated_port
|
||||
|
@ -83,6 +83,7 @@ class FakeVif(object):
|
||||
ofport = 99
|
||||
port_name = 'name'
|
||||
vif_mac = 'aa:bb:cc:11:22:33'
|
||||
vif_id = 'dead-beaf'
|
||||
|
||||
|
||||
class MockFixedIntervalLoopingCall(object):
|
||||
@ -2486,6 +2487,95 @@ class TestOvsNeutronAgent(object):
|
||||
cleanup.assert_not_called()
|
||||
rpc_stop.assert_called_once()
|
||||
|
||||
def _test_rpc_loop_hints(self, devices, failed_devices, hints):
|
||||
devices_details = []
|
||||
vifs = {}
|
||||
for device in devices:
|
||||
details = {
|
||||
'admin_state_up': True,
|
||||
'port_id': mock.Mock(),
|
||||
'device': device,
|
||||
'network_id': device + 'net',
|
||||
'physical_network': 'physnet42',
|
||||
'segmentation_id': '4242',
|
||||
'network_type': n_const.TYPE_LOCAL,
|
||||
'fixed_ips': [{
|
||||
'subnet_id': '8a66d0cc-2950-dead-beef-d090140d3607',
|
||||
'ip_address': '1.1.1.1'}],
|
||||
'device_owner': DEVICE_OWNER_COMPUTE,
|
||||
}
|
||||
devices_details.append(details)
|
||||
vifs[device] = FakeVif()
|
||||
port_info = {'current': set(),
|
||||
'added': set(devices + failed_devices)}
|
||||
|
||||
with mock.patch.object(
|
||||
self.agent,
|
||||
'process_port_info') as process_port_info, \
|
||||
mock.patch.object(
|
||||
self.agent,
|
||||
'_check_and_handle_signal', side_effect=[True, False]), \
|
||||
mock.patch.object(
|
||||
self.agent,
|
||||
'check_ovs_status',
|
||||
return_value=ovs_constants.OVS_RESTARTED), \
|
||||
mock.patch.object(
|
||||
self.agent,
|
||||
'_handle_ovs_restart'), \
|
||||
mock.patch.object(
|
||||
self.agent,
|
||||
'cleanup_stale_flows'), \
|
||||
mock.patch.object(
|
||||
self.agent.plugin_rpc,
|
||||
'get_devices_details_list_and_failed_devices') as gddl, \
|
||||
mock.patch.object(
|
||||
self.agent.int_br,
|
||||
'get_vifs_by_ids') as get_vifs_by_ids:
|
||||
# Simulate a restart
|
||||
self.agent.fullsync = True
|
||||
self.agent._local_vlan_hints = hints
|
||||
|
||||
# Populate fake info
|
||||
process_port_info.return_value = (port_info, set(), 0, set())
|
||||
gddl.return_value = {
|
||||
'devices': devices_details,
|
||||
'failed_devices': failed_devices,
|
||||
}
|
||||
get_vifs_by_ids.return_value = vifs
|
||||
|
||||
# Run the rpc_loop
|
||||
self.agent.rpc_loop(polling_manager=mock.Mock())
|
||||
|
||||
def test_rpc_loop_hints_all_used(self):
|
||||
devices = ['tap1234', 'tap2345']
|
||||
failed_devices = []
|
||||
hints = {'tap1234net/4242': 42, 'tap2345net/4242': 43}
|
||||
self._test_rpc_loop_hints(
|
||||
devices=devices, failed_devices=failed_devices, hints=hints)
|
||||
|
||||
# Assert that we do not have any value in the hints
|
||||
self.assertEqual(0, len(self.agent._local_vlan_hints))
|
||||
|
||||
def test_rpc_loop_hints_one_left(self):
|
||||
devices = ['tap1234']
|
||||
failed_devices = []
|
||||
hints = {'tap1234net/4242': 42, 'tap2345net/4242': 43}
|
||||
self._test_rpc_loop_hints(
|
||||
devices=devices, failed_devices=failed_devices, hints=hints)
|
||||
|
||||
# Assert that we have one value left in the hints
|
||||
self.assertEqual(1, len(self.agent._local_vlan_hints))
|
||||
|
||||
def test_rpc_loop_hints_with_failed_devices(self):
|
||||
devices = ['tap1234']
|
||||
failed_devices = ['tap2345']
|
||||
hints = {'tap1234net/4242': 42, 'tap2345net/4242': 43}
|
||||
self._test_rpc_loop_hints(
|
||||
devices=devices, failed_devices=failed_devices, hints=hints)
|
||||
|
||||
# Assert that we have one value left in the hints
|
||||
self.assertEqual(1, len(self.agent._local_vlan_hints))
|
||||
|
||||
def test_set_rpc_timeout(self):
|
||||
with mock.patch.object(n_rpc.BackingOffClient,
|
||||
'set_max_timeout') as smt:
|
||||
|
Loading…
Reference in New Issue
Block a user