Browse Source

Do not delete trunk bridges if service port attached

When a deployment has instance ports that are neutron trunk ports with
DPDK vhu in vhostuserclient mode, when the instance reboots nova will
delete the ovs port and then recreate when the host comes back from
reboot.  This quick transition change can trigger a race condition that
causes the tbr trunk bridge to be deleted after the port has been
recreated.  See the bug for more details.

This change mitigates the race condition by adding a check for active
service ports within the trunk port deletion function.

Change-Id: I70b9c26990e6902f8888449bfd7483c25e5bff46
Closes-Bug: #1807239
(cherry picked from commit bd2a1bc6c3)
tags/11.0.7
Nate Johnston 1 year ago
parent
commit
d36cb19813
3 changed files with 25 additions and 1 deletions
  1. +14
    -0
      neutron/services/trunk/drivers/openvswitch/agent/ovsdb_handler.py
  2. +8
    -0
      neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py
  3. +3
    -1
      neutron/tests/unit/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py

+ 14
- 0
neutron/services/trunk/drivers/openvswitch/agent/ovsdb_handler.py View File

@@ -205,6 +205,20 @@ class OVSDBHandler(object):
:param bridge_name: Name of the bridge used for locking purposes.
:param port: Parent port dict.
"""
# TODO(njohnston): In the case of DPDK with trunk ports, if nova
# deletes an interface and then re-adds it we can get a race
# condition where the port is re-added and then the bridge is
# deleted because we did not properly catch the re-addition. To
# solve this would require transitioning to ordered event
# resolution, like the L3 agent does with the
# ResourceProcessingQueue class. Until we can make that happen, we
# try to mitigate the issue by checking if there is a port on the
# bridge and if so then do not remove it.
bridge = ovs_lib.OVSBridge(bridge_name)
if bridge_has_instance_port(bridge):
LOG.debug("The bridge %s has instances attached so it will not "
"be deleted.", bridge_name)
return
try:
# TODO(jlibosva): Investigate how to proceed during removal of
# trunk bridge that doesn't have metadata stored.

+ 8
- 0
neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py View File

@@ -193,3 +193,11 @@ class OVSDBHandlerTestCase(base.OVSAgentTestFramework):
# Check no resources are left behind.
self.assertFalse(self.trunk_br.exists())
self.assertFalse(ovsdb_handler.bridge_has_service_port(br_int))

def test_do_not_delete_trunk_bridge_with_instance_ports(self):
ports = self._fill_trunk_dict()
self.setup_agent_and_ports(port_dicts=ports)
self.wait_until_ports_state(self.ports, up=True)
self.ovsdb_handler.handle_trunk_remove(self.trunk_br.br_name,
ports.pop())
self.assertTrue(self.trunk_br.exists())

+ 3
- 1
neutron/tests/unit/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py View File

@@ -182,7 +182,9 @@ class TestOVSDBHandler(base.BaseTestCase):
def test_handle_trunk_remove_trunk_manager_failure(self):
with mock.patch.object(self.ovsdb_handler, '_get_trunk_metadata',
side_effect=trunk_manager.TrunkManagerError(error='error')):
self.ovsdb_handler.handle_trunk_remove('foo', self.fake_port)
with mock.patch.object(ovsdb_handler, 'bridge_has_instance_port',
return_value=True):
self.ovsdb_handler.handle_trunk_remove('foo', self.fake_port)

@mock.patch('neutron.agent.common.ovs_lib.OVSBridge')
def test_handle_trunk_remove_rpc_failure(self, br):

Loading…
Cancel
Save