Do not delete trunk bridges if service port attached

When a deployment has instance ports that are neutron trunk ports with
DPDK vhu in vhostuserclient mode, when the instance reboots nova will
delete the ovs port and then recreate when the host comes back from
reboot.  This quick transition change can trigger a race condition that
causes the tbr trunk bridge to be deleted after the port has been
recreated.  See the bug for more details.

This change mitigates the race condition by adding a check for active
service ports within the trunk port deletion function.

Change-Id: I70b9c26990e6902f8888449bfd7483c25e5bff46
Closes-Bug: #1807239
(cherry picked from commit bd2a1bc6c3d68ddd27fc0b77ec767b11ec0fcb54)
This commit is contained in:
Nate Johnston 2018-12-06 12:39:49 -05:00
parent fea4365500
commit 25ab89f7d3
3 changed files with 25 additions and 1 deletions
neutron
services/trunk/drivers/openvswitch/agent
tests
functional/services/trunk/drivers/openvswitch/agent
unit/services/trunk/drivers/openvswitch/agent

@ -205,6 +205,20 @@ class OVSDBHandler(object):
:param bridge_name: Name of the bridge used for locking purposes.
:param port: Parent port dict.
"""
# TODO(njohnston): In the case of DPDK with trunk ports, if nova
# deletes an interface and then re-adds it we can get a race
# condition where the port is re-added and then the bridge is
# deleted because we did not properly catch the re-addition. To
# solve this would require transitioning to ordered event
# resolution, like the L3 agent does with the
# ResourceProcessingQueue class. Until we can make that happen, we
# try to mitigate the issue by checking if there is a port on the
# bridge and if so then do not remove it.
bridge = ovs_lib.OVSBridge(bridge_name)
if bridge_has_instance_port(bridge):
LOG.debug("The bridge %s has instances attached so it will not "
"be deleted.", bridge_name)
return
try:
# TODO(jlibosva): Investigate how to proceed during removal of
# trunk bridge that doesn't have metadata stored.

@ -193,3 +193,11 @@ class OVSDBHandlerTestCase(base.OVSAgentTestFramework):
# Check no resources are left behind.
self.assertFalse(self.trunk_br.exists())
self.assertFalse(ovsdb_handler.bridge_has_service_port(br_int))
def test_do_not_delete_trunk_bridge_with_instance_ports(self):
ports = self._fill_trunk_dict()
self.setup_agent_and_ports(port_dicts=ports)
self.wait_until_ports_state(self.ports, up=True)
self.ovsdb_handler.handle_trunk_remove(self.trunk_br.br_name,
ports.pop())
self.assertTrue(self.trunk_br.exists())

@ -182,7 +182,9 @@ class TestOVSDBHandler(base.BaseTestCase):
def test_handle_trunk_remove_trunk_manager_failure(self):
with mock.patch.object(self.ovsdb_handler, '_get_trunk_metadata',
side_effect=trunk_manager.TrunkManagerError(error='error')):
self.ovsdb_handler.handle_trunk_remove('foo', self.fake_port)
with mock.patch.object(ovsdb_handler, 'bridge_has_instance_port',
return_value=True):
self.ovsdb_handler.handle_trunk_remove('foo', self.fake_port)
@mock.patch('neutron.agent.common.ovs_lib.OVSBridge')
def test_handle_trunk_remove_rpc_failure(self, br):