From 33de608f04dcc8117eeba63876598dc2ae93013a Mon Sep 17 00:00:00 2001 From: Miguel Lavalle Date: Wed, 13 Apr 2022 18:00:12 -0500 Subject: [PATCH] Avoid race condition when deleting trunk bridges Prior to this change, trunk bridges are created by os-vif but deleted by Neutron when the last vif is removed from it. This creates race conditions in some use cases, like DPDK with vhostuserclient mode, when VMs are rebooted. To avoid these races, Neutron will not delete trunk bridges anymore. Their creation and deletion will be os-vif's responsiblity. Since [1], Nova uses the os-vif version that contains this functionality. This patch also changes the trunk status change event. During a live migration, when the trunk parent port has been bound to the destination host (that means there is only one port binding associated) and the status has changed to ACTIVE, the method triggers the subport binding to the new host too. This is because there could be a race condition between the subport binding, triggered by the OVS agent, and the parent port binding, triggered by Nova. If when the OVS agent tries to bind the subports, the parent port is still bound to the source host, the subport binding remains in the source host too, instead of changing to the destination. This patch also reverts [2] and [3]. As commented in the previous paragraph, this patch fixes the issue reported in LP#1997025. The trunk port live migration with ML2/OVS must be fixed with this patch. [1]https://review.opendev.org/c/openstack/nova/+/865031 [2]https://review.opendev.org/c/openstack/neutron/+/865295 [3]https://review.opendev.org/c/openstack/neutron/+/865424 Closes-Bug: #1869244 Closes-Bug: #1997025 Change-Id: I4e16357f3ff214fcf41e418982806c24088a2665 --- .../openvswitch/agent/ovsdb_handler.py | 25 ++----- .../openvswitch/agent/trunk_manager.py | 16 ++--- .../trunk/drivers/openvswitch/driver.py | 69 +++++++++++++++++++ neutron/tests/common/net_helpers.py | 13 ++++ neutron/tests/fullstack/test_trunk.py | 17 +---- .../openvswitch/agent/test_ovsdb_handler.py | 10 --- .../openvswitch/agent/test_trunk_manager.py | 8 +-- .../openvswitch/agent/test_ovsdb_handler.py | 4 +- .../trunk/drivers/openvswitch/test_driver.py | 23 +++++++ requirements.txt | 2 +- zuul.d/tempest-multinode.yaml | 8 --- 11 files changed, 121 insertions(+), 74 deletions(-) diff --git a/neutron/services/trunk/drivers/openvswitch/agent/ovsdb_handler.py b/neutron/services/trunk/drivers/openvswitch/agent/ovsdb_handler.py index 4f72cf0a06f..79bc7ac0437 100644 --- a/neutron/services/trunk/drivers/openvswitch/agent/ovsdb_handler.py +++ b/neutron/services/trunk/drivers/openvswitch/agent/ovsdb_handler.py @@ -14,7 +14,6 @@ # under the License. import functools -import time import eventlet from neutron_lib.callbacks import events @@ -43,7 +42,6 @@ from neutron.services.trunk.rpc import agent LOG = logging.getLogger(__name__) DEFAULT_WAIT_FOR_PORT_TIMEOUT = 60 -WAIT_BEFORE_TRUNK_DELETE = 6 def lock_on_bridge_name(required_parameter): @@ -208,21 +206,6 @@ class OVSDBHandler(object): :param bridge_name: Name of the bridge used for locking purposes. :param port: Parent port dict. """ - # TODO(njohnston): In the case of DPDK with trunk ports, if nova - # deletes an interface and then re-adds it we can get a race - # condition where the port is re-added and then the bridge is - # deleted because we did not properly catch the re-addition. To - # solve this would require transitioning to ordered event - # resolution, like the L3 agent does with the - # ResourceProcessingQueue class. Until we can make that happen, we - # try to mitigate the issue by checking if there is a port on the - # bridge and if so then do not remove it. - bridge = ovs_lib.OVSBridge(bridge_name) - time.sleep(WAIT_BEFORE_TRUNK_DELETE) - if bridge_has_instance_port(bridge): - LOG.debug("The bridge %s has instances attached so it will not " - "be deleted.", bridge_name) - return try: # TODO(jlibosva): Investigate how to proceed during removal of # trunk bridge that doesn't have metadata stored. @@ -304,8 +287,7 @@ class OVSDBHandler(object): LOG.debug("Added trunk: %s", trunk_id) return self._get_current_status(subports, subport_ids) - def unwire_subports_for_trunk(self, trunk_id, subport_ids): - """Destroy OVS ports associated to the logical subports.""" + def _remove_sub_ports(self, trunk_id, subport_ids): ids = [] for subport_id in subport_ids: try: @@ -317,6 +299,11 @@ class OVSDBHandler(object): {'subport_id': subport_id, 'trunk_id': trunk_id, 'err': te}) + return ids + + def unwire_subports_for_trunk(self, trunk_id, subport_ids): + """Destroy OVS ports associated to the logical subports.""" + ids = self._remove_sub_ports(trunk_id, subport_ids) try: # OVS bridge and port to be determined by _update_trunk_metadata bridge = None diff --git a/neutron/services/trunk/drivers/openvswitch/agent/trunk_manager.py b/neutron/services/trunk/drivers/openvswitch/agent/trunk_manager.py index 3dfd4580129..ff37fc946f3 100644 --- a/neutron/services/trunk/drivers/openvswitch/agent/trunk_manager.py +++ b/neutron/services/trunk/drivers/openvswitch/agent/trunk_manager.py @@ -136,7 +136,6 @@ class TrunkParentPort(object): """ ovsdb = self.bridge.ovsdb with ovsdb.transaction() as txn: - txn.add(ovsdb.del_br(self.bridge.br_name)) txn.add(ovsdb.del_port(self.patch_port_int_name, bridge.br_name)) @@ -186,8 +185,9 @@ class SubPort(TrunkParentPort): """ ovsdb = self.bridge.ovsdb with ovsdb.transaction() as txn: - txn.add(ovsdb.del_port(self.patch_port_trunk_name, - self.bridge.br_name)) + if self.bridge.exists(): + txn.add(ovsdb.del_port(self.patch_port_trunk_name, + self.bridge.br_name)) txn.add(ovsdb.del_port(self.patch_port_int_name, bridge.br_name)) @@ -225,10 +225,7 @@ class TrunkManager(object): """Remove the trunk bridge.""" trunk = TrunkParentPort(trunk_id, port_id) try: - if trunk.bridge.exists(): - trunk.unplug(self.br_int) - else: - LOG.debug("Trunk bridge with ID %s does not exist.", trunk_id) + trunk.unplug(self.br_int) except RuntimeError as e: raise TrunkManagerError(error=e) @@ -283,10 +280,7 @@ class TrunkManager(object): # Trunk bridge might have been deleted by calling delete_trunk() before # remove_sub_port(). try: - if sub_port.bridge.exists(): - sub_port.unplug(self.br_int) - else: - LOG.debug("Trunk bridge with ID %s does not exist.", trunk_id) + sub_port.unplug(self.br_int) except RuntimeError as e: raise TrunkManagerError(error=e) diff --git a/neutron/services/trunk/drivers/openvswitch/driver.py b/neutron/services/trunk/drivers/openvswitch/driver.py index 46a33abb117..9bc753402e4 100644 --- a/neutron/services/trunk/drivers/openvswitch/driver.py +++ b/neutron/services/trunk/drivers/openvswitch/driver.py @@ -14,14 +14,18 @@ from neutron_lib.api.definitions import portbindings from neutron_lib.callbacks import events from neutron_lib.callbacks import registry +from neutron_lib.callbacks import resources from neutron_lib import constants +from neutron_lib.plugins import directory from neutron_lib.plugins.ml2 import ovs_constants as agent_consts from neutron_lib.services.trunk import constants as trunk_consts from oslo_config import cfg from oslo_log import log as logging +from neutron.objects import trunk as trunk_obj from neutron.services.trunk.drivers import base from neutron.services.trunk.drivers.openvswitch import utils +from neutron.services.trunk import exceptions as trunk_exc LOG = logging.getLogger(__name__) @@ -41,6 +45,16 @@ DRIVER = None class OVSDriver(base.DriverBase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._core_plugin = None + + @property + def core_plugin(self): + if not self._core_plugin: + self._core_plugin = directory.get_plugin() + return self._core_plugin + @property def is_loaded(self): try: @@ -55,6 +69,61 @@ class OVSDriver(base.DriverBase): SUPPORTED_SEGMENTATION_TYPES, constants.AGENT_TYPE_OVS) + @staticmethod + def _get_trunk(context, trunk_id): + """Return the trunk object or raise if not found.""" + obj = trunk_obj.Trunk.get_object(context, id=trunk_id) + if obj is None: + raise trunk_exc.TrunkNotFound(trunk_id=trunk_id) + + return obj + + def _update_subport_binding(self, context, trunk_id): + """Update the subport binding host""" + trunk_obj = self._get_trunk(context, trunk_id) + trunk_port = self.core_plugin.get_port(context, trunk_obj.port_id) + trunk_host = trunk_port.get(portbindings.HOST_ID) + for subport in trunk_obj.sub_ports: + port = self.core_plugin.update_port( + context, subport.port_id, + {'port': {portbindings.HOST_ID: trunk_host, + 'device_owner': trunk_consts.TRUNK_SUBPORT_OWNER}}) + vif_type = port.get(portbindings.VIF_TYPE) + if vif_type == portbindings.VIF_TYPE_BINDING_FAILED: + raise trunk_exc.SubPortBindingError( + port_id=subport.port_id, trunk_id=trunk_obj.id) + + @registry.receives(resources.PORT, [events.AFTER_UPDATE]) + def _subport_binding(self, resource, event, trigger, payload=None): + """Bind the subports to the parent port host + + This method listen to the port after update events. If the parent port + is updated and transitions from inactive to active, this method + retrieves the port host ID and binds the subports to this host. + + :param resource: neutron_lib.callbacks.resources.PORT + :param event: neutron_lib.callbacks.events.AFTER_UPDATE + :param trigger: the specific driver plugin + + """ + updated_port = payload.latest_state + trunk_details = updated_port.get('trunk_details') + vif_details = updated_port.get(portbindings.VIF_DETAILS) + driver = vif_details.get(portbindings.VIF_DETAILS_BOUND_DRIVERS, + {}).get('0') + # If no trunk_details, the port is not the parent of a trunk. + # If this port is not bound to ML2/OVS, we skip this method. + if not trunk_details or not driver == NAME: + return + + context = payload.context + orig_status = payload.states[0].get('status') + new_status = updated_port.get('status') + trunk_id = trunk_details['trunk_id'] + if (new_status == constants.PORT_STATUS_ACTIVE and + new_status != orig_status): + self._update_subport_binding(context, trunk_id) + def register(): """Register the driver.""" diff --git a/neutron/tests/common/net_helpers.py b/neutron/tests/common/net_helpers.py index 0fbfa1f923c..db131c789d0 100644 --- a/neutron/tests/common/net_helpers.py +++ b/neutron/tests/common/net_helpers.py @@ -44,6 +44,7 @@ from neutron.conf.agent import common as config from neutron.db import db_base_plugin_common as db_base from neutron.plugins.ml2.drivers.linuxbridge.agent import \ linuxbridge_neutron_agent as linuxbridge_agent +from neutron.services.trunk.drivers.openvswitch.agent import trunk_manager from neutron.tests.common import base as common_base from neutron.tests.common import helpers from neutron.tests import tools @@ -812,6 +813,18 @@ class OVSTrunkBridgeFixture(OVSBridgeFixture): self.addCleanup(self.bridge.destroy) +class OVSTrunkBridgeFixtureTrunkBridge(fixtures.Fixture): + + def __init__(self, trunk_id): + super(OVSTrunkBridgeFixtureTrunkBridge, self).__init__() + self.trunk_id = trunk_id + + def _setUp(self): + self.bridge = trunk_manager.TrunkBridge(self.trunk_id) + self.bridge.create() + self.addCleanup(self.bridge.destroy) + + class OVSPortFixture(PortFixture): NIC_NAME_LEN = 14 diff --git a/neutron/tests/fullstack/test_trunk.py b/neutron/tests/fullstack/test_trunk.py index ace42a36520..f13e48fcc16 100644 --- a/neutron/tests/fullstack/test_trunk.py +++ b/neutron/tests/fullstack/test_trunk.py @@ -20,19 +20,12 @@ from oslo_utils import uuidutils from neutron.common import utils from neutron.services.trunk.drivers.openvswitch.agent import ovsdb_handler -from neutron.services.trunk.drivers.openvswitch.agent import trunk_manager from neutron.services.trunk.drivers.openvswitch import utils as trunk_ovs_utils from neutron.tests.fullstack import base from neutron.tests.fullstack.resources import environment from neutron.tests.fullstack.resources import machine -def trunk_bridge_does_not_exist(trunk_id): - """Return true if trunk bridge for given ID does not exists.""" - bridge = trunk_manager.TrunkBridge(trunk_id) - return not bridge.exists() - - def make_ip_network(port, network): """Make an IPNetwork object from port and network. @@ -282,16 +275,8 @@ class TestTrunkPlugin(base.BaseFullStackTestCase): vlan_aware_vm.block_until_ping(trunk_network_vm.ip) vlan_aware_vm.block_until_ping(vlan2_network_vm.ip) - # Delete vm and check that patch ports and trunk bridge are gone + # Delete vm and check that patch ports are gone vlan_aware_vm.destroy() - bridge_doesnt_exist_predicate = functools.partial( - trunk_bridge_does_not_exist, trunk_id) - utils.wait_until_true( - bridge_doesnt_exist_predicate, - exception=TrunkTestException( - 'Trunk bridge with ID %s has not been removed' % - trunk_id) - ) integration_bridge = self.host.get_bridge(None) no_patch_ports_predicate = functools.partial( diff --git a/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py b/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py index 2e5edea593d..73aea398bd2 100644 --- a/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py +++ b/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py @@ -140,8 +140,6 @@ class OVSDBHandlerTestCase(base.OVSAgentTestFramework): self.wait_until_ports_state(self.ports, up=True) self.trunk_br.delete_port(self.trunk_port_name) self.wait_until_ports_state(self.ports, up=False) - common_utils.wait_until_true(lambda: - not self.trunk_br.bridge_exists(self.trunk_br.br_name)) def test_trunk_creation_with_subports(self): ports = self._fill_trunk_dict() @@ -194,11 +192,3 @@ class OVSDBHandlerTestCase(base.OVSAgentTestFramework): # Check no resources are left behind. self.assertFalse(self.trunk_br.exists()) self.assertFalse(ovsdb_handler.bridge_has_service_port(br_int)) - - def test_do_not_delete_trunk_bridge_with_instance_ports(self): - ports = self._fill_trunk_dict() - self.setup_agent_and_ports(port_dicts=ports) - self.wait_until_ports_state(self.ports, up=True) - self.ovsdb_handler.handle_trunk_remove(self.trunk_br.br_name, - ports.pop()) - self.assertTrue(self.trunk_br.exists()) diff --git a/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_trunk_manager.py b/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_trunk_manager.py index 01fd692c8a0..b3e14a6c32d 100644 --- a/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_trunk_manager.py +++ b/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_trunk_manager.py @@ -45,8 +45,7 @@ class TrunkParentPortTestCase(base.BaseSudoTestCase): port_mac = net.get_random_mac('fa:16:3e:00:00:00'.split(':')) self.trunk = trunk_manager.TrunkParentPort(trunk_id, port_id, port_mac) self.trunk.bridge = self.useFixture( - net_helpers.OVSTrunkBridgeFixture( - self.trunk.bridge.br_name)).bridge + net_helpers.OVSTrunkBridgeFixtureTrunkBridge(trunk_id)).bridge self.br_int = self.useFixture(net_helpers.OVSBridgeFixture()).bridge def test_plug(self): @@ -70,8 +69,6 @@ class TrunkParentPortTestCase(base.BaseSudoTestCase): def test_unplug(self): self.trunk.plug(self.br_int) self.trunk.unplug(self.br_int) - self.assertFalse( - self.trunk.bridge.bridge_exists(self.trunk.bridge.br_name)) self.assertNotIn(self.trunk.patch_port_int_name, self.br_int.get_port_name_list()) @@ -96,9 +93,8 @@ class SubPortTestCase(base.BaseSudoTestCase): trunk_id = uuidutils.generate_uuid() port_id = uuidutils.generate_uuid() port_mac = net.get_random_mac('fa:16:3e:00:00:00'.split(':')) - trunk_bridge_name = utils.gen_trunk_br_name(trunk_id) trunk_bridge = self.useFixture( - net_helpers.OVSTrunkBridgeFixture(trunk_bridge_name)).bridge + net_helpers.OVSTrunkBridgeFixtureTrunkBridge(trunk_id)).bridge segmentation_id = helpers.get_not_used_vlan( trunk_bridge, VLAN_RANGE) self.subport = trunk_manager.SubPort( diff --git a/neutron/tests/unit/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py b/neutron/tests/unit/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py index 850a04adf51..32b0ab7e44a 100644 --- a/neutron/tests/unit/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py +++ b/neutron/tests/unit/services/trunk/drivers/openvswitch/agent/test_ovsdb_handler.py @@ -183,9 +183,7 @@ class TestOVSDBHandler(base.BaseTestCase): def test_handle_trunk_remove_trunk_manager_failure(self, br): with mock.patch.object(self.ovsdb_handler, '_get_trunk_metadata', side_effect=trunk_manager.TrunkManagerError(error='error')): - with mock.patch.object(ovsdb_handler, 'bridge_has_instance_port', - return_value=True): - self.ovsdb_handler.handle_trunk_remove('foo', self.fake_port) + self.ovsdb_handler.handle_trunk_remove('foo', self.fake_port) @mock.patch('neutron.agent.common.ovs_lib.OVSBridge') def test_handle_trunk_remove_rpc_failure(self, br): diff --git a/neutron/tests/unit/services/trunk/drivers/openvswitch/test_driver.py b/neutron/tests/unit/services/trunk/drivers/openvswitch/test_driver.py index 6819266cba9..557b913edbf 100644 --- a/neutron/tests/unit/services/trunk/drivers/openvswitch/test_driver.py +++ b/neutron/tests/unit/services/trunk/drivers/openvswitch/test_driver.py @@ -13,10 +13,13 @@ from unittest import mock +from neutron_lib.api.definitions import portbindings from neutron_lib.callbacks import events from neutron_lib.callbacks import registry from neutron_lib import constants +from neutron_lib import context from neutron_lib.plugins.ml2 import ovs_constants as agent_consts +from neutron_lib.services.trunk import constants as trunk_consts from oslo_config import cfg from neutron.services.trunk.drivers.openvswitch import driver @@ -70,3 +73,23 @@ class OVSDriverTestCase(base.BaseTestCase): } })) test_trigger.assert_called_once_with('fake-trunk-br-name') + + def test__update_subport_binding(self): + ovs_driver = driver.OVSDriver.create() + core_plugin = mock.Mock() + core_plugin.get_port.return_value = {portbindings.HOST_ID: 'host_id'} + ovs_driver._core_plugin = core_plugin + ctx = context.get_admin_context() + mock_subports = [mock.Mock(port_id='sport_id1'), + mock.Mock(port_id='sport_id2')] + mock_trunk_obj = mock.Mock(sub_ports=mock_subports, port_id='port_id', + id='trunk_id') + with mock.patch.object(ovs_driver, '_get_trunk') as mock_get_trunk: + mock_get_trunk.return_value = mock_trunk_obj + updated_port = { + 'port': {portbindings.HOST_ID: 'host_id', + 'device_owner': trunk_consts.TRUNK_SUBPORT_OWNER}} + ovs_driver._update_subport_binding(ctx, 'trunk_id') + core_plugin.update_port.assert_has_calls( + [mock.call(ctx, 'sport_id1', updated_port), + mock.call(ctx, 'sport_id2', updated_port)], any_order=True) diff --git a/requirements.txt b/requirements.txt index 1a6755da7dd..39c6490404f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -59,7 +59,7 @@ pyOpenSSL>=17.1.0 # Apache-2.0 python-novaclient>=9.1.0 # Apache-2.0 openstacksdk>=0.31.2 # Apache-2.0 python-designateclient>=2.7.0 # Apache-2.0 -os-vif>=1.15.1 # Apache-2.0 +os-vif>=3.1.0 # Apache-2.0 futurist>=1.2.0 # Apache-2.0 tooz>=1.58.0 # Apache-2.0 wmi>=1.4.9;sys_platform=='win32' # MIT diff --git a/zuul.d/tempest-multinode.yaml b/zuul.d/tempest-multinode.yaml index 7010966839d..ce1d49454fc 100644 --- a/zuul.d/tempest-multinode.yaml +++ b/zuul.d/tempest-multinode.yaml @@ -41,10 +41,6 @@ - ^zuul.d/(?!(project)).*\.yaml vars: tox_envlist: integrated-network - # NOTE(ralonsoh): to be removed once LP#1997025 is fixed. - tempest_exclude_regex: "\ - (^tempest.api.compute.admin.test_live_migration.LiveAutoBlockMigrationV225Test.test_live_migration_with_trunk)|\ - (^tempest.api.compute.admin.test_live_migration.LiveMigrationTest.test_live_migration_with_trunk)" devstack_localrc: CIRROS_VERSION: 0.5.1 DEFAULT_IMAGE_NAME: cirros-0.5.1-x86_64-uec @@ -171,10 +167,6 @@ timeout: 10800 irrelevant-files: *openvswitch-irrelevant-files vars: - # NOTE(ralonsoh): to be removed once LP#1997025 is fixed. - tempest_exclude_regex: "\ - (^tempest.api.compute.admin.test_live_migration.LiveAutoBlockMigrationV225Test.test_live_migration_with_trunk)|\ - (^tempest.api.compute.admin.test_live_migration.LiveMigrationTest.test_live_migration_with_trunk)" tox_envlist: integrated-network devstack_localrc: CIRROS_VERSION: 0.5.1