From f26a30a03e61c51c0ed1cadba77d0e6a4443b8ae Mon Sep 17 00:00:00 2001 From: Lucas Alvares Gomes Date: Thu, 20 Jul 2023 10:25:23 +0100 Subject: [PATCH] [OVN] Retry retrieving LSP hosting information There's a sync issue while trying to fetch the hosting information for the LSP before we write it to the OVN database, sometimes the information is not yet present and we end up with an empty string ("") for the host attribute of portbindings. This patch adds a retry mechanism to solve this sync issue. Conflicts: neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_ovn_client.py Change-Id: I52ec4b346271889ebaa7b7f84981eae5503d02d3 Related-Bug: #2020058 Signed-off-by: Lucas Alvares Gomes (cherry picked from commit 3044b938b9fa7dfef126db9acc18782978238dc6) --- .../ovn/mech_driver/ovsdb/ovn_client.py | 38 +++++++- .../ovn/mech_driver/ovsdb/test_ovn_client.py | 86 +++++++++++++++++++ 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py index 2c0bdc20d3a..6319b433be0 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py @@ -42,6 +42,7 @@ from oslo_utils import timeutils from ovsdbapp.backend.ovs_idl import idlutils import tenacity +from neutron._i18n import _ from neutron.common.ovn import acl as ovn_acl from neutron.common.ovn import constants as ovn_const from neutron.common.ovn import utils @@ -49,6 +50,7 @@ from neutron.common import utils as common_utils from neutron.conf.plugins.ml2.drivers.ovn import ovn_conf from neutron.db import ovn_revision_numbers_db as db_rev from neutron.db import segments_db +from neutron.plugins.ml2 import db as ml2_db from neutron.plugins.ml2.drivers.ovn.mech_driver.ovsdb.extensions \ import qos as qos_extension from neutron.scheduler import l3_ovn_scheduler @@ -231,6 +233,29 @@ class OVNClient(object): external_ids=subnet_dhcp_options['external_ids']) return {'cmd': add_dhcp_opts_cmd} + @tenacity.retry(retry=tenacity.retry_if_exception_type(RuntimeError), + wait=tenacity.wait_random(min=2, max=3), + stop=tenacity.stop_after_attempt(3), + reraise=True) + def _wait_for_port_bindings_host(self, context, port_id): + db_port = ml2_db.get_port(context, port_id) + # This is already checked previously but, just to stay on + # the safe side in case the port is deleted mid-operation + if not db_port: + raise RuntimeError( + _('No port found with ID %s') % port_id) + + if not db_port.port_bindings: + raise RuntimeError( + _('No port bindings information found for ' + 'port %s') % port_id) + + if not db_port.port_bindings[0].host: + raise RuntimeError( + _('No hosting information found for port %s') % port_id) + + return db_port + def update_lsp_host_info(self, context, db_port, up=True): """Update the binding hosting information for the LSP. @@ -246,8 +271,19 @@ class OVNClient(object): if up: if not db_port.port_bindings: return - host = db_port.port_bindings[0].host + if not db_port.port_bindings[0].host: + # NOTE(lucasgomes): There might be a sync issue between + # the moment that this port was fetched from the database + # and the hosting information being set, retry a few times + try: + db_port = self._wait_for_port_bindings_host( + context, db_port.id) + except RuntimeError as e: + LOG.warning(e) + return + + host = db_port.port_bindings[0].host ext_ids = ('external_ids', {ovn_const.OVN_HOST_ID_EXT_ID_KEY: host}) cmd.append( diff --git a/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_ovn_client.py b/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_ovn_client.py index 5e39d34f439..3887b141bd4 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_ovn_client.py +++ b/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_ovn_client.py @@ -17,12 +17,15 @@ from unittest import mock from neutron.common.ovn import constants from neutron.conf.plugins.ml2.drivers.ovn import ovn_conf +from neutron.plugins.ml2 import db as ml2_db from neutron.plugins.ml2.drivers.ovn.mech_driver.ovsdb import ovn_client from neutron.tests import base from neutron.tests.unit.services.logapi.drivers.ovn \ import test_driver as test_log_driver from neutron_lib.services.logapi import constants as log_const +from tenacity import wait_none + class TestOVNClientBase(base.BaseTestCase): @@ -41,6 +44,9 @@ class TestOVNClient(TestOVNClientBase): self.get_plugin = mock.patch( 'neutron_lib.plugins.directory.get_plugin').start() + # Disable tenacity wait for UT + self.ovn_client._wait_for_port_bindings_host.retry.wait = wait_none() + def test_update_lsp_host_info_up(self): context = mock.MagicMock() host_id = 'fake-binding-host-id' @@ -54,6 +60,45 @@ class TestOVNClient(TestOVNClientBase): 'Logical_Switch_Port', port_id, ('external_ids', {constants.OVN_HOST_ID_EXT_ID_KEY: host_id})) + def test_update_lsp_host_info_up_retry(self): + context = mock.MagicMock() + host_id = 'fake-binding-host-id' + port_id = 'fake-port-id' + db_port_no_host = mock.Mock( + id=port_id, port_bindings=[mock.Mock(host="")]) + db_port = mock.Mock( + id=port_id, port_bindings=[mock.Mock(host=host_id)]) + + with mock.patch.object( + self.ovn_client, '_wait_for_port_bindings_host') as mock_wait: + mock_wait.return_value = db_port + self.ovn_client.update_lsp_host_info(context, db_port_no_host) + + # Assert _wait_for_port_bindings_host was called + mock_wait.assert_called_once_with(context, port_id) + + # Assert host_id was set + self.nb_idl.db_set.assert_called_once_with( + 'Logical_Switch_Port', port_id, + ('external_ids', {constants.OVN_HOST_ID_EXT_ID_KEY: host_id})) + + def test_update_lsp_host_info_up_retry_fail(self): + context = mock.MagicMock() + port_id = 'fake-port-id' + db_port_no_host = mock.Mock( + id=port_id, port_bindings=[mock.Mock(host="")]) + + with mock.patch.object( + self.ovn_client, '_wait_for_port_bindings_host') as mock_wait: + mock_wait.side_effect = RuntimeError("boom") + self.ovn_client.update_lsp_host_info(context, db_port_no_host) + + # Assert _wait_for_port_bindings_host was called + mock_wait.assert_called_once_with(context, port_id) + + # Assert host_id was NOT set + self.assertFalse(self.nb_idl.db_set.called) + def test_update_lsp_host_info_down(self): context = mock.MagicMock() port_id = 'fake-port-id' @@ -65,6 +110,47 @@ class TestOVNClient(TestOVNClientBase): 'Logical_Switch_Port', port_id, 'external_ids', constants.OVN_HOST_ID_EXT_ID_KEY, if_exists=True) + @mock.patch.object(ml2_db, 'get_port') + def test__wait_for_port_bindings_host(self, mock_get_port): + context = mock.MagicMock() + host_id = 'fake-binding-host-id' + port_id = 'fake-port-id' + db_port_no_host = mock.Mock( + id=port_id, port_bindings=[mock.Mock(host="")]) + db_port = mock.Mock( + id=port_id, port_bindings=[mock.Mock(host=host_id)]) + + mock_get_port.side_effect = (db_port_no_host, db_port) + + ret = self.ovn_client._wait_for_port_bindings_host( + context, port_id) + + self.assertEqual(ret, db_port) + + expected_calls = [mock.call(context, port_id), + mock.call(context, port_id)] + mock_get_port.assert_has_calls(expected_calls) + + @mock.patch.object(ml2_db, 'get_port') + def test__wait_for_port_bindings_host_fail(self, mock_get_port): + context = mock.MagicMock() + port_id = 'fake-port-id' + db_port_no_pb = mock.Mock(id=port_id, port_bindings=[]) + db_port_no_host = mock.Mock( + id=port_id, port_bindings=[mock.Mock(host="")]) + + mock_get_port.side_effect = ( + db_port_no_pb, db_port_no_host, db_port_no_host) + + self.assertRaises( + RuntimeError, self.ovn_client._wait_for_port_bindings_host, + context, port_id) + + expected_calls = [mock.call(context, port_id), + mock.call(context, port_id), + mock.call(context, port_id)] + mock_get_port.assert_has_calls(expected_calls) + class TestOVNClientFairMeter(TestOVNClientBase, test_log_driver.TestOVNDriverBase):