From e6fb32e27d1141ca4b00c3758ee44bfe2f060ccf Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Fri, 28 Apr 2023 13:56:02 +0000 Subject: [PATCH] Fix race condition when creating two routers without HA network When a HA router is created and the HA is not yet, before creating the router, the Neutron server creates the HA network and the corresponding subnet. The HA network cannot be duplicated (see previous patches related to this bug). But the subnet, that is created in another database transaction, cannot be present when the router creation call tries to create the HA port. This patch adds a HA subnet check before creating the router and the HA port. Even if the subnet check fails and the worker tries to create this subnet, if the process fails with ``InvalidInput``, that means other worker created the subnet before and the current one fails because tries to create the same subnet with the same CIDR. In this case, we dismiss the exception and continue with the router creation. Closes-Bug: #2016198 Change-Id: I82225fcc6248bb0fd68959ceb1daabff423d81ff --- neutron/db/l3_hamode_db.py | 27 +++++++++++++++--- neutron/tests/unit/db/test_l3_hamode_db.py | 32 +++++++++++++++++++++- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/neutron/db/l3_hamode_db.py b/neutron/db/l3_hamode_db.py index 1b6412a3d27..b59b74b07fa 100644 --- a/neutron/db/l3_hamode_db.py +++ b/neutron/db/l3_hamode_db.py @@ -53,6 +53,7 @@ from neutron.db import l3_dvr_db from neutron.objects import base from neutron.objects import l3_hamode from neutron.objects import router as l3_obj +from neutron.objects import subnet as subnet_obj VR_ID_RANGE = set(range(1, 255)) @@ -222,8 +223,7 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin, network[providernet.PHYSICAL_NETWORK] = ( cfg.CONF.l3_ha_network_physical_name) - def _create_ha_network(self, context, tenant_id): - admin_ctx = context.elevated() + def _create_ha_network(self, admin_ctx, tenant_id): # The project ID is needed to create the ``L3HARouterNetwork`` # resource; the project ID cannot be retrieved from the network because # is explicitly created without it. @@ -371,13 +371,32 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin, """Event handler to create HA resources before router creation.""" if not self._is_ha(router): return + + admin_ctx = context.elevated() # ensure the HA network exists before we start router creation so # we can provide meaningful errors back to the user if no network # can be allocated # TODO(ralonsoh): remove once bp/keystone-v3 migration finishes. project_id = router.get('project_id') or router['tenant_id'] - if not self.get_ha_network(context, project_id): - self._create_ha_network(context, project_id) + ha_network = self.get_ha_network(admin_ctx, project_id) + if not ha_network: + self._create_ha_network(admin_ctx, project_id) + else: + # Check the HA network subnet. As reported in LP#2016198, two + # consecutive router creation can try to create the HA network at + # the same time. Because the network and subnet creation operations + # cannot be executed in the same transaction, it is needed to make + # this check before leaving this callback. + network_id = ha_network['network_id'] + if subnet_obj.Subnet.count(admin_ctx, network_id=network_id) > 0: + return + try: + self._create_ha_subnet(admin_ctx, network_id, project_id) + except n_exc.InvalidInput: + # That could happen when the previous method tries to create + # the HA subnet with the same CIDR. In this case, dismiss the + # exception. + pass @registry.receives(resources.ROUTER, [events.PRECOMMIT_CREATE], priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE) diff --git a/neutron/tests/unit/db/test_l3_hamode_db.py b/neutron/tests/unit/db/test_l3_hamode_db.py index e44ee223805..af3cec45b36 100644 --- a/neutron/tests/unit/db/test_l3_hamode_db.py +++ b/neutron/tests/unit/db/test_l3_hamode_db.py @@ -48,6 +48,7 @@ from neutron.db import l3_agentschedulers_db from neutron.db import l3_hamode_db from neutron.objects import l3_hamode from neutron.objects import network as network_obj +from neutron.objects import subnet as subnet_obj from neutron import quota from neutron.scheduler import l3_agent_scheduler from neutron.services.revisions import revision_plugin @@ -1399,7 +1400,7 @@ class L3HAModeDbTestCase(L3HATestFramework): self.assertEqual(self.agent2['host'], routers[0]['gw_port_host']) def test__before_router_create_no_network(self): - project_id = 'project1' + project_id = uuidutils.generate_uuid() ha_network = self.plugin.get_ha_network(self.admin_ctx, project_id) self.assertIsNone(ha_network) @@ -1413,6 +1414,35 @@ class L3HAModeDbTestCase(L3HATestFramework): ha_network = self.plugin.get_ha_network(self.admin_ctx, project_id) self.assertEqual(project_id, ha_network.project_id) + def test__before_router_create_no_subnet(self): + project_id = uuidutils.generate_uuid() + admin_ctx = self.admin_ctx + admin_ctx.project_id = project_id + self._create_network(self.core_plugin, admin_ctx, + tenant_id=project_id, ha=True) + ha_network = self.plugin.get_ha_network(self.admin_ctx, project_id) + self.assertEqual(project_id, ha_network.project_id) + subnets = subnet_obj.Subnet.get_objects( + admin_ctx, network_id=ha_network.network_id) + self.assertEqual([], subnets) + + router = {'ha': True, 'project_id': project_id} + self.plugin._before_router_create(mock.ANY, admin_ctx, router) + ha_network = self.plugin.get_ha_network(admin_ctx, project_id) + self.assertEqual(project_id, ha_network.project_id) + subnets = subnet_obj.Subnet.get_objects( + admin_ctx, network_id=ha_network.network_id) + self.assertEqual(1, len(subnets)) + + # This test is simulating a concurrent update. The "Subnet.count" + # method returns 0 and "_before_router_create" tries to create the + # subnet again. The exception is catch and dismissed. + with mock.patch.object(subnet_obj.Subnet, 'count', return_value=0): + self.plugin._before_router_create(mock.ANY, admin_ctx, router) + subnets = subnet_obj.Subnet.get_objects( + admin_ctx, network_id=ha_network.network_id) + self.assertEqual(1, len(subnets)) + class L3HAUserTestCase(L3HATestFramework):