From b993ebb407208bf04a7957a3d15c9ac482a1dd5c Mon Sep 17 00:00:00 2001 From: Sebastian Lohff Date: Wed, 28 Apr 2021 14:53:46 +0200 Subject: [PATCH] Fix dynamic segment allocation race condition When two segments are concurrently created this could have resulted in both threads creating a segment, thus resulting in two segments with different segmentation ids. To prevent this we now introduce a new unique constraint onto the networksegments table, which requires (network_id, network_type, physical_network) to be unique, which allows only a single segment with a single segmentation id to exist per combination of these three values. With the constraint in place a DB error will be thrown, which will cause allocate_dynamic_segment() to be executed again and this time it will find the already existing segment. To make sure that no additional DB objects are created when segment creation failed we need to put all of the allocation code into a DB transaction. Change-Id: I407ae88d69ed971bf8d9a9b79120366f33bb56fd Closes-Bug: #1791233 --- neutron/cmd/upgrade_checks/checks.py | 46 +++++++++++ .../alembic_migrations/versions/EXPAND_HEAD | 2 +- ...add_networksegments_database_constraint.py | 37 +++++++++ neutron/db/models/segment.py | 10 +++ neutron/plugins/ml2/managers.py | 41 +++++----- .../unit/cmd/upgrade_checks/test_checks.py | 10 +++ neutron/tests/unit/extensions/test_segment.py | 2 +- neutron/tests/unit/plugins/ml2/test_db.py | 6 +- neutron/tests/unit/plugins/ml2/test_plugin.py | 79 ++++++++++++++++--- .../scheduler/test_dhcp_agent_scheduler.py | 6 +- ...ents-uniq-constraint-89e52b42ca2f7ec2.yaml | 8 ++ 11 files changed, 212 insertions(+), 35 deletions(-) create mode 100644 neutron/db/migration/alembic_migrations/versions/yoga/expand/e981acd076d3_add_networksegments_database_constraint.py create mode 100644 releasenotes/notes/add-networksegments-uniq-constraint-89e52b42ca2f7ec2.yaml diff --git a/neutron/cmd/upgrade_checks/checks.py b/neutron/cmd/upgrade_checks/checks.py index 4645b398ce2..3b7bf3a2d26 100644 --- a/neutron/cmd/upgrade_checks/checks.py +++ b/neutron/cmd/upgrade_checks/checks.py @@ -20,12 +20,14 @@ from oslo_config import cfg from oslo_serialization import jsonutils from oslo_upgradecheck import common_checks from oslo_upgradecheck import upgradecheck +from sqlalchemy import func from sqlalchemy import or_ from neutron._i18n import _ from neutron.cmd.upgrade_checks import base from neutron.db.models import agent as agent_model from neutron.db.models.plugins.ml2 import vlanallocation +from neutron.db.models import segment from neutron.db import models_v2 @@ -90,6 +92,20 @@ def port_mac_addresses(): ctx.session.query(models_v2.Port.mac_address).all()] +def get_duplicate_network_segment_count(): + ctx = context.get_admin_context() + query = ctx.session.query(segment.NetworkSegment.network_id) + # for a unique constraint it's always NULL != NULL --> we filter them out + query = query.filter(segment.NetworkSegment.physical_network.isnot(None)) + query = query.group_by( + segment.NetworkSegment.network_id, + segment.NetworkSegment.network_type, + segment.NetworkSegment.physical_network + ) + query = query.having(func.count() > 1) + return query.count() + + class CoreChecks(base.BaseChecks): def get_checks(self): @@ -109,6 +125,8 @@ class CoreChecks(base.BaseChecks): (common_checks.check_policy_json, {'conf': cfg.CONF})), (_('Port MAC address sanity check'), self.port_mac_address_sanity), + (_('NetworkSegments unique constraint check'), + self.networksegments_unique_constraint_check), ] @staticmethod @@ -319,3 +337,31 @@ class CoreChecks(base.BaseChecks): upgradecheck.Code.SUCCESS, _("All port MAC addresses are correctly formated in the " "database.")) + + @staticmethod + def networksegments_unique_constraint_check(checker): + """Checks that there are no duplicate networksegments + + No two networksegments should never share the same network_id, + network_type and physical_network. Two NULL values are not regarded + as equal for a unique constraint, so networksegments with NULL as + physical_network are ignored by this check. + """ + if not cfg.CONF.database.connection: + return upgradecheck.Result( + upgradecheck.Code.WARNING, + _("Database connection string is not set. Check for port MAC " + "sanity can't be done.")) + + count = get_duplicate_network_segment_count() + if count: + return upgradecheck.Result( + upgradecheck.Code.WARNING, + _("There are %d instances of networksegments sharing the same " + "combination of network_id, network_type and " + "physical_network.") % count) + + return upgradecheck.Result( + upgradecheck.Code.SUCCESS, + _("No networksegments sharing the same network_id, network_type " + "and physical_network found.")) diff --git a/neutron/db/migration/alembic_migrations/versions/EXPAND_HEAD b/neutron/db/migration/alembic_migrations/versions/EXPAND_HEAD index e2ec2347a47..a167b34ef58 100644 --- a/neutron/db/migration/alembic_migrations/versions/EXPAND_HEAD +++ b/neutron/db/migration/alembic_migrations/versions/EXPAND_HEAD @@ -1 +1 @@ -ba859d649675 +e981acd076d3 diff --git a/neutron/db/migration/alembic_migrations/versions/yoga/expand/e981acd076d3_add_networksegments_database_constraint.py b/neutron/db/migration/alembic_migrations/versions/yoga/expand/e981acd076d3_add_networksegments_database_constraint.py new file mode 100644 index 00000000000..65fa20e55fa --- /dev/null +++ b/neutron/db/migration/alembic_migrations/versions/yoga/expand/e981acd076d3_add_networksegments_database_constraint.py @@ -0,0 +1,37 @@ +# Copyright 2021 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +from alembic import op + + +"""Add NetworkSegments database constraint + +Revision ID: e981acd076d3 +Revises: ba859d649675 +Create Date: 2021-04-23 16:01:01.320910 + +""" + +# revision identifiers, used by Alembic. +revision = 'e981acd076d3' +down_revision = 'ba859d649675' + + +def upgrade(): + op.create_unique_constraint( + 'uniq_networksegment0network_id0network_type0physical_network', + 'networksegments', + ['network_id', 'network_type', 'physical_network'] + ) diff --git a/neutron/db/models/segment.py b/neutron/db/models/segment.py index 5d1b2b6abc8..ec353932e26 100644 --- a/neutron/db/models/segment.py +++ b/neutron/db/models/segment.py @@ -53,6 +53,16 @@ class NetworkSegment(standard_attr.HasStandardAttributes, cascade='delete')) api_collections = [segment.SEGMENTS] + __table_args__ = ( + sa.UniqueConstraint( + network_id, + network_type, + physical_network, + name='uniq_networksegment0network_id0' + 'network_type0physical_network'), + model_base.BASEV2.__table_args__ + ) + class SegmentHostMapping(model_base.BASEV2): diff --git a/neutron/plugins/ml2/managers.py b/neutron/plugins/ml2/managers.py index a20992ba1d7..a549aba60b6 100644 --- a/neutron/plugins/ml2/managers.py +++ b/neutron/plugins/ml2/managers.py @@ -340,6 +340,7 @@ class TypeManager(stevedore.named.NamedExtensionManager): LOG.error("Failed to release segment '%s' because " "network type is not supported.", segment) + @db_api.retry_if_session_inactive() def allocate_dynamic_segment(self, context, network_id, segment): """Allocate a dynamic segment using a partial or full segment dict.""" dynamic_segment = segments_db.get_dynamic_segment( @@ -349,31 +350,35 @@ class TypeManager(stevedore.named.NamedExtensionManager): if dynamic_segment: return dynamic_segment - driver = self.drivers.get(segment.get(api.NETWORK_TYPE)) - if isinstance(driver.obj, api.TypeDriver): - dynamic_segment = driver.obj.reserve_provider_segment( - context.session, segment) - else: - dynamic_segment = driver.obj.reserve_provider_segment( - context, segment) - segments_db.add_network_segment(context, network_id, dynamic_segment, - is_dynamic=True) + with db_api.CONTEXT_WRITER.using(context): + driver = self.drivers.get(segment.get(api.NETWORK_TYPE)) + if isinstance(driver.obj, api.TypeDriver): + dynamic_segment = driver.obj.reserve_provider_segment( + context.session, segment) + else: + dynamic_segment = driver.obj.reserve_provider_segment( + context, segment) + segments_db.add_network_segment(context, network_id, + dynamic_segment, + is_dynamic=True) return dynamic_segment + @db_api.retry_if_session_inactive() def release_dynamic_segment(self, context, segment_id): """Delete a dynamic segment.""" segment = segments_db.get_segment_by_id(context, segment_id) if segment: - driver = self.drivers.get(segment.get(api.NETWORK_TYPE)) - if driver: - if isinstance(driver.obj, api.TypeDriver): - driver.obj.release_segment(context.session, segment) + with db_api.CONTEXT_WRITER.using(context): + driver = self.drivers.get(segment.get(api.NETWORK_TYPE)) + if driver: + if isinstance(driver.obj, api.TypeDriver): + driver.obj.release_segment(context.session, segment) + else: + driver.obj.release_segment(context, segment) + segments_db.delete_network_segment(context, segment_id) else: - driver.obj.release_segment(context, segment) - segments_db.delete_network_segment(context, segment_id) - else: - LOG.error("Failed to release segment '%s' because " - "network type is not supported.", segment) + LOG.error("Failed to release segment '%s' because " + "network type is not supported.", segment) else: LOG.debug("No segment found with id %(segment_id)s", segment_id) diff --git a/neutron/tests/unit/cmd/upgrade_checks/test_checks.py b/neutron/tests/unit/cmd/upgrade_checks/test_checks.py index dd33f6c1eee..91e3bd7e39a 100644 --- a/neutron/tests/unit/cmd/upgrade_checks/test_checks.py +++ b/neutron/tests/unit/cmd/upgrade_checks/test_checks.py @@ -201,3 +201,13 @@ class TestChecks(base.BaseTestCase): mock_port_mac_addresses.return_value = mac_addresses result = checks.CoreChecks.port_mac_address_sanity(mock.ANY) self.assertEqual(returned_code, result.code) + + def test_networksegments_unique_constraint_check(self): + cases = ([0, Code.SUCCESS], [1, Code.WARNING]) + with mock.patch.object( + checks, 'get_duplicate_network_segment_count') as mock_count: + for count, returned_code in cases: + mock_count.return_value = count + result = checks.CoreChecks.\ + networksegments_unique_constraint_check(mock.ANY) + self.assertEqual(returned_code, result.code) diff --git a/neutron/tests/unit/extensions/test_segment.py b/neutron/tests/unit/extensions/test_segment.py index c9c4aaddb85..e04a671939f 100644 --- a/neutron/tests/unit/extensions/test_segment.py +++ b/neutron/tests/unit/extensions/test_segment.py @@ -636,7 +636,7 @@ class TestSegmentSubnetAssociation(SegmentTestCase): subnet = subnet['subnet'] segment2 = self._test_create_segment(network_id=net['id'], - physical_network='phys_net2', + physical_network='phys_net3', segmentation_id=202)['segment'] data = {'subnet': {'segment_id': segment2['id']}} diff --git a/neutron/tests/unit/plugins/ml2/test_db.py b/neutron/tests/unit/plugins/ml2/test_db.py index fbdd649d147..9346bebc1a2 100644 --- a/neutron/tests/unit/plugins/ml2/test_db.py +++ b/neutron/tests/unit/plugins/ml2/test_db.py @@ -113,7 +113,7 @@ class Ml2DBTestCase(testlib_api.SqlTestCase): api.PHYSICAL_NETWORK: 'physnet1', api.SEGMENTATION_ID: 1}, {api.NETWORK_TYPE: 'vlan', - api.PHYSICAL_NETWORK: 'physnet1', + api.PHYSICAL_NETWORK: 'physnet2', api.SEGMENTATION_ID: 2}] self._create_segments(segments) @@ -124,13 +124,13 @@ class Ml2DBTestCase(testlib_api.SqlTestCase): api.PHYSICAL_NETWORK: 'physnet1', api.SEGMENTATION_ID: 1}, {api.NETWORK_TYPE: 'vlan', - api.PHYSICAL_NETWORK: 'physnet1', + api.PHYSICAL_NETWORK: 'physnet2', api.SEGMENTATION_ID: 2}] segments2 = [{api.NETWORK_TYPE: 'vlan', api.PHYSICAL_NETWORK: 'physnet1', api.SEGMENTATION_ID: 3}, {api.NETWORK_TYPE: 'vlan', - api.PHYSICAL_NETWORK: 'physnet1', + api.PHYSICAL_NETWORK: 'physnet2', api.SEGMENTATION_ID: 4}] net1segs = self._create_segments(segments1, network_id=net_id1) net2segs = self._create_segments(segments2, network_id=net_id2) diff --git a/neutron/tests/unit/plugins/ml2/test_plugin.py b/neutron/tests/unit/plugins/ml2/test_plugin.py index b0a081dac5d..111b3b8251c 100644 --- a/neutron/tests/unit/plugins/ml2/test_plugin.py +++ b/neutron/tests/unit/plugins/ml2/test_plugin.py @@ -150,13 +150,17 @@ class Ml2PluginV2TestCase(test_plugin.NeutronDbPluginV2TestCase): self._mechanism_drivers, group='ml2') self.physnet = 'physnet1' + self.physnet2 = 'physnet2' + self.physnet3 = 'physnet3' self.vlan_range = '1:100' self.vlan_range2 = '200:300' - self.physnet2 = 'physnet2' + self.vlan_range3 = '400:500' self.phys_vrange = ':'.join([self.physnet, self.vlan_range]) self.phys2_vrange = ':'.join([self.physnet2, self.vlan_range2]) + self.phys3_vrange = ':'.join([self.physnet3, self.vlan_range3]) cfg.CONF.set_override('network_vlan_ranges', - [self.phys_vrange, self.phys2_vrange], + [self.phys_vrange, self.phys2_vrange, + self.phys3_vrange], group='ml2_type_vlan') self.setup_parent() self.driver = directory.get_plugin() @@ -223,7 +227,7 @@ class TestMl2NetworksV2(test_plugin.TestNetworksV2, self.mp_nets = [{'name': 'net4', mpnet_apidef.SEGMENTS: [{pnet.NETWORK_TYPE: 'vlan', - pnet.PHYSICAL_NETWORK: 'physnet2', + pnet.PHYSICAL_NETWORK: 'physnet3', pnet.SEGMENTATION_ID: 1}, {pnet.NETWORK_TYPE: 'vlan', pnet.PHYSICAL_NETWORK: 'physnet2', @@ -481,7 +485,7 @@ class TestMl2NetworksV2(test_plugin.TestNetworksV2, pnet.PHYSICAL_NETWORK: 'physnet1', pnet.SEGMENTATION_ID: 1}, {pnet.NETWORK_TYPE: 'vlan', - pnet.PHYSICAL_NETWORK: 'physnet1', + pnet.PHYSICAL_NETWORK: 'physnet2', pnet.SEGMENTATION_ID: 2}] with self.network(**{'arg_list': (mpnet_apidef.SEGMENTS, ), mpnet_apidef.SEGMENTS: segments}) as net: @@ -2677,6 +2681,61 @@ class TestMultiSegmentNetworks(Ml2PluginV2TestCase): dynamic_segmentation2_id = dynamic_segment2[driver_api.SEGMENTATION_ID] self.assertNotEqual(dynamic_segmentation_id, dynamic_segmentation2_id) + def test_allocate_dynamic_segments_race_condition(self): + # create same segment two times, make sure the same segment comes back + physnet_name = 'physnet1' + segment = {driver_api.NETWORK_TYPE: 'vlan', + driver_api.PHYSICAL_NETWORK: physnet_name} + + data = {'network': {'name': 'net1', + 'tenant_id': 'tenant_one'}} + network_req = self.new_create_request('networks', data) + network = self.deserialize(self.fmt, + network_req.get_response(self.api)) + segment = {driver_api.NETWORK_TYPE: 'vlan', + driver_api.PHYSICAL_NETWORK: physnet_name} + network_id = network['network']['id'] + + # create first segment + seg1 = self.driver.type_manager.allocate_dynamic_segment( + self.context, network_id, segment) + + # create same segment but make sure it is not found on first try + # thus simulating a race condition between two creates + orig_get_dynamic_segment = segments_db.get_dynamic_segment + + def return_none_on_first_call(*args, **kwargs): + if not return_none_on_first_call.called: + return_none_on_first_call.called = True + return + return orig_get_dynamic_segment(*args, **kwargs) + return_none_on_first_call.called = False + + with mock.patch('neutron.db.segments_db.get_dynamic_segment', + wraps=return_none_on_first_call): + seg2 = self.driver.type_manager.allocate_dynamic_segment( + self.context, network_id, segment) + + self.assertEqual('vlan', seg1[driver_api.NETWORK_TYPE]) + self.assertEqual(physnet_name, + seg1[driver_api.PHYSICAL_NETWORK]) + + # make sure we got the same segment + self.assertEqual(seg1[driver_api.PHYSICAL_NETWORK], + seg2[driver_api.PHYSICAL_NETWORK]) + self.assertEqual(seg1[driver_api.ID], + seg2[driver_api.ID]) + self.assertEqual(seg1[driver_api.SEGMENTATION_ID], + seg2[driver_api.SEGMENTATION_ID]) + + # make sure that there is only one vlan allocated to our segment + vlan_typedriver = self.driver.type_manager.drivers['vlan'] + seg_obj = vlan_typedriver.obj.segmentation_obj + all_allocs = seg_obj.get_objects(self.context) + allocs = [obj for obj in all_allocs + if obj.allocated and obj.physical_network == physnet_name] + self.assertEqual(1, len(allocs)) + def test_allocate_release_dynamic_segment(self): data = {'network': {'name': 'net1', 'tenant_id': 'tenant_one'}} @@ -2787,7 +2846,7 @@ class TestMultiSegmentNetworks(Ml2PluginV2TestCase): pnet.PHYSICAL_NETWORK: 'physnet1', pnet.SEGMENTATION_ID: 1}, {pnet.NETWORK_TYPE: 'vlan', - pnet.PHYSICAL_NETWORK: 'physnet1', + pnet.PHYSICAL_NETWORK: 'physnet2', pnet.SEGMENTATION_ID: 2}], 'tenant_id': 'tenant_one'}} network_req = self.new_create_request('networks', data) @@ -2841,7 +2900,7 @@ class TestMultiSegmentNetworks(Ml2PluginV2TestCase): res = network_req.get_response(self.api) self.assertEqual(400, res.status_int) - def test_create_network_duplicate_partial_segments(self): + def test_create_network_duplicate_partial_segments_fail(self): data = {'network': {'name': 'net1', mpnet_apidef.SEGMENTS: [{pnet.NETWORK_TYPE: 'vlan', @@ -2849,9 +2908,11 @@ class TestMultiSegmentNetworks(Ml2PluginV2TestCase): {pnet.NETWORK_TYPE: 'vlan', pnet.PHYSICAL_NETWORK: 'physnet1'}], 'tenant_id': 'tenant_one'}} + retry_fixture = fixture.DBRetryErrorsFixture(max_retries=2) + retry_fixture.setUp() network_req = self.new_create_request('networks', data) res = network_req.get_response(self.api) - self.assertEqual(201, res.status_int) + self.assertEqual(409, res.status_int) def test_release_network_segments(self): data = {'network': {'name': 'net1', @@ -3488,10 +3549,10 @@ class TestML2Segments(Ml2PluginV2TestCase): driver_api.PHYSICAL_NETWORK: self.physnet, driver_api.SEGMENTATION_ID: 1000} seg2 = {driver_api.NETWORK_TYPE: 'vlan', - driver_api.PHYSICAL_NETWORK: self.physnet, + driver_api.PHYSICAL_NETWORK: self.physnet2, driver_api.SEGMENTATION_ID: 1001} seg3 = {driver_api.NETWORK_TYPE: 'vlan', - driver_api.PHYSICAL_NETWORK: self.physnet, + driver_api.PHYSICAL_NETWORK: self.physnet3, driver_api.SEGMENTATION_ID: 1002} with self.network() as network: network = network['network'] diff --git a/neutron/tests/unit/scheduler/test_dhcp_agent_scheduler.py b/neutron/tests/unit/scheduler/test_dhcp_agent_scheduler.py index 3cc6c8b65dc..0ebae7dfdc5 100644 --- a/neutron/tests/unit/scheduler/test_dhcp_agent_scheduler.py +++ b/neutron/tests/unit/scheduler/test_dhcp_agent_scheduler.py @@ -307,12 +307,12 @@ class TestAutoScheduleSegments(test_plugin.Ml2PluginV2TestCase, 'shared': True}}) return net['id'] - def _create_segment(self, network_id): + def _create_segment(self, network_id, physical_network='physnet1'): seg = self.segments_plugin.create_segment( self.ctx, {'segment': {'network_id': network_id, 'name': None, 'description': None, - 'physical_network': 'physnet1', + 'physical_network': physical_network, 'network_type': 'vlan', 'segmentation_id': constants.ATTR_NOT_SPECIFIED}}) return seg['id'] @@ -381,7 +381,7 @@ class TestAutoScheduleSegments(test_plugin.Ml2PluginV2TestCase, self.ctx, [net_id]) self.assertEqual(1, len(agents)) - seg2_id = self._create_segment(net_id) + seg2_id = self._create_segment(net_id, 'physnet2') self._create_subnet(seg2_id, net_id, '192.168.11.0/24') helpers.register_dhcp_agent(HOST_C) segments_service_db.update_segment_host_mapping( diff --git a/releasenotes/notes/add-networksegments-uniq-constraint-89e52b42ca2f7ec2.yaml b/releasenotes/notes/add-networksegments-uniq-constraint-89e52b42ca2f7ec2.yaml new file mode 100644 index 00000000000..bd0fc791979 --- /dev/null +++ b/releasenotes/notes/add-networksegments-uniq-constraint-89e52b42ca2f7ec2.yaml @@ -0,0 +1,8 @@ +--- +upgrade: + - | + A unique constraint for (network_id, network_type, physical_network) is + added to the networksegments table. This was done to prevent race + conditions on dynamic segment allocation. Operators having networks with + multiple segments (e.g. when using hierarchical portbinding) should check + that this constraint is not violated with the included upgrade-check.