Fix dynamic segment allocation race condition

When two segments are concurrently created this could have resulted in
both threads creating a segment, thus resulting in two segments with
different segmentation ids. To prevent this we now introduce a new
unique constraint onto the networksegments table, which requires
(network_id, network_type, physical_network) to be unique, which allows
only a single segment with a single segmentation id to exist per
combination of these three values.

With the constraint in place a DB error will be thrown, which will cause
allocate_dynamic_segment() to be executed again and this time it will
find the already existing segment. To make sure that no additional DB
objects are created when segment creation failed we need to put all of
the allocation code into a DB transaction.

Change-Id: I407ae88d69ed971bf8d9a9b79120366f33bb56fd
Closes-Bug: #1791233
This commit is contained in:
Sebastian Lohff 2021-04-28 14:53:46 +02:00
parent 9b9bd56cef
commit b993ebb407
11 changed files with 212 additions and 35 deletions

View File

@ -20,12 +20,14 @@ from oslo_config import cfg
from oslo_serialization import jsonutils from oslo_serialization import jsonutils
from oslo_upgradecheck import common_checks from oslo_upgradecheck import common_checks
from oslo_upgradecheck import upgradecheck from oslo_upgradecheck import upgradecheck
from sqlalchemy import func
from sqlalchemy import or_ from sqlalchemy import or_
from neutron._i18n import _ from neutron._i18n import _
from neutron.cmd.upgrade_checks import base from neutron.cmd.upgrade_checks import base
from neutron.db.models import agent as agent_model from neutron.db.models import agent as agent_model
from neutron.db.models.plugins.ml2 import vlanallocation from neutron.db.models.plugins.ml2 import vlanallocation
from neutron.db.models import segment
from neutron.db import models_v2 from neutron.db import models_v2
@ -90,6 +92,20 @@ def port_mac_addresses():
ctx.session.query(models_v2.Port.mac_address).all()] ctx.session.query(models_v2.Port.mac_address).all()]
def get_duplicate_network_segment_count():
ctx = context.get_admin_context()
query = ctx.session.query(segment.NetworkSegment.network_id)
# for a unique constraint it's always NULL != NULL --> we filter them out
query = query.filter(segment.NetworkSegment.physical_network.isnot(None))
query = query.group_by(
segment.NetworkSegment.network_id,
segment.NetworkSegment.network_type,
segment.NetworkSegment.physical_network
)
query = query.having(func.count() > 1)
return query.count()
class CoreChecks(base.BaseChecks): class CoreChecks(base.BaseChecks):
def get_checks(self): def get_checks(self):
@ -109,6 +125,8 @@ class CoreChecks(base.BaseChecks):
(common_checks.check_policy_json, {'conf': cfg.CONF})), (common_checks.check_policy_json, {'conf': cfg.CONF})),
(_('Port MAC address sanity check'), (_('Port MAC address sanity check'),
self.port_mac_address_sanity), self.port_mac_address_sanity),
(_('NetworkSegments unique constraint check'),
self.networksegments_unique_constraint_check),
] ]
@staticmethod @staticmethod
@ -319,3 +337,31 @@ class CoreChecks(base.BaseChecks):
upgradecheck.Code.SUCCESS, upgradecheck.Code.SUCCESS,
_("All port MAC addresses are correctly formated in the " _("All port MAC addresses are correctly formated in the "
"database.")) "database."))
@staticmethod
def networksegments_unique_constraint_check(checker):
"""Checks that there are no duplicate networksegments
No two networksegments should never share the same network_id,
network_type and physical_network. Two NULL values are not regarded
as equal for a unique constraint, so networksegments with NULL as
physical_network are ignored by this check.
"""
if not cfg.CONF.database.connection:
return upgradecheck.Result(
upgradecheck.Code.WARNING,
_("Database connection string is not set. Check for port MAC "
"sanity can't be done."))
count = get_duplicate_network_segment_count()
if count:
return upgradecheck.Result(
upgradecheck.Code.WARNING,
_("There are %d instances of networksegments sharing the same "
"combination of network_id, network_type and "
"physical_network.") % count)
return upgradecheck.Result(
upgradecheck.Code.SUCCESS,
_("No networksegments sharing the same network_id, network_type "
"and physical_network found."))

View File

@ -1 +1 @@
ba859d649675 e981acd076d3

View File

@ -0,0 +1,37 @@
# Copyright 2021 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
from alembic import op
"""Add NetworkSegments database constraint
Revision ID: e981acd076d3
Revises: ba859d649675
Create Date: 2021-04-23 16:01:01.320910
"""
# revision identifiers, used by Alembic.
revision = 'e981acd076d3'
down_revision = 'ba859d649675'
def upgrade():
op.create_unique_constraint(
'uniq_networksegment0network_id0network_type0physical_network',
'networksegments',
['network_id', 'network_type', 'physical_network']
)

View File

@ -53,6 +53,16 @@ class NetworkSegment(standard_attr.HasStandardAttributes,
cascade='delete')) cascade='delete'))
api_collections = [segment.SEGMENTS] api_collections = [segment.SEGMENTS]
__table_args__ = (
sa.UniqueConstraint(
network_id,
network_type,
physical_network,
name='uniq_networksegment0network_id0'
'network_type0physical_network'),
model_base.BASEV2.__table_args__
)
class SegmentHostMapping(model_base.BASEV2): class SegmentHostMapping(model_base.BASEV2):

View File

@ -340,6 +340,7 @@ class TypeManager(stevedore.named.NamedExtensionManager):
LOG.error("Failed to release segment '%s' because " LOG.error("Failed to release segment '%s' because "
"network type is not supported.", segment) "network type is not supported.", segment)
@db_api.retry_if_session_inactive()
def allocate_dynamic_segment(self, context, network_id, segment): def allocate_dynamic_segment(self, context, network_id, segment):
"""Allocate a dynamic segment using a partial or full segment dict.""" """Allocate a dynamic segment using a partial or full segment dict."""
dynamic_segment = segments_db.get_dynamic_segment( dynamic_segment = segments_db.get_dynamic_segment(
@ -349,31 +350,35 @@ class TypeManager(stevedore.named.NamedExtensionManager):
if dynamic_segment: if dynamic_segment:
return dynamic_segment return dynamic_segment
driver = self.drivers.get(segment.get(api.NETWORK_TYPE)) with db_api.CONTEXT_WRITER.using(context):
if isinstance(driver.obj, api.TypeDriver): driver = self.drivers.get(segment.get(api.NETWORK_TYPE))
dynamic_segment = driver.obj.reserve_provider_segment( if isinstance(driver.obj, api.TypeDriver):
context.session, segment) dynamic_segment = driver.obj.reserve_provider_segment(
else: context.session, segment)
dynamic_segment = driver.obj.reserve_provider_segment( else:
context, segment) dynamic_segment = driver.obj.reserve_provider_segment(
segments_db.add_network_segment(context, network_id, dynamic_segment, context, segment)
is_dynamic=True) segments_db.add_network_segment(context, network_id,
dynamic_segment,
is_dynamic=True)
return dynamic_segment return dynamic_segment
@db_api.retry_if_session_inactive()
def release_dynamic_segment(self, context, segment_id): def release_dynamic_segment(self, context, segment_id):
"""Delete a dynamic segment.""" """Delete a dynamic segment."""
segment = segments_db.get_segment_by_id(context, segment_id) segment = segments_db.get_segment_by_id(context, segment_id)
if segment: if segment:
driver = self.drivers.get(segment.get(api.NETWORK_TYPE)) with db_api.CONTEXT_WRITER.using(context):
if driver: driver = self.drivers.get(segment.get(api.NETWORK_TYPE))
if isinstance(driver.obj, api.TypeDriver): if driver:
driver.obj.release_segment(context.session, segment) if isinstance(driver.obj, api.TypeDriver):
driver.obj.release_segment(context.session, segment)
else:
driver.obj.release_segment(context, segment)
segments_db.delete_network_segment(context, segment_id)
else: else:
driver.obj.release_segment(context, segment) LOG.error("Failed to release segment '%s' because "
segments_db.delete_network_segment(context, segment_id) "network type is not supported.", segment)
else:
LOG.error("Failed to release segment '%s' because "
"network type is not supported.", segment)
else: else:
LOG.debug("No segment found with id %(segment_id)s", segment_id) LOG.debug("No segment found with id %(segment_id)s", segment_id)

View File

@ -201,3 +201,13 @@ class TestChecks(base.BaseTestCase):
mock_port_mac_addresses.return_value = mac_addresses mock_port_mac_addresses.return_value = mac_addresses
result = checks.CoreChecks.port_mac_address_sanity(mock.ANY) result = checks.CoreChecks.port_mac_address_sanity(mock.ANY)
self.assertEqual(returned_code, result.code) self.assertEqual(returned_code, result.code)
def test_networksegments_unique_constraint_check(self):
cases = ([0, Code.SUCCESS], [1, Code.WARNING])
with mock.patch.object(
checks, 'get_duplicate_network_segment_count') as mock_count:
for count, returned_code in cases:
mock_count.return_value = count
result = checks.CoreChecks.\
networksegments_unique_constraint_check(mock.ANY)
self.assertEqual(returned_code, result.code)

View File

@ -636,7 +636,7 @@ class TestSegmentSubnetAssociation(SegmentTestCase):
subnet = subnet['subnet'] subnet = subnet['subnet']
segment2 = self._test_create_segment(network_id=net['id'], segment2 = self._test_create_segment(network_id=net['id'],
physical_network='phys_net2', physical_network='phys_net3',
segmentation_id=202)['segment'] segmentation_id=202)['segment']
data = {'subnet': {'segment_id': segment2['id']}} data = {'subnet': {'segment_id': segment2['id']}}

View File

@ -113,7 +113,7 @@ class Ml2DBTestCase(testlib_api.SqlTestCase):
api.PHYSICAL_NETWORK: 'physnet1', api.PHYSICAL_NETWORK: 'physnet1',
api.SEGMENTATION_ID: 1}, api.SEGMENTATION_ID: 1},
{api.NETWORK_TYPE: 'vlan', {api.NETWORK_TYPE: 'vlan',
api.PHYSICAL_NETWORK: 'physnet1', api.PHYSICAL_NETWORK: 'physnet2',
api.SEGMENTATION_ID: 2}] api.SEGMENTATION_ID: 2}]
self._create_segments(segments) self._create_segments(segments)
@ -124,13 +124,13 @@ class Ml2DBTestCase(testlib_api.SqlTestCase):
api.PHYSICAL_NETWORK: 'physnet1', api.PHYSICAL_NETWORK: 'physnet1',
api.SEGMENTATION_ID: 1}, api.SEGMENTATION_ID: 1},
{api.NETWORK_TYPE: 'vlan', {api.NETWORK_TYPE: 'vlan',
api.PHYSICAL_NETWORK: 'physnet1', api.PHYSICAL_NETWORK: 'physnet2',
api.SEGMENTATION_ID: 2}] api.SEGMENTATION_ID: 2}]
segments2 = [{api.NETWORK_TYPE: 'vlan', segments2 = [{api.NETWORK_TYPE: 'vlan',
api.PHYSICAL_NETWORK: 'physnet1', api.PHYSICAL_NETWORK: 'physnet1',
api.SEGMENTATION_ID: 3}, api.SEGMENTATION_ID: 3},
{api.NETWORK_TYPE: 'vlan', {api.NETWORK_TYPE: 'vlan',
api.PHYSICAL_NETWORK: 'physnet1', api.PHYSICAL_NETWORK: 'physnet2',
api.SEGMENTATION_ID: 4}] api.SEGMENTATION_ID: 4}]
net1segs = self._create_segments(segments1, network_id=net_id1) net1segs = self._create_segments(segments1, network_id=net_id1)
net2segs = self._create_segments(segments2, network_id=net_id2) net2segs = self._create_segments(segments2, network_id=net_id2)

View File

@ -150,13 +150,17 @@ class Ml2PluginV2TestCase(test_plugin.NeutronDbPluginV2TestCase):
self._mechanism_drivers, self._mechanism_drivers,
group='ml2') group='ml2')
self.physnet = 'physnet1' self.physnet = 'physnet1'
self.physnet2 = 'physnet2'
self.physnet3 = 'physnet3'
self.vlan_range = '1:100' self.vlan_range = '1:100'
self.vlan_range2 = '200:300' self.vlan_range2 = '200:300'
self.physnet2 = 'physnet2' self.vlan_range3 = '400:500'
self.phys_vrange = ':'.join([self.physnet, self.vlan_range]) self.phys_vrange = ':'.join([self.physnet, self.vlan_range])
self.phys2_vrange = ':'.join([self.physnet2, self.vlan_range2]) self.phys2_vrange = ':'.join([self.physnet2, self.vlan_range2])
self.phys3_vrange = ':'.join([self.physnet3, self.vlan_range3])
cfg.CONF.set_override('network_vlan_ranges', cfg.CONF.set_override('network_vlan_ranges',
[self.phys_vrange, self.phys2_vrange], [self.phys_vrange, self.phys2_vrange,
self.phys3_vrange],
group='ml2_type_vlan') group='ml2_type_vlan')
self.setup_parent() self.setup_parent()
self.driver = directory.get_plugin() self.driver = directory.get_plugin()
@ -223,7 +227,7 @@ class TestMl2NetworksV2(test_plugin.TestNetworksV2,
self.mp_nets = [{'name': 'net4', self.mp_nets = [{'name': 'net4',
mpnet_apidef.SEGMENTS: mpnet_apidef.SEGMENTS:
[{pnet.NETWORK_TYPE: 'vlan', [{pnet.NETWORK_TYPE: 'vlan',
pnet.PHYSICAL_NETWORK: 'physnet2', pnet.PHYSICAL_NETWORK: 'physnet3',
pnet.SEGMENTATION_ID: 1}, pnet.SEGMENTATION_ID: 1},
{pnet.NETWORK_TYPE: 'vlan', {pnet.NETWORK_TYPE: 'vlan',
pnet.PHYSICAL_NETWORK: 'physnet2', pnet.PHYSICAL_NETWORK: 'physnet2',
@ -481,7 +485,7 @@ class TestMl2NetworksV2(test_plugin.TestNetworksV2,
pnet.PHYSICAL_NETWORK: 'physnet1', pnet.PHYSICAL_NETWORK: 'physnet1',
pnet.SEGMENTATION_ID: 1}, pnet.SEGMENTATION_ID: 1},
{pnet.NETWORK_TYPE: 'vlan', {pnet.NETWORK_TYPE: 'vlan',
pnet.PHYSICAL_NETWORK: 'physnet1', pnet.PHYSICAL_NETWORK: 'physnet2',
pnet.SEGMENTATION_ID: 2}] pnet.SEGMENTATION_ID: 2}]
with self.network(**{'arg_list': (mpnet_apidef.SEGMENTS, ), with self.network(**{'arg_list': (mpnet_apidef.SEGMENTS, ),
mpnet_apidef.SEGMENTS: segments}) as net: mpnet_apidef.SEGMENTS: segments}) as net:
@ -2677,6 +2681,61 @@ class TestMultiSegmentNetworks(Ml2PluginV2TestCase):
dynamic_segmentation2_id = dynamic_segment2[driver_api.SEGMENTATION_ID] dynamic_segmentation2_id = dynamic_segment2[driver_api.SEGMENTATION_ID]
self.assertNotEqual(dynamic_segmentation_id, dynamic_segmentation2_id) self.assertNotEqual(dynamic_segmentation_id, dynamic_segmentation2_id)
def test_allocate_dynamic_segments_race_condition(self):
# create same segment two times, make sure the same segment comes back
physnet_name = 'physnet1'
segment = {driver_api.NETWORK_TYPE: 'vlan',
driver_api.PHYSICAL_NETWORK: physnet_name}
data = {'network': {'name': 'net1',
'tenant_id': 'tenant_one'}}
network_req = self.new_create_request('networks', data)
network = self.deserialize(self.fmt,
network_req.get_response(self.api))
segment = {driver_api.NETWORK_TYPE: 'vlan',
driver_api.PHYSICAL_NETWORK: physnet_name}
network_id = network['network']['id']
# create first segment
seg1 = self.driver.type_manager.allocate_dynamic_segment(
self.context, network_id, segment)
# create same segment but make sure it is not found on first try
# thus simulating a race condition between two creates
orig_get_dynamic_segment = segments_db.get_dynamic_segment
def return_none_on_first_call(*args, **kwargs):
if not return_none_on_first_call.called:
return_none_on_first_call.called = True
return
return orig_get_dynamic_segment(*args, **kwargs)
return_none_on_first_call.called = False
with mock.patch('neutron.db.segments_db.get_dynamic_segment',
wraps=return_none_on_first_call):
seg2 = self.driver.type_manager.allocate_dynamic_segment(
self.context, network_id, segment)
self.assertEqual('vlan', seg1[driver_api.NETWORK_TYPE])
self.assertEqual(physnet_name,
seg1[driver_api.PHYSICAL_NETWORK])
# make sure we got the same segment
self.assertEqual(seg1[driver_api.PHYSICAL_NETWORK],
seg2[driver_api.PHYSICAL_NETWORK])
self.assertEqual(seg1[driver_api.ID],
seg2[driver_api.ID])
self.assertEqual(seg1[driver_api.SEGMENTATION_ID],
seg2[driver_api.SEGMENTATION_ID])
# make sure that there is only one vlan allocated to our segment
vlan_typedriver = self.driver.type_manager.drivers['vlan']
seg_obj = vlan_typedriver.obj.segmentation_obj
all_allocs = seg_obj.get_objects(self.context)
allocs = [obj for obj in all_allocs
if obj.allocated and obj.physical_network == physnet_name]
self.assertEqual(1, len(allocs))
def test_allocate_release_dynamic_segment(self): def test_allocate_release_dynamic_segment(self):
data = {'network': {'name': 'net1', data = {'network': {'name': 'net1',
'tenant_id': 'tenant_one'}} 'tenant_id': 'tenant_one'}}
@ -2787,7 +2846,7 @@ class TestMultiSegmentNetworks(Ml2PluginV2TestCase):
pnet.PHYSICAL_NETWORK: 'physnet1', pnet.PHYSICAL_NETWORK: 'physnet1',
pnet.SEGMENTATION_ID: 1}, pnet.SEGMENTATION_ID: 1},
{pnet.NETWORK_TYPE: 'vlan', {pnet.NETWORK_TYPE: 'vlan',
pnet.PHYSICAL_NETWORK: 'physnet1', pnet.PHYSICAL_NETWORK: 'physnet2',
pnet.SEGMENTATION_ID: 2}], pnet.SEGMENTATION_ID: 2}],
'tenant_id': 'tenant_one'}} 'tenant_id': 'tenant_one'}}
network_req = self.new_create_request('networks', data) network_req = self.new_create_request('networks', data)
@ -2841,7 +2900,7 @@ class TestMultiSegmentNetworks(Ml2PluginV2TestCase):
res = network_req.get_response(self.api) res = network_req.get_response(self.api)
self.assertEqual(400, res.status_int) self.assertEqual(400, res.status_int)
def test_create_network_duplicate_partial_segments(self): def test_create_network_duplicate_partial_segments_fail(self):
data = {'network': {'name': 'net1', data = {'network': {'name': 'net1',
mpnet_apidef.SEGMENTS: mpnet_apidef.SEGMENTS:
[{pnet.NETWORK_TYPE: 'vlan', [{pnet.NETWORK_TYPE: 'vlan',
@ -2849,9 +2908,11 @@ class TestMultiSegmentNetworks(Ml2PluginV2TestCase):
{pnet.NETWORK_TYPE: 'vlan', {pnet.NETWORK_TYPE: 'vlan',
pnet.PHYSICAL_NETWORK: 'physnet1'}], pnet.PHYSICAL_NETWORK: 'physnet1'}],
'tenant_id': 'tenant_one'}} 'tenant_id': 'tenant_one'}}
retry_fixture = fixture.DBRetryErrorsFixture(max_retries=2)
retry_fixture.setUp()
network_req = self.new_create_request('networks', data) network_req = self.new_create_request('networks', data)
res = network_req.get_response(self.api) res = network_req.get_response(self.api)
self.assertEqual(201, res.status_int) self.assertEqual(409, res.status_int)
def test_release_network_segments(self): def test_release_network_segments(self):
data = {'network': {'name': 'net1', data = {'network': {'name': 'net1',
@ -3488,10 +3549,10 @@ class TestML2Segments(Ml2PluginV2TestCase):
driver_api.PHYSICAL_NETWORK: self.physnet, driver_api.PHYSICAL_NETWORK: self.physnet,
driver_api.SEGMENTATION_ID: 1000} driver_api.SEGMENTATION_ID: 1000}
seg2 = {driver_api.NETWORK_TYPE: 'vlan', seg2 = {driver_api.NETWORK_TYPE: 'vlan',
driver_api.PHYSICAL_NETWORK: self.physnet, driver_api.PHYSICAL_NETWORK: self.physnet2,
driver_api.SEGMENTATION_ID: 1001} driver_api.SEGMENTATION_ID: 1001}
seg3 = {driver_api.NETWORK_TYPE: 'vlan', seg3 = {driver_api.NETWORK_TYPE: 'vlan',
driver_api.PHYSICAL_NETWORK: self.physnet, driver_api.PHYSICAL_NETWORK: self.physnet3,
driver_api.SEGMENTATION_ID: 1002} driver_api.SEGMENTATION_ID: 1002}
with self.network() as network: with self.network() as network:
network = network['network'] network = network['network']

View File

@ -307,12 +307,12 @@ class TestAutoScheduleSegments(test_plugin.Ml2PluginV2TestCase,
'shared': True}}) 'shared': True}})
return net['id'] return net['id']
def _create_segment(self, network_id): def _create_segment(self, network_id, physical_network='physnet1'):
seg = self.segments_plugin.create_segment( seg = self.segments_plugin.create_segment(
self.ctx, self.ctx,
{'segment': {'network_id': network_id, {'segment': {'network_id': network_id,
'name': None, 'description': None, 'name': None, 'description': None,
'physical_network': 'physnet1', 'physical_network': physical_network,
'network_type': 'vlan', 'network_type': 'vlan',
'segmentation_id': constants.ATTR_NOT_SPECIFIED}}) 'segmentation_id': constants.ATTR_NOT_SPECIFIED}})
return seg['id'] return seg['id']
@ -381,7 +381,7 @@ class TestAutoScheduleSegments(test_plugin.Ml2PluginV2TestCase,
self.ctx, [net_id]) self.ctx, [net_id])
self.assertEqual(1, len(agents)) self.assertEqual(1, len(agents))
seg2_id = self._create_segment(net_id) seg2_id = self._create_segment(net_id, 'physnet2')
self._create_subnet(seg2_id, net_id, '192.168.11.0/24') self._create_subnet(seg2_id, net_id, '192.168.11.0/24')
helpers.register_dhcp_agent(HOST_C) helpers.register_dhcp_agent(HOST_C)
segments_service_db.update_segment_host_mapping( segments_service_db.update_segment_host_mapping(

View File

@ -0,0 +1,8 @@
---
upgrade:
- |
A unique constraint for (network_id, network_type, physical_network) is
added to the networksegments table. This was done to prevent race
conditions on dynamic segment allocation. Operators having networks with
multiple segments (e.g. when using hierarchical portbinding) should check
that this constraint is not violated with the included upgrade-check.