Lock subnets during port creation and subnet deletion

The field "in_use" is added to "subnet" DB definition. This DB
register column is a flag used to mark a register as in use
by other transaction. When a write DB transaction writes any
value on this field, the register is locked for any other
concurrent transaction. If two DB transactions try to set this
column at the same time, one of them will fail.

This DB lock is implemented in "subnet" and is used during the
subnet deletion and the port IP assignation, where all the port
network subnets are retrieved to provide an IP address on the subnet
CIDR.

As reported in the related bug, it was possible to assign an IP
to a port and, before the port creation command finished, delete the
subnet where the IP belonged. This patch introduces this subnet lock
during the IP assignation and at the beginning of the subnet deletion
process. At the end of both transactions, the DB engine checks if the
lock operation (write "in_use" column) is possible or the subnet
register was already requested by other DB transaction.

Change-Id: I45a724917389814e83400f5854ada175dfce2b7b
Closes-Bug: #1865891
This commit is contained in:
Rodolfo Alonso Hernandez 2020-03-13 18:31:01 +00:00
parent af1af5d3e7
commit a8a2bd7e07
7 changed files with 143 additions and 78 deletions
neutron

@ -17,7 +17,6 @@ import functools
import netaddr
from neutron_lib.api.definitions import ip_allocation as ipalloc_apidef
from neutron_lib.api.definitions import port as port_def
from neutron_lib.api.definitions import portbindings as portbindings_def
from neutron_lib.api.definitions import subnetpool as subnetpool_def
from neutron_lib.api import validators
@ -79,7 +78,9 @@ LOG = logging.getLogger(__name__)
AUTO_DELETE_PORT_OWNERS = [constants.DEVICE_OWNER_DHCP]
def _check_subnet_not_used(context, subnet_id):
def _ensure_subnet_not_used(context, subnet_id):
models_v2.Subnet.lock_register(
context, exc.SubnetInUse(subnet_id=subnet_id), id=subnet_id)
try:
registry.publish(
resources.SUBNET, events.BEFORE_DELETE, None,
@ -499,6 +500,8 @@ class NeutronDbPluginV2(db_base_plugin_common.DbBasePluginCommon,
# cleanup if a network-owned port snuck in without failing
for subnet in subnets:
self._delete_subnet(context, subnet)
registry.notify(resources.SUBNET, events.AFTER_DELETE,
self, context=context, subnet=subnet.to_dict())
with db_api.CONTEXT_WRITER.using(context):
network_db = self._get_network(context, id)
network = self._make_network_dict(network_db, context=context)
@ -994,13 +997,11 @@ class NeutronDbPluginV2(db_base_plugin_common.DbBasePluginCommon,
**kwargs)
return result
@db_api.CONTEXT_READER
def _subnet_get_user_allocation(self, context, subnet_id):
"""Check if there are any user ports on subnet and return first."""
return port_obj.IPAllocation.get_alloc_by_subnet_id(
context, subnet_id, AUTO_DELETE_PORT_OWNERS)
@db_api.CONTEXT_READER
def _subnet_check_ip_allocations_internal_router_ports(self, context,
subnet_id):
# Do not delete the subnet if IP allocations for internal
@ -1012,23 +1013,6 @@ class NeutronDbPluginV2(db_base_plugin_common.DbBasePluginCommon,
"cannot delete", subnet_id)
raise exc.SubnetInUse(subnet_id=subnet_id)
@db_api.retry_if_session_inactive()
def _remove_subnet_from_port(self, context, sub_id, port_id, auto_subnet):
try:
fixed = [f for f in self.get_port(context, port_id)['fixed_ips']
if f['subnet_id'] != sub_id]
if auto_subnet:
# special flag to avoid re-allocation on auto subnets
fixed.append({'subnet_id': sub_id, 'delete_subnet': True})
data = {port_def.RESOURCE_NAME: {'fixed_ips': fixed}}
self.update_port(context, port_id, data)
except exc.PortNotFound:
# port is gone
return
except exc.SubnetNotFound as e:
# another subnet in the fixed ips was concurrently removed. retry
raise os_db_exc.RetryRequest(e)
def _ensure_no_user_ports_on_subnet(self, context, id):
alloc = self._subnet_get_user_allocation(context, id)
if alloc:
@ -1040,36 +1024,32 @@ class NeutronDbPluginV2(db_base_plugin_common.DbBasePluginCommon,
'subnet': id})
raise exc.SubnetInUse(subnet_id=id)
@db_api.retry_if_session_inactive()
def _remove_subnet_ip_allocations_from_ports(self, context, subnet):
# Do not allow a subnet to be deleted if a router is attached to it
sid = subnet['id']
self._subnet_check_ip_allocations_internal_router_ports(
context, subnet.id)
context, sid)
is_auto_addr_subnet = ipv6_utils.is_auto_address_subnet(subnet)
if not is_auto_addr_subnet:
# we only automatically remove IP addresses from user ports if
# the IPs come from auto allocation subnets.
self._ensure_no_user_ports_on_subnet(context, subnet.id)
net_allocs = (context.session.query(models_v2.IPAllocation.port_id).
filter_by(subnet_id=subnet.id))
port_ids_on_net = [ipal.port_id for ipal in net_allocs]
for port_id in port_ids_on_net:
self._remove_subnet_from_port(context, subnet.id, port_id,
auto_subnet=is_auto_addr_subnet)
self._ensure_no_user_ports_on_subnet(context, sid)
port_obj.IPAllocation.delete_alloc_by_subnet_id(context, sid)
@db_api.retry_if_session_inactive()
def delete_subnet(self, context, id):
LOG.debug("Deleting subnet %s", id)
# Make sure the subnet isn't used by other resources
_check_subnet_not_used(context, id)
subnet = self._get_subnet_object(context, id)
registry.publish(resources.SUBNET,
events.PRECOMMIT_DELETE_ASSOCIATIONS,
self,
payload=events.DBEventPayload(context,
resource_id=subnet.id))
self._remove_subnet_ip_allocations_from_ports(context, subnet)
self._delete_subnet(context, subnet)
with db_api.CONTEXT_WRITER.using(context):
_ensure_subnet_not_used(context, id)
subnet = self._get_subnet_object(context, id)
registry.publish(
resources.SUBNET, events.PRECOMMIT_DELETE_ASSOCIATIONS, self,
payload=events.DBEventPayload(context, resource_id=subnet.id))
self._remove_subnet_ip_allocations_from_ports(context, subnet)
self._delete_subnet(context, subnet)
registry.notify(resources.SUBNET, events.AFTER_DELETE,
self, context=context, subnet=subnet.to_dict())
def _delete_subnet(self, context, subnet):
with db_api.exc_to_retry(sql_exc.IntegrityError), \
@ -1080,8 +1060,6 @@ class NeutronDbPluginV2(db_base_plugin_common.DbBasePluginCommon,
# Delete related ipam subnet manually,
# since there is no FK relationship
self.ipam.delete_subnet(context, subnet.id)
registry.notify(resources.SUBNET, events.AFTER_DELETE,
self, context=context, subnet=subnet.to_dict())
@db_api.retry_if_session_inactive()
def get_subnet(self, context, id, fields=None):

@ -656,6 +656,12 @@ class IpamBackendMixin(db_base_plugin_common.DbBasePluginCommon):
context, network_id, host, service_type, fixed_configured,
fixed_ips)
if subnets:
msg = ('This subnet is being modified by another concurrent '
'operation')
for subnet in subnets:
subnet.lock_register(
context, exc.SubnetInUse(subnet_id=subnet.id, reason=msg),
id=subnet.id)
subnet_dicts = [self._make_subnet_dict(subnet, context=context)
for subnet in subnets]
# Give priority to subnets with service_types

@ -1 +1 @@
e88badaa9591
d8bdf05313f4

@ -0,0 +1,53 @@
# Copyright 2020 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
from alembic import op
import sqlalchemy as sa
from sqlalchemy.engine import reflection
"""add in_use to subnet
Revision ID: d8bdf05313f4
Revises: e88badaa9591
Create Date: 2020-03-13 17:15:38.462751
"""
# revision identifiers, used by Alembic.
revision = 'd8bdf05313f4'
down_revision = 'e88badaa9591'
TABLE = 'subnets'
COLUMN_IN_USE = 'in_use'
def upgrade():
inspector = reflection.Inspector.from_engine(op.get_bind())
# NOTE(ralonsoh): bug #1865891 is present in stable releases. Although is
# not possible to backport a patch implementing a DB change [1], we are
# planning to migrate this patch to a stable branch in a private
# repository. This check will not affect the current revision (this table
# column does not exist) and help us in the maintenance of our stable
# branches.
# [1] https://docs.openstack.org/project-team-guide/
# stable-branches.html#review-guidelines
if COLUMN_IN_USE not in [column['name'] for column
in inspector.get_columns(TABLE)]:
op.add_column(TABLE,
sa.Column(COLUMN_IN_USE,
sa.Boolean(),
server_default=sa.sql.false(),
nullable=False))

@ -29,6 +29,32 @@ from neutron.db import rbac_db_models
from neutron.db import standard_attr
# NOTE(ralonsoh): move to neutron_lib.db.model_base
class HasInUse(object):
"""NeutronBaseV2 mixin, to add the flag "in_use" to a DB model.
The content of this flag (boolean) parameter is not relevant. The goal of
this field is to be used in a write transaction to mark a DB register as
"in_use". Writing any value on this DB parameter will lock the container
register. At the end of the DB transaction, the DB engine will check if
this register was modified or deleted. In such case, the transaction will
fail and won't be commited.
"lock_register" is the method to write the register "in_use" column.
Because the lifespan of this DB lock is the DB transaction, there isn't an
unlock method. The lock will finish once the transaction ends.
"""
in_use = sa.Column(sa.Boolean(), nullable=False,
server_default=sql.false(), default=False)
@classmethod
def lock_register(cls, context, exception, **filters):
num_reg = context.session.query(
cls).filter_by(**filters).update({'in_use': True})
if num_reg != 1:
raise exception
class IPAllocationPool(model_base.BASEV2, model_base.HasId):
"""Representation of an allocation pool in a Neutron subnet."""
@ -147,7 +173,7 @@ class DNSNameServer(model_base.BASEV2):
class Subnet(standard_attr.HasStandardAttributes, model_base.BASEV2,
model_base.HasId, model_base.HasProject):
model_base.HasId, model_base.HasProject, HasInUse):
"""Represents a neutron subnet.
When a subnet is created the first and last entries will be created. These

@ -214,6 +214,14 @@ class IPAllocation(base.NeutronDbObject):
if alloc_db:
return True
@classmethod
def delete_alloc_by_subnet_id(cls, context, subnet_id):
allocs = context.session.query(models_v2.IPAllocation).filter_by(
subnet_id=subnet_id).all()
for alloc in allocs:
alloc_obj = super(IPAllocation, cls)._load_object(context, alloc)
alloc_obj.delete()
@base.NeutronObjectRegistry.register
class PortDNS(base.NeutronDbObject):

@ -698,7 +698,7 @@ class TestMl2SubnetsV2(test_plugin.TestSubnetsV2,
kwargs = after_create.mock_calls[0][2]
self.assertEqual(s['subnet']['id'], kwargs['subnet']['id'])
def test_port_update_subnetnotfound(self):
def test_port_create_subnetnotfound(self):
with self.network() as n:
with self.subnet(network=n, cidr='1.1.1.0/24') as s1,\
self.subnet(network=n, cidr='1.1.2.0/24') as s2,\
@ -706,27 +706,23 @@ class TestMl2SubnetsV2(test_plugin.TestSubnetsV2,
fixed_ips = [{'subnet_id': s1['subnet']['id']},
{'subnet_id': s2['subnet']['id']},
{'subnet_id': s3['subnet']['id']}]
with self.port(subnet=s1, fixed_ips=fixed_ips,
device_owner=constants.DEVICE_OWNER_DHCP) as p:
plugin = directory.get_plugin()
orig_update = plugin.update_port
plugin = directory.get_plugin()
origin_create_port_db = plugin.create_port_db
def delete_before_update(ctx, *args, **kwargs):
# swap back out with original so only called once
plugin.update_port = orig_update
# delete s2 in the middle of s1 port_update
plugin.delete_subnet(ctx, s2['subnet']['id'])
return plugin.update_port(ctx, *args, **kwargs)
plugin.update_port = delete_before_update
req = self.new_delete_request('subnets',
s1['subnet']['id'])
res = req.get_response(self.api)
self.assertEqual(204, res.status_int)
# ensure port only has 1 IP on s3
port = self._show('ports', p['port']['id'])['port']
self.assertEqual(1, len(port['fixed_ips']))
self.assertEqual(s3['subnet']['id'],
port['fixed_ips'][0]['subnet_id'])
def create_port(ctx, port):
plugin.create_port_db = origin_create_port_db
second_ctx = context.get_admin_context()
with db_api.CONTEXT_WRITER.using(second_ctx):
setattr(second_ctx, 'GUARD_TRANSACTION', False)
plugin.delete_subnet(second_ctx, s2['subnet']['id'])
return plugin.create_port_db(ctx, port)
plugin.create_port_db = create_port
res = self._create_port(
self.fmt, s2['subnet']['network_id'], fixed_ips=fixed_ips,
device_owner=constants.DEVICE_OWNER_DHCP, subnet=s1)
self.assertIn('Subnet %s could not be found.'
% s2['subnet']['id'], res.text)
def test_update_subnet_with_empty_body(self):
with self.subnet() as subnet:
@ -3286,23 +3282,21 @@ class TestML2PluggableIPAM(test_ipam.UseIpamMixin, TestMl2SubnetsV2):
driver_mock().remove_subnet.assert_called_with(request.subnet_id)
def test_delete_subnet_deallocates_slaac_correctly(self):
driver = 'neutron.ipam.drivers.neutrondb_ipam.driver.NeutronDbPool'
cidr = '2001:db8:100::0/64'
with self.network() as network:
with self.subnet(network=network,
cidr='2001:100::0/64',
with self.subnet(network=network, cidr=cidr,
ip_version=constants.IP_VERSION_6,
ipv6_ra_mode=constants.IPV6_SLAAC) as subnet:
with self.port(subnet=subnet) as port:
with mock.patch(driver) as driver_mock:
# Validate that deletion of SLAAC allocation happens
# via IPAM interface, i.e. ipam_subnet.deallocate is
# called prior to subnet deletiong from db.
self._delete('subnets', subnet['subnet']['id'])
dealloc = driver_mock().get_subnet().deallocate
dealloc.assert_called_with(
port['port']['fixed_ips'][0]['ip_address'])
driver_mock().remove_subnet.assert_called_with(
subnet['subnet']['id'])
ipallocs = port_obj.IPAllocation.get_objects(self.context)
self.assertEqual(1, len(ipallocs))
self.assertEqual(
port['port']['fixed_ips'][0]['ip_address'],
str(ipallocs[0].ip_address))
self._delete('subnets', subnet['subnet']['id'])
ipallocs = port_obj.IPAllocation.get_objects(self.context)
self.assertEqual(0, len(ipallocs))
class TestTransactionGuard(Ml2PluginV2TestCase):