2bf7211670
There is a race we need to check for where a port is created and then updated with binding information via another API shortly afterward that causes the bug referenced in this message. * The port is created and a DHCP provisioning block is added. * The DHCP agent finishes setting up the reservation and emits a message to clear the block. * The _port_provisioned callback in ML2 is triggered. * A port update comes in that binds the port and adds new blocks. * The _port_provisioned callback now does a get_port and sees that the port is bound so it assumes everything is done and the port can be marked ACTIVE. * The port is now ACTIVE before the L2 agent has had a chance to do its wiring and the VM boots. * The L2 agent requests the details and the port flaps back to BUILD. * The L2 agent finishes and clears the provisioning block a second time so the port goes back to ACTIVE. This will randomly cause failures because the VM is booting before the L2 agent is done and tempest may see the port in the BUILD state after the agent grabs port details. The fix is relatively simple. Just check for any new provisioning blocks added *after* doing a get_port in the _port_provisioned callback to make sure we don't update to ACTIVE if there are newly added blocks. Closes-Bug: #1600396 Change-Id: I14f41a5fda0707e8bba064c5cd952553686c30cd
185 lines
8.3 KiB
Python
185 lines
8.3 KiB
Python
# Copyright 2016 Mirantis, Inc. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
|
|
from oslo_db import exception as db_exc
|
|
from oslo_log import log as logging
|
|
import sqlalchemy as sa
|
|
|
|
from neutron._i18n import _LE
|
|
from neutron.callbacks import registry
|
|
from neutron.callbacks import resources
|
|
from neutron.db import api as db_api
|
|
from neutron.db import model_base
|
|
from neutron.db import models_v2
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
PROVISIONING_COMPLETE = 'provisioning_complete'
|
|
# identifiers for the various entities that participate in provisioning
|
|
DHCP_ENTITY = 'DHCP'
|
|
L2_AGENT_ENTITY = 'L2'
|
|
_RESOURCE_TO_MODEL_MAP = {resources.PORT: models_v2.Port}
|
|
|
|
|
|
class ProvisioningBlock(model_base.BASEV2):
|
|
# the standard attr id of the thing we want to block
|
|
standard_attr_id = (
|
|
sa.Column(sa.BigInteger().with_variant(sa.Integer(), 'sqlite'),
|
|
sa.ForeignKey(model_base.StandardAttribute.id,
|
|
ondelete="CASCADE"),
|
|
primary_key=True))
|
|
# the entity that wants to block the status change (e.g. L2 Agent)
|
|
entity = sa.Column(sa.String(255), nullable=False, primary_key=True)
|
|
|
|
|
|
def add_model_for_resource(resource, model):
|
|
"""Adds a mapping between a callback resource and a DB model."""
|
|
_RESOURCE_TO_MODEL_MAP[resource] = model
|
|
|
|
|
|
def add_provisioning_component(context, object_id, object_type, entity):
|
|
"""Adds a provisioning block by an entity to a given object.
|
|
|
|
Adds a provisioning block to the DB for object_id with an identifier
|
|
of the entity that is doing the provisioning. While an object has these
|
|
provisioning blocks present, this module will not emit any callback events
|
|
indicating that provisioning has completed. Any logic that depends on
|
|
multiple disjoint components use these blocks and subscribe to the
|
|
PROVISIONING_COMPLETE event to know when all components have completed.
|
|
|
|
:param context: neutron api request context
|
|
:param object_id: ID of object that has been provisioned
|
|
:param object_type: callback resource type of the object
|
|
:param entity: The entity that has provisioned the object
|
|
"""
|
|
log_dict = {'entity': entity, 'oid': object_id, 'otype': object_type}
|
|
# we get an object's ID, so we need to convert that into a standard attr id
|
|
standard_attr_id = _get_standard_attr_id(context, object_id, object_type)
|
|
if not standard_attr_id:
|
|
return
|
|
try:
|
|
with db_api.autonested_transaction(context.session):
|
|
record = ProvisioningBlock(standard_attr_id=standard_attr_id,
|
|
entity=entity)
|
|
context.session.add(record)
|
|
except db_exc.DBDuplicateEntry:
|
|
# an entry could be leftover from a previous transition that hasn't
|
|
# yet been provisioned. (e.g. multiple updates in a short period)
|
|
LOG.debug("Ignored duplicate provisioning block setup for %(otype)s "
|
|
"%(oid)s by entity %(entity)s.", log_dict)
|
|
return
|
|
LOG.debug("Transition to ACTIVE for %(otype)s object %(oid)s "
|
|
"will not be triggered until provisioned by entity %(entity)s.",
|
|
log_dict)
|
|
|
|
|
|
def remove_provisioning_component(context, object_id, object_type, entity,
|
|
standard_attr_id=None):
|
|
"""Removes a provisioning block for an object with triggering a callback.
|
|
|
|
Removes a provisioning block without triggering a callback. A user of this
|
|
module should call this when a block is no longer correct. If the block has
|
|
been satisfied, the 'provisioning_complete' method should be called.
|
|
|
|
:param context: neutron api request context
|
|
:param object_id: ID of object that has been provisioned
|
|
:param object_type: callback resource type of the object
|
|
:param entity: The entity that has provisioned the object
|
|
:param standard_attr_id: Optional ID to pass to the function to avoid the
|
|
extra DB lookup to translate the object_id into
|
|
the standard_attr_id.
|
|
:return: boolean indicating whether or not a record was deleted
|
|
"""
|
|
with context.session.begin(subtransactions=True):
|
|
standard_attr_id = standard_attr_id or _get_standard_attr_id(
|
|
context, object_id, object_type)
|
|
if not standard_attr_id:
|
|
return False
|
|
record = context.session.query(ProvisioningBlock).filter_by(
|
|
standard_attr_id=standard_attr_id, entity=entity).first()
|
|
if record:
|
|
context.session.delete(record)
|
|
return True
|
|
return False
|
|
|
|
|
|
def provisioning_complete(context, object_id, object_type, entity):
|
|
"""Mark that the provisioning for object_id has been completed by entity.
|
|
|
|
Marks that an entity has finished provisioning an object. If there are
|
|
no remaining provisioning components, a callback will be triggered
|
|
indicating that provisioning has been completed for the object. Subscribers
|
|
to this callback must be idempotent because it may be called multiple
|
|
times in high availability deployments.
|
|
|
|
:param context: neutron api request context
|
|
:param object_id: ID of object that has been provisioned
|
|
:param object_type: callback resource type of the object
|
|
:param entity: The entity that has provisioned the object
|
|
"""
|
|
log_dict = {'oid': object_id, 'entity': entity, 'otype': object_type}
|
|
# this can't be called in a transaction to avoid REPEATABLE READ
|
|
# tricking us into thinking there are remaining provisioning components
|
|
if context.session.is_active:
|
|
raise RuntimeError(_LE("Must not be called in a transaction"))
|
|
standard_attr_id = _get_standard_attr_id(context, object_id,
|
|
object_type)
|
|
if not standard_attr_id:
|
|
return
|
|
if remove_provisioning_component(context, object_id, object_type, entity,
|
|
standard_attr_id):
|
|
LOG.debug("Provisioning for %(otype)s %(oid)s completed by entity "
|
|
"%(entity)s.", log_dict)
|
|
# now with that committed, check if any records are left. if None, emit
|
|
# an event that provisioning is complete.
|
|
records = context.session.query(ProvisioningBlock).filter_by(
|
|
standard_attr_id=standard_attr_id).count()
|
|
if not records:
|
|
LOG.debug("Provisioning complete for %(otype)s %(oid)s", log_dict)
|
|
registry.notify(object_type, PROVISIONING_COMPLETE,
|
|
'neutron.db.provisioning_blocks',
|
|
context=context, object_id=object_id)
|
|
|
|
|
|
def is_object_blocked(context, object_id, object_type):
|
|
"""Return boolean indicating if object has a provisioning block.
|
|
|
|
:param context: neutron api request context
|
|
:param object_id: ID of object that has been provisioned
|
|
:param object_type: callback resource type of the object
|
|
"""
|
|
standard_attr_id = _get_standard_attr_id(context, object_id,
|
|
object_type)
|
|
if not standard_attr_id:
|
|
# object doesn't exist so it has no blocks
|
|
return False
|
|
return bool(context.session.query(ProvisioningBlock).filter_by(
|
|
standard_attr_id=standard_attr_id).count())
|
|
|
|
|
|
def _get_standard_attr_id(context, object_id, object_type):
|
|
model = _RESOURCE_TO_MODEL_MAP.get(object_type)
|
|
if not model:
|
|
raise RuntimeError(_LE("Could not find model for %s. If you are "
|
|
"adding provisioning blocks for a new resource "
|
|
"you must call add_model_for_resource during "
|
|
"initialization for your type.") % object_type)
|
|
obj = (context.session.query(model).enable_eagerloads(False).
|
|
filter_by(id=object_id).first())
|
|
if not obj:
|
|
# concurrent delete
|
|
LOG.debug("Could not find standard attr ID for object %s.", object_id)
|
|
return
|
|
return obj.standard_attr_id
|