Add an active wait during the port provisioning event

In ML2/OVN, during a live-migration process, it could
happend that the port provisioning event is received before
the port binding has been updated. That means the port has
been created in the destination host and the event received
(this event will remove any pending provisioning block). But
the Nova port binding request has not arrived yet, updating
the port binding registers. Because the port is considered
"not bound" (yet), the port provisioning doesn't set the port
status to ACTIVE.

This patch creates an active wait during the port provisioning
event method. If the port binding is still "unbound", the method
retries the port retrieval several times, giving some time to the
port binding request from Nova to arrive.

Closes-Bug: #1988199
Change-Id: I50091c84e67c172c94ce9140f23235421599185c
This commit is contained in:
Rodolfo Alonso Hernandez 2022-08-30 18:09:34 +02:00
parent 653949808d
commit 91f0864dc0
3 changed files with 79 additions and 25 deletions

View File

@ -147,6 +147,7 @@ from neutron.services.segments import plugin as segments_plugin
LOG = log.getLogger(__name__)
MAX_BIND_TRIES = 10
MAX_PROVISIONING_TRIES = MAX_BIND_TRIES
SERVICE_PLUGINS_REQUIRED_DRIVERS = {
@ -334,37 +335,55 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2,
[provisioning_blocks.PROVISIONING_COMPLETE])
def _port_provisioned(self, rtype, event, trigger, payload=None):
port_id = payload.resource_id
port = db.get_port(payload.context, port_id)
port_binding = p_utils.get_port_binding_by_status_and_host(
getattr(port, 'port_bindings', []), const.ACTIVE)
if not port or not port_binding:
LOG.debug("Port %s was deleted so its status cannot be updated.",
port_id)
return
if port_binding.vif_type in (portbindings.VIF_TYPE_BINDING_FAILED,
portbindings.VIF_TYPE_UNBOUND):
# NOTE(kevinbenton): we hit here when a port is created without
# a host ID and the dhcp agent notifies that its wiring is done
LOG.debug("Port %s cannot update to ACTIVE because it "
"is not bound.", port_id)
return
else:
# port is bound, but we have to check for new provisioning blocks
# one last time to detect the case where we were triggered by an
# unbound port and the port became bound with new provisioning
# blocks before 'get_port' was called above
if provisioning_blocks.is_object_blocked(payload.context, port_id,
resources.PORT):
LOG.debug("Port %s had new provisioning blocks added so it "
"will not transition to active.", port_id)
for count in range(1, MAX_PROVISIONING_TRIES + 1):
LOG.info('Attempt %(count)s to provision port %(port)s',
{'count': count, 'port': port_id})
port = db.get_port(payload.context, port_id)
port_bindings = getattr(port, 'port_bindings', [])
port_binding = p_utils.get_port_binding_by_status_and_host(
port_bindings, const.ACTIVE)
if not port or not port_binding:
LOG.debug("Port %s was deleted so its status cannot be "
"updated.", port_id)
return
if port_binding.vif_type == portbindings.VIF_TYPE_BINDING_FAILED:
LOG.debug('Port %s cannot update to ACTIVE because it failed.',
port_id)
return
if port_binding.vif_type == portbindings.VIF_TYPE_UNBOUND:
# NOTE(kevinbenton): we hit here when a port is created without
# a host ID and the dhcp agent notifies that its wiring is done
LOG.debug('Port %s cannot update to ACTIVE because it '
'is not bound.', port_id)
if count == MAX_PROVISIONING_TRIES:
return
# Wait 0.5 seconds before checking again if the port is bound.
# We could hit this during a live-migration.
greenthread.sleep(0.5)
continue
break
# port is bound, but we have to check for new provisioning blocks
# one last time to detect the case where we were triggered by an
# unbound port and the port became bound with new provisioning
# blocks before 'get_port' was called above
if provisioning_blocks.is_object_blocked(payload.context, port_id,
resources.PORT):
LOG.debug("Port %s had new provisioning blocks added so it "
"will not transition to active.", port_id)
return
if not port.admin_state_up:
LOG.debug("Port %s is administratively disabled so it will "
"not transition to active.", port_id)
return
host_migrating = agent_rpc.migrating_to_host(
getattr(port, 'port_bindings', []))
host_migrating = agent_rpc.migrating_to_host(port_bindings)
if host_migrating and self.nova_notifier:
send_nova_event = bool(trigger ==
provisioning_blocks.L2_AGENT_ENTITY)

View File

@ -47,6 +47,7 @@ import testtools
import webob
from neutron._i18n import _
from neutron.agent import rpc as agent_rpc
from neutron.common import utils
from neutron.db import agents_db
from neutron.db import provisioning_blocks
@ -1113,6 +1114,32 @@ class TestMl2PortsV2(test_plugin.TestPortsV2, Ml2PluginV2TestCase):
self.context, resource_id=port_id))
self.assertFalse(ups.called)
@staticmethod
def _set_max_provisioning_tries():
ml2_plugin.MAX_PROVISIONING_TRIES = ml2_plugin.MAX_BIND_TRIES
@mock.patch.object(agent_rpc, 'migrating_to_host', return_value=None)
@mock.patch('neutron.plugins.ml2.plugin.db.get_port')
@mock.patch.object(p_utils, 'get_port_binding_by_status_and_host')
def test__port_provisioned_port_retry_port_binding_unbound(
self, mock_get_pb, mock_get_port, *args):
self.addCleanup(self._set_max_provisioning_tries)
ml2_plugin.MAX_PROVISIONING_TRIES = 2
plugin = directory.get_plugin()
port_id = 'fake_port_id'
port = mock.Mock(id=port_id, admin_state_up=True)
mock_get_port.return_value = port
with mock.patch.object(plugin, 'update_port_status') as mock_pstatus:
pb1 = mock.MagicMock(vif_type=portbindings.VIF_TYPE_UNBOUND)
pb2 = mock.MagicMock(vif_type=portbindings.VIF_TYPE_OVS)
pb2.__iter__.return_value = []
mock_get_pb.side_effect = [pb1, pb2]
plugin._port_provisioned('port', 'evt', 'trigger',
payload=events.DBEventPayload(
self.context, resource_id=port_id))
mock_pstatus.assert_called_once_with(self.context, port_id,
constants.PORT_STATUS_ACTIVE)
def test_port_after_create_outside_transaction(self):
self.tx_open = True

View File

@ -0,0 +1,8 @@
---
features:
- |
After the port is considered as provisioned, the Nova port binding update
could have not been received, leaving the port as not bound. Now the
port provisioning method has an active wait that will retry several times,
waiting for the port binding update. If received, the port status will be
set as active if the admin state flag is set.