Change live_migration_wait_for_vif_plug=True by default

This resolves the TODO to make the option default to True
so that the source compute service will wait for the
"network-vif-plugged" event, initiated by vif plugging during
pre_live_migration on the destination compute servie, before
initiating the guest transfer in the hypervisor. There are
certain networking backends that will not send the neutron
event for vif plugging alone (which is arguably a bug) but
OVS and linuxbridge, probably the two most widely used in
OpenStack deployments, are known to work with this config.

While in here, the Timeout message is fleshed out to give
more help with what the cause of the timeout could be and
possible recourse.

Change-Id: I8da38aec0fe4808273b8587ace3df9dbbc3ab576
This commit is contained in:
Matt Riedemann 2019-02-06 18:49:41 -05:00
parent b2299908d3
commit 1a42eb9ec1
6 changed files with 27 additions and 16 deletions

View File

@ -6296,12 +6296,22 @@ class ComputeManager(manager.Manager):
self._cleanup_pre_live_migration(
context, dest, instance, migration, migrate_data)
except eventlet.timeout.Timeout:
msg = 'Timed out waiting for events: %s'
LOG.warning(msg, events, instance=instance)
# We only get here if wait_for_vif_plugged is True which means
# live_migration_wait_for_vif_plug=True on the destination host.
msg = (
'Timed out waiting for events: %(events)s. If these timeouts '
'are a persistent issue it could mean the networking backend '
'on host %(dest)s does not support sending these events '
'unless there are port binding host changes which does not '
'happen at this point in the live migration process. You may '
'need to disable the live_migration_wait_for_vif_plug option '
'on host %(dest)s.')
subs = {'events': events, 'dest': dest}
LOG.warning(msg, subs, instance=instance)
if CONF.vif_plugging_is_fatal:
self._cleanup_pre_live_migration(
context, dest, instance, migration, migrate_data)
raise exception.MigrationError(reason=msg % events)
raise exception.MigrationError(reason=msg % subs)
except Exception:
with excutils.save_and_reraise_exception():
LOG.exception('Pre live migration failed at %s',

View File

@ -745,8 +745,7 @@ For example::
cpu_shared_set = "4-12,^8,15"
"""),
cfg.BoolOpt('live_migration_wait_for_vif_plug',
# TODO(mriedem): Change to default=True starting in Stein.
default=False,
default=True,
help="""
Determine if the source compute host should wait for a ``network-vif-plugged``
event from the (neutron) networking service before starting the actual transfer
@ -764,12 +763,9 @@ event may be triggered and then received on the source compute host and the
source compute can wait for that event to ensure networking is set up on the
destination host before starting the guest transfer in the hypervisor.
By default, this is False for two reasons:
.. note::
1. Backward compatibility: deployments should test this out and ensure it works
for them before enabling it.
2. The compute service cannot reliably determine which types of virtual
The compute service cannot reliably determine which types of virtual
interfaces (``port.binding:vif_type``) will send ``network-vif-plugged``
events without an accompanying port ``binding:host_id`` change.
Open vSwitch and linuxbridge should be OK, but OpenDaylight is at least
@ -780,8 +776,7 @@ Possible values:
* True: wait for ``network-vif-plugged`` events before starting guest transfer
* False: do not wait for ``network-vif-plugged`` events before starting guest
transfer (this is how things have always worked before this option
was introduced)
transfer (this is the legacy behavior)
Related options:

View File

@ -43,6 +43,9 @@ class TestInstanceNotificationSampleWithMultipleCompute(
self.useFixture(fixtures.AllServicesCurrent())
def test_multiple_compute_actions(self):
# There are not going to be real network-vif-plugged events coming
# so don't wait for them.
self.flags(live_migration_wait_for_vif_plug=False, group='compute')
server = self._boot_a_server(
extra_params={'networks': [{'port': self.neutron.port_1['id']}]})
self._wait_for_notification('instance.create.end')

View File

@ -80,9 +80,6 @@ function _ceph_configure_nova {
$ANSIBLE all --sudo -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=images_rbd_pool value=${NOVA_CEPH_POOL}"
$ANSIBLE all --sudo -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=images_rbd_ceph_conf value=${CEPH_CONF_FILE}"
# Configure nova-compute to wait for network-vif-plugged events.
$ANSIBLE all --sudo -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=compute option=live_migration_wait_for_vif_plug value=True"
sudo ceph -c ${CEPH_CONF_FILE} auth get-or-create client.${CINDER_CEPH_USER} \
mon "allow r" \
osd "allow class-read object_prefix rbd_children, allow rwx pool=${CINDER_CEPH_POOL}, allow rwx pool=${NOVA_CEPH_POOL},allow rwx pool=${GLANCE_CEPH_POOL}" | \

View File

@ -6157,7 +6157,6 @@ class ComputeTestCase(BaseTestCase,
with mock.patch.object(self.compute.network_api,
'setup_networks_on_host') as mock_setup:
self.flags(live_migration_wait_for_vif_plug=True, group='compute')
ret = self.compute.pre_live_migration(c, instance=instance,
block_migration=False,
disk=None,

View File

@ -0,0 +1,7 @@
---
upgrade:
- |
The default value for the ``[compute]/live_migration_wait_for_vif_plug``
configuration option has been changed to True. As noted in the help text
for the option, some networking backends will not work with this set to
True, although OVS and linuxbridge will.