Change live_migration_wait_for_vif_plug=True by default
This resolves the TODO to make the option default to True so that the source compute service will wait for the "network-vif-plugged" event, initiated by vif plugging during pre_live_migration on the destination compute servie, before initiating the guest transfer in the hypervisor. There are certain networking backends that will not send the neutron event for vif plugging alone (which is arguably a bug) but OVS and linuxbridge, probably the two most widely used in OpenStack deployments, are known to work with this config. While in here, the Timeout message is fleshed out to give more help with what the cause of the timeout could be and possible recourse. Change-Id: I8da38aec0fe4808273b8587ace3df9dbbc3ab576
This commit is contained in:
parent
b2299908d3
commit
1a42eb9ec1
@ -6296,12 +6296,22 @@ class ComputeManager(manager.Manager):
|
||||
self._cleanup_pre_live_migration(
|
||||
context, dest, instance, migration, migrate_data)
|
||||
except eventlet.timeout.Timeout:
|
||||
msg = 'Timed out waiting for events: %s'
|
||||
LOG.warning(msg, events, instance=instance)
|
||||
# We only get here if wait_for_vif_plugged is True which means
|
||||
# live_migration_wait_for_vif_plug=True on the destination host.
|
||||
msg = (
|
||||
'Timed out waiting for events: %(events)s. If these timeouts '
|
||||
'are a persistent issue it could mean the networking backend '
|
||||
'on host %(dest)s does not support sending these events '
|
||||
'unless there are port binding host changes which does not '
|
||||
'happen at this point in the live migration process. You may '
|
||||
'need to disable the live_migration_wait_for_vif_plug option '
|
||||
'on host %(dest)s.')
|
||||
subs = {'events': events, 'dest': dest}
|
||||
LOG.warning(msg, subs, instance=instance)
|
||||
if CONF.vif_plugging_is_fatal:
|
||||
self._cleanup_pre_live_migration(
|
||||
context, dest, instance, migration, migrate_data)
|
||||
raise exception.MigrationError(reason=msg % events)
|
||||
raise exception.MigrationError(reason=msg % subs)
|
||||
except Exception:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.exception('Pre live migration failed at %s',
|
||||
|
@ -745,8 +745,7 @@ For example::
|
||||
cpu_shared_set = "4-12,^8,15"
|
||||
"""),
|
||||
cfg.BoolOpt('live_migration_wait_for_vif_plug',
|
||||
# TODO(mriedem): Change to default=True starting in Stein.
|
||||
default=False,
|
||||
default=True,
|
||||
help="""
|
||||
Determine if the source compute host should wait for a ``network-vif-plugged``
|
||||
event from the (neutron) networking service before starting the actual transfer
|
||||
@ -764,12 +763,9 @@ event may be triggered and then received on the source compute host and the
|
||||
source compute can wait for that event to ensure networking is set up on the
|
||||
destination host before starting the guest transfer in the hypervisor.
|
||||
|
||||
By default, this is False for two reasons:
|
||||
.. note::
|
||||
|
||||
1. Backward compatibility: deployments should test this out and ensure it works
|
||||
for them before enabling it.
|
||||
|
||||
2. The compute service cannot reliably determine which types of virtual
|
||||
The compute service cannot reliably determine which types of virtual
|
||||
interfaces (``port.binding:vif_type``) will send ``network-vif-plugged``
|
||||
events without an accompanying port ``binding:host_id`` change.
|
||||
Open vSwitch and linuxbridge should be OK, but OpenDaylight is at least
|
||||
@ -780,8 +776,7 @@ Possible values:
|
||||
|
||||
* True: wait for ``network-vif-plugged`` events before starting guest transfer
|
||||
* False: do not wait for ``network-vif-plugged`` events before starting guest
|
||||
transfer (this is how things have always worked before this option
|
||||
was introduced)
|
||||
transfer (this is the legacy behavior)
|
||||
|
||||
Related options:
|
||||
|
||||
|
@ -43,6 +43,9 @@ class TestInstanceNotificationSampleWithMultipleCompute(
|
||||
self.useFixture(fixtures.AllServicesCurrent())
|
||||
|
||||
def test_multiple_compute_actions(self):
|
||||
# There are not going to be real network-vif-plugged events coming
|
||||
# so don't wait for them.
|
||||
self.flags(live_migration_wait_for_vif_plug=False, group='compute')
|
||||
server = self._boot_a_server(
|
||||
extra_params={'networks': [{'port': self.neutron.port_1['id']}]})
|
||||
self._wait_for_notification('instance.create.end')
|
||||
|
@ -80,9 +80,6 @@ function _ceph_configure_nova {
|
||||
$ANSIBLE all --sudo -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=images_rbd_pool value=${NOVA_CEPH_POOL}"
|
||||
$ANSIBLE all --sudo -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=images_rbd_ceph_conf value=${CEPH_CONF_FILE}"
|
||||
|
||||
# Configure nova-compute to wait for network-vif-plugged events.
|
||||
$ANSIBLE all --sudo -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=compute option=live_migration_wait_for_vif_plug value=True"
|
||||
|
||||
sudo ceph -c ${CEPH_CONF_FILE} auth get-or-create client.${CINDER_CEPH_USER} \
|
||||
mon "allow r" \
|
||||
osd "allow class-read object_prefix rbd_children, allow rwx pool=${CINDER_CEPH_POOL}, allow rwx pool=${NOVA_CEPH_POOL},allow rwx pool=${GLANCE_CEPH_POOL}" | \
|
||||
|
@ -6157,7 +6157,6 @@ class ComputeTestCase(BaseTestCase,
|
||||
|
||||
with mock.patch.object(self.compute.network_api,
|
||||
'setup_networks_on_host') as mock_setup:
|
||||
self.flags(live_migration_wait_for_vif_plug=True, group='compute')
|
||||
ret = self.compute.pre_live_migration(c, instance=instance,
|
||||
block_migration=False,
|
||||
disk=None,
|
||||
|
@ -0,0 +1,7 @@
|
||||
---
|
||||
upgrade:
|
||||
- |
|
||||
The default value for the ``[compute]/live_migration_wait_for_vif_plug``
|
||||
configuration option has been changed to True. As noted in the help text
|
||||
for the option, some networking backends will not work with this set to
|
||||
True, although OVS and linuxbridge will.
|
Loading…
Reference in New Issue
Block a user