Add further workaround features for qemu_monitor_announce_self
In some cases on Arista VXLAN fabrics, VMs are inaccessible via network after live migration, despite garps being observed on the fabric itself. This patch builds on the feature ``[workarounds]/enable_qemu_monitor_announce_self`` feature as reported in `bug 1815989 <https://bugs.launchpad.net/nova/+bug/1815989>` This patch adds the ability to config the number of times the QEMU announce_self monitor command is called, and add a new configuration option to specify a delay between calling the announce_self command multiple times, as in some cases, multiple announce_self monitor commands are required for the fabric to honor the garp packets and the VM to become accessible via the network after live migration. Closes-Bug: #1996995 Change-Id: I2f5bf7c9de621bb1dc7fae5b3374629a4fcc1f46
This commit is contained in:
@@ -373,6 +373,28 @@ Please note that this causes the domain to be considered tainted by libvirt.
|
|||||||
Related options:
|
Related options:
|
||||||
|
|
||||||
* :oslo.config:option:`DEFAULT.compute_driver` (libvirt)
|
* :oslo.config:option:`DEFAULT.compute_driver` (libvirt)
|
||||||
|
"""),
|
||||||
|
cfg.IntOpt('qemu_monitor_announce_self_count',
|
||||||
|
default=3,
|
||||||
|
min=1,
|
||||||
|
help="""
|
||||||
|
The total number of times to send the announce_self command to the QEMU
|
||||||
|
monitor when enable_qemu_monitor_announce_self is enabled.
|
||||||
|
|
||||||
|
Related options:
|
||||||
|
|
||||||
|
* :oslo.config:option:`WORKAROUNDS.enable_qemu_monitor_announce_self` (libvirt)
|
||||||
|
"""),
|
||||||
|
cfg.IntOpt('qemu_monitor_announce_self_interval',
|
||||||
|
default=1,
|
||||||
|
min=1,
|
||||||
|
help="""
|
||||||
|
The number of seconds to wait before re-sending the announce_self
|
||||||
|
command to the QEMU monitor.
|
||||||
|
|
||||||
|
Related options:
|
||||||
|
|
||||||
|
* :oslo.config:option:`WORKAROUNDS.enable_qemu_monitor_announce_self` (libvirt)
|
||||||
"""),
|
"""),
|
||||||
cfg.BoolOpt('disable_compute_service_check_for_ffu',
|
cfg.BoolOpt('disable_compute_service_check_for_ffu',
|
||||||
default=False,
|
default=False,
|
||||||
|
|||||||
@@ -1822,6 +1822,22 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
|||||||
|
|
||||||
mock_guest.set_user_password.assert_called_once_with("root", "123")
|
mock_guest.set_user_password.assert_called_once_with("root", "123")
|
||||||
|
|
||||||
|
@mock.patch('nova.virt.libvirt.host.Host.get_guest')
|
||||||
|
def test_qemu_announce_self(self, mock_get_guest):
|
||||||
|
# Enable the workaround, configure to call announce_self 3 times
|
||||||
|
self.flags(enable_qemu_monitor_announce_self=True, group='workarounds')
|
||||||
|
|
||||||
|
mock_guest = mock.Mock(spec=libvirt_guest.Guest)
|
||||||
|
mock_get_guest.return_value = mock_guest
|
||||||
|
|
||||||
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
|
drvr._qemu_monitor_announce_self(mock_guest)
|
||||||
|
|
||||||
|
# Ensure that 3 calls are made as defined by option
|
||||||
|
# enable_qemu_monitor_announce_self_retries default of 3
|
||||||
|
mock_guest.announce_self.assert_any_call()
|
||||||
|
self.assertEqual(3, mock_guest.announce_self.call_count)
|
||||||
|
|
||||||
@mock.patch('nova.utils.get_image_from_system_metadata')
|
@mock.patch('nova.utils.get_image_from_system_metadata')
|
||||||
@mock.patch.object(host.Host,
|
@mock.patch.object(host.Host,
|
||||||
'has_min_version', return_value=True)
|
'has_min_version', return_value=True)
|
||||||
|
|||||||
@@ -11034,16 +11034,37 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
if not CONF.workarounds.enable_qemu_monitor_announce_self:
|
if not CONF.workarounds.enable_qemu_monitor_announce_self:
|
||||||
return
|
return
|
||||||
|
|
||||||
LOG.info('Sending announce-self command to QEMU monitor',
|
current_attempt = 0
|
||||||
instance=instance)
|
|
||||||
|
|
||||||
try:
|
max_attempts = (
|
||||||
guest = self._host.get_guest(instance)
|
CONF.workarounds.qemu_monitor_announce_self_count)
|
||||||
guest.announce_self()
|
# qemu_monitor_announce_retry_interval specified in seconds
|
||||||
except Exception:
|
announce_pause = (
|
||||||
LOG.warning('Failed to send announce-self command to QEMU monitor',
|
CONF.workarounds.qemu_monitor_announce_self_interval)
|
||||||
instance=instance)
|
|
||||||
LOG.exception()
|
while(current_attempt < max_attempts):
|
||||||
|
# Increment attempt
|
||||||
|
current_attempt += 1
|
||||||
|
|
||||||
|
# Only use announce_pause after the first attempt to avoid
|
||||||
|
# pausing before calling announce_self for the first attempt
|
||||||
|
if current_attempt != 1:
|
||||||
|
greenthread.sleep(announce_pause)
|
||||||
|
|
||||||
|
LOG.info('Sending announce-self command to QEMU monitor. '
|
||||||
|
'Attempt %(current_attempt)s of %(max_attempts)s',
|
||||||
|
{'current_attempt': current_attempt,
|
||||||
|
'max_attempts': max_attempts}, instance=instance)
|
||||||
|
try:
|
||||||
|
guest = self._host.get_guest(instance)
|
||||||
|
guest.announce_self()
|
||||||
|
except Exception:
|
||||||
|
LOG.warning('Failed to send announce-self command to '
|
||||||
|
'QEMU monitor. Attempt %(current_attempt)s of '
|
||||||
|
'%(max_attempts)s',
|
||||||
|
{'current_attempt': current_attempt,
|
||||||
|
'max_attempts': max_attempts}, instance=instance)
|
||||||
|
LOG.exception()
|
||||||
|
|
||||||
def post_live_migration_at_destination(self, context,
|
def post_live_migration_at_destination(self, context,
|
||||||
instance,
|
instance,
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixes `bug 1996995`_ in which VMs live migrated on certain VXLAN Arista
|
||||||
|
network fabrics were inaccessible until the switch arp cache expired.
|
||||||
|
|
||||||
|
A Nova workaround option of ``enable_qemu_monitor_announce_self`` was added
|
||||||
|
to fix `bug 1815989`_ which when enabled would interact with the QEMU
|
||||||
|
monitor and force a VM to announce itself.
|
||||||
|
|
||||||
|
On certain network fabrics, VMs that are live migrated remain inaccessible
|
||||||
|
via the network despite the QEMU monitor announce_self command successfully
|
||||||
|
being called.
|
||||||
|
|
||||||
|
It was noted that on Arista VXLAN fabrics, testing showed that it required
|
||||||
|
several attempts of running the QEMU announce_self monitor command before
|
||||||
|
the switch would acknowledge a VM's new location on the fabric.
|
||||||
|
|
||||||
|
This fix introduces two operator configurable options.
|
||||||
|
The first option sets the number of times the QEMU monitor announce_self
|
||||||
|
command is called - ``qemu_announce_self_count``
|
||||||
|
|
||||||
|
The second option allows operators to set the delay between the QEMU
|
||||||
|
announce_self commands in seconds for subsequent announce_self commands
|
||||||
|
with ``qemu_announce_self_interval``
|
||||||
|
|
||||||
|
.. _`bug 1996995`: https://bugs.launchpad.net/nova/+bug/1996995
|
||||||
|
.. _`bug 1815989`: https://bugs.launchpad.net/nova/+bug/1815989
|
||||||
Reference in New Issue
Block a user