Merge "Allow to perform parallel live migrations"

This commit is contained in:
Zuul
2025-11-07 22:36:34 +00:00
committed by Gerrit Code Review
6 changed files with 154 additions and 7 deletions

View File

@@ -370,6 +370,30 @@ Related options:
* live_migration_downtime
* live_migration_downtime_steps
* live_migration_downtime_delay
"""),
cfg.IntOpt('live_migration_parallel_connections',
default=1,
help="""
Number of parallel connections to QEMU during live migrations.
Values above 1 will instruct hypervisor explicitly on amount of connections
to use.
Please note, that each connection can utilize up to 1 CPU core, especially when
``live_migration_with_native_tls`` is used. Therefore it is recommended to
reserve CPUs using ``cpu_shared_set``/``cpu_dedicated_set`` or
``reserved_host_cpus`` multiplied by ``cpu_allocation_ratio``.
Usage of ``live_migration_parallel_connections`` in
combination with ``live_migration_permit_post_copy`` is supported only with
`QEMU>=10.1.0 <https://www.qemu.org/2025/08/26/qemu-10-1-0/>`_.
Related options:
* ``[compute] cpu_shared_set``
* ``[compute] cpu_dedicated_set``
* ``[DEFAULT] reserved_host_cpus``
* ``[libvirt] live_migration_permit_post_copy``
"""),
cfg.StrOpt('live_migration_timeout_action',
default='abort',
@@ -411,10 +435,15 @@ When using post-copy mode, if the source and destination hosts lose network
connectivity, the VM being live-migrated will need to be rebooted. For more
details, please see the Administration guide.
Usage of the option together with ``live_migration_parallel_connections``
is supported only with QEMU>=10.1.0. Otherwise VM will end up in SHUTOFF
state on the destination host.
Related options:
* live_migration_permit_auto_converge
* live_migration_timeout_action
* live_migration_parallel_connections
"""),
cfg.BoolOpt('live_migration_permit_auto_converge',
default=False,

View File

@@ -127,6 +127,8 @@ VIR_MIGRATE_PERSIST_DEST = 8
VIR_MIGRATE_UNDEFINE_SOURCE = 16
VIR_MIGRATE_NON_SHARED_INC = 128
VIR_MIGRATE_AUTO_CONVERGE = 8192
VIR_MIGRATE_PARALLEL = 131072
VIR_MIGRATE_PARAM_PARALLEL_CONNECTIONS = 'parallel.connections'
VIR_MIGRATE_POSTCOPY = 32768
VIR_MIGRATE_TLS = 65536

View File

@@ -1223,6 +1223,19 @@ class LibvirtConnTestCase(test.NoDBTestCase,
break
self.assertTrue(version_arg_found)
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_register_all_undefined_instance_details',
new=mock.Mock())
@mock.patch.object(fakelibvirt.Connection, 'getVersion',
return_value=versionutils.convert_version_to_int(
(9, 1, 0)))
def test_qemu_multifd_with_postcopy_version_ok(self, mock_gv):
self.flags(live_migration_parallel_connections=2,
live_migration_permit_post_copy=True,
group='libvirt')
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
self.assertRaises(exception.InternalError, drvr.init_host, "dummyhost")
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_register_all_undefined_instance_details',
new=mock.Mock())
@@ -2002,6 +2015,21 @@ class LibvirtConnTestCase(test.NoDBTestCase,
libvirt_driver.libvirt.VIR_MIGRATE_NON_SHARED_INC |
libvirt_driver.libvirt.VIR_MIGRATE_AUTO_CONVERGE))
def test_live_migration_parallel_connections_enabled(self):
self.flags(live_migration_parallel_connections=2, group='libvirt')
self._do_test_parse_migration_flags(
lm_expected=(libvirt_driver.libvirt.VIR_MIGRATE_UNDEFINE_SOURCE |
libvirt_driver.libvirt.VIR_MIGRATE_PERSIST_DEST |
libvirt_driver.libvirt.VIR_MIGRATE_PEER2PEER |
libvirt_driver.libvirt.VIR_MIGRATE_LIVE |
libvirt_driver.libvirt.VIR_MIGRATE_PARALLEL),
bm_expected=(libvirt_driver.libvirt.VIR_MIGRATE_UNDEFINE_SOURCE |
libvirt_driver.libvirt.VIR_MIGRATE_PERSIST_DEST |
libvirt_driver.libvirt.VIR_MIGRATE_PEER2PEER |
libvirt_driver.libvirt.VIR_MIGRATE_LIVE |
libvirt_driver.libvirt.VIR_MIGRATE_NON_SHARED_INC |
libvirt_driver.libvirt.VIR_MIGRATE_PARALLEL))
def test_live_migration_permit_auto_converge_and_post_copy_true(self):
self.flags(live_migration_permit_auto_converge=True, group='libvirt')
self.flags(live_migration_permit_post_copy=True, group='libvirt')
@@ -14133,6 +14161,54 @@ class LibvirtConnTestCase(test.NoDBTestCase,
drvr._live_migration_uri(target_connection),
params=params, flags=expected_flags)
@mock.patch.object(host.Host, 'has_min_version', return_value=True)
@mock.patch.object(fakelibvirt.virDomain, "migrateToURI3")
@mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml',
return_value='')
@mock.patch('nova.virt.libvirt.guest.Guest.get_xml_desc', return_value='')
def test_block_live_parallel_connections(
self, mock_old_xml, mock_new_xml,
mock_migrateToURI3, mock_min_version):
self.flags(live_migration_parallel_connections=5, group='libvirt')
target_connection = None
disk_paths = ['vda', 'vdb']
params = {
'bandwidth': CONF.libvirt.live_migration_bandwidth,
'migrate_disks': disk_paths,
'parallel.connections':
CONF.libvirt.live_migration_parallel_connections
}
# Start test
migrate_data = objects.LibvirtLiveMigrateData(
graphics_listen_addr_vnc='0.0.0.0',
graphics_listen_addr_spice='0.0.0.0',
serial_listen_addr='127.0.0.1',
serial_listen_ports=[1234],
target_connect_addr=target_connection,
bdms=[],
block_migration=True)
dom = fakelibvirt.virDomain
guest = libvirt_guest.Guest(dom)
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
drvr._parse_migration_flags()
instance = objects.Instance(**self.test_instance)
drvr._live_migration_operation(self.context, instance,
target_connection, True, migrate_data,
guest, disk_paths)
expected_flags = (fakelibvirt.VIR_MIGRATE_UNDEFINE_SOURCE |
fakelibvirt.VIR_MIGRATE_PERSIST_DEST |
fakelibvirt.VIR_MIGRATE_PEER2PEER |
fakelibvirt.VIR_MIGRATE_NON_SHARED_INC |
fakelibvirt.VIR_MIGRATE_LIVE |
fakelibvirt.VIR_MIGRATE_PARALLEL)
mock_migrateToURI3.assert_called_once_with(
drvr._live_migration_uri(target_connection),
params=params, flags=expected_flags)
@mock.patch.object(host.Host, 'has_min_version', return_value=True)
@mock.patch.object(fakelibvirt.virDomain, "migrateToURI3")
@mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml',

View File

@@ -262,6 +262,9 @@ MIN_VFIO_PCI_VARIANT_QEMU_VERSION = (8, 2, 2)
MIN_VIRTIO_SOUND_LIBVIRT_VERSION = (10, 4, 0)
MIN_VIRTIO_SOUND_QEMU_VERSION = (8, 2, 0)
# Minimum version of Qemu that supports multifd migration with post-copy
MIN_MULTIFD_WITH_POSTCOPY_QEMU_VERSION = (10, 1, 0)
REGISTER_IMAGE_PROPERTY_DEFAULTS = [
'hw_machine_type',
'hw_cdrom_bus',
@@ -814,6 +817,16 @@ class LibvirtDriver(driver.ComputeDriver):
raise exception.InternalError(
_('Nova requires QEMU version %s or greater.') %
libvirt_utils.version_to_string(MIN_QEMU_VERSION))
if (CONF.libvirt.virt_type in ("qemu", "kvm") and
CONF.libvirt.live_migration_parallel_connections > 1 and
CONF.libvirt.live_migration_permit_post_copy is True):
if not self._host.has_min_version(
hv_ver=MIN_MULTIFD_WITH_POSTCOPY_QEMU_VERSION):
raise exception.InternalError(
_('Nova requires QEMU version %s or greater to use '
'live migration parallel connections with post-copy.') %
libvirt_utils.version_to_string(
MIN_MULTIFD_WITH_POSTCOPY_QEMU_VERSION))
if CONF.libvirt.virt_type == 'parallels':
if not self._host.has_min_version(hv_ver=MIN_VIRTUOZZO_VERSION):
@@ -1354,6 +1367,11 @@ class LibvirtDriver(driver.ComputeDriver):
migration_flags |= libvirt.VIR_MIGRATE_AUTO_CONVERGE
return migration_flags
def _handle_live_migration_parallel(self, migration_flags):
if CONF.libvirt.live_migration_parallel_connections > 1:
migration_flags |= libvirt.VIR_MIGRATE_PARALLEL
return migration_flags
def _parse_migration_flags(self):
(live_migration_flags,
block_migration_flags) = self._prepare_migration_flags()
@@ -1378,6 +1396,11 @@ class LibvirtDriver(driver.ComputeDriver):
block_migration_flags = self._handle_live_migration_auto_converge(
block_migration_flags)
live_migration_flags = self._handle_live_migration_parallel(
live_migration_flags)
block_migration_flags = self._handle_live_migration_parallel(
block_migration_flags)
self._live_migration_flags = live_migration_flags
self._block_migration_flags = block_migration_flags
@@ -11173,12 +11196,14 @@ class LibvirtDriver(driver.ComputeDriver):
serial_ports = list(self._get_serial_ports_from_guest(guest))
LOG.debug("About to invoke the migrate API", instance=instance)
guest.migrate(self._live_migration_uri(dest),
migrate_uri=migrate_uri,
flags=migration_flags,
migrate_disks=device_names,
destination_xml=new_xml_str,
bandwidth=CONF.libvirt.live_migration_bandwidth)
guest.migrate(
self._live_migration_uri(dest),
migrate_uri=migrate_uri,
flags=migration_flags,
migrate_disks=device_names,
destination_xml=new_xml_str,
bandwidth=CONF.libvirt.live_migration_bandwidth,
parallel=CONF.libvirt.live_migration_parallel_connections)
LOG.debug("Migrate API has completed", instance=instance)
for hostname, port in serial_ports:

View File

@@ -583,7 +583,8 @@ class Guest(object):
self._domain.suspend()
def migrate(self, destination, migrate_uri=None, migrate_disks=None,
destination_xml=None, flags=0, bandwidth=0):
destination_xml=None, flags=0, bandwidth=0,
parallel=0):
"""Migrate guest object from its current host to the destination
:param destination: URI of host destination where guest will be migrate
@@ -609,9 +610,12 @@ class Guest(object):
not change its memory faster than a
hypervisor can transfer the changed
memory to the destination host
VIR_MIGRATE_PARALLEL Send memory pages to the destination host
through several network connections.
VIR_MIGRATE_POSTCOPY Tell libvirt to enable post-copy migration
VIR_MIGRATE_TLS Use QEMU-native TLS
:param bandwidth: The maximum bandwidth in MiB/s
:param parallel: Number of connections used during live migration
"""
params = {}
# In migrateToURI3 these parameters are extracted from the
@@ -625,6 +629,8 @@ class Guest(object):
params['migrate_disks'] = migrate_disks
if migrate_uri:
params['migrate_uri'] = migrate_uri
if parallel > 1:
params[libvirt.VIR_MIGRATE_PARAM_PARALLEL_CONNECTIONS] = parallel
# Due to a quirk in the libvirt python bindings,
# VIR_MIGRATE_NON_SHARED_INC with an empty migrate_disks is

View File

@@ -0,0 +1,9 @@
---
features:
- |
Implemented parallel live migrations for libvirt driver, which can be
enabled by defining ``[libvirt] live_migration_parallel_connections``
to a value higher than 1. By default parallel migrations are not used to
preserve existing behavior. Also note that QEMU prior to 10.1.0 was not
able to support multifd with postcopy migrations, so enabling these two
features together on older QEMU versions is disallowed.