diff --git a/nova/conf/libvirt.py b/nova/conf/libvirt.py index 297aaae8d296..0746d4040a0f 100644 --- a/nova/conf/libvirt.py +++ b/nova/conf/libvirt.py @@ -370,6 +370,30 @@ Related options: * live_migration_downtime * live_migration_downtime_steps * live_migration_downtime_delay +"""), + cfg.IntOpt('live_migration_parallel_connections', + default=1, + help=""" +Number of parallel connections to QEMU during live migrations. + +Values above 1 will instruct hypervisor explicitly on amount of connections +to use. +Please note, that each connection can utilize up to 1 CPU core, especially when +``live_migration_with_native_tls`` is used. Therefore it is recommended to +reserve CPUs using ``cpu_shared_set``/``cpu_dedicated_set`` or +``reserved_host_cpus`` multiplied by ``cpu_allocation_ratio``. + +Usage of ``live_migration_parallel_connections`` in +combination with ``live_migration_permit_post_copy`` is supported only with +`QEMU>=10.1.0 `_. + +Related options: + +* ``[compute] cpu_shared_set`` +* ``[compute] cpu_dedicated_set`` +* ``[DEFAULT] reserved_host_cpus`` +* ``[libvirt] live_migration_permit_post_copy`` + """), cfg.StrOpt('live_migration_timeout_action', default='abort', @@ -411,10 +435,15 @@ When using post-copy mode, if the source and destination hosts lose network connectivity, the VM being live-migrated will need to be rebooted. For more details, please see the Administration guide. +Usage of the option together with ``live_migration_parallel_connections`` +is supported only with QEMU>=10.1.0. Otherwise VM will end up in SHUTOFF +state on the destination host. + Related options: * live_migration_permit_auto_converge * live_migration_timeout_action +* live_migration_parallel_connections """), cfg.BoolOpt('live_migration_permit_auto_converge', default=False, diff --git a/nova/tests/fixtures/libvirt.py b/nova/tests/fixtures/libvirt.py index 638553c1ed29..08e9496f5064 100644 --- a/nova/tests/fixtures/libvirt.py +++ b/nova/tests/fixtures/libvirt.py @@ -127,6 +127,8 @@ VIR_MIGRATE_PERSIST_DEST = 8 VIR_MIGRATE_UNDEFINE_SOURCE = 16 VIR_MIGRATE_NON_SHARED_INC = 128 VIR_MIGRATE_AUTO_CONVERGE = 8192 +VIR_MIGRATE_PARALLEL = 131072 +VIR_MIGRATE_PARAM_PARALLEL_CONNECTIONS = 'parallel.connections' VIR_MIGRATE_POSTCOPY = 32768 VIR_MIGRATE_TLS = 65536 diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index ccdf99215345..ff15bafb0e69 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -1223,6 +1223,19 @@ class LibvirtConnTestCase(test.NoDBTestCase, break self.assertTrue(version_arg_found) + @mock.patch.object(libvirt_driver.LibvirtDriver, + '_register_all_undefined_instance_details', + new=mock.Mock()) + @mock.patch.object(fakelibvirt.Connection, 'getVersion', + return_value=versionutils.convert_version_to_int( + (9, 1, 0))) + def test_qemu_multifd_with_postcopy_version_ok(self, mock_gv): + self.flags(live_migration_parallel_connections=2, + live_migration_permit_post_copy=True, + group='libvirt') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + self.assertRaises(exception.InternalError, drvr.init_host, "dummyhost") + @mock.patch.object(libvirt_driver.LibvirtDriver, '_register_all_undefined_instance_details', new=mock.Mock()) @@ -2002,6 +2015,21 @@ class LibvirtConnTestCase(test.NoDBTestCase, libvirt_driver.libvirt.VIR_MIGRATE_NON_SHARED_INC | libvirt_driver.libvirt.VIR_MIGRATE_AUTO_CONVERGE)) + def test_live_migration_parallel_connections_enabled(self): + self.flags(live_migration_parallel_connections=2, group='libvirt') + self._do_test_parse_migration_flags( + lm_expected=(libvirt_driver.libvirt.VIR_MIGRATE_UNDEFINE_SOURCE | + libvirt_driver.libvirt.VIR_MIGRATE_PERSIST_DEST | + libvirt_driver.libvirt.VIR_MIGRATE_PEER2PEER | + libvirt_driver.libvirt.VIR_MIGRATE_LIVE | + libvirt_driver.libvirt.VIR_MIGRATE_PARALLEL), + bm_expected=(libvirt_driver.libvirt.VIR_MIGRATE_UNDEFINE_SOURCE | + libvirt_driver.libvirt.VIR_MIGRATE_PERSIST_DEST | + libvirt_driver.libvirt.VIR_MIGRATE_PEER2PEER | + libvirt_driver.libvirt.VIR_MIGRATE_LIVE | + libvirt_driver.libvirt.VIR_MIGRATE_NON_SHARED_INC | + libvirt_driver.libvirt.VIR_MIGRATE_PARALLEL)) + def test_live_migration_permit_auto_converge_and_post_copy_true(self): self.flags(live_migration_permit_auto_converge=True, group='libvirt') self.flags(live_migration_permit_post_copy=True, group='libvirt') @@ -14133,6 +14161,54 @@ class LibvirtConnTestCase(test.NoDBTestCase, drvr._live_migration_uri(target_connection), params=params, flags=expected_flags) + @mock.patch.object(host.Host, 'has_min_version', return_value=True) + @mock.patch.object(fakelibvirt.virDomain, "migrateToURI3") + @mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml', + return_value='') + @mock.patch('nova.virt.libvirt.guest.Guest.get_xml_desc', return_value='') + def test_block_live_parallel_connections( + self, mock_old_xml, mock_new_xml, + mock_migrateToURI3, mock_min_version): + self.flags(live_migration_parallel_connections=5, group='libvirt') + target_connection = None + disk_paths = ['vda', 'vdb'] + + params = { + 'bandwidth': CONF.libvirt.live_migration_bandwidth, + 'migrate_disks': disk_paths, + 'parallel.connections': + CONF.libvirt.live_migration_parallel_connections + } + + # Start test + migrate_data = objects.LibvirtLiveMigrateData( + graphics_listen_addr_vnc='0.0.0.0', + graphics_listen_addr_spice='0.0.0.0', + serial_listen_addr='127.0.0.1', + serial_listen_ports=[1234], + target_connect_addr=target_connection, + bdms=[], + block_migration=True) + + dom = fakelibvirt.virDomain + guest = libvirt_guest.Guest(dom) + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + drvr._parse_migration_flags() + instance = objects.Instance(**self.test_instance) + drvr._live_migration_operation(self.context, instance, + target_connection, True, migrate_data, + guest, disk_paths) + + expected_flags = (fakelibvirt.VIR_MIGRATE_UNDEFINE_SOURCE | + fakelibvirt.VIR_MIGRATE_PERSIST_DEST | + fakelibvirt.VIR_MIGRATE_PEER2PEER | + fakelibvirt.VIR_MIGRATE_NON_SHARED_INC | + fakelibvirt.VIR_MIGRATE_LIVE | + fakelibvirt.VIR_MIGRATE_PARALLEL) + mock_migrateToURI3.assert_called_once_with( + drvr._live_migration_uri(target_connection), + params=params, flags=expected_flags) + @mock.patch.object(host.Host, 'has_min_version', return_value=True) @mock.patch.object(fakelibvirt.virDomain, "migrateToURI3") @mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml', diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 3d843eb1f13a..7ebbb7a72092 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -262,6 +262,9 @@ MIN_VFIO_PCI_VARIANT_QEMU_VERSION = (8, 2, 2) MIN_VIRTIO_SOUND_LIBVIRT_VERSION = (10, 4, 0) MIN_VIRTIO_SOUND_QEMU_VERSION = (8, 2, 0) +# Minimum version of Qemu that supports multifd migration with post-copy +MIN_MULTIFD_WITH_POSTCOPY_QEMU_VERSION = (10, 1, 0) + REGISTER_IMAGE_PROPERTY_DEFAULTS = [ 'hw_machine_type', 'hw_cdrom_bus', @@ -814,6 +817,16 @@ class LibvirtDriver(driver.ComputeDriver): raise exception.InternalError( _('Nova requires QEMU version %s or greater.') % libvirt_utils.version_to_string(MIN_QEMU_VERSION)) + if (CONF.libvirt.virt_type in ("qemu", "kvm") and + CONF.libvirt.live_migration_parallel_connections > 1 and + CONF.libvirt.live_migration_permit_post_copy is True): + if not self._host.has_min_version( + hv_ver=MIN_MULTIFD_WITH_POSTCOPY_QEMU_VERSION): + raise exception.InternalError( + _('Nova requires QEMU version %s or greater to use ' + 'live migration parallel connections with post-copy.') % + libvirt_utils.version_to_string( + MIN_MULTIFD_WITH_POSTCOPY_QEMU_VERSION)) if CONF.libvirt.virt_type == 'parallels': if not self._host.has_min_version(hv_ver=MIN_VIRTUOZZO_VERSION): @@ -1354,6 +1367,11 @@ class LibvirtDriver(driver.ComputeDriver): migration_flags |= libvirt.VIR_MIGRATE_AUTO_CONVERGE return migration_flags + def _handle_live_migration_parallel(self, migration_flags): + if CONF.libvirt.live_migration_parallel_connections > 1: + migration_flags |= libvirt.VIR_MIGRATE_PARALLEL + return migration_flags + def _parse_migration_flags(self): (live_migration_flags, block_migration_flags) = self._prepare_migration_flags() @@ -1378,6 +1396,11 @@ class LibvirtDriver(driver.ComputeDriver): block_migration_flags = self._handle_live_migration_auto_converge( block_migration_flags) + live_migration_flags = self._handle_live_migration_parallel( + live_migration_flags) + block_migration_flags = self._handle_live_migration_parallel( + block_migration_flags) + self._live_migration_flags = live_migration_flags self._block_migration_flags = block_migration_flags @@ -11173,12 +11196,14 @@ class LibvirtDriver(driver.ComputeDriver): serial_ports = list(self._get_serial_ports_from_guest(guest)) LOG.debug("About to invoke the migrate API", instance=instance) - guest.migrate(self._live_migration_uri(dest), - migrate_uri=migrate_uri, - flags=migration_flags, - migrate_disks=device_names, - destination_xml=new_xml_str, - bandwidth=CONF.libvirt.live_migration_bandwidth) + guest.migrate( + self._live_migration_uri(dest), + migrate_uri=migrate_uri, + flags=migration_flags, + migrate_disks=device_names, + destination_xml=new_xml_str, + bandwidth=CONF.libvirt.live_migration_bandwidth, + parallel=CONF.libvirt.live_migration_parallel_connections) LOG.debug("Migrate API has completed", instance=instance) for hostname, port in serial_ports: diff --git a/nova/virt/libvirt/guest.py b/nova/virt/libvirt/guest.py index 78ea60b39ef7..e6c60d29d1fb 100644 --- a/nova/virt/libvirt/guest.py +++ b/nova/virt/libvirt/guest.py @@ -583,7 +583,8 @@ class Guest(object): self._domain.suspend() def migrate(self, destination, migrate_uri=None, migrate_disks=None, - destination_xml=None, flags=0, bandwidth=0): + destination_xml=None, flags=0, bandwidth=0, + parallel=0): """Migrate guest object from its current host to the destination :param destination: URI of host destination where guest will be migrate @@ -609,9 +610,12 @@ class Guest(object): not change its memory faster than a hypervisor can transfer the changed memory to the destination host + VIR_MIGRATE_PARALLEL Send memory pages to the destination host + through several network connections. VIR_MIGRATE_POSTCOPY Tell libvirt to enable post-copy migration VIR_MIGRATE_TLS Use QEMU-native TLS :param bandwidth: The maximum bandwidth in MiB/s + :param parallel: Number of connections used during live migration """ params = {} # In migrateToURI3 these parameters are extracted from the @@ -625,6 +629,8 @@ class Guest(object): params['migrate_disks'] = migrate_disks if migrate_uri: params['migrate_uri'] = migrate_uri + if parallel > 1: + params[libvirt.VIR_MIGRATE_PARAM_PARALLEL_CONNECTIONS] = parallel # Due to a quirk in the libvirt python bindings, # VIR_MIGRATE_NON_SHARED_INC with an empty migrate_disks is diff --git a/releasenotes/notes/libvirt_parallel_migration_libvirt-d36d621eb34f0ada.yaml b/releasenotes/notes/libvirt_parallel_migration_libvirt-d36d621eb34f0ada.yaml new file mode 100644 index 000000000000..53e7a034c898 --- /dev/null +++ b/releasenotes/notes/libvirt_parallel_migration_libvirt-d36d621eb34f0ada.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Implemented parallel live migrations for libvirt driver, which can be + enabled by defining ``[libvirt] live_migration_parallel_connections`` + to a value higher than 1. By default parallel migrations are not used to + preserve existing behavior. Also note that QEMU prior to 10.1.0 was not + able to support multifd with postcopy migrations, so enabling these two + features together on older QEMU versions is disallowed.