From c438fd9a0eb1903306a53ab44e3ae80660d8a429 Mon Sep 17 00:00:00 2001 From: Lee Yarwood Date: Wed, 5 Aug 2020 23:00:06 +0100 Subject: [PATCH] libvirt: Provide VIR_MIGRATE_PARAM_PERSIST_XML during live migration The VIR_MIGRATE_PARAM_PERSIST_XML parameter was introduced in libvirt v1.3.4 and is used to provide the new persistent configuration for the destination during a live migration: https://libvirt.org/html/libvirt-libvirt-domain.html#VIR_MIGRATE_PARAM_PERSIST_XML Without this parameter the persistent configuration on the destination will be the same as the original persistent configuration on the source when the VIR_MIGRATE_PERSIST_DEST flag is provided. As Nova does not currently provide the VIR_MIGRATE_PARAM_PERSIST_XML param but does provide the VIR_MIGRATE_PERSIST_DEST flag this means that a soft reboot by Nova of the instance after a live migration can revert the domain back to the original persistent configuration from the source. Note that this is only possible in Nova as a soft reboot actually results in the virDomainShutdown and virDomainLaunch libvirt APIs being called that recreate the domain using the persistent configuration. virDomainReboot does not result in this but is not called at this time. The impact of this on the instance after the soft reboot is pretty severe, host devices referenced in the original persistent configuration on the source may not exist or could even be used by other users on the destination. CPU and NUMA affinity could also differ drastically between the two hosts resulting in the instance being unable to start etc. As MIN_LIBVIRT_VERSION is now > v1.3.4 this change simply includes the VIR_MIGRATE_PARAM_PERSIST_XML param using the same updated XML for the destination as is already provided to VIR_MIGRATE_PARAM_DEST_XML. NOTE(lyarwood): As this is no longer the case from stable/rocky the change is slightly more involved introducing a persistent_xml_param kwarg that is used from _live_migration_operation within the driver based on the availability of libvirt v1.3.4 on the source host. Co-authored-by: Tadayoshi Hosoya Closes-Bug: #1890501 Change-Id: Ia3f1d8e83cbc574ce5cb440032e12bbcb1e10e98 (cherry picked from commit 1bb8ee95d4c3ddc3f607ac57526b75af1b7fbcff) (cherry picked from commit bbf9d1de06e9991acd968fceee899a8df3776d60) (cherry picked from commit 6a07edb4b29d8bfb5c86ed14263f7cd7525958c1) (cherry picked from commit b9ea91d17703f5b324a50727b6503ace0f4e95eb) --- nova/tests/unit/virt/libvirt/test_driver.py | 91 +++++++++++++++++++-- nova/tests/unit/virt/test_virt_drivers.py | 2 +- nova/virt/libvirt/driver.py | 12 ++- nova/virt/libvirt/guest.py | 9 +- 4 files changed, 105 insertions(+), 9 deletions(-) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index acb2775d449a..3d245978baaa 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -9736,6 +9736,76 @@ class LibvirtConnTestCase(test.NoDBTestCase, 'instance', data, block_device_info=bdi)) self.assertEqual(0, mock_get_instance_disk_info.call_count) + @mock.patch.object(fakelibvirt.virDomain, "migrateToURI3") + @mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml') + @mock.patch.object(fakelibvirt.Connection, 'getLibVersion') + def test_live_migration_persistent_xml( + self, mock_get_version, mock_get_updated_xml, mock_migrateToURI3): + """Assert that persistent_xml only provided when libvirt is >= v1.3.4 + """ + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + instance = self.test_instance + dest = '127.0.0.1' + block_migration = False + migrate_data = objects.LibvirtLiveMigrateData( + graphics_listen_addr_vnc='10.0.0.1', + graphics_listen_addr_spice='10.0.0.2', + serial_listen_addr='127.0.0.1', + target_connect_addr='127.0.0.1', + bdms=[], + block_migration=block_migration) + guest = libvirt_guest.Guest(fakelibvirt.virDomain) + device_names = ['vda'] + + mock_get_updated_xml.return_value = mock.sentinel.dest_xml + + # persistent_xml was introduced in v1.3.4 so provide v1.3.3 + v1_3_3 = versionutils.convert_version_to_int((1, 3, 3)) + mock_get_version.return_value = v1_3_3 + + drvr._live_migration_operation( + self.context, instance, dest, block_migration, migrate_data, + guest, device_names) + + expected_uri = drvr._live_migration_uri(dest) + expected_flags = 0 + expected_params = { + 'bandwidth': 0, + 'destination_xml': mock.sentinel.dest_xml, + 'migrate_disks': device_names, + 'migrate_uri': 'tcp://127.0.0.1' + } + + # Assert that migrateToURI3 is called without the persistent_xml param + mock_get_version.assert_called() + mock_migrateToURI3.assert_called_once_with( + expected_uri, params=expected_params, flags=expected_flags) + + # reset mocks and try again with v1.3.4 + mock_get_version.reset_mock() + mock_migrateToURI3.reset_mock() + + # persistent_xml was introduced in v1.3.4 so provide it this time + v1_3_4 = versionutils.convert_version_to_int((1, 3, 4)) + mock_get_version.return_value = v1_3_4 + + drvr._live_migration_operation( + self.context, instance, dest, + block_migration, migrate_data, guest, device_names) + + expected_params = { + 'bandwidth': 0, + 'destination_xml': mock.sentinel.dest_xml, + 'persistent_xml': mock.sentinel.dest_xml, + 'migrate_disks': device_names, + 'migrate_uri': 'tcp://127.0.0.1' + } + + # Assert that migrateToURI3 is called with the persistent_xml param + mock_get_version.assert_called() + mock_migrateToURI3.assert_called_once_with( + expected_uri, params=expected_params, flags=expected_flags) + @mock.patch.object(host.Host, 'has_min_version', return_value=True) @mock.patch.object(fakelibvirt.virDomain, "migrateToURI3") @mock.patch.object(fakelibvirt.virDomain, "XMLDesc") @@ -9778,6 +9848,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, 'migrate_disks': disk_paths, 'bandwidth': _bandwidth, 'destination_xml': target_xml, + 'persistent_xml': target_xml, } # start test @@ -9885,7 +9956,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, 'migrate_disks': disk_paths, 'migrate_uri': 'tcp://127.0.0.2', 'bandwidth': CONF.libvirt.live_migration_bandwidth, - 'destination_xml': target_xml + 'destination_xml': target_xml, } # Start test @@ -9985,6 +10056,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, 'migrate_uri': 'tcp://127.0.0.2', 'bandwidth': CONF.libvirt.live_migration_bandwidth, 'destination_xml': target_xml, + 'persistent_xml': target_xml, } # start test @@ -10335,6 +10407,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, 'migrate_disks': ['vda', 'vdb'], 'bandwidth': CONF.libvirt.live_migration_bandwidth, 'destination_xml': target_xml, + 'persistent_xml': target_xml, } # start test @@ -10388,14 +10461,14 @@ class LibvirtConnTestCase(test.NoDBTestCase, @mock.patch.object(host.Host, 'has_min_version', return_value=True) @mock.patch.object(fakelibvirt.virDomain, "migrateToURI3") - @mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml', - return_value='') + @mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml') @mock.patch('nova.virt.libvirt.guest.Guest.get_xml_desc', return_value='') def test_live_migration_uses_migrateToURI3( self, mock_old_xml, mock_new_xml, mock_migrateToURI3, mock_min_version): + mock_new_xml.return_value = mock.sentinel.new_xml target_connection = '127.0.0.2' # Preparing mocks disk_paths = ['vda', 'vdb'] @@ -10403,6 +10476,8 @@ class LibvirtConnTestCase(test.NoDBTestCase, 'migrate_uri': 'tcp://127.0.0.2', 'migrate_disks': ['vda', 'vdb'], 'bandwidth': CONF.libvirt.live_migration_bandwidth, + 'destination_xml': mock.sentinel.new_xml, + 'persistent_xml': mock.sentinel.new_xml, } mock_migrateToURI3.side_effect = fakelibvirt.libvirtError("ERR") @@ -10461,6 +10536,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, 'migrate_disks': device_names, 'bandwidth': CONF.libvirt.live_migration_bandwidth, 'destination_xml': '', + 'persistent_xml': '', } if not params['migrate_disks']: del params['migrate_disks'] @@ -10492,14 +10568,14 @@ class LibvirtConnTestCase(test.NoDBTestCase, @mock.patch.object(host.Host, 'has_min_version', return_value=True) @mock.patch.object(fakelibvirt.virDomain, "migrateToURI3") - @mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml', - return_value='') + @mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml') @mock.patch('nova.virt.libvirt.guest.Guest.get_xml_desc', return_value='') def test_block_live_migration_tunnelled_migrateToURI3( self, mock_old_xml, mock_new_xml, mock_migrateToURI3, mock_min_version): self.flags(live_migration_tunnelled=True, group='libvirt') + mock_new_xml.return_value = mock.sentinel.new_xml target_connection = None device_names = ['disk1', 'disk2'] @@ -10507,7 +10583,9 @@ class LibvirtConnTestCase(test.NoDBTestCase, # Since we are passing the VIR_MIGRATE_TUNNELLED flag, the # 'parms' dict will not (as expected) contain 'migrate_disks' params = { - 'bandwidth': CONF.libvirt.live_migration_bandwidth + 'bandwidth': CONF.libvirt.live_migration_bandwidth, + 'destination_xml': mock.sentinel.new_xml, + 'persistent_xml': mock.sentinel.new_xml, } # Start test migrate_data = objects.LibvirtLiveMigrateData( @@ -10554,6 +10632,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, 'migrate_disks': disk_paths, 'bandwidth': CONF.libvirt.live_migration_bandwidth, 'destination_xml': '', + 'persistent_xml': '', } # Prepare mocks diff --git a/nova/tests/unit/virt/test_virt_drivers.py b/nova/tests/unit/virt/test_virt_drivers.py index 0f1ac970d157..b7ed71f6657f 100644 --- a/nova/tests/unit/virt/test_virt_drivers.py +++ b/nova/tests/unit/virt/test_virt_drivers.py @@ -135,7 +135,7 @@ class _FakeDriverBackendTestCase(object): self.stub_out('nova.virt.libvirt.guest.Guest.migrate', lambda self, destination, migrate_uri=None, migrate_disks=None, destination_xml=None, flags=0, - bandwidth=0: None) + bandwidth=0, persistent_xml_param=False: None) # We can't actually make a config drive v2 because ensure_tree has # been faked out self.stub_out('nova.virt.configdrive.ConfigDriveBuilder.make_drive', diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 6a7ae8b1b3d6..abb8753e478d 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -301,6 +301,11 @@ MIN_LIBVIRT_BETTER_SIGKILL_HANDLING = (4, 7, 0) VGPU_RESOURCE_SEMAPHORE = "vgpu_resources" +# libvirt >= v1.3.4 introduced VIR_MIGRATE_PARAM_PERSIST_XML that needs to be +# provided when the VIR_MIGRATE_PERSIST_DEST flag is used to ensure the updated +# domain XML is persisted on the destination. +MIN_LIBVIRT_MIGRATE_PARAM_PERSIST_XML = (1, 3, 4) + class LibvirtDriver(driver.ComputeDriver): capabilities = { @@ -7279,13 +7284,18 @@ class LibvirtDriver(driver.ComputeDriver): if CONF.serial_console.enabled: serial_ports = list(self._get_serial_ports_from_guest(guest)) + # NOTE(lyarwood): Only available from v1.3.4 + persistent_xml_param = self._host.has_min_version( + MIN_LIBVIRT_MIGRATE_PARAM_PERSIST_XML) + LOG.debug("About to invoke the migrate API", instance=instance) guest.migrate(self._live_migration_uri(dest), migrate_uri=migrate_uri, flags=migration_flags, migrate_disks=device_names, destination_xml=new_xml_str, - bandwidth=CONF.libvirt.live_migration_bandwidth) + bandwidth=CONF.libvirt.live_migration_bandwidth, + persistent_xml_param=persistent_xml_param) LOG.debug("Migrate API has completed", instance=instance) for hostname, port in serial_ports: diff --git a/nova/virt/libvirt/guest.py b/nova/virt/libvirt/guest.py index ef015ee8b9b8..847262c03383 100644 --- a/nova/virt/libvirt/guest.py +++ b/nova/virt/libvirt/guest.py @@ -610,7 +610,8 @@ class Guest(object): self._domain.suspend() def migrate(self, destination, migrate_uri=None, migrate_disks=None, - destination_xml=None, flags=0, bandwidth=0): + destination_xml=None, flags=0, bandwidth=0, + persistent_xml_param=False): """Migrate guest object from its current host to the destination :param destination: URI of host destination where guest will be migrate @@ -645,6 +646,9 @@ class Guest(object): unsafe. VIR_MIGRATE_OFFLINE Migrate offline :param bandwidth: The maximum bandwidth in MiB/s + :param persistent_xml_param: Boolean indicating if the + VIR_MIGRATE_PARAM_PERSIST_XML param should + be provided to migrateToURI3. """ params = {} # In migrateToURI3 these parameters are extracted from the @@ -653,6 +657,9 @@ class Guest(object): if destination_xml: params['destination_xml'] = destination_xml + # NOTE(lyarwood): Only available from v1.3.4 + if persistent_xml_param: + params['persistent_xml'] = destination_xml if migrate_disks: params['migrate_disks'] = migrate_disks if migrate_uri: