diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 1b13d13bd68d..957e23e9f83e 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -7664,7 +7664,7 @@ class ComputeManager(manager.Manager): LOG.debug('destination check data is %s', dest_check_data) try: allocs = self.reportclient.get_allocations_for_consumer( - ctxt, instance.uuid) + ctxt, instance.uuid) migrate_data = self.compute_rpcapi.check_can_live_migrate_source( ctxt, instance, dest_check_data) if ('src_supports_numa_live_migration' in migrate_data and @@ -8249,9 +8249,10 @@ class ComputeManager(manager.Manager): self.driver.live_migration_abort(instance) self._notify_live_migrate_abort_end(context, instance) - def _live_migration_cleanup_flags(self, migrate_data): - """Determine whether disks or instance path need to be cleaned up after - live migration (at source on success, at destination on rollback) + def _live_migration_cleanup_flags(self, migrate_data, migr_ctxt=None): + """Determine whether disks, instance path or other resources + need to be cleaned up after live migration (at source on success, + at destination on rollback) Block migration needs empty image at destination host before migration starts, so if any failure occurs, any empty images has to be deleted. @@ -8260,7 +8261,11 @@ class ComputeManager(manager.Manager): newly created instance-xxx dir on the destination as a part of its rollback process + There may be other resources which need cleanup; currently this is + limited to vPMEM devices with the libvirt driver. + :param migrate_data: implementation specific data + :param migr_ctxt: specific resources stored in migration_context :returns: (bool, bool) -- do_cleanup, destroy_disks """ # NOTE(pkoniszewski): block migration specific params are set inside @@ -8270,11 +8275,20 @@ class ComputeManager(manager.Manager): do_cleanup = False destroy_disks = False if isinstance(migrate_data, migrate_data_obj.LibvirtLiveMigrateData): + has_vpmem = False + if migr_ctxt and migr_ctxt.old_resources: + for resource in migr_ctxt.old_resources: + if ('metadata' in resource and + isinstance(resource.metadata, + objects.LibvirtVPMEMDevice)): + has_vpmem = True + break # No instance booting at source host, but instance dir # must be deleted for preparing next block migration # must be deleted for preparing next live migration w/o shared # storage - do_cleanup = not migrate_data.is_shared_instance_path + # vpmem must be cleanped + do_cleanup = not migrate_data.is_shared_instance_path or has_vpmem destroy_disks = not migrate_data.is_shared_block_storage elif isinstance(migrate_data, migrate_data_obj.XenapiLiveMigrateData): do_cleanup = migrate_data.block_migration @@ -8427,7 +8441,7 @@ class ComputeManager(manager.Manager): source_node = instance.node do_cleanup, destroy_disks = self._live_migration_cleanup_flags( - migrate_data) + migrate_data, migr_ctxt=instance.migration_context) if do_cleanup: LOG.debug('Calling driver.cleanup from _post_live_migration', @@ -8727,7 +8741,7 @@ class ComputeManager(manager.Manager): bdms=bdms) do_cleanup, destroy_disks = self._live_migration_cleanup_flags( - migrate_data) + migrate_data, migr_ctxt=instance.migration_context) if do_cleanup: self.compute_rpcapi.rollback_live_migration_at_destination( @@ -8867,6 +8881,9 @@ class ComputeManager(manager.Manager): # check_can_live_migrate_destination() self.rt.free_pci_device_claims_for_instance(context, instance) + # NOTE(luyao): Apply migration_context temporarily since it's + # on destination host, we rely on instance object to cleanup + # specific resources like vpmem with instance.mutated_migration_context(): self.driver.rollback_live_migration_at_destination( context, instance, network_info, block_device_info, diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py index 5aa701e688af..36a856d779db 100644 --- a/nova/conductor/tasks/live_migrate.py +++ b/nova/conductor/tasks/live_migrate.py @@ -46,6 +46,17 @@ def supports_vif_related_pci_allocations(context, host): return svc.version >= 36 +def supports_vpmem_live_migration(context): + """Checks if the commpute host service is new enough to support + instance live migration with virtual persistent memory. + + :param context: The user request context. + :returns: True if the compute hosts are new enough to support live + migration with vpmem + """ + return objects.Service.get_minimum_version(context, 'nova-compute') >= 51 + + class LiveMigrationTask(base.TaskBase): def __init__(self, context, instance, destination, block_migration, disk_over_commit, migration, compute_rpcapi, @@ -261,11 +272,16 @@ class LiveMigrationTask(base.TaskBase): if not self.instance.resources: return + has_vpmem = False for resource in self.instance.resources: if resource.resource_class.startswith("CUSTOM_PMEM_NAMESPACE_"): - raise exception.MigrationPreCheckError( - reason="Cannot live migration with virtual persistent " - "memory, the operation is not supported.") + has_vpmem = True + break + + if has_vpmem and not supports_vpmem_live_migration(self.context): + raise exception.MigrationPreCheckError( + reason="Cannot live migrate with virtual persistent memory, " + "the operation is not supported.") def _check_host_is_up(self, host): service = objects.Service.get_by_compute_host(self.context, host) diff --git a/nova/objects/service.py b/nova/objects/service.py index 98cb6efdd59c..887566266836 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__) # NOTE(danms): This is the global service version counter -SERVICE_VERSION = 50 +SERVICE_VERSION = 51 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any @@ -183,6 +183,8 @@ SERVICE_VERSION_HISTORY = ( # Version 50: Compute RPC v5.11: # Add accel_uuids (accelerator requests) param to build_and_run_instance {'compute_rpc': '5.11'}, + # Version 51: Add support for live migration with vpmem + {'compute_rpc': '5.11'}, ) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index aef8618bc195..4f5a90004146 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -10137,6 +10137,25 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, instance, migration.id) + def test_live_migration_cleanup_flags_shared_path_and_vpmem_libvirt(self): + migrate_data = objects.LibvirtLiveMigrateData( + is_shared_block_storage=False, + is_shared_instance_path=True) + migr_ctxt = objects.MigrationContext() + vpmem_resource = objects.Resource( + provider_uuid=uuids.rp_uuid, + resource_class="CUSTOM_PMEM_NAMESPACE_4GB", + identifier='ns_0', metadata=objects.LibvirtVPMEMDevice( + label='4GB', + name='ns_0', devpath='/dev/dax0.0', + size=4292870144, align=2097152)) + migr_ctxt.old_resources = objects.ResourceList( + objects=[vpmem_resource]) + do_cleanup, destroy_disks = self.compute._live_migration_cleanup_flags( + migrate_data, migr_ctxt) + self.assertTrue(do_cleanup) + self.assertTrue(destroy_disks) + def test_live_migration_cleanup_flags_block_migrate_libvirt(self): migrate_data = objects.LibvirtLiveMigrateData( is_shared_block_storage=False, diff --git a/nova/tests/unit/conductor/tasks/test_live_migrate.py b/nova/tests/unit/conductor/tasks/test_live_migrate.py index 127bc8a76613..223cac298845 100644 --- a/nova/tests/unit/conductor/tasks/test_live_migrate.py +++ b/nova/tests/unit/conductor/tasks/test_live_migrate.py @@ -833,6 +833,15 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): _test, pci_requests, True, True) def test_check_can_migrate_specific_resources(self): + """Test _check_can_migrate_specific_resources allows live migration + with vpmem. + """ + @mock.patch.object(live_migrate, 'supports_vpmem_live_migration') + def _test(resources, supp_lm_vpmem_retval, mock_support_lm_vpmem): + self.instance.resources = resources + mock_support_lm_vpmem.return_value = supp_lm_vpmem_retval + self.task._check_can_migrate_specific_resources() + vpmem_0 = objects.LibvirtVPMEMDevice( label='4GB', name='ns_0', devpath='/dev/dax0.0', size=4292870144, align=2097152) @@ -840,7 +849,11 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): provider_uuid=uuids.rp, resource_class="CUSTOM_PMEM_NAMESPACE_4GB", identifier='ns_0', metadata=vpmem_0) - self.instance.resources = objects.ResourceList( + resources = objects.ResourceList( objects=[resource_0]) + + _test(None, False) + _test(None, True) + _test(resources, True) self.assertRaises(exception.MigrationPreCheckError, - self.task._check_can_migrate_specific_resources) + _test, resources, False) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index c643515d2a3e..4d1c49cdc720 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -11218,7 +11218,8 @@ class LibvirtConnTestCase(test.NoDBTestCase, drvr._live_migration_uri(target_connection), params=params, flags=0) mock_updated_guest_xml.assert_called_once_with( - guest, migrate_data, mock.ANY, get_vif_config=None) + guest, migrate_data, mock.ANY, get_vif_config=None, + new_resources=None) def test_live_migration_update_vifs_xml(self): """Tests that when migrate_data.vifs is populated, the destination @@ -11245,7 +11246,8 @@ class LibvirtConnTestCase(test.NoDBTestCase, fake_xml = '' def fake_get_updated_guest_xml(guest, migrate_data, get_volume_config, - get_vif_config=None): + get_vif_config=None, + new_resources=None): self.assertIsNotNone(get_vif_config) return fake_xml @@ -25894,6 +25896,29 @@ class LibvirtPMEMNamespaceTests(test.NoDBTestCase): self.assertEqual('SMALL', vpmems[1].label) self.assertEqual('SMALL', vpmems[2].label) + @mock.patch('nova.virt.hardware.get_vpmems') + def test_sorted_migrating_vpmem_resources(self, mock_labels): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + instance = fake_instance.fake_instance_obj(self.context) + instance.flavor = objects.Flavor( + name='m1.small', memory_mb=2048, vcpus=2, root_gb=10, + ephemeral_gb=20, swap=0, extra_specs={ + 'hw:pmem': 'SMALL,4GB,SMALL'}) + mock_labels.return_value = ['SMALL', '4GB', 'SMALL'] + migr_context = objects.MigrationContext() + # original order is '4GB' 'SMALL' 'SMALL' + migr_context.new_resources = objects.ResourceList(objects=[ + self.resource_0, self.resource_1, self.resource_2]) + instance.migration_context = migr_context + + new_resources = drvr._sorted_migrating_resources( + instance, instance.flavor) + # ordered vpmems are 'SMAL' '4GB' 'SMALL' + expected_new_resources = objects.ResourceList(objects=[ + self.resource_1, self.resource_0, self.resource_2]) + for i in range(3): + self.assertEqual(expected_new_resources[i], new_resources[i]) + @mock.patch('nova.privsep.libvirt.cleanup_vpmem') def test_cleanup_vpmems(self, mock_cleanup_vpmem): vpmems = [self.vpmem_0, self.vpmem_1, self.vpmem_2] diff --git a/nova/tests/unit/virt/libvirt/test_migration.py b/nova/tests/unit/virt/libvirt/test_migration.py index 89a7b13119b5..39321b912502 100644 --- a/nova/tests/unit/virt/libvirt/test_migration.py +++ b/nova/tests/unit/virt/libvirt/test_migration.py @@ -116,6 +116,70 @@ class UtilityMigrationTestCase(test.NoDBTestCase): mock_memory_backing.assert_called_once_with(mock.ANY, data) self.assertEqual(1, mock_tostring.called) + def test_update_device_resources_xml_vpmem(self): + # original xml for vpmems, /dev/dax0.1 and /dev/dax0.2 here + # are vpmem device path on source host + old_xml = textwrap.dedent(""" + + + + + /dev/dax0.1 + 2048 + on + + + 4192256 + + 0 + + + + + /dev/dax0.2 + 2048 + on + + + 4192256 + + 0 + + + + """) + doc = etree.fromstring(old_xml) + vpmem_resource_0 = objects.Resource( + provider_uuid=uuids.rp_uuid, + resource_class="CUSTOM_PMEM_NAMESPACE_4GB", + identifier='ns_0', + metadata= objects.LibvirtVPMEMDevice( + label='4GB', name='ns_0', devpath='/dev/dax1.0', + size=4292870144, align=2097152)) + vpmem_resource_1 = objects.Resource( + provider_uuid=uuids.rp_uuid, + resource_class="CUSTOM_PMEM_NAMESPACE_4GB", + identifier='ns_0', + metadata= objects.LibvirtVPMEMDevice( + label='4GB', name='ns_1', devpath='/dev/dax2.0', + size=4292870144, align=2097152)) + # new_resources contains vpmems claimed on destination, + # /dev/dax1.0 and /dev/dax2.0 are where vpmem data is migrated to + new_resources = objects.ResourceList( + objects=[vpmem_resource_0, vpmem_resource_1]) + res = etree.tostring(migration._update_device_resources_xml( + copy.deepcopy(doc), new_resources), + encoding='unicode') + # we expect vpmem info will be updated in xml after invoking + # _update_device_resources_xml + new_xml = old_xml.replace("/dev/dax0.1", "/dev/dax1.0") + new_xml = new_xml.replace("/dev/dax0.2", "/dev/dax2.0") + self.assertXmlEqual(res, new_xml) + def test_update_numa_xml(self): xml = textwrap.dedent(""" diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 443d331de5ab..a0eaeec3904c 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -5858,30 +5858,18 @@ class LibvirtDriver(driver.ComputeDriver): return guest def _get_ordered_vpmems(self, instance, flavor): - ordered_vpmems = [] - vpmems = self._get_vpmems(instance) - labels = hardware.get_vpmems(flavor) - for label in labels: - for vpmem in vpmems: - if vpmem.label == label: - ordered_vpmems.append(vpmem) - vpmems.remove(vpmem) - break + resources = self._get_resources(instance) + ordered_vpmem_resources = self._get_ordered_vpmem_resources( + resources, flavor) + ordered_vpmems = [self._vpmems_by_name[resource.identifier] + for resource in ordered_vpmem_resources] return ordered_vpmems def _get_vpmems(self, instance, prefix=None): - vpmems = [] - resources = instance.resources - if prefix == 'old' and instance.migration_context: - if 'old_resources' in instance.migration_context: - resources = instance.migration_context.old_resources - if not resources: - return vpmems - for resource in resources: - rc = resource.resource_class - if rc.startswith("CUSTOM_PMEM_NAMESPACE_"): - vpmem = self._vpmems_by_name[resource.identifier] - vpmems.append(vpmem) + resources = self._get_resources(instance, prefix=prefix) + vpmem_resources = self._get_vpmem_resources(resources) + vpmems = [self._vpmems_by_name[resource.identifier] + for resource in vpmem_resources] return vpmems def _guest_add_vpmems(self, guest, vpmems): @@ -8143,6 +8131,53 @@ class LibvirtDriver(driver.ComputeDriver): claim.image_meta) return migrate_data + def _get_resources(self, instance, prefix=None): + resources = [] + if prefix: + migr_context = instance.migration_context + attr_name = prefix + 'resources' + if migr_context and attr_name in migr_context: + resources = getattr(migr_context, attr_name) or [] + else: + resources = instance.resources or [] + return resources + + def _get_vpmem_resources(self, resources): + vpmem_resources = [] + for resource in resources: + if 'metadata' in resource and \ + isinstance(resource.metadata, objects.LibvirtVPMEMDevice): + vpmem_resources.append(resource) + return vpmem_resources + + def _get_ordered_vpmem_resources(self, resources, flavor): + vpmem_resources = self._get_vpmem_resources(resources) + ordered_vpmem_resources = [] + labels = hardware.get_vpmems(flavor) + for label in labels: + for vpmem_resource in vpmem_resources: + if vpmem_resource.metadata.label == label: + ordered_vpmem_resources.append(vpmem_resource) + vpmem_resources.remove(vpmem_resource) + break + return ordered_vpmem_resources + + def _sorted_migrating_resources(self, instance, flavor): + """This method is used to sort instance.migration_context.new_resources + claimed on dest host, then the ordered new resources will be used to + update resources info (e.g. vpmems) in the new xml which is used for + live migration. + """ + resources = self._get_resources(instance, prefix='new_') + if not resources: + return + ordered_resources = [] + ordered_vpmem_resources = self._get_ordered_vpmem_resources( + resources, flavor) + ordered_resources.extend(ordered_vpmem_resources) + ordered_resources_obj = objects.ResourceList(objects=ordered_resources) + return ordered_resources_obj + def _get_live_migrate_numa_info(self, instance_numa_topology, flavor, image_meta): """Builds a LibvirtLiveMigrateNUMAInfo object to send to the source of @@ -8614,12 +8649,16 @@ class LibvirtDriver(driver.ComputeDriver): host=self._host) self._detach_direct_passthrough_vifs(context, migrate_data, instance) + new_resources = None + if isinstance(instance, objects.Instance): + new_resources = self._sorted_migrating_resources( + instance, instance.flavor) new_xml_str = libvirt_migrate.get_updated_guest_xml( # TODO(sahid): It's not a really good idea to pass # the method _get_volume_config and we should to find # a way to avoid this in future. guest, migrate_data, self._get_volume_config, - get_vif_config=get_vif_config) + get_vif_config=get_vif_config, new_resources=new_resources) # NOTE(pkoniszewski): Because of precheck which blocks # tunnelled block live migration with mapped volumes we @@ -8803,6 +8842,8 @@ class LibvirtDriver(driver.ComputeDriver): n = 0 start = time.time() is_post_copy_enabled = self._is_post_copy_enabled(migration_flags) + # vpmem does not support post copy + is_post_copy_enabled &= not bool(self._get_vpmems(instance)) while True: info = guest.get_job_info() diff --git a/nova/virt/libvirt/migration.py b/nova/virt/libvirt/migration.py index 01b6ba6cb8e1..4143261ad038 100644 --- a/nova/virt/libvirt/migration.py +++ b/nova/virt/libvirt/migration.py @@ -25,6 +25,7 @@ from oslo_log import log as logging from nova.compute import power_state import nova.conf from nova import exception +from nova import objects from nova.virt import hardware from nova.virt.libvirt import config as vconfig @@ -80,7 +81,7 @@ def serial_listen_ports(migrate_data): def get_updated_guest_xml(guest, migrate_data, get_volume_config, - get_vif_config=None): + get_vif_config=None, new_resources=None): xml_doc = etree.fromstring(guest.get_xml_desc(dump_migratable=True)) xml_doc = _update_graphics_xml(xml_doc, migrate_data) xml_doc = _update_serial_xml(xml_doc, migrate_data) @@ -91,9 +92,33 @@ def get_updated_guest_xml(guest, migrate_data, get_volume_config, xml_doc = _update_vif_xml(xml_doc, migrate_data, get_vif_config) if 'dst_numa_info' in migrate_data: xml_doc = _update_numa_xml(xml_doc, migrate_data) + if new_resources: + xml_doc = _update_device_resources_xml(xml_doc, new_resources) return etree.tostring(xml_doc, encoding='unicode') +def _update_device_resources_xml(xml_doc, new_resources): + vpmems = [] + for resource in new_resources: + if 'metadata' in resource: + res_meta = resource.metadata + if isinstance(res_meta, objects.LibvirtVPMEMDevice): + vpmems.append(res_meta) + # If there are other resources in the future, the xml should + # be updated here like vpmems + xml_doc = _update_vpmems_xml(xml_doc, vpmems) + return xml_doc + + +def _update_vpmems_xml(xml_doc, vpmems): + memory_devices = xml_doc.findall("./devices/memory") + for pos, memory_dev in enumerate(memory_devices): + if memory_dev.get('model') == 'nvdimm': + devpath = memory_dev.find('./source/path') + devpath.text = vpmems[pos].devpath + return xml_doc + + def _update_numa_xml(xml_doc, migrate_data): LOG.debug('_update_numa_xml input xml=%s', etree.tostring(xml_doc, encoding='unicode', pretty_print=True)) diff --git a/releasenotes/notes/add-support-for-live-migration-with-vpmem-9af5057dbe551f3b.yaml b/releasenotes/notes/add-support-for-live-migration-with-vpmem-9af5057dbe551f3b.yaml new file mode 100644 index 000000000000..dcef12c8ac46 --- /dev/null +++ b/releasenotes/notes/add-support-for-live-migration-with-vpmem-9af5057dbe551f3b.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + The libvirt driver now supports live migration with virtual persistent + memory (vPMEM), which requires QEMU as hypervisor. In virtualization layer, + QEMU will copy vpmem over the network like volatile memory, due to the + typical large capacity of vPMEM, it may takes longer time for live + migration.