From c39ad2383c1921bda486d58c8670846518caed72 Mon Sep 17 00:00:00 2001 From: LuyaoZhong Date: Wed, 11 Sep 2019 07:02:20 +0000 Subject: [PATCH] libvirt: Support VM creation with vpmems and vpmems cleanup Add the vpmems config into guest config xml according to resources info in the instance object, then users can build a VM with vpmems. Also this patch add the support for data cleanup on the backend device of the vpmems. note: We modify the root element generated for libvirt to include elements when memory device is used.[1] Requiring vpmems implies a NUMA topology because libvirt won't let us use vpmem without NUMA.[2] [1]https://github.com/libvirt/libvirt/blob/master/src/qemu/qemu_domain.c#L11593-L11599 [2]https://github.com/libvirt/libvirt/blob/master/src/qemu/qemu_domain.c#L11604-L11615 Change-Id: I725deb0312c930087c9e60115abe68b4e06e6804 Partially-Implements: blueprint virtual-persistent-memory Co-Authored-By: He Jie Xu --- nova/exception.py | 5 + nova/privsep/libvirt.py | 6 ++ nova/tests/unit/virt/libvirt/fakelibvirt.py | 41 ++++++- nova/tests/unit/virt/libvirt/test_config.py | 23 ++++ nova/tests/unit/virt/libvirt/test_driver.py | 114 ++++++++++++++++++-- nova/virt/hardware.py | 11 ++ nova/virt/libvirt/config.py | 68 ++++++++++++ nova/virt/libvirt/driver.py | 65 +++++++++++ 8 files changed, 326 insertions(+), 7 deletions(-) diff --git a/nova/exception.py b/nova/exception.py index 422e5f983b12..df8c68190808 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2532,3 +2532,8 @@ class PMEMNamespaceConfigInvalid(NovaException): class GetPMEMNamespaceFailed(NovaException): msg_fmt = _("Get PMEM namespaces on host failed: %(reason)s.") + + +class VPMEMCleanupFailed(NovaException): + msg_fmt = _("Failed to clean up the vpmem backend device %(dev)s: " + "%(error)s") diff --git a/nova/privsep/libvirt.py b/nova/privsep/libvirt.py index f4e59acd6cd9..05a6c8ba4cea 100644 --- a/nova/privsep/libvirt.py +++ b/nova/privsep/libvirt.py @@ -253,3 +253,9 @@ def get_pmem_namespaces(): ndctl_cmd = ['ndctl', 'list', '-X'] nss_info = processutils.execute(*ndctl_cmd)[0] return nss_info + + +@nova.privsep.sys_admin_pctxt.entrypoint +def cleanup_vpmem(devpath): + daxio_cmd = ['daxio', '-z', '-o', '%s' % devpath] + processutils.execute(*daxio_cmd) diff --git a/nova/tests/unit/virt/libvirt/fakelibvirt.py b/nova/tests/unit/virt/libvirt/fakelibvirt.py index e578aa0cefb0..daa92050ab8e 100644 --- a/nova/tests/unit/virt/libvirt/fakelibvirt.py +++ b/nova/tests/unit/virt/libvirt/fakelibvirt.py @@ -864,6 +864,24 @@ class Domain(object): }) devices['hostdevs'] = hostdev_info + vpmem_info = [] + vpmems = device_nodes.findall('./memory') + for vpmem in vpmems: + model = vpmem.get('model') + if model == 'nvdimm': + source = vpmem.find('./source') + target = vpmem.find('./target') + path = source.find('./path').text + alignsize = source.find('./alignsize').text + size = target.find('./size').text + node = target.find('./node').text + vpmem_info.append({ + 'path': path, + 'size': size, + 'alignsize': alignsize, + 'node': node}) + devices['vpmems'] = vpmem_info + definition['devices'] = devices return definition @@ -1023,6 +1041,25 @@ class Domain(object): ''' % hostdev # noqa + vpmems = '' + for vpmem in self._def['devices']['vpmems']: + vpmems += ''' + + + %(path)s + %(alignsize)s + + + + %(size)s + %(node)s + + + + ''' % vpmem + return ''' %(name)s %(uuid)s @@ -1079,6 +1116,7 @@ class Domain(object): function='0x0'/> %(hostdevs)s + %(vpmems)s ''' % {'name': self._def['name'], 'uuid': self._def['uuid'], @@ -1087,7 +1125,8 @@ class Domain(object): 'arch': self._def['os']['arch'], 'disks': disks, 'nics': nics, - 'hostdevs': hostdevs} + 'hostdevs': hostdevs, + 'vpmems': vpmems} def managedSave(self, flags): self._connection._mark_not_running(self) diff --git a/nova/tests/unit/virt/libvirt/test_config.py b/nova/tests/unit/virt/libvirt/test_config.py index 34ddd40762e7..b8e3620b775d 100644 --- a/nova/tests/unit/virt/libvirt/test_config.py +++ b/nova/tests/unit/virt/libvirt/test_config.py @@ -3758,3 +3758,26 @@ class LibvirtConfigSecretTest(LibvirtConfigBaseTest): """ self.assertXmlEqual(expected_xml, xml) + + +class LibvirtConfigGuestVPMEMTest(LibvirtConfigBaseTest): + def test_config_vpmem(self): + obj = config.LibvirtConfigGuestVPMEM( + devpath='/dev/dax0.0', size_kb=4096 * units.Ki, align_kb=2048) + + xml = obj.to_xml() + self.assertXmlEqual(xml, """ + + + /dev/dax0.0 + 2048 + + + + 4194304 + 0 + + + """) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index d2e9916cc6b4..6879b83f8f0a 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -902,7 +902,8 @@ def _create_test_instance(): 'host': 'fake-host', 'task_state': None, 'vm_state': None, - 'trusted_certs': None + 'trusted_certs': None, + 'resources': None, } @@ -14709,7 +14710,8 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_unplug_vifs): instance = fake_instance.fake_instance_obj( None, name='instancename', id=1, - uuid='875a8070-d0b9-4949-8b31-104d125c9a64') + uuid='875a8070-d0b9-4949-8b31-104d125c9a64', + expected_attrs=['resources']) drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) drvr.destroy(self.context, instance, [], None, False) @@ -18194,7 +18196,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI()) drvr.firewall_driver = mock.Mock() drvr._disconnect_volume = mock.Mock() - fake_inst = {'name': 'foo'} + fake_inst = objects.Instance(**self.test_instance) fake_bdms = [{'connection_info': 'foo', 'mount_device': None}] with mock.patch('nova.virt.driver' @@ -18207,7 +18209,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._undefine_domain') def test_cleanup_wants_vif_errors_ignored(self, undefine, unplug): drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI()) - fake_inst = {'name': 'foo'} + fake_inst = objects.Instance(**self.test_instance) with mock.patch.object(drvr._conn, 'lookupByUUIDString') as lookup: lookup.return_value = fake_inst # NOTE(danms): Make unplug cause us to bail early, since @@ -18951,6 +18953,41 @@ class LibvirtConnTestCase(test.NoDBTestCase, else: assert False, "Unable to find any mediated device for the guest." + @mock.patch('nova.virt.hardware.get_vpmems') + def test_get_guest_config_with_vpmems(self, mock_get_vpmems_label): + vpmem_0 = objects.LibvirtVPMEMDevice( + label='4GB', name='ns_0', devpath='/dev/dax0.0', + size=4292870144, align=2097152) + vpmem_1 = objects.LibvirtVPMEMDevice( + label='16GB', name='ns_1', devpath='/dev/dax0.1', + size=17177772032, align=2097152) + resource_0 = objects.Resource( + provider_uuid=uuids.rp, + resource_class="CUSTOM_PMEM_NAMESPACE_4GB", + identifier='ns_0', metadata=vpmem_0) + resource_1 = objects.Resource( + provider_uuid=uuids.rp, + resource_class="CUSTOM_PMEM_NAMESPACE_16GB", + identifier='ns_1', metadata=vpmem_1) + resources = objects.ResourceList(objects=[resource_0, resource_1]) + + instance_ref = objects.Instance(**self.test_instance) + instance_ref.resources = resources + image_meta = objects.ImageMeta.from_dict(self.test_image_meta) + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + drvr._vpmems_by_name = {"ns_0": vpmem_0, "ns_1": vpmem_1} + + mock_get_vpmems_label.return_value = ['4GB', '16GB'] + cfg = drvr._get_guest_config(instance_ref, + _fake_network_info(self, 1), + image_meta, {'mapping': {}}) + vpmem_amount = 0 + for device in cfg.devices: + if isinstance(device, vconfig.LibvirtConfigGuestVPMEM): + self.assertEqual("nvdimm", device.model) + vpmem_amount += 1 + self.assertEqual(2, vpmem_amount) + class TestGuestConfigSysinfoSerialOS(test.NoDBTestCase): def setUp(self): @@ -19702,7 +19739,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): # Attributes which we need to be set so they don't touch the db, # but it's not worth the effort to fake properly - for field in ['numa_topology', 'vcpu_model', 'trusted_certs']: + for field in ['numa_topology', 'vcpu_model', 'trusted_certs', + 'resources', 'migration_context']: setattr(instance, field, None) return instance @@ -21770,7 +21808,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): drv = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) instance = objects.Instance( uuid=uuids.instance, id=1, - ephemeral_key_uuid=uuids.ephemeral_key_uuid) + ephemeral_key_uuid=uuids.ephemeral_key_uuid, + resources=None) instance.system_metadata = {} block_device_info = {'root_device_name': '/dev/vda', 'ephemerals': [], @@ -24319,6 +24358,7 @@ class LibvirtPMEMNamespaceTests(test.NoDBTestCase): def setUp(self): super(LibvirtPMEMNamespaceTests, self).setUp() self.useFixture(fakelibvirt.FakeLibvirtFixture()) + self.context = context.get_admin_context() self.vpmem_0 = objects.LibvirtVPMEMDevice( label='4GB', name='ns_0', devpath='/dev/dax0.0', @@ -24331,6 +24371,22 @@ class LibvirtPMEMNamespaceTests(test.NoDBTestCase): label='SMALL', name='ns_2', devpath='/dev/dax0.2', size=17177772032, align=2097152) + self.resource_0 = objects.Resource( + provider_uuid=uuids.rp_uuid, + resource_class="CUSTOM_PMEM_NAMESPACE_4GB", + identifier='ns_0', metadata=self.vpmem_0) + self.resource_1 = objects.Resource( + provider_uuid=uuids.rp_uuid, + resource_class="CUSTOM_PMEM_NAMESPACE_SMALL", + identifier='ns_1', metadata=self.vpmem_1) + self.resource_2 = objects.Resource( + provider_uuid=uuids.rp_uuid, + resource_class="CUSTOM_PMEM_NAMESPACE_SMALL", + identifier='ns_2', metadata=self.vpmem_2) + self.resource_3 = objects.Resource( + provider_uuid=uuids.rp_uuid, + resource_class="CUSTOM_RESOURCE_0", + identifier='resource_0') self.pmem_namespaces = ''' [{"dev":"namespace0.0", @@ -24417,3 +24473,49 @@ class LibvirtPMEMNamespaceTests(test.NoDBTestCase): vpmem_conf = ["4GB:ns_0", "SMALL:ns_0"] self.assertRaises(exception.PMEMNamespaceConfigInvalid, drvr._discover_vpmems, vpmem_conf) + + @mock.patch('nova.virt.hardware.get_vpmems') + def test_get_ordered_vpmems(self, mock_labels): + # get orgered vpmems based on flavor extra_specs + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + drvr._vpmems_by_name = {'ns_0': self.vpmem_0, + 'ns_1': self.vpmem_1, + 'ns_2': self.vpmem_2} + instance = fake_instance.fake_instance_obj(self.context) + instance.flavor = objects.Flavor( + name='m1.small', memory_mb=2048, vcpus=2, root_gb=10, + ephemeral_gb=20, swap=0, extra_specs={ + 'hw:pmem': 'SMALL,4GB,SMALL'}) + mock_labels.return_value = ['SMALL', '4GB', 'SMALL'] + # self.resource_3 is not vpmem resource + instance.resources = objects.ResourceList(objects=[ + self.resource_0, self.resource_1, + self.resource_2, self.resource_3]) + ordered_vpmems = drvr._get_ordered_vpmems(instance, instance.flavor) + # keep consistent with the order in flavor extra_specs + self.assertEqual('SMALL', ordered_vpmems[0].label) + self.assertEqual('4GB', ordered_vpmems[1].label) + self.assertEqual('SMALL', ordered_vpmems[2].label) + vpmems = drvr._get_vpmems(instance) + # this is not sorted, keep the same as instance.resources + self.assertEqual('4GB', vpmems[0].label) + self.assertEqual('SMALL', vpmems[1].label) + self.assertEqual('SMALL', vpmems[2].label) + + @mock.patch('nova.privsep.libvirt.cleanup_vpmem') + def test_cleanup_vpmems(self, mock_cleanup_vpmem): + vpmems = [self.vpmem_0, self.vpmem_1, self.vpmem_2] + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + drvr._cleanup_vpmems(vpmems) + mock_cleanup_vpmem.assert_has_calls([ + mock.call(self.vpmem_0.devpath), + mock.call(self.vpmem_1.devpath), + mock.call(self.vpmem_2.devpath)]) + + @mock.patch('nova.privsep.libvirt.cleanup_vpmem') + def test_cleanup_vpmems_fail(self, mock_cleanup_vpmem): + mock_cleanup_vpmem.side_effect = Exception('Not known') + vpmems = [self.vpmem_0, self.vpmem_1, self.vpmem_2] + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + self.assertRaises(exception.VPMEMCleanupFailed, + drvr._cleanup_vpmems, vpmems) diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py index fd8dcec2a514..a5b2c10aa969 100644 --- a/nova/virt/hardware.py +++ b/nova/virt/hardware.py @@ -2066,3 +2066,14 @@ def numa_usage_from_instance_numa(host_topology, instance_topology, cells.append(new_cell) return objects.NUMATopology(cells=cells) + + +def get_vpmems(flavor): + """Return vpmems related to input request. + + :param flavor: a flavor object to read extra specs from + :returns: a vpmem label list + """ + # TODO(Luyao) Return vpmem label list when the whole + # vpmem feature is supported. + return [] diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py index cb699e472130..7fd78ee25842 100644 --- a/nova/virt/libvirt/config.py +++ b/nova/virt/libvirt/config.py @@ -2546,6 +2546,8 @@ class LibvirtConfigGuest(LibvirtConfigObject): self.uuid = None self.name = None self.memory = 500 * units.Mi + self.max_memory_size = None + self.max_memory_slots = 0 self.membacking = None self.memtune = None self.numatune = None @@ -2578,6 +2580,10 @@ class LibvirtConfigGuest(LibvirtConfigObject): root.append(self._text_node("uuid", self.uuid)) root.append(self._text_node("name", self.name)) root.append(self._text_node("memory", self.memory)) + if self.max_memory_size is not None: + max_memory = self._text_node("maxMemory", self.max_memory_size) + max_memory.set("slots", str(self.max_memory_slots)) + root.append(max_memory) if self.membacking is not None: root.append(self.membacking.format_dom()) if self.memtune is not None: @@ -2752,6 +2758,7 @@ class LibvirtConfigGuest(LibvirtConfigObject): # LibvirtConfigGuestUidMap # LibvirtConfigGuestGidMap # LibvirtConfigGuestCPU + # LibvirtConfigGuestVPMEM for c in xmldoc: if c.tag == 'devices': for d in c: @@ -2775,6 +2782,10 @@ class LibvirtConfigGuest(LibvirtConfigObject): obj = LibvirtConfigGuestInterface() obj.parse_dom(d) self.devices.append(obj) + elif d.tag == 'memory' and d.get('model') == 'nvdimm': + obj = LibvirtConfigGuestVPMEM() + obj.parse_dom(d) + self.devices.append(obj) if c.tag == 'idmap': for idmap in c: obj = None @@ -3154,3 +3165,60 @@ class LibvirtConfigSecret(LibvirtConfigObject): usage.append(self._text_node('volume', str(self.usage_id))) root.append(usage) return root + + +class LibvirtConfigGuestVPMEM(LibvirtConfigGuestDevice): + def __init__(self, **kwargs): + super(LibvirtConfigGuestVPMEM, self).__init__( + root_name="memory", **kwargs) + + self.model = "nvdimm" + self.access = "shared" + self.source_path = kwargs.get("devpath", "") + self.align_size = kwargs.get("align_kb", 0) + self.pmem = True + + self.target_size = kwargs.get("size_kb", 0) + self.target_node = 0 + self.label_size = 2 * units.Ki + + def format_dom(self): + memory = super(LibvirtConfigGuestVPMEM, self).format_dom() + + memory.set("model", self.model) + memory.set("access", self.access) + + source = etree.Element("source") + source.append(self._text_node("path", self.source_path)) + source.append(self._text_node("alignsize", self.align_size)) + if self.pmem is True: + source.append(etree.Element("pmem")) + + target = etree.Element("target") + target.append(self._text_node("size", self.target_size)) + target.append(self._text_node("node", self.target_node)) + label = etree.Element("label") + label.append(self._text_node("size", self.label_size)) + target.append(label) + + memory.append(source) + memory.append(target) + + return memory + + def parse_dom(self, xmldoc): + super(LibvirtConfigGuestVPMEM, self).parse_dom(xmldoc) + self.model = xmldoc.get("model") + self.access = xmldoc.get("access") + + for c in xmldoc.getchildren(): + if c.tag == "source": + for sub in c.getchildren(): + if sub.tag == "path": + self.source_path = sub.text + if sub.tag == "alignsize": + self.align_size = sub.text + elif c.tag == "target": + for sub in c.getchildren(): + if sub.tag == "size": + self.target_size = sub.text diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 303d2eb0bd1d..6f86432f3baf 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -1266,6 +1266,11 @@ class LibvirtDriver(driver.ComputeDriver): def cleanup(self, context, instance, network_info, block_device_info=None, destroy_disks=True, migrate_data=None, destroy_vifs=True): + # zero the data on backend pmem device + vpmems = self._get_vpmems(instance) + if vpmems: + self._cleanup_vpmems(vpmems) + if destroy_vifs: self._unplug_vifs(instance, network_info, True) @@ -1359,6 +1364,14 @@ class LibvirtDriver(driver.ComputeDriver): self._undefine_domain(instance) + def _cleanup_vpmems(self, vpmems): + for vpmem in vpmems: + try: + nova.privsep.libvirt.cleanup_vpmem(vpmem.devpath) + except Exception as e: + raise exception.VPMEMCleanupFailed(dev=vpmem.devpath, + error=e) + def _detach_encrypted_volumes(self, instance, block_device_info): """Detaches encrypted volumes attached to instance.""" disks = self._get_instance_disk_info(instance, block_device_info) @@ -1452,6 +1465,11 @@ class LibvirtDriver(driver.ComputeDriver): inst_base = libvirt_utils.get_instance_path(instance) target = inst_base + '_resize' + # zero the data on backend old pmem device + vpmems = self._get_vpmems(instance, prefix='old') + if vpmems: + self._cleanup_vpmems(vpmems) + # Deletion can fail over NFS, so retry the deletion as required. # Set maximum attempt as 5, most test can remove the directory # for the second time. @@ -5534,6 +5552,7 @@ class LibvirtDriver(driver.ComputeDriver): flavor = instance.flavor inst_path = libvirt_utils.get_instance_path(instance) disk_mapping = disk_info['mapping'] + vpmems = self._get_ordered_vpmems(instance, flavor) virt_type = CONF.libvirt.virt_type guest = vconfig.LibvirtConfigGuest() @@ -5650,8 +5669,54 @@ class LibvirtDriver(driver.ComputeDriver): self._guest_configure_sev(guest, caps.host.cpu.arch, guest.os_mach_type) + if vpmems: + self._guest_add_vpmems(guest, vpmems) + return guest + def _get_ordered_vpmems(self, instance, flavor): + ordered_vpmems = [] + vpmems = self._get_vpmems(instance) + labels = hardware.get_vpmems(flavor) + for label in labels: + for vpmem in vpmems: + if vpmem.label == label: + ordered_vpmems.append(vpmem) + vpmems.remove(vpmem) + break + return ordered_vpmems + + def _get_vpmems(self, instance, prefix=None): + vpmems = [] + resources = instance.resources + if prefix == 'old' and instance.migration_context: + if 'old_resources' in instance.migration_context: + resources = instance.migration_context.old_resources + if not resources: + return vpmems + for resource in resources: + rc = resource.resource_class + if rc.startswith("CUSTOM_PMEM_NAMESPACE_"): + vpmem = self._vpmems_by_name[resource.identifier] + vpmems.append(vpmem) + return vpmems + + def _guest_add_vpmems(self, guest, vpmems): + guest.max_memory_size = guest.memory + guest.max_memory_slots = 0 + for vpmem in vpmems: + size_kb = vpmem.size / units.Ki + align_kb = vpmem.align / units.Ki + + vpmem_config = vconfig.LibvirtConfigGuestVPMEM( + devpath=vpmem.devpath, size_kb=size_kb, align_kb=align_kb) + + # max memory size needs contain vpmem size + guest.max_memory_size += size_kb + # one vpmem will occupy one memory slot + guest.max_memory_slots += 1 + guest.add_device(vpmem_config) + def _sev_enabled(self, flavor, image_meta): """To enable AMD SEV, the following should be true: