Add infrastructure for invoking libvirt's getDomainCapabilities API
Two use cases have emerged semi-recently which both require the libvirt driver to be able to invoke libvirt's virConnectGetDomainCapabilities() API: https://libvirt.org/html/libvirt-libvirt-domain.html#virConnectGetDomainCapabilities and parse the results: - Automatic detection of AMD compute hosts which are capable of providing SEV (Secure Encrypted Virtualization) - Gracefully handling different QEMU machine types for x86 hosts So lay the foundation for these use cases by adding a new get_domain_capabilities() method to nova.virt.libvirt.host.Host, along with new subclasses of LibvirtConfigObject for parsing the XML returned from libvirt, and corresponding tests. Change-Id: I4aeac9b2397bb2f5e130d1e58829a5e549fcb191 blueprint: gracefully-handle-qemu-machine-types blueprint: amd-sev-libvirt-support
This commit is contained in:
		| @@ -1357,6 +1357,154 @@ class Connection(object): | ||||
|                    else False for cpu_num in range(total_cpus)] | ||||
|         return (total_cpus, cpu_map, active_cpus) | ||||
|  | ||||
|     def getDomainCapabilities(self, emulatorbin, arch, machine_type, | ||||
|                               virt_type, flags): | ||||
|         """Return spoofed domain capabilities.""" | ||||
|  | ||||
|         return ''' | ||||
| <domainCapabilities> | ||||
|   <path>/usr/bin/qemu-kvm</path> | ||||
|   <domain>kvm</domain> | ||||
|   <machine>pc-i440fx-2.11</machine> | ||||
|   <arch>x86_64</arch> | ||||
|   <vcpu max='255'/> | ||||
|   <os supported='yes'> | ||||
|     <loader supported='yes'> | ||||
|       <value>/usr/share/qemu/ovmf-x86_64-ms-4m-code.bin</value> | ||||
|       <value>/usr/share/qemu/ovmf-x86_64-ms-code.bin</value> | ||||
|       <enum name='type'> | ||||
|         <value>rom</value> | ||||
|         <value>pflash</value> | ||||
|       </enum> | ||||
|       <enum name='readonly'> | ||||
|         <value>yes</value> | ||||
|         <value>no</value> | ||||
|       </enum> | ||||
|     </loader> | ||||
|   </os> | ||||
|   <cpu> | ||||
|     <mode name='host-passthrough' supported='yes'/> | ||||
|     <mode name='host-model' supported='yes'> | ||||
|       <model fallback='forbid'>EPYC-IBPB</model> | ||||
|       <vendor>AMD</vendor> | ||||
|       <feature policy='require' name='x2apic'/> | ||||
|       <feature policy='require' name='tsc-deadline'/> | ||||
|       <feature policy='require' name='hypervisor'/> | ||||
|       <feature policy='require' name='tsc_adjust'/> | ||||
|       <feature policy='require' name='cmp_legacy'/> | ||||
|       <feature policy='require' name='invtsc'/> | ||||
|       <feature policy='require' name='virt-ssbd'/> | ||||
|       <feature policy='disable' name='monitor'/> | ||||
|     </mode> | ||||
|     <mode name='custom' supported='yes'> | ||||
|       <model usable='yes'>qemu64</model> | ||||
|       <model usable='yes'>qemu32</model> | ||||
|       <model usable='no'>phenom</model> | ||||
|       <model usable='yes'>pentium3</model> | ||||
|       <model usable='yes'>pentium2</model> | ||||
|       <model usable='yes'>pentium</model> | ||||
|       <model usable='no'>n270</model> | ||||
|       <model usable='yes'>kvm64</model> | ||||
|       <model usable='yes'>kvm32</model> | ||||
|       <model usable='no'>coreduo</model> | ||||
|       <model usable='no'>core2duo</model> | ||||
|       <model usable='no'>athlon</model> | ||||
|       <model usable='yes'>Westmere</model> | ||||
|       <model usable='no'>Westmere-IBRS</model> | ||||
|       <model usable='no'>Skylake-Server</model> | ||||
|       <model usable='no'>Skylake-Server-IBRS</model> | ||||
|       <model usable='no'>Skylake-Client</model> | ||||
|       <model usable='no'>Skylake-Client-IBRS</model> | ||||
|       <model usable='yes'>SandyBridge</model> | ||||
|       <model usable='no'>SandyBridge-IBRS</model> | ||||
|       <model usable='yes'>Penryn</model> | ||||
|       <model usable='no'>Opteron_G5</model> | ||||
|       <model usable='no'>Opteron_G4</model> | ||||
|       <model usable='yes'>Opteron_G3</model> | ||||
|       <model usable='yes'>Opteron_G2</model> | ||||
|       <model usable='yes'>Opteron_G1</model> | ||||
|       <model usable='yes'>Nehalem</model> | ||||
|       <model usable='no'>Nehalem-IBRS</model> | ||||
|       <model usable='no'>IvyBridge</model> | ||||
|       <model usable='no'>IvyBridge-IBRS</model> | ||||
|       <model usable='no'>Haswell</model> | ||||
|       <model usable='no'>Haswell-noTSX</model> | ||||
|       <model usable='no'>Haswell-noTSX-IBRS</model> | ||||
|       <model usable='no'>Haswell-IBRS</model> | ||||
|       <model usable='yes'>EPYC</model> | ||||
|       <model usable='yes'>EPYC-IBPB</model> | ||||
|       <model usable='yes'>Conroe</model> | ||||
|       <model usable='no'>Broadwell</model> | ||||
|       <model usable='no'>Broadwell-noTSX</model> | ||||
|       <model usable='no'>Broadwell-noTSX-IBRS</model> | ||||
|       <model usable='no'>Broadwell-IBRS</model> | ||||
|       <model usable='yes'>486</model> | ||||
|     </mode> | ||||
|   </cpu> | ||||
|   <devices> | ||||
|     <disk supported='yes'> | ||||
|       <enum name='diskDevice'> | ||||
|         <value>disk</value> | ||||
|         <value>cdrom</value> | ||||
|         <value>floppy</value> | ||||
|         <value>lun</value> | ||||
|       </enum> | ||||
|       <enum name='bus'> | ||||
|         <value>ide</value> | ||||
|         <value>fdc</value> | ||||
|         <value>scsi</value> | ||||
|         <value>virtio</value> | ||||
|         <value>usb</value> | ||||
|         <value>sata</value> | ||||
|       </enum> | ||||
|     </disk> | ||||
|     <graphics supported='yes'> | ||||
|       <enum name='type'> | ||||
|         <value>sdl</value> | ||||
|         <value>vnc</value> | ||||
|         <value>spice</value> | ||||
|       </enum> | ||||
|     </graphics> | ||||
|     <video supported='yes'> | ||||
|       <enum name='modelType'> | ||||
|         <value>vga</value> | ||||
|         <value>cirrus</value> | ||||
|         <value>vmvga</value> | ||||
|         <value>qxl</value> | ||||
|         <value>virtio</value> | ||||
|       </enum> | ||||
|     </video> | ||||
|     <hostdev supported='yes'> | ||||
|       <enum name='mode'> | ||||
|         <value>subsystem</value> | ||||
|       </enum> | ||||
|       <enum name='startupPolicy'> | ||||
|         <value>default</value> | ||||
|         <value>mandatory</value> | ||||
|         <value>requisite</value> | ||||
|         <value>optional</value> | ||||
|       </enum> | ||||
|       <enum name='subsysType'> | ||||
|         <value>usb</value> | ||||
|         <value>pci</value> | ||||
|         <value>scsi</value> | ||||
|       </enum> | ||||
|       <enum name='capsType'/> | ||||
|       <enum name='pciBackend'> | ||||
|         <value>default</value> | ||||
|         <value>vfio</value> | ||||
|       </enum> | ||||
|     </hostdev> | ||||
|   </devices> | ||||
| %(features)s | ||||
| </domainCapabilities>''' % {'features': self._domain_capability_features} | ||||
|  | ||||
|     # Features are kept separately so that the tests can patch this | ||||
|     # class variable with alternate values. | ||||
|     _domain_capability_features = '''  <features> | ||||
|     <gic supported='no'/> | ||||
|   </features>''' | ||||
|  | ||||
|     def getCapabilities(self): | ||||
|         """Return spoofed capabilities.""" | ||||
|         numa_topology = self.host_info.numa_topology | ||||
|   | ||||
| @@ -277,6 +277,11 @@ class FakeLibvirtTests(test.NoDBTestCase): | ||||
|         conn = self.get_openAuth_curry_func()('qemu:///system') | ||||
|         etree.fromstring(conn.getCapabilities()) | ||||
|  | ||||
|     def test_getDomainCapabilities(self): | ||||
|         conn = self.get_openAuth_curry_func()('qemu:///system') | ||||
|         etree.fromstring(conn.getDomainCapabilities( | ||||
|             '/usr/bin/qemu-kvm', 'x86_64', 'q35', 'kvm', 0)) | ||||
|  | ||||
|     def test_nwfilter_define_undefine(self): | ||||
|         conn = self.get_openAuth_curry_func()('qemu:///system') | ||||
|         # Will raise an exception if it's not valid XML | ||||
|   | ||||
| @@ -637,6 +637,27 @@ class HostTestCase(test.NoDBTestCase): | ||||
|             self.assertIsNone(caps.host.cpu.model) | ||||
|             self.assertEqual(0, len(caps.host.cpu.features)) | ||||
|  | ||||
|     def _test_get_domain_capabilities(self): | ||||
|         caps = self.host.get_domain_capabilities() | ||||
|         self.assertIn('x86_64', caps.keys()) | ||||
|         self.assertEqual(['q35'], list(caps['x86_64'])) | ||||
|         return caps['x86_64']['q35'] | ||||
|  | ||||
|     def test_get_domain_capabilities(self): | ||||
|         caps = self._test_get_domain_capabilities() | ||||
|         self.assertEqual(vconfig.LibvirtConfigDomainCaps, type(caps)) | ||||
|         # There is a <gic supported='no'/> feature in the fixture but | ||||
|         # we don't parse that because nothing currently cares about it. | ||||
|         self.assertEqual(0, len(caps.features)) | ||||
|  | ||||
|     @mock.patch.object(fakelibvirt.virConnect, '_domain_capability_features', | ||||
|                        new='') | ||||
|     def test_get_domain_capabilities_no_features(self): | ||||
|         caps = self._test_get_domain_capabilities() | ||||
|         self.assertEqual(vconfig.LibvirtConfigDomainCaps, type(caps)) | ||||
|         features = caps.features | ||||
|         self.assertEqual([], features) | ||||
|  | ||||
|     @mock.patch.object(fakelibvirt.virConnect, "getHostname") | ||||
|     def test_get_hostname_caching(self, mock_hostname): | ||||
|         mock_hostname.return_value = "foo" | ||||
|   | ||||
| @@ -112,6 +112,59 @@ class LibvirtConfigCaps(LibvirtConfigObject): | ||||
|         return caps | ||||
|  | ||||
|  | ||||
| class LibvirtConfigDomainCaps(LibvirtConfigObject): | ||||
|  | ||||
|     def __init__(self, **kwargs): | ||||
|         super(LibvirtConfigDomainCaps, self).__init__( | ||||
|             root_name="domainCapabilities", **kwargs) | ||||
|         self._features = None | ||||
|  | ||||
|     def parse_dom(self, xmldoc): | ||||
|         super(LibvirtConfigDomainCaps, self).parse_dom(xmldoc) | ||||
|  | ||||
|         for c in xmldoc.getchildren(): | ||||
|             if c.tag == "features": | ||||
|                 features = LibvirtConfigDomainCapsFeatures() | ||||
|                 features.parse_dom(c) | ||||
|                 self._features = features | ||||
|  | ||||
|     @property | ||||
|     def features(self): | ||||
|         if self._features is None: | ||||
|             return [] | ||||
|         return self._features.features | ||||
|  | ||||
|  | ||||
| class LibvirtConfigDomainCapsFeatures(LibvirtConfigObject): | ||||
|  | ||||
|     def __init__(self, **kwargs): | ||||
|         super(LibvirtConfigDomainCapsFeatures, self).__init__( | ||||
|             root_name="features", **kwargs) | ||||
|         self.features = [] | ||||
|  | ||||
|     def parse_dom(self, xmldoc): | ||||
|         super(LibvirtConfigDomainCapsFeatures, self).parse_dom(xmldoc) | ||||
|  | ||||
|         for c in xmldoc.getchildren(): | ||||
|             feature = None | ||||
|             # TODO(aspiers): add supported features here | ||||
|             if feature: | ||||
|                 feature.parse_dom(c) | ||||
|                 self.features.append(feature) | ||||
|  | ||||
|             # There are many other features and domain capabilities, | ||||
|             # but we don't need to regenerate the XML (it's read-only | ||||
|             # data provided by libvirtd), so there's no point parsing | ||||
|             # them until we actually need their values. | ||||
|  | ||||
|     # For the same reason, we do not need a format_dom() method, but | ||||
|     # it's a bug if this ever gets called and we inherited one from | ||||
|     # the base class, so override that to watch out for accidental | ||||
|     # calls. | ||||
|     def format_dom(self): | ||||
|         raise RuntimeError(_('BUG: tried to generate domainCapabilities XML')) | ||||
|  | ||||
|  | ||||
| class LibvirtConfigCapsNUMATopology(LibvirtConfigObject): | ||||
|  | ||||
|     def __init__(self, **kwargs): | ||||
|   | ||||
| @@ -27,6 +27,7 @@ the raw libvirt API. These APIs are then used by all | ||||
| the other libvirt related classes | ||||
| """ | ||||
|  | ||||
| from collections import defaultdict | ||||
| import operator | ||||
| import os | ||||
| import socket | ||||
| @@ -56,6 +57,7 @@ from nova import utils | ||||
| from nova.virt import event as virtevent | ||||
| from nova.virt.libvirt import config as vconfig | ||||
| from nova.virt.libvirt import guest as libvirt_guest | ||||
| from nova.virt.libvirt import utils as libvirt_utils | ||||
|  | ||||
| libvirt = None | ||||
|  | ||||
| @@ -91,6 +93,7 @@ class Host(object): | ||||
|         self._conn_event_handler_queue = six.moves.queue.Queue() | ||||
|         self._lifecycle_event_handler = lifecycle_event_handler | ||||
|         self._caps = None | ||||
|         self._domain_caps = None | ||||
|         self._hostname = None | ||||
|  | ||||
|         self._wrapped_conn = None | ||||
| @@ -667,6 +670,117 @@ class Host(object): | ||||
|                         raise | ||||
|         return self._caps | ||||
|  | ||||
|     def get_domain_capabilities(self): | ||||
|         """Returns the capabilities you can request when creating a | ||||
|         domain (VM) with that hypervisor, for various combinations of | ||||
|         architecture and machine type. | ||||
|  | ||||
|         In this context the fuzzy word "hypervisor" implies QEMU | ||||
|         binary, libvirt itself and the host config.  libvirt provides | ||||
|         this in order that callers can determine what the underlying | ||||
|         emulator and/or libvirt is capable of, prior to creating a domain | ||||
|         (for instance via virDomainCreateXML or virDomainDefineXML). | ||||
|         However nova needs to know the capabilities much earlier, when | ||||
|         the host's compute service is first initialised, in order that | ||||
|         placement decisions can be made across many compute hosts. | ||||
|         Therefore this is expected to be called during the init_host() | ||||
|         phase of the driver lifecycle rather than just before booting | ||||
|         an instance. | ||||
|  | ||||
|         This causes an additional complication since the Python | ||||
|         binding for this libvirt API call requires the architecture | ||||
|         and machine type to be provided.  So in order to gain a full | ||||
|         picture of the hypervisor's capabilities, technically we need | ||||
|         to call it with the right parameters, once for each | ||||
|         (architecture, machine_type) combination which we care about. | ||||
|         However the libvirt experts have advised us that in practice | ||||
|         the domain capabilities do not (yet, at least) vary enough | ||||
|         across machine types to justify the cost of calling | ||||
|         getDomainCapabilities() once for every single (architecture, | ||||
|         machine_type) combination.  In particular, SEV support isn't | ||||
|         reported per-machine type, and since there are usually many | ||||
|         machine types, we follow the advice of the experts that for | ||||
|         now it's sufficient to call it once per host architecture: | ||||
|  | ||||
|             https://bugzilla.redhat.com/show_bug.cgi?id=1683471#c7 | ||||
|  | ||||
|         However, future domain capabilities might report SEV in a more | ||||
|         fine-grained manner, and we also expect to use this method to | ||||
|         detect other features, such as for gracefully handling machine | ||||
|         types and potentially for detecting OVMF binaries.  Therefore | ||||
|         we memoize the results of the API calls in a nested dict where | ||||
|         the top-level keys are architectures, and second-level keys | ||||
|         are machine types, in order to allow easy expansion later. | ||||
|  | ||||
|         Whenever libvirt/QEMU are updated, cached domCapabilities | ||||
|         would get outdated (because QEMU will contain new features and | ||||
|         the capabilities will vary).  However, this should not be a | ||||
|         problem here, because when libvirt/QEMU gets updated, the | ||||
|         nova-compute agent also needs restarting, at which point the | ||||
|         memoization will vanish because it's not persisted to disk. | ||||
|  | ||||
|         Note: The result is cached in the member attribute | ||||
|         _domain_caps. | ||||
|  | ||||
|         :returns: a nested dict of dicts which maps architectures to | ||||
|         machine types to instances of config.LibvirtConfigDomainCaps | ||||
|         representing the domain capabilities of the host for that arch | ||||
|         and machine type: | ||||
|  | ||||
|         { arch: | ||||
|           { machine_type: LibvirtConfigDomainCaps } | ||||
|         } | ||||
|         """ | ||||
|         if self._domain_caps: | ||||
|             return self._domain_caps | ||||
|  | ||||
|         domain_caps = defaultdict(dict) | ||||
|         caps = self.get_capabilities() | ||||
|         virt_type = CONF.libvirt.virt_type | ||||
|  | ||||
|         for guest in caps.guests: | ||||
|             arch = guest.arch | ||||
|             machine_type = \ | ||||
|                 libvirt_utils.get_default_machine_type(arch) or 'q35' | ||||
|  | ||||
|             emulator_bin = guest.emulator | ||||
|             if virt_type in guest.domemulator: | ||||
|                 emulator_bin = guest.domemulator[virt_type] | ||||
|  | ||||
|             # It is expected that each <guest> will have a different | ||||
|             # architecture, but it doesn't hurt to add a safety net to | ||||
|             # avoid needlessly calling libvirt's API more times than | ||||
|             # we need. | ||||
|             if machine_type in domain_caps[arch]: | ||||
|                 continue | ||||
|  | ||||
|             domain_caps[arch][machine_type] = \ | ||||
|                 self._get_domain_capabilities(emulator_bin, arch, | ||||
|                                               machine_type, virt_type) | ||||
|  | ||||
|         # NOTE(aspiers): Use a temporary variable to update the | ||||
|         # instance variable atomically, otherwise if some API | ||||
|         # calls succeeded and then one failed, we might | ||||
|         # accidentally memoize a partial result. | ||||
|         self._domain_caps = domain_caps | ||||
|  | ||||
|         return self._domain_caps | ||||
|  | ||||
|     def _get_domain_capabilities(self, emulator_bin, arch, machine_type, | ||||
|                                  virt_type, flags=0): | ||||
|         xmlstr = self.get_connection().getDomainCapabilities( | ||||
|             emulator_bin, | ||||
|             arch, | ||||
|             machine_type, | ||||
|             virt_type, | ||||
|             flags | ||||
|         ) | ||||
|         LOG.info("Libvirt host hypervisor capabilities for arch=%s and " | ||||
|                  "machine_type=%s:\n%s", arch, machine_type, xmlstr) | ||||
|         caps = vconfig.LibvirtConfigDomainCaps() | ||||
|         caps.parse_str(xmlstr) | ||||
|         return caps | ||||
|  | ||||
|     def get_driver_type(self): | ||||
|         """Get hypervisor type. | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Adam Spiers
					Adam Spiers