Add infrastructure for invoking libvirt's getDomainCapabilities API
Two use cases have emerged semi-recently which both require the libvirt driver to be able to invoke libvirt's virConnectGetDomainCapabilities() API: https://libvirt.org/html/libvirt-libvirt-domain.html#virConnectGetDomainCapabilities and parse the results: - Automatic detection of AMD compute hosts which are capable of providing SEV (Secure Encrypted Virtualization) - Gracefully handling different QEMU machine types for x86 hosts So lay the foundation for these use cases by adding a new get_domain_capabilities() method to nova.virt.libvirt.host.Host, along with new subclasses of LibvirtConfigObject for parsing the XML returned from libvirt, and corresponding tests. Change-Id: I4aeac9b2397bb2f5e130d1e58829a5e549fcb191 blueprint: gracefully-handle-qemu-machine-types blueprint: amd-sev-libvirt-support
This commit is contained in:
		| @@ -1357,6 +1357,154 @@ class Connection(object): | |||||||
|                    else False for cpu_num in range(total_cpus)] |                    else False for cpu_num in range(total_cpus)] | ||||||
|         return (total_cpus, cpu_map, active_cpus) |         return (total_cpus, cpu_map, active_cpus) | ||||||
|  |  | ||||||
|  |     def getDomainCapabilities(self, emulatorbin, arch, machine_type, | ||||||
|  |                               virt_type, flags): | ||||||
|  |         """Return spoofed domain capabilities.""" | ||||||
|  |  | ||||||
|  |         return ''' | ||||||
|  | <domainCapabilities> | ||||||
|  |   <path>/usr/bin/qemu-kvm</path> | ||||||
|  |   <domain>kvm</domain> | ||||||
|  |   <machine>pc-i440fx-2.11</machine> | ||||||
|  |   <arch>x86_64</arch> | ||||||
|  |   <vcpu max='255'/> | ||||||
|  |   <os supported='yes'> | ||||||
|  |     <loader supported='yes'> | ||||||
|  |       <value>/usr/share/qemu/ovmf-x86_64-ms-4m-code.bin</value> | ||||||
|  |       <value>/usr/share/qemu/ovmf-x86_64-ms-code.bin</value> | ||||||
|  |       <enum name='type'> | ||||||
|  |         <value>rom</value> | ||||||
|  |         <value>pflash</value> | ||||||
|  |       </enum> | ||||||
|  |       <enum name='readonly'> | ||||||
|  |         <value>yes</value> | ||||||
|  |         <value>no</value> | ||||||
|  |       </enum> | ||||||
|  |     </loader> | ||||||
|  |   </os> | ||||||
|  |   <cpu> | ||||||
|  |     <mode name='host-passthrough' supported='yes'/> | ||||||
|  |     <mode name='host-model' supported='yes'> | ||||||
|  |       <model fallback='forbid'>EPYC-IBPB</model> | ||||||
|  |       <vendor>AMD</vendor> | ||||||
|  |       <feature policy='require' name='x2apic'/> | ||||||
|  |       <feature policy='require' name='tsc-deadline'/> | ||||||
|  |       <feature policy='require' name='hypervisor'/> | ||||||
|  |       <feature policy='require' name='tsc_adjust'/> | ||||||
|  |       <feature policy='require' name='cmp_legacy'/> | ||||||
|  |       <feature policy='require' name='invtsc'/> | ||||||
|  |       <feature policy='require' name='virt-ssbd'/> | ||||||
|  |       <feature policy='disable' name='monitor'/> | ||||||
|  |     </mode> | ||||||
|  |     <mode name='custom' supported='yes'> | ||||||
|  |       <model usable='yes'>qemu64</model> | ||||||
|  |       <model usable='yes'>qemu32</model> | ||||||
|  |       <model usable='no'>phenom</model> | ||||||
|  |       <model usable='yes'>pentium3</model> | ||||||
|  |       <model usable='yes'>pentium2</model> | ||||||
|  |       <model usable='yes'>pentium</model> | ||||||
|  |       <model usable='no'>n270</model> | ||||||
|  |       <model usable='yes'>kvm64</model> | ||||||
|  |       <model usable='yes'>kvm32</model> | ||||||
|  |       <model usable='no'>coreduo</model> | ||||||
|  |       <model usable='no'>core2duo</model> | ||||||
|  |       <model usable='no'>athlon</model> | ||||||
|  |       <model usable='yes'>Westmere</model> | ||||||
|  |       <model usable='no'>Westmere-IBRS</model> | ||||||
|  |       <model usable='no'>Skylake-Server</model> | ||||||
|  |       <model usable='no'>Skylake-Server-IBRS</model> | ||||||
|  |       <model usable='no'>Skylake-Client</model> | ||||||
|  |       <model usable='no'>Skylake-Client-IBRS</model> | ||||||
|  |       <model usable='yes'>SandyBridge</model> | ||||||
|  |       <model usable='no'>SandyBridge-IBRS</model> | ||||||
|  |       <model usable='yes'>Penryn</model> | ||||||
|  |       <model usable='no'>Opteron_G5</model> | ||||||
|  |       <model usable='no'>Opteron_G4</model> | ||||||
|  |       <model usable='yes'>Opteron_G3</model> | ||||||
|  |       <model usable='yes'>Opteron_G2</model> | ||||||
|  |       <model usable='yes'>Opteron_G1</model> | ||||||
|  |       <model usable='yes'>Nehalem</model> | ||||||
|  |       <model usable='no'>Nehalem-IBRS</model> | ||||||
|  |       <model usable='no'>IvyBridge</model> | ||||||
|  |       <model usable='no'>IvyBridge-IBRS</model> | ||||||
|  |       <model usable='no'>Haswell</model> | ||||||
|  |       <model usable='no'>Haswell-noTSX</model> | ||||||
|  |       <model usable='no'>Haswell-noTSX-IBRS</model> | ||||||
|  |       <model usable='no'>Haswell-IBRS</model> | ||||||
|  |       <model usable='yes'>EPYC</model> | ||||||
|  |       <model usable='yes'>EPYC-IBPB</model> | ||||||
|  |       <model usable='yes'>Conroe</model> | ||||||
|  |       <model usable='no'>Broadwell</model> | ||||||
|  |       <model usable='no'>Broadwell-noTSX</model> | ||||||
|  |       <model usable='no'>Broadwell-noTSX-IBRS</model> | ||||||
|  |       <model usable='no'>Broadwell-IBRS</model> | ||||||
|  |       <model usable='yes'>486</model> | ||||||
|  |     </mode> | ||||||
|  |   </cpu> | ||||||
|  |   <devices> | ||||||
|  |     <disk supported='yes'> | ||||||
|  |       <enum name='diskDevice'> | ||||||
|  |         <value>disk</value> | ||||||
|  |         <value>cdrom</value> | ||||||
|  |         <value>floppy</value> | ||||||
|  |         <value>lun</value> | ||||||
|  |       </enum> | ||||||
|  |       <enum name='bus'> | ||||||
|  |         <value>ide</value> | ||||||
|  |         <value>fdc</value> | ||||||
|  |         <value>scsi</value> | ||||||
|  |         <value>virtio</value> | ||||||
|  |         <value>usb</value> | ||||||
|  |         <value>sata</value> | ||||||
|  |       </enum> | ||||||
|  |     </disk> | ||||||
|  |     <graphics supported='yes'> | ||||||
|  |       <enum name='type'> | ||||||
|  |         <value>sdl</value> | ||||||
|  |         <value>vnc</value> | ||||||
|  |         <value>spice</value> | ||||||
|  |       </enum> | ||||||
|  |     </graphics> | ||||||
|  |     <video supported='yes'> | ||||||
|  |       <enum name='modelType'> | ||||||
|  |         <value>vga</value> | ||||||
|  |         <value>cirrus</value> | ||||||
|  |         <value>vmvga</value> | ||||||
|  |         <value>qxl</value> | ||||||
|  |         <value>virtio</value> | ||||||
|  |       </enum> | ||||||
|  |     </video> | ||||||
|  |     <hostdev supported='yes'> | ||||||
|  |       <enum name='mode'> | ||||||
|  |         <value>subsystem</value> | ||||||
|  |       </enum> | ||||||
|  |       <enum name='startupPolicy'> | ||||||
|  |         <value>default</value> | ||||||
|  |         <value>mandatory</value> | ||||||
|  |         <value>requisite</value> | ||||||
|  |         <value>optional</value> | ||||||
|  |       </enum> | ||||||
|  |       <enum name='subsysType'> | ||||||
|  |         <value>usb</value> | ||||||
|  |         <value>pci</value> | ||||||
|  |         <value>scsi</value> | ||||||
|  |       </enum> | ||||||
|  |       <enum name='capsType'/> | ||||||
|  |       <enum name='pciBackend'> | ||||||
|  |         <value>default</value> | ||||||
|  |         <value>vfio</value> | ||||||
|  |       </enum> | ||||||
|  |     </hostdev> | ||||||
|  |   </devices> | ||||||
|  | %(features)s | ||||||
|  | </domainCapabilities>''' % {'features': self._domain_capability_features} | ||||||
|  |  | ||||||
|  |     # Features are kept separately so that the tests can patch this | ||||||
|  |     # class variable with alternate values. | ||||||
|  |     _domain_capability_features = '''  <features> | ||||||
|  |     <gic supported='no'/> | ||||||
|  |   </features>''' | ||||||
|  |  | ||||||
|     def getCapabilities(self): |     def getCapabilities(self): | ||||||
|         """Return spoofed capabilities.""" |         """Return spoofed capabilities.""" | ||||||
|         numa_topology = self.host_info.numa_topology |         numa_topology = self.host_info.numa_topology | ||||||
|   | |||||||
| @@ -277,6 +277,11 @@ class FakeLibvirtTests(test.NoDBTestCase): | |||||||
|         conn = self.get_openAuth_curry_func()('qemu:///system') |         conn = self.get_openAuth_curry_func()('qemu:///system') | ||||||
|         etree.fromstring(conn.getCapabilities()) |         etree.fromstring(conn.getCapabilities()) | ||||||
|  |  | ||||||
|  |     def test_getDomainCapabilities(self): | ||||||
|  |         conn = self.get_openAuth_curry_func()('qemu:///system') | ||||||
|  |         etree.fromstring(conn.getDomainCapabilities( | ||||||
|  |             '/usr/bin/qemu-kvm', 'x86_64', 'q35', 'kvm', 0)) | ||||||
|  |  | ||||||
|     def test_nwfilter_define_undefine(self): |     def test_nwfilter_define_undefine(self): | ||||||
|         conn = self.get_openAuth_curry_func()('qemu:///system') |         conn = self.get_openAuth_curry_func()('qemu:///system') | ||||||
|         # Will raise an exception if it's not valid XML |         # Will raise an exception if it's not valid XML | ||||||
|   | |||||||
| @@ -637,6 +637,27 @@ class HostTestCase(test.NoDBTestCase): | |||||||
|             self.assertIsNone(caps.host.cpu.model) |             self.assertIsNone(caps.host.cpu.model) | ||||||
|             self.assertEqual(0, len(caps.host.cpu.features)) |             self.assertEqual(0, len(caps.host.cpu.features)) | ||||||
|  |  | ||||||
|  |     def _test_get_domain_capabilities(self): | ||||||
|  |         caps = self.host.get_domain_capabilities() | ||||||
|  |         self.assertIn('x86_64', caps.keys()) | ||||||
|  |         self.assertEqual(['q35'], list(caps['x86_64'])) | ||||||
|  |         return caps['x86_64']['q35'] | ||||||
|  |  | ||||||
|  |     def test_get_domain_capabilities(self): | ||||||
|  |         caps = self._test_get_domain_capabilities() | ||||||
|  |         self.assertEqual(vconfig.LibvirtConfigDomainCaps, type(caps)) | ||||||
|  |         # There is a <gic supported='no'/> feature in the fixture but | ||||||
|  |         # we don't parse that because nothing currently cares about it. | ||||||
|  |         self.assertEqual(0, len(caps.features)) | ||||||
|  |  | ||||||
|  |     @mock.patch.object(fakelibvirt.virConnect, '_domain_capability_features', | ||||||
|  |                        new='') | ||||||
|  |     def test_get_domain_capabilities_no_features(self): | ||||||
|  |         caps = self._test_get_domain_capabilities() | ||||||
|  |         self.assertEqual(vconfig.LibvirtConfigDomainCaps, type(caps)) | ||||||
|  |         features = caps.features | ||||||
|  |         self.assertEqual([], features) | ||||||
|  |  | ||||||
|     @mock.patch.object(fakelibvirt.virConnect, "getHostname") |     @mock.patch.object(fakelibvirt.virConnect, "getHostname") | ||||||
|     def test_get_hostname_caching(self, mock_hostname): |     def test_get_hostname_caching(self, mock_hostname): | ||||||
|         mock_hostname.return_value = "foo" |         mock_hostname.return_value = "foo" | ||||||
|   | |||||||
| @@ -112,6 +112,59 @@ class LibvirtConfigCaps(LibvirtConfigObject): | |||||||
|         return caps |         return caps | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class LibvirtConfigDomainCaps(LibvirtConfigObject): | ||||||
|  |  | ||||||
|  |     def __init__(self, **kwargs): | ||||||
|  |         super(LibvirtConfigDomainCaps, self).__init__( | ||||||
|  |             root_name="domainCapabilities", **kwargs) | ||||||
|  |         self._features = None | ||||||
|  |  | ||||||
|  |     def parse_dom(self, xmldoc): | ||||||
|  |         super(LibvirtConfigDomainCaps, self).parse_dom(xmldoc) | ||||||
|  |  | ||||||
|  |         for c in xmldoc.getchildren(): | ||||||
|  |             if c.tag == "features": | ||||||
|  |                 features = LibvirtConfigDomainCapsFeatures() | ||||||
|  |                 features.parse_dom(c) | ||||||
|  |                 self._features = features | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def features(self): | ||||||
|  |         if self._features is None: | ||||||
|  |             return [] | ||||||
|  |         return self._features.features | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class LibvirtConfigDomainCapsFeatures(LibvirtConfigObject): | ||||||
|  |  | ||||||
|  |     def __init__(self, **kwargs): | ||||||
|  |         super(LibvirtConfigDomainCapsFeatures, self).__init__( | ||||||
|  |             root_name="features", **kwargs) | ||||||
|  |         self.features = [] | ||||||
|  |  | ||||||
|  |     def parse_dom(self, xmldoc): | ||||||
|  |         super(LibvirtConfigDomainCapsFeatures, self).parse_dom(xmldoc) | ||||||
|  |  | ||||||
|  |         for c in xmldoc.getchildren(): | ||||||
|  |             feature = None | ||||||
|  |             # TODO(aspiers): add supported features here | ||||||
|  |             if feature: | ||||||
|  |                 feature.parse_dom(c) | ||||||
|  |                 self.features.append(feature) | ||||||
|  |  | ||||||
|  |             # There are many other features and domain capabilities, | ||||||
|  |             # but we don't need to regenerate the XML (it's read-only | ||||||
|  |             # data provided by libvirtd), so there's no point parsing | ||||||
|  |             # them until we actually need their values. | ||||||
|  |  | ||||||
|  |     # For the same reason, we do not need a format_dom() method, but | ||||||
|  |     # it's a bug if this ever gets called and we inherited one from | ||||||
|  |     # the base class, so override that to watch out for accidental | ||||||
|  |     # calls. | ||||||
|  |     def format_dom(self): | ||||||
|  |         raise RuntimeError(_('BUG: tried to generate domainCapabilities XML')) | ||||||
|  |  | ||||||
|  |  | ||||||
| class LibvirtConfigCapsNUMATopology(LibvirtConfigObject): | class LibvirtConfigCapsNUMATopology(LibvirtConfigObject): | ||||||
|  |  | ||||||
|     def __init__(self, **kwargs): |     def __init__(self, **kwargs): | ||||||
|   | |||||||
| @@ -27,6 +27,7 @@ the raw libvirt API. These APIs are then used by all | |||||||
| the other libvirt related classes | the other libvirt related classes | ||||||
| """ | """ | ||||||
|  |  | ||||||
|  | from collections import defaultdict | ||||||
| import operator | import operator | ||||||
| import os | import os | ||||||
| import socket | import socket | ||||||
| @@ -56,6 +57,7 @@ from nova import utils | |||||||
| from nova.virt import event as virtevent | from nova.virt import event as virtevent | ||||||
| from nova.virt.libvirt import config as vconfig | from nova.virt.libvirt import config as vconfig | ||||||
| from nova.virt.libvirt import guest as libvirt_guest | from nova.virt.libvirt import guest as libvirt_guest | ||||||
|  | from nova.virt.libvirt import utils as libvirt_utils | ||||||
|  |  | ||||||
| libvirt = None | libvirt = None | ||||||
|  |  | ||||||
| @@ -91,6 +93,7 @@ class Host(object): | |||||||
|         self._conn_event_handler_queue = six.moves.queue.Queue() |         self._conn_event_handler_queue = six.moves.queue.Queue() | ||||||
|         self._lifecycle_event_handler = lifecycle_event_handler |         self._lifecycle_event_handler = lifecycle_event_handler | ||||||
|         self._caps = None |         self._caps = None | ||||||
|  |         self._domain_caps = None | ||||||
|         self._hostname = None |         self._hostname = None | ||||||
|  |  | ||||||
|         self._wrapped_conn = None |         self._wrapped_conn = None | ||||||
| @@ -667,6 +670,117 @@ class Host(object): | |||||||
|                         raise |                         raise | ||||||
|         return self._caps |         return self._caps | ||||||
|  |  | ||||||
|  |     def get_domain_capabilities(self): | ||||||
|  |         """Returns the capabilities you can request when creating a | ||||||
|  |         domain (VM) with that hypervisor, for various combinations of | ||||||
|  |         architecture and machine type. | ||||||
|  |  | ||||||
|  |         In this context the fuzzy word "hypervisor" implies QEMU | ||||||
|  |         binary, libvirt itself and the host config.  libvirt provides | ||||||
|  |         this in order that callers can determine what the underlying | ||||||
|  |         emulator and/or libvirt is capable of, prior to creating a domain | ||||||
|  |         (for instance via virDomainCreateXML or virDomainDefineXML). | ||||||
|  |         However nova needs to know the capabilities much earlier, when | ||||||
|  |         the host's compute service is first initialised, in order that | ||||||
|  |         placement decisions can be made across many compute hosts. | ||||||
|  |         Therefore this is expected to be called during the init_host() | ||||||
|  |         phase of the driver lifecycle rather than just before booting | ||||||
|  |         an instance. | ||||||
|  |  | ||||||
|  |         This causes an additional complication since the Python | ||||||
|  |         binding for this libvirt API call requires the architecture | ||||||
|  |         and machine type to be provided.  So in order to gain a full | ||||||
|  |         picture of the hypervisor's capabilities, technically we need | ||||||
|  |         to call it with the right parameters, once for each | ||||||
|  |         (architecture, machine_type) combination which we care about. | ||||||
|  |         However the libvirt experts have advised us that in practice | ||||||
|  |         the domain capabilities do not (yet, at least) vary enough | ||||||
|  |         across machine types to justify the cost of calling | ||||||
|  |         getDomainCapabilities() once for every single (architecture, | ||||||
|  |         machine_type) combination.  In particular, SEV support isn't | ||||||
|  |         reported per-machine type, and since there are usually many | ||||||
|  |         machine types, we follow the advice of the experts that for | ||||||
|  |         now it's sufficient to call it once per host architecture: | ||||||
|  |  | ||||||
|  |             https://bugzilla.redhat.com/show_bug.cgi?id=1683471#c7 | ||||||
|  |  | ||||||
|  |         However, future domain capabilities might report SEV in a more | ||||||
|  |         fine-grained manner, and we also expect to use this method to | ||||||
|  |         detect other features, such as for gracefully handling machine | ||||||
|  |         types and potentially for detecting OVMF binaries.  Therefore | ||||||
|  |         we memoize the results of the API calls in a nested dict where | ||||||
|  |         the top-level keys are architectures, and second-level keys | ||||||
|  |         are machine types, in order to allow easy expansion later. | ||||||
|  |  | ||||||
|  |         Whenever libvirt/QEMU are updated, cached domCapabilities | ||||||
|  |         would get outdated (because QEMU will contain new features and | ||||||
|  |         the capabilities will vary).  However, this should not be a | ||||||
|  |         problem here, because when libvirt/QEMU gets updated, the | ||||||
|  |         nova-compute agent also needs restarting, at which point the | ||||||
|  |         memoization will vanish because it's not persisted to disk. | ||||||
|  |  | ||||||
|  |         Note: The result is cached in the member attribute | ||||||
|  |         _domain_caps. | ||||||
|  |  | ||||||
|  |         :returns: a nested dict of dicts which maps architectures to | ||||||
|  |         machine types to instances of config.LibvirtConfigDomainCaps | ||||||
|  |         representing the domain capabilities of the host for that arch | ||||||
|  |         and machine type: | ||||||
|  |  | ||||||
|  |         { arch: | ||||||
|  |           { machine_type: LibvirtConfigDomainCaps } | ||||||
|  |         } | ||||||
|  |         """ | ||||||
|  |         if self._domain_caps: | ||||||
|  |             return self._domain_caps | ||||||
|  |  | ||||||
|  |         domain_caps = defaultdict(dict) | ||||||
|  |         caps = self.get_capabilities() | ||||||
|  |         virt_type = CONF.libvirt.virt_type | ||||||
|  |  | ||||||
|  |         for guest in caps.guests: | ||||||
|  |             arch = guest.arch | ||||||
|  |             machine_type = \ | ||||||
|  |                 libvirt_utils.get_default_machine_type(arch) or 'q35' | ||||||
|  |  | ||||||
|  |             emulator_bin = guest.emulator | ||||||
|  |             if virt_type in guest.domemulator: | ||||||
|  |                 emulator_bin = guest.domemulator[virt_type] | ||||||
|  |  | ||||||
|  |             # It is expected that each <guest> will have a different | ||||||
|  |             # architecture, but it doesn't hurt to add a safety net to | ||||||
|  |             # avoid needlessly calling libvirt's API more times than | ||||||
|  |             # we need. | ||||||
|  |             if machine_type in domain_caps[arch]: | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|  |             domain_caps[arch][machine_type] = \ | ||||||
|  |                 self._get_domain_capabilities(emulator_bin, arch, | ||||||
|  |                                               machine_type, virt_type) | ||||||
|  |  | ||||||
|  |         # NOTE(aspiers): Use a temporary variable to update the | ||||||
|  |         # instance variable atomically, otherwise if some API | ||||||
|  |         # calls succeeded and then one failed, we might | ||||||
|  |         # accidentally memoize a partial result. | ||||||
|  |         self._domain_caps = domain_caps | ||||||
|  |  | ||||||
|  |         return self._domain_caps | ||||||
|  |  | ||||||
|  |     def _get_domain_capabilities(self, emulator_bin, arch, machine_type, | ||||||
|  |                                  virt_type, flags=0): | ||||||
|  |         xmlstr = self.get_connection().getDomainCapabilities( | ||||||
|  |             emulator_bin, | ||||||
|  |             arch, | ||||||
|  |             machine_type, | ||||||
|  |             virt_type, | ||||||
|  |             flags | ||||||
|  |         ) | ||||||
|  |         LOG.info("Libvirt host hypervisor capabilities for arch=%s and " | ||||||
|  |                  "machine_type=%s:\n%s", arch, machine_type, xmlstr) | ||||||
|  |         caps = vconfig.LibvirtConfigDomainCaps() | ||||||
|  |         caps.parse_str(xmlstr) | ||||||
|  |         return caps | ||||||
|  |  | ||||||
|     def get_driver_type(self): |     def get_driver_type(self): | ||||||
|         """Get hypervisor type. |         """Get hypervisor type. | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Adam Spiers
					Adam Spiers