Add infrastructure for invoking libvirt's getDomainCapabilities API
Two use cases have emerged semi-recently which both require the libvirt driver to be able to invoke libvirt's virConnectGetDomainCapabilities() API: https://libvirt.org/html/libvirt-libvirt-domain.html#virConnectGetDomainCapabilities and parse the results: - Automatic detection of AMD compute hosts which are capable of providing SEV (Secure Encrypted Virtualization) - Gracefully handling different QEMU machine types for x86 hosts So lay the foundation for these use cases by adding a new get_domain_capabilities() method to nova.virt.libvirt.host.Host, along with new subclasses of LibvirtConfigObject for parsing the XML returned from libvirt, and corresponding tests. Change-Id: I4aeac9b2397bb2f5e130d1e58829a5e549fcb191 blueprint: gracefully-handle-qemu-machine-types blueprint: amd-sev-libvirt-support
This commit is contained in:
parent
ed5b7c7fc3
commit
297f3ba687
@ -1357,6 +1357,154 @@ class Connection(object):
|
||||
else False for cpu_num in range(total_cpus)]
|
||||
return (total_cpus, cpu_map, active_cpus)
|
||||
|
||||
def getDomainCapabilities(self, emulatorbin, arch, machine_type,
|
||||
virt_type, flags):
|
||||
"""Return spoofed domain capabilities."""
|
||||
|
||||
return '''
|
||||
<domainCapabilities>
|
||||
<path>/usr/bin/qemu-kvm</path>
|
||||
<domain>kvm</domain>
|
||||
<machine>pc-i440fx-2.11</machine>
|
||||
<arch>x86_64</arch>
|
||||
<vcpu max='255'/>
|
||||
<os supported='yes'>
|
||||
<loader supported='yes'>
|
||||
<value>/usr/share/qemu/ovmf-x86_64-ms-4m-code.bin</value>
|
||||
<value>/usr/share/qemu/ovmf-x86_64-ms-code.bin</value>
|
||||
<enum name='type'>
|
||||
<value>rom</value>
|
||||
<value>pflash</value>
|
||||
</enum>
|
||||
<enum name='readonly'>
|
||||
<value>yes</value>
|
||||
<value>no</value>
|
||||
</enum>
|
||||
</loader>
|
||||
</os>
|
||||
<cpu>
|
||||
<mode name='host-passthrough' supported='yes'/>
|
||||
<mode name='host-model' supported='yes'>
|
||||
<model fallback='forbid'>EPYC-IBPB</model>
|
||||
<vendor>AMD</vendor>
|
||||
<feature policy='require' name='x2apic'/>
|
||||
<feature policy='require' name='tsc-deadline'/>
|
||||
<feature policy='require' name='hypervisor'/>
|
||||
<feature policy='require' name='tsc_adjust'/>
|
||||
<feature policy='require' name='cmp_legacy'/>
|
||||
<feature policy='require' name='invtsc'/>
|
||||
<feature policy='require' name='virt-ssbd'/>
|
||||
<feature policy='disable' name='monitor'/>
|
||||
</mode>
|
||||
<mode name='custom' supported='yes'>
|
||||
<model usable='yes'>qemu64</model>
|
||||
<model usable='yes'>qemu32</model>
|
||||
<model usable='no'>phenom</model>
|
||||
<model usable='yes'>pentium3</model>
|
||||
<model usable='yes'>pentium2</model>
|
||||
<model usable='yes'>pentium</model>
|
||||
<model usable='no'>n270</model>
|
||||
<model usable='yes'>kvm64</model>
|
||||
<model usable='yes'>kvm32</model>
|
||||
<model usable='no'>coreduo</model>
|
||||
<model usable='no'>core2duo</model>
|
||||
<model usable='no'>athlon</model>
|
||||
<model usable='yes'>Westmere</model>
|
||||
<model usable='no'>Westmere-IBRS</model>
|
||||
<model usable='no'>Skylake-Server</model>
|
||||
<model usable='no'>Skylake-Server-IBRS</model>
|
||||
<model usable='no'>Skylake-Client</model>
|
||||
<model usable='no'>Skylake-Client-IBRS</model>
|
||||
<model usable='yes'>SandyBridge</model>
|
||||
<model usable='no'>SandyBridge-IBRS</model>
|
||||
<model usable='yes'>Penryn</model>
|
||||
<model usable='no'>Opteron_G5</model>
|
||||
<model usable='no'>Opteron_G4</model>
|
||||
<model usable='yes'>Opteron_G3</model>
|
||||
<model usable='yes'>Opteron_G2</model>
|
||||
<model usable='yes'>Opteron_G1</model>
|
||||
<model usable='yes'>Nehalem</model>
|
||||
<model usable='no'>Nehalem-IBRS</model>
|
||||
<model usable='no'>IvyBridge</model>
|
||||
<model usable='no'>IvyBridge-IBRS</model>
|
||||
<model usable='no'>Haswell</model>
|
||||
<model usable='no'>Haswell-noTSX</model>
|
||||
<model usable='no'>Haswell-noTSX-IBRS</model>
|
||||
<model usable='no'>Haswell-IBRS</model>
|
||||
<model usable='yes'>EPYC</model>
|
||||
<model usable='yes'>EPYC-IBPB</model>
|
||||
<model usable='yes'>Conroe</model>
|
||||
<model usable='no'>Broadwell</model>
|
||||
<model usable='no'>Broadwell-noTSX</model>
|
||||
<model usable='no'>Broadwell-noTSX-IBRS</model>
|
||||
<model usable='no'>Broadwell-IBRS</model>
|
||||
<model usable='yes'>486</model>
|
||||
</mode>
|
||||
</cpu>
|
||||
<devices>
|
||||
<disk supported='yes'>
|
||||
<enum name='diskDevice'>
|
||||
<value>disk</value>
|
||||
<value>cdrom</value>
|
||||
<value>floppy</value>
|
||||
<value>lun</value>
|
||||
</enum>
|
||||
<enum name='bus'>
|
||||
<value>ide</value>
|
||||
<value>fdc</value>
|
||||
<value>scsi</value>
|
||||
<value>virtio</value>
|
||||
<value>usb</value>
|
||||
<value>sata</value>
|
||||
</enum>
|
||||
</disk>
|
||||
<graphics supported='yes'>
|
||||
<enum name='type'>
|
||||
<value>sdl</value>
|
||||
<value>vnc</value>
|
||||
<value>spice</value>
|
||||
</enum>
|
||||
</graphics>
|
||||
<video supported='yes'>
|
||||
<enum name='modelType'>
|
||||
<value>vga</value>
|
||||
<value>cirrus</value>
|
||||
<value>vmvga</value>
|
||||
<value>qxl</value>
|
||||
<value>virtio</value>
|
||||
</enum>
|
||||
</video>
|
||||
<hostdev supported='yes'>
|
||||
<enum name='mode'>
|
||||
<value>subsystem</value>
|
||||
</enum>
|
||||
<enum name='startupPolicy'>
|
||||
<value>default</value>
|
||||
<value>mandatory</value>
|
||||
<value>requisite</value>
|
||||
<value>optional</value>
|
||||
</enum>
|
||||
<enum name='subsysType'>
|
||||
<value>usb</value>
|
||||
<value>pci</value>
|
||||
<value>scsi</value>
|
||||
</enum>
|
||||
<enum name='capsType'/>
|
||||
<enum name='pciBackend'>
|
||||
<value>default</value>
|
||||
<value>vfio</value>
|
||||
</enum>
|
||||
</hostdev>
|
||||
</devices>
|
||||
%(features)s
|
||||
</domainCapabilities>''' % {'features': self._domain_capability_features}
|
||||
|
||||
# Features are kept separately so that the tests can patch this
|
||||
# class variable with alternate values.
|
||||
_domain_capability_features = ''' <features>
|
||||
<gic supported='no'/>
|
||||
</features>'''
|
||||
|
||||
def getCapabilities(self):
|
||||
"""Return spoofed capabilities."""
|
||||
numa_topology = self.host_info.numa_topology
|
||||
|
@ -277,6 +277,11 @@ class FakeLibvirtTests(test.NoDBTestCase):
|
||||
conn = self.get_openAuth_curry_func()('qemu:///system')
|
||||
etree.fromstring(conn.getCapabilities())
|
||||
|
||||
def test_getDomainCapabilities(self):
|
||||
conn = self.get_openAuth_curry_func()('qemu:///system')
|
||||
etree.fromstring(conn.getDomainCapabilities(
|
||||
'/usr/bin/qemu-kvm', 'x86_64', 'q35', 'kvm', 0))
|
||||
|
||||
def test_nwfilter_define_undefine(self):
|
||||
conn = self.get_openAuth_curry_func()('qemu:///system')
|
||||
# Will raise an exception if it's not valid XML
|
||||
|
@ -637,6 +637,27 @@ class HostTestCase(test.NoDBTestCase):
|
||||
self.assertIsNone(caps.host.cpu.model)
|
||||
self.assertEqual(0, len(caps.host.cpu.features))
|
||||
|
||||
def _test_get_domain_capabilities(self):
|
||||
caps = self.host.get_domain_capabilities()
|
||||
self.assertIn('x86_64', caps.keys())
|
||||
self.assertEqual(['q35'], list(caps['x86_64']))
|
||||
return caps['x86_64']['q35']
|
||||
|
||||
def test_get_domain_capabilities(self):
|
||||
caps = self._test_get_domain_capabilities()
|
||||
self.assertEqual(vconfig.LibvirtConfigDomainCaps, type(caps))
|
||||
# There is a <gic supported='no'/> feature in the fixture but
|
||||
# we don't parse that because nothing currently cares about it.
|
||||
self.assertEqual(0, len(caps.features))
|
||||
|
||||
@mock.patch.object(fakelibvirt.virConnect, '_domain_capability_features',
|
||||
new='')
|
||||
def test_get_domain_capabilities_no_features(self):
|
||||
caps = self._test_get_domain_capabilities()
|
||||
self.assertEqual(vconfig.LibvirtConfigDomainCaps, type(caps))
|
||||
features = caps.features
|
||||
self.assertEqual([], features)
|
||||
|
||||
@mock.patch.object(fakelibvirt.virConnect, "getHostname")
|
||||
def test_get_hostname_caching(self, mock_hostname):
|
||||
mock_hostname.return_value = "foo"
|
||||
|
@ -112,6 +112,59 @@ class LibvirtConfigCaps(LibvirtConfigObject):
|
||||
return caps
|
||||
|
||||
|
||||
class LibvirtConfigDomainCaps(LibvirtConfigObject):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(LibvirtConfigDomainCaps, self).__init__(
|
||||
root_name="domainCapabilities", **kwargs)
|
||||
self._features = None
|
||||
|
||||
def parse_dom(self, xmldoc):
|
||||
super(LibvirtConfigDomainCaps, self).parse_dom(xmldoc)
|
||||
|
||||
for c in xmldoc.getchildren():
|
||||
if c.tag == "features":
|
||||
features = LibvirtConfigDomainCapsFeatures()
|
||||
features.parse_dom(c)
|
||||
self._features = features
|
||||
|
||||
@property
|
||||
def features(self):
|
||||
if self._features is None:
|
||||
return []
|
||||
return self._features.features
|
||||
|
||||
|
||||
class LibvirtConfigDomainCapsFeatures(LibvirtConfigObject):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(LibvirtConfigDomainCapsFeatures, self).__init__(
|
||||
root_name="features", **kwargs)
|
||||
self.features = []
|
||||
|
||||
def parse_dom(self, xmldoc):
|
||||
super(LibvirtConfigDomainCapsFeatures, self).parse_dom(xmldoc)
|
||||
|
||||
for c in xmldoc.getchildren():
|
||||
feature = None
|
||||
# TODO(aspiers): add supported features here
|
||||
if feature:
|
||||
feature.parse_dom(c)
|
||||
self.features.append(feature)
|
||||
|
||||
# There are many other features and domain capabilities,
|
||||
# but we don't need to regenerate the XML (it's read-only
|
||||
# data provided by libvirtd), so there's no point parsing
|
||||
# them until we actually need their values.
|
||||
|
||||
# For the same reason, we do not need a format_dom() method, but
|
||||
# it's a bug if this ever gets called and we inherited one from
|
||||
# the base class, so override that to watch out for accidental
|
||||
# calls.
|
||||
def format_dom(self):
|
||||
raise RuntimeError(_('BUG: tried to generate domainCapabilities XML'))
|
||||
|
||||
|
||||
class LibvirtConfigCapsNUMATopology(LibvirtConfigObject):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
|
@ -27,6 +27,7 @@ the raw libvirt API. These APIs are then used by all
|
||||
the other libvirt related classes
|
||||
"""
|
||||
|
||||
from collections import defaultdict
|
||||
import operator
|
||||
import os
|
||||
import socket
|
||||
@ -56,6 +57,7 @@ from nova import utils
|
||||
from nova.virt import event as virtevent
|
||||
from nova.virt.libvirt import config as vconfig
|
||||
from nova.virt.libvirt import guest as libvirt_guest
|
||||
from nova.virt.libvirt import utils as libvirt_utils
|
||||
|
||||
libvirt = None
|
||||
|
||||
@ -91,6 +93,7 @@ class Host(object):
|
||||
self._conn_event_handler_queue = six.moves.queue.Queue()
|
||||
self._lifecycle_event_handler = lifecycle_event_handler
|
||||
self._caps = None
|
||||
self._domain_caps = None
|
||||
self._hostname = None
|
||||
|
||||
self._wrapped_conn = None
|
||||
@ -667,6 +670,117 @@ class Host(object):
|
||||
raise
|
||||
return self._caps
|
||||
|
||||
def get_domain_capabilities(self):
|
||||
"""Returns the capabilities you can request when creating a
|
||||
domain (VM) with that hypervisor, for various combinations of
|
||||
architecture and machine type.
|
||||
|
||||
In this context the fuzzy word "hypervisor" implies QEMU
|
||||
binary, libvirt itself and the host config. libvirt provides
|
||||
this in order that callers can determine what the underlying
|
||||
emulator and/or libvirt is capable of, prior to creating a domain
|
||||
(for instance via virDomainCreateXML or virDomainDefineXML).
|
||||
However nova needs to know the capabilities much earlier, when
|
||||
the host's compute service is first initialised, in order that
|
||||
placement decisions can be made across many compute hosts.
|
||||
Therefore this is expected to be called during the init_host()
|
||||
phase of the driver lifecycle rather than just before booting
|
||||
an instance.
|
||||
|
||||
This causes an additional complication since the Python
|
||||
binding for this libvirt API call requires the architecture
|
||||
and machine type to be provided. So in order to gain a full
|
||||
picture of the hypervisor's capabilities, technically we need
|
||||
to call it with the right parameters, once for each
|
||||
(architecture, machine_type) combination which we care about.
|
||||
However the libvirt experts have advised us that in practice
|
||||
the domain capabilities do not (yet, at least) vary enough
|
||||
across machine types to justify the cost of calling
|
||||
getDomainCapabilities() once for every single (architecture,
|
||||
machine_type) combination. In particular, SEV support isn't
|
||||
reported per-machine type, and since there are usually many
|
||||
machine types, we follow the advice of the experts that for
|
||||
now it's sufficient to call it once per host architecture:
|
||||
|
||||
https://bugzilla.redhat.com/show_bug.cgi?id=1683471#c7
|
||||
|
||||
However, future domain capabilities might report SEV in a more
|
||||
fine-grained manner, and we also expect to use this method to
|
||||
detect other features, such as for gracefully handling machine
|
||||
types and potentially for detecting OVMF binaries. Therefore
|
||||
we memoize the results of the API calls in a nested dict where
|
||||
the top-level keys are architectures, and second-level keys
|
||||
are machine types, in order to allow easy expansion later.
|
||||
|
||||
Whenever libvirt/QEMU are updated, cached domCapabilities
|
||||
would get outdated (because QEMU will contain new features and
|
||||
the capabilities will vary). However, this should not be a
|
||||
problem here, because when libvirt/QEMU gets updated, the
|
||||
nova-compute agent also needs restarting, at which point the
|
||||
memoization will vanish because it's not persisted to disk.
|
||||
|
||||
Note: The result is cached in the member attribute
|
||||
_domain_caps.
|
||||
|
||||
:returns: a nested dict of dicts which maps architectures to
|
||||
machine types to instances of config.LibvirtConfigDomainCaps
|
||||
representing the domain capabilities of the host for that arch
|
||||
and machine type:
|
||||
|
||||
{ arch:
|
||||
{ machine_type: LibvirtConfigDomainCaps }
|
||||
}
|
||||
"""
|
||||
if self._domain_caps:
|
||||
return self._domain_caps
|
||||
|
||||
domain_caps = defaultdict(dict)
|
||||
caps = self.get_capabilities()
|
||||
virt_type = CONF.libvirt.virt_type
|
||||
|
||||
for guest in caps.guests:
|
||||
arch = guest.arch
|
||||
machine_type = \
|
||||
libvirt_utils.get_default_machine_type(arch) or 'q35'
|
||||
|
||||
emulator_bin = guest.emulator
|
||||
if virt_type in guest.domemulator:
|
||||
emulator_bin = guest.domemulator[virt_type]
|
||||
|
||||
# It is expected that each <guest> will have a different
|
||||
# architecture, but it doesn't hurt to add a safety net to
|
||||
# avoid needlessly calling libvirt's API more times than
|
||||
# we need.
|
||||
if machine_type in domain_caps[arch]:
|
||||
continue
|
||||
|
||||
domain_caps[arch][machine_type] = \
|
||||
self._get_domain_capabilities(emulator_bin, arch,
|
||||
machine_type, virt_type)
|
||||
|
||||
# NOTE(aspiers): Use a temporary variable to update the
|
||||
# instance variable atomically, otherwise if some API
|
||||
# calls succeeded and then one failed, we might
|
||||
# accidentally memoize a partial result.
|
||||
self._domain_caps = domain_caps
|
||||
|
||||
return self._domain_caps
|
||||
|
||||
def _get_domain_capabilities(self, emulator_bin, arch, machine_type,
|
||||
virt_type, flags=0):
|
||||
xmlstr = self.get_connection().getDomainCapabilities(
|
||||
emulator_bin,
|
||||
arch,
|
||||
machine_type,
|
||||
virt_type,
|
||||
flags
|
||||
)
|
||||
LOG.info("Libvirt host hypervisor capabilities for arch=%s and "
|
||||
"machine_type=%s:\n%s", arch, machine_type, xmlstr)
|
||||
caps = vconfig.LibvirtConfigDomainCaps()
|
||||
caps.parse_str(xmlstr)
|
||||
return caps
|
||||
|
||||
def get_driver_type(self):
|
||||
"""Get hypervisor type.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user