Add infrastructure for invoking libvirt's getDomainCapabilities API
Two use cases have emerged semi-recently which both require the libvirt driver to be able to invoke libvirt's virConnectGetDomainCapabilities() API: https://libvirt.org/html/libvirt-libvirt-domain.html#virConnectGetDomainCapabilities and parse the results: - Automatic detection of AMD compute hosts which are capable of providing SEV (Secure Encrypted Virtualization) - Gracefully handling different QEMU machine types for x86 hosts So lay the foundation for these use cases by adding a new get_domain_capabilities() method to nova.virt.libvirt.host.Host, along with new subclasses of LibvirtConfigObject for parsing the XML returned from libvirt, and corresponding tests. Change-Id: I4aeac9b2397bb2f5e130d1e58829a5e549fcb191 blueprint: gracefully-handle-qemu-machine-types blueprint: amd-sev-libvirt-support
This commit is contained in:
parent
ed5b7c7fc3
commit
297f3ba687
nova
@ -1357,6 +1357,154 @@ class Connection(object):
|
|||||||
else False for cpu_num in range(total_cpus)]
|
else False for cpu_num in range(total_cpus)]
|
||||||
return (total_cpus, cpu_map, active_cpus)
|
return (total_cpus, cpu_map, active_cpus)
|
||||||
|
|
||||||
|
def getDomainCapabilities(self, emulatorbin, arch, machine_type,
|
||||||
|
virt_type, flags):
|
||||||
|
"""Return spoofed domain capabilities."""
|
||||||
|
|
||||||
|
return '''
|
||||||
|
<domainCapabilities>
|
||||||
|
<path>/usr/bin/qemu-kvm</path>
|
||||||
|
<domain>kvm</domain>
|
||||||
|
<machine>pc-i440fx-2.11</machine>
|
||||||
|
<arch>x86_64</arch>
|
||||||
|
<vcpu max='255'/>
|
||||||
|
<os supported='yes'>
|
||||||
|
<loader supported='yes'>
|
||||||
|
<value>/usr/share/qemu/ovmf-x86_64-ms-4m-code.bin</value>
|
||||||
|
<value>/usr/share/qemu/ovmf-x86_64-ms-code.bin</value>
|
||||||
|
<enum name='type'>
|
||||||
|
<value>rom</value>
|
||||||
|
<value>pflash</value>
|
||||||
|
</enum>
|
||||||
|
<enum name='readonly'>
|
||||||
|
<value>yes</value>
|
||||||
|
<value>no</value>
|
||||||
|
</enum>
|
||||||
|
</loader>
|
||||||
|
</os>
|
||||||
|
<cpu>
|
||||||
|
<mode name='host-passthrough' supported='yes'/>
|
||||||
|
<mode name='host-model' supported='yes'>
|
||||||
|
<model fallback='forbid'>EPYC-IBPB</model>
|
||||||
|
<vendor>AMD</vendor>
|
||||||
|
<feature policy='require' name='x2apic'/>
|
||||||
|
<feature policy='require' name='tsc-deadline'/>
|
||||||
|
<feature policy='require' name='hypervisor'/>
|
||||||
|
<feature policy='require' name='tsc_adjust'/>
|
||||||
|
<feature policy='require' name='cmp_legacy'/>
|
||||||
|
<feature policy='require' name='invtsc'/>
|
||||||
|
<feature policy='require' name='virt-ssbd'/>
|
||||||
|
<feature policy='disable' name='monitor'/>
|
||||||
|
</mode>
|
||||||
|
<mode name='custom' supported='yes'>
|
||||||
|
<model usable='yes'>qemu64</model>
|
||||||
|
<model usable='yes'>qemu32</model>
|
||||||
|
<model usable='no'>phenom</model>
|
||||||
|
<model usable='yes'>pentium3</model>
|
||||||
|
<model usable='yes'>pentium2</model>
|
||||||
|
<model usable='yes'>pentium</model>
|
||||||
|
<model usable='no'>n270</model>
|
||||||
|
<model usable='yes'>kvm64</model>
|
||||||
|
<model usable='yes'>kvm32</model>
|
||||||
|
<model usable='no'>coreduo</model>
|
||||||
|
<model usable='no'>core2duo</model>
|
||||||
|
<model usable='no'>athlon</model>
|
||||||
|
<model usable='yes'>Westmere</model>
|
||||||
|
<model usable='no'>Westmere-IBRS</model>
|
||||||
|
<model usable='no'>Skylake-Server</model>
|
||||||
|
<model usable='no'>Skylake-Server-IBRS</model>
|
||||||
|
<model usable='no'>Skylake-Client</model>
|
||||||
|
<model usable='no'>Skylake-Client-IBRS</model>
|
||||||
|
<model usable='yes'>SandyBridge</model>
|
||||||
|
<model usable='no'>SandyBridge-IBRS</model>
|
||||||
|
<model usable='yes'>Penryn</model>
|
||||||
|
<model usable='no'>Opteron_G5</model>
|
||||||
|
<model usable='no'>Opteron_G4</model>
|
||||||
|
<model usable='yes'>Opteron_G3</model>
|
||||||
|
<model usable='yes'>Opteron_G2</model>
|
||||||
|
<model usable='yes'>Opteron_G1</model>
|
||||||
|
<model usable='yes'>Nehalem</model>
|
||||||
|
<model usable='no'>Nehalem-IBRS</model>
|
||||||
|
<model usable='no'>IvyBridge</model>
|
||||||
|
<model usable='no'>IvyBridge-IBRS</model>
|
||||||
|
<model usable='no'>Haswell</model>
|
||||||
|
<model usable='no'>Haswell-noTSX</model>
|
||||||
|
<model usable='no'>Haswell-noTSX-IBRS</model>
|
||||||
|
<model usable='no'>Haswell-IBRS</model>
|
||||||
|
<model usable='yes'>EPYC</model>
|
||||||
|
<model usable='yes'>EPYC-IBPB</model>
|
||||||
|
<model usable='yes'>Conroe</model>
|
||||||
|
<model usable='no'>Broadwell</model>
|
||||||
|
<model usable='no'>Broadwell-noTSX</model>
|
||||||
|
<model usable='no'>Broadwell-noTSX-IBRS</model>
|
||||||
|
<model usable='no'>Broadwell-IBRS</model>
|
||||||
|
<model usable='yes'>486</model>
|
||||||
|
</mode>
|
||||||
|
</cpu>
|
||||||
|
<devices>
|
||||||
|
<disk supported='yes'>
|
||||||
|
<enum name='diskDevice'>
|
||||||
|
<value>disk</value>
|
||||||
|
<value>cdrom</value>
|
||||||
|
<value>floppy</value>
|
||||||
|
<value>lun</value>
|
||||||
|
</enum>
|
||||||
|
<enum name='bus'>
|
||||||
|
<value>ide</value>
|
||||||
|
<value>fdc</value>
|
||||||
|
<value>scsi</value>
|
||||||
|
<value>virtio</value>
|
||||||
|
<value>usb</value>
|
||||||
|
<value>sata</value>
|
||||||
|
</enum>
|
||||||
|
</disk>
|
||||||
|
<graphics supported='yes'>
|
||||||
|
<enum name='type'>
|
||||||
|
<value>sdl</value>
|
||||||
|
<value>vnc</value>
|
||||||
|
<value>spice</value>
|
||||||
|
</enum>
|
||||||
|
</graphics>
|
||||||
|
<video supported='yes'>
|
||||||
|
<enum name='modelType'>
|
||||||
|
<value>vga</value>
|
||||||
|
<value>cirrus</value>
|
||||||
|
<value>vmvga</value>
|
||||||
|
<value>qxl</value>
|
||||||
|
<value>virtio</value>
|
||||||
|
</enum>
|
||||||
|
</video>
|
||||||
|
<hostdev supported='yes'>
|
||||||
|
<enum name='mode'>
|
||||||
|
<value>subsystem</value>
|
||||||
|
</enum>
|
||||||
|
<enum name='startupPolicy'>
|
||||||
|
<value>default</value>
|
||||||
|
<value>mandatory</value>
|
||||||
|
<value>requisite</value>
|
||||||
|
<value>optional</value>
|
||||||
|
</enum>
|
||||||
|
<enum name='subsysType'>
|
||||||
|
<value>usb</value>
|
||||||
|
<value>pci</value>
|
||||||
|
<value>scsi</value>
|
||||||
|
</enum>
|
||||||
|
<enum name='capsType'/>
|
||||||
|
<enum name='pciBackend'>
|
||||||
|
<value>default</value>
|
||||||
|
<value>vfio</value>
|
||||||
|
</enum>
|
||||||
|
</hostdev>
|
||||||
|
</devices>
|
||||||
|
%(features)s
|
||||||
|
</domainCapabilities>''' % {'features': self._domain_capability_features}
|
||||||
|
|
||||||
|
# Features are kept separately so that the tests can patch this
|
||||||
|
# class variable with alternate values.
|
||||||
|
_domain_capability_features = ''' <features>
|
||||||
|
<gic supported='no'/>
|
||||||
|
</features>'''
|
||||||
|
|
||||||
def getCapabilities(self):
|
def getCapabilities(self):
|
||||||
"""Return spoofed capabilities."""
|
"""Return spoofed capabilities."""
|
||||||
numa_topology = self.host_info.numa_topology
|
numa_topology = self.host_info.numa_topology
|
||||||
|
@ -277,6 +277,11 @@ class FakeLibvirtTests(test.NoDBTestCase):
|
|||||||
conn = self.get_openAuth_curry_func()('qemu:///system')
|
conn = self.get_openAuth_curry_func()('qemu:///system')
|
||||||
etree.fromstring(conn.getCapabilities())
|
etree.fromstring(conn.getCapabilities())
|
||||||
|
|
||||||
|
def test_getDomainCapabilities(self):
|
||||||
|
conn = self.get_openAuth_curry_func()('qemu:///system')
|
||||||
|
etree.fromstring(conn.getDomainCapabilities(
|
||||||
|
'/usr/bin/qemu-kvm', 'x86_64', 'q35', 'kvm', 0))
|
||||||
|
|
||||||
def test_nwfilter_define_undefine(self):
|
def test_nwfilter_define_undefine(self):
|
||||||
conn = self.get_openAuth_curry_func()('qemu:///system')
|
conn = self.get_openAuth_curry_func()('qemu:///system')
|
||||||
# Will raise an exception if it's not valid XML
|
# Will raise an exception if it's not valid XML
|
||||||
|
@ -637,6 +637,27 @@ class HostTestCase(test.NoDBTestCase):
|
|||||||
self.assertIsNone(caps.host.cpu.model)
|
self.assertIsNone(caps.host.cpu.model)
|
||||||
self.assertEqual(0, len(caps.host.cpu.features))
|
self.assertEqual(0, len(caps.host.cpu.features))
|
||||||
|
|
||||||
|
def _test_get_domain_capabilities(self):
|
||||||
|
caps = self.host.get_domain_capabilities()
|
||||||
|
self.assertIn('x86_64', caps.keys())
|
||||||
|
self.assertEqual(['q35'], list(caps['x86_64']))
|
||||||
|
return caps['x86_64']['q35']
|
||||||
|
|
||||||
|
def test_get_domain_capabilities(self):
|
||||||
|
caps = self._test_get_domain_capabilities()
|
||||||
|
self.assertEqual(vconfig.LibvirtConfigDomainCaps, type(caps))
|
||||||
|
# There is a <gic supported='no'/> feature in the fixture but
|
||||||
|
# we don't parse that because nothing currently cares about it.
|
||||||
|
self.assertEqual(0, len(caps.features))
|
||||||
|
|
||||||
|
@mock.patch.object(fakelibvirt.virConnect, '_domain_capability_features',
|
||||||
|
new='')
|
||||||
|
def test_get_domain_capabilities_no_features(self):
|
||||||
|
caps = self._test_get_domain_capabilities()
|
||||||
|
self.assertEqual(vconfig.LibvirtConfigDomainCaps, type(caps))
|
||||||
|
features = caps.features
|
||||||
|
self.assertEqual([], features)
|
||||||
|
|
||||||
@mock.patch.object(fakelibvirt.virConnect, "getHostname")
|
@mock.patch.object(fakelibvirt.virConnect, "getHostname")
|
||||||
def test_get_hostname_caching(self, mock_hostname):
|
def test_get_hostname_caching(self, mock_hostname):
|
||||||
mock_hostname.return_value = "foo"
|
mock_hostname.return_value = "foo"
|
||||||
|
@ -112,6 +112,59 @@ class LibvirtConfigCaps(LibvirtConfigObject):
|
|||||||
return caps
|
return caps
|
||||||
|
|
||||||
|
|
||||||
|
class LibvirtConfigDomainCaps(LibvirtConfigObject):
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(LibvirtConfigDomainCaps, self).__init__(
|
||||||
|
root_name="domainCapabilities", **kwargs)
|
||||||
|
self._features = None
|
||||||
|
|
||||||
|
def parse_dom(self, xmldoc):
|
||||||
|
super(LibvirtConfigDomainCaps, self).parse_dom(xmldoc)
|
||||||
|
|
||||||
|
for c in xmldoc.getchildren():
|
||||||
|
if c.tag == "features":
|
||||||
|
features = LibvirtConfigDomainCapsFeatures()
|
||||||
|
features.parse_dom(c)
|
||||||
|
self._features = features
|
||||||
|
|
||||||
|
@property
|
||||||
|
def features(self):
|
||||||
|
if self._features is None:
|
||||||
|
return []
|
||||||
|
return self._features.features
|
||||||
|
|
||||||
|
|
||||||
|
class LibvirtConfigDomainCapsFeatures(LibvirtConfigObject):
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(LibvirtConfigDomainCapsFeatures, self).__init__(
|
||||||
|
root_name="features", **kwargs)
|
||||||
|
self.features = []
|
||||||
|
|
||||||
|
def parse_dom(self, xmldoc):
|
||||||
|
super(LibvirtConfigDomainCapsFeatures, self).parse_dom(xmldoc)
|
||||||
|
|
||||||
|
for c in xmldoc.getchildren():
|
||||||
|
feature = None
|
||||||
|
# TODO(aspiers): add supported features here
|
||||||
|
if feature:
|
||||||
|
feature.parse_dom(c)
|
||||||
|
self.features.append(feature)
|
||||||
|
|
||||||
|
# There are many other features and domain capabilities,
|
||||||
|
# but we don't need to regenerate the XML (it's read-only
|
||||||
|
# data provided by libvirtd), so there's no point parsing
|
||||||
|
# them until we actually need their values.
|
||||||
|
|
||||||
|
# For the same reason, we do not need a format_dom() method, but
|
||||||
|
# it's a bug if this ever gets called and we inherited one from
|
||||||
|
# the base class, so override that to watch out for accidental
|
||||||
|
# calls.
|
||||||
|
def format_dom(self):
|
||||||
|
raise RuntimeError(_('BUG: tried to generate domainCapabilities XML'))
|
||||||
|
|
||||||
|
|
||||||
class LibvirtConfigCapsNUMATopology(LibvirtConfigObject):
|
class LibvirtConfigCapsNUMATopology(LibvirtConfigObject):
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
|
@ -27,6 +27,7 @@ the raw libvirt API. These APIs are then used by all
|
|||||||
the other libvirt related classes
|
the other libvirt related classes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
import operator
|
import operator
|
||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
@ -56,6 +57,7 @@ from nova import utils
|
|||||||
from nova.virt import event as virtevent
|
from nova.virt import event as virtevent
|
||||||
from nova.virt.libvirt import config as vconfig
|
from nova.virt.libvirt import config as vconfig
|
||||||
from nova.virt.libvirt import guest as libvirt_guest
|
from nova.virt.libvirt import guest as libvirt_guest
|
||||||
|
from nova.virt.libvirt import utils as libvirt_utils
|
||||||
|
|
||||||
libvirt = None
|
libvirt = None
|
||||||
|
|
||||||
@ -91,6 +93,7 @@ class Host(object):
|
|||||||
self._conn_event_handler_queue = six.moves.queue.Queue()
|
self._conn_event_handler_queue = six.moves.queue.Queue()
|
||||||
self._lifecycle_event_handler = lifecycle_event_handler
|
self._lifecycle_event_handler = lifecycle_event_handler
|
||||||
self._caps = None
|
self._caps = None
|
||||||
|
self._domain_caps = None
|
||||||
self._hostname = None
|
self._hostname = None
|
||||||
|
|
||||||
self._wrapped_conn = None
|
self._wrapped_conn = None
|
||||||
@ -667,6 +670,117 @@ class Host(object):
|
|||||||
raise
|
raise
|
||||||
return self._caps
|
return self._caps
|
||||||
|
|
||||||
|
def get_domain_capabilities(self):
|
||||||
|
"""Returns the capabilities you can request when creating a
|
||||||
|
domain (VM) with that hypervisor, for various combinations of
|
||||||
|
architecture and machine type.
|
||||||
|
|
||||||
|
In this context the fuzzy word "hypervisor" implies QEMU
|
||||||
|
binary, libvirt itself and the host config. libvirt provides
|
||||||
|
this in order that callers can determine what the underlying
|
||||||
|
emulator and/or libvirt is capable of, prior to creating a domain
|
||||||
|
(for instance via virDomainCreateXML or virDomainDefineXML).
|
||||||
|
However nova needs to know the capabilities much earlier, when
|
||||||
|
the host's compute service is first initialised, in order that
|
||||||
|
placement decisions can be made across many compute hosts.
|
||||||
|
Therefore this is expected to be called during the init_host()
|
||||||
|
phase of the driver lifecycle rather than just before booting
|
||||||
|
an instance.
|
||||||
|
|
||||||
|
This causes an additional complication since the Python
|
||||||
|
binding for this libvirt API call requires the architecture
|
||||||
|
and machine type to be provided. So in order to gain a full
|
||||||
|
picture of the hypervisor's capabilities, technically we need
|
||||||
|
to call it with the right parameters, once for each
|
||||||
|
(architecture, machine_type) combination which we care about.
|
||||||
|
However the libvirt experts have advised us that in practice
|
||||||
|
the domain capabilities do not (yet, at least) vary enough
|
||||||
|
across machine types to justify the cost of calling
|
||||||
|
getDomainCapabilities() once for every single (architecture,
|
||||||
|
machine_type) combination. In particular, SEV support isn't
|
||||||
|
reported per-machine type, and since there are usually many
|
||||||
|
machine types, we follow the advice of the experts that for
|
||||||
|
now it's sufficient to call it once per host architecture:
|
||||||
|
|
||||||
|
https://bugzilla.redhat.com/show_bug.cgi?id=1683471#c7
|
||||||
|
|
||||||
|
However, future domain capabilities might report SEV in a more
|
||||||
|
fine-grained manner, and we also expect to use this method to
|
||||||
|
detect other features, such as for gracefully handling machine
|
||||||
|
types and potentially for detecting OVMF binaries. Therefore
|
||||||
|
we memoize the results of the API calls in a nested dict where
|
||||||
|
the top-level keys are architectures, and second-level keys
|
||||||
|
are machine types, in order to allow easy expansion later.
|
||||||
|
|
||||||
|
Whenever libvirt/QEMU are updated, cached domCapabilities
|
||||||
|
would get outdated (because QEMU will contain new features and
|
||||||
|
the capabilities will vary). However, this should not be a
|
||||||
|
problem here, because when libvirt/QEMU gets updated, the
|
||||||
|
nova-compute agent also needs restarting, at which point the
|
||||||
|
memoization will vanish because it's not persisted to disk.
|
||||||
|
|
||||||
|
Note: The result is cached in the member attribute
|
||||||
|
_domain_caps.
|
||||||
|
|
||||||
|
:returns: a nested dict of dicts which maps architectures to
|
||||||
|
machine types to instances of config.LibvirtConfigDomainCaps
|
||||||
|
representing the domain capabilities of the host for that arch
|
||||||
|
and machine type:
|
||||||
|
|
||||||
|
{ arch:
|
||||||
|
{ machine_type: LibvirtConfigDomainCaps }
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if self._domain_caps:
|
||||||
|
return self._domain_caps
|
||||||
|
|
||||||
|
domain_caps = defaultdict(dict)
|
||||||
|
caps = self.get_capabilities()
|
||||||
|
virt_type = CONF.libvirt.virt_type
|
||||||
|
|
||||||
|
for guest in caps.guests:
|
||||||
|
arch = guest.arch
|
||||||
|
machine_type = \
|
||||||
|
libvirt_utils.get_default_machine_type(arch) or 'q35'
|
||||||
|
|
||||||
|
emulator_bin = guest.emulator
|
||||||
|
if virt_type in guest.domemulator:
|
||||||
|
emulator_bin = guest.domemulator[virt_type]
|
||||||
|
|
||||||
|
# It is expected that each <guest> will have a different
|
||||||
|
# architecture, but it doesn't hurt to add a safety net to
|
||||||
|
# avoid needlessly calling libvirt's API more times than
|
||||||
|
# we need.
|
||||||
|
if machine_type in domain_caps[arch]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
domain_caps[arch][machine_type] = \
|
||||||
|
self._get_domain_capabilities(emulator_bin, arch,
|
||||||
|
machine_type, virt_type)
|
||||||
|
|
||||||
|
# NOTE(aspiers): Use a temporary variable to update the
|
||||||
|
# instance variable atomically, otherwise if some API
|
||||||
|
# calls succeeded and then one failed, we might
|
||||||
|
# accidentally memoize a partial result.
|
||||||
|
self._domain_caps = domain_caps
|
||||||
|
|
||||||
|
return self._domain_caps
|
||||||
|
|
||||||
|
def _get_domain_capabilities(self, emulator_bin, arch, machine_type,
|
||||||
|
virt_type, flags=0):
|
||||||
|
xmlstr = self.get_connection().getDomainCapabilities(
|
||||||
|
emulator_bin,
|
||||||
|
arch,
|
||||||
|
machine_type,
|
||||||
|
virt_type,
|
||||||
|
flags
|
||||||
|
)
|
||||||
|
LOG.info("Libvirt host hypervisor capabilities for arch=%s and "
|
||||||
|
"machine_type=%s:\n%s", arch, machine_type, xmlstr)
|
||||||
|
caps = vconfig.LibvirtConfigDomainCaps()
|
||||||
|
caps.parse_str(xmlstr)
|
||||||
|
return caps
|
||||||
|
|
||||||
def get_driver_type(self):
|
def get_driver_type(self):
|
||||||
"""Get hypervisor type.
|
"""Get hypervisor type.
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user