libvirt: fix disablement of NUMA & hugepages on unsupported platforms

Two previous commits updated the libvirt XML config generator so
that it would omit the config elements for huge pages and NUMA
placement when running on old libvirt:

  commit cf3a1262ec
  Author: Vladik Romanovsky <vladik.romanovsky@enovance.com>
  Date:   Wed Jan 28 00:54:26 2015 -0500

    libvirt: avoid setting the memnodes where when it's not a supported option

  commit 3466e72754
  Author: Vladik Romanovsky <vladik.romanovsky@enovance.com>
  Date:   Tue Jan 6 14:23:38 2015 -0500

    libvirt: not setting membacking when mempages are empty host topology

The problem arising from this is that the hosts are still reporting
to the schedular that they can support NUMA and huge pages if
libvirt >= 1.0.4, but we are silently discarding the guest config
if libvirt < 1.2.7 (numa) or 1.2.8 (huge pages).

The result is that the schedular thinks the host can support the
requested feature and so places the guest there, but the actual
guest that is launched is missing the feature. So the user is not
getting what they requested.

The correct approach is to update the "_get_host_numa_topology"
method so that it does not report any NUMA topology in the first
place, if the host is incapable of having guests configured in
the way Nova requires. Likewise if the host cannot support the
huge page configuration method required, it should not report
availability of huge pages.

In summary, we are moving the version checks added by the two
commits above to the point at which host capability reporting
is done. We add a fatal error check in the guest XML config
generator methods, so that if there is some future schedular
bug causing it to not honour host capabilities, we see a
clear error instead of silently starting guests with the
wrong config.

There is a further mistake in that a version number check alone
is insufficient. We must also check that the hypervisor is
either QEMU or KVM, to prevent the code paths running on Xen
or LXC.

Closes-bug: #1425115
Related-bug: #1408070
Related-bug: #1415333

Change-Id: I8326596696cbb030ae95d17f3dd430d05279081a
This commit is contained in:
Daniel P. Berrange 2015-02-25 10:11:14 +00:00 committed by Nikola Dipanov
parent ef42f74a07
commit 945ab28df0
3 changed files with 218 additions and 112 deletions

View File

@ -1861,3 +1861,11 @@ class UnsupportedPolicyException(Invalid):
class CellMappingNotFound(NotFound):
msg_fmt = _("Cell %(uuid)s has no mapping.")
class NUMATopologyUnsupported(Invalid):
msg_fmt = _("Host does not support guests with NUMA topology set")
class MemoryPagesUnsupported(Invalid):
msg_fmt = _("Host does not support guests with custom memory page sizes")

View File

@ -1175,32 +1175,32 @@ class LibvirtConnTestCase(test.NoDBTestCase):
self.assertEqual(2048, result.hugepages[0].size_kb)
self.assertEqual([0], result.hugepages[0].nodeset)
def test_get_guest_memory_backing_config_mempages_none(self):
with mock.patch.object(host.Host, 'has_min_version',
return_value=False):
self.assertIsNone(self._test_get_guest_memory_backing_config(
'not_empty', 'not_empty', 'not_empty'))
@mock.patch.object(host.Host,
'has_min_version', return_value=True)
def test_get_guest_memory_backing_config_smallest(self, mock_version):
host_topology = objects.NUMATopology(
cells=[
objects.NUMACell(
id=3, cpuset=set([1]), memory=1024, mempages=[
objects.NUMAPagesTopology(size_kb=4, total=2000,
used=0),
objects.NUMAPagesTopology(size_kb=2048, total=512,
used=0),
objects.NUMAPagesTopology(size_kb=1048576, total=0,
used=0),
])])
inst_topology = objects.InstanceNUMATopology(cells=[
objects.InstanceNUMACell(
id=3, cpuset=set([0, 1]), memory=1024, pagesize=4)])
def test_get_guest_numa_tune_memnodes(self):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
guest_cpu_numa_config = objects.InstanceNUMATopology(cells=[
objects.InstanceNUMACell(
id=1, cpuset=set([0, 1]),
memory=1024, pagesize=2048)])
memnodes = [vconfig.LibvirtConfigGuestNUMATuneMemNode()]
with mock.patch.object(host.Host, 'has_min_version',
return_value=True):
self.assertTrue(drvr._get_guest_numa_tune_memnodes(
guest_cpu_numa_config, memnodes))
self.assertEqual(guest_cpu_numa_config.cells[0].id,
memnodes[0].cellid)
numa_tune = vconfig.LibvirtConfigGuestNUMATune()
numa_tune.memnodes = [vconfig.LibvirtConfigGuestNUMATuneMemNode()]
numa_tune.memnodes[0].cellid = 0
numa_tune.memnodes[0].nodeset = [3]
def test_get_guest_numa_tune_memnodes_none(self):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
with mock.patch.object(host.Host, 'has_min_version',
return_value=False):
self.assertFalse(drvr._get_guest_numa_tune_memnodes(
'something', 'something'))
result = self._test_get_guest_memory_backing_config(
host_topology, inst_topology, numa_tune)
self.assertIsNone(result)
def test_get_guest_config_numa_host_instance_1pci_fits(self):
instance_ref = objects.Instance(**self.test_instance)
@ -1339,10 +1339,17 @@ class LibvirtConnTestCase(test.NoDBTestCase):
self.assertEqual(0, len(cfg.cputune.vcpupin))
self.assertIsNone(cfg.cpu.numa)
def test_get_guest_config_numa_empty_memnode(self):
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion')
@mock.patch.object(host.Host, 'get_capabilities')
@mock.patch.object(libvirt_driver.LibvirtDriver, '_set_host_enabled')
def _test_get_guest_config_numa_unsupported(self, fake_version,
exception_class, pagesize,
mock_host,
mock_caps, mock_version):
instance_topology = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell(
id=0, cpuset=set([0]), memory=1024)])
id=0, cpuset=set([0]),
memory=1024, pagesize=pagesize)])
instance_ref = objects.Instance(**self.test_instance)
instance_ref.numa_topology = instance_topology
image_meta = {}
@ -1357,25 +1364,44 @@ class LibvirtConnTestCase(test.NoDBTestCase):
caps.host.cpu.arch = "x86_64"
caps.host.topology = self._fake_caps_numa_topology()
def fake_has_min_version(ver):
if ver == libvirt_driver.MIN_LIBVIRT_MEMNODE_VERSION:
return False
return True
mock_version.return_value = fake_version
mock_caps.return_value = caps
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance_ref,
image_meta)
with contextlib.nested(
mock.patch.object(host.Host, 'has_min_version',
side_effect=fake_has_min_version),
mock.patch.object(host.Host, "get_capabilities",
return_value=caps),
mock.patch.object(
random, 'choice', side_effect=lambda cells: cells[0])):
cfg = drvr._get_guest_config(instance_ref, [], {}, disk_info)
self.assertFalse(cfg.numatune.memnodes)
self.assertRaises(exception_class,
drvr._get_guest_config,
instance_ref, [], {}, disk_info)
def test_get_guest_config_numa_old_version(self):
self.flags(virt_type='kvm', group='libvirt')
self._test_get_guest_config_numa_unsupported(
utils.convert_version_to_int(
libvirt_driver.MIN_LIBVIRT_NUMA_VERSION) - 1,
exception.NUMATopologyUnsupported,
None)
def test_get_guest_config_numa_xen(self):
self.flags(virt_type='xen', group='libvirt')
self._test_get_guest_config_numa_unsupported(
utils.convert_version_to_int(
libvirt_driver.MIN_LIBVIRT_NUMA_VERSION),
exception.NUMATopologyUnsupported,
None)
def test_get_guest_config_numa_old_pages(self):
self.flags(virt_type='kvm', group='libvirt')
self._test_get_guest_config_numa_unsupported(
utils.convert_version_to_int(
libvirt_driver.MIN_LIBVIRT_HUGEPAGE_VERSION) - 1,
exception.MemoryPagesUnsupported,
2048)
def test_get_guest_config_numa_host_instance_fit_w_cpu_pinset(self):
instance_ref = objects.Instance(**self.test_instance)
@ -8724,7 +8750,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
return topology
def test_get_host_numa_topology(self):
def _test_get_host_numa_topology(self, mempages):
caps = vconfig.LibvirtConfigCaps()
caps.host = vconfig.LibvirtConfigCapsHost()
caps.host.topology = self._fake_caps_numa_topology()
@ -8744,8 +8770,6 @@ class LibvirtConnTestCase(test.NoDBTestCase):
'mem': {'total': 1024, 'used': 0},
'id': 3}]}
with contextlib.nested(
mock.patch.object(host.Host, 'has_min_version',
return_value=True),
mock.patch.object(host.Host, "get_capabilities",
return_value=caps),
mock.patch.object(
@ -8758,16 +8782,21 @@ class LibvirtConnTestCase(test.NoDBTestCase):
got_topo_dict = got_topo._to_dict()
self.assertThat(
expected_topo_dict, matchers.DictMatches(got_topo_dict))
# cells 0
self.assertEqual(4, got_topo.cells[0].mempages[0].size_kb)
self.assertEqual(0, got_topo.cells[0].mempages[0].total)
self.assertEqual(2048, got_topo.cells[0].mempages[1].size_kb)
self.assertEqual(0, got_topo.cells[0].mempages[1].total)
# cells 1
self.assertEqual(4, got_topo.cells[1].mempages[0].size_kb)
self.assertEqual(1024, got_topo.cells[1].mempages[0].total)
self.assertEqual(2048, got_topo.cells[1].mempages[1].size_kb)
self.assertEqual(1, got_topo.cells[1].mempages[1].total)
if mempages:
# cells 0
self.assertEqual(4, got_topo.cells[0].mempages[0].size_kb)
self.assertEqual(0, got_topo.cells[0].mempages[0].total)
self.assertEqual(2048, got_topo.cells[0].mempages[1].size_kb)
self.assertEqual(0, got_topo.cells[0].mempages[1].total)
# cells 1
self.assertEqual(4, got_topo.cells[1].mempages[0].size_kb)
self.assertEqual(1024, got_topo.cells[1].mempages[0].total)
self.assertEqual(2048, got_topo.cells[1].mempages[1].size_kb)
self.assertEqual(1, got_topo.cells[1].mempages[1].total)
else:
self.assertEqual([], got_topo.cells[0].mempages)
self.assertEqual([], got_topo.cells[1].mempages)
self.assertEqual(expected_topo_dict, got_topo_dict)
self.assertEqual(set([]), got_topo.cells[0].pinned_cpus)
@ -8777,6 +8806,22 @@ class LibvirtConnTestCase(test.NoDBTestCase):
self.assertEqual([set([0, 1])], got_topo.cells[0].siblings)
self.assertEqual([], got_topo.cells[1].siblings)
@mock.patch.object(host.Host, 'has_min_version')
def test_get_host_numa_topology(self, mock_version):
def fake_version(ver):
return True
mock_version.side_effect = fake_version
self._test_get_host_numa_topology(mempages=True)
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion')
def test_get_host_numa_topology_no_mempages(self, mock_version):
self.flags(virt_type='kvm', group='libvirt')
mock_version.return_value = utils.convert_version_to_int(
libvirt_driver.MIN_LIBVIRT_HUGEPAGE_VERSION) - 1
self._test_get_host_numa_topology(mempages=False)
def test_get_host_numa_topology_empty(self):
caps = vconfig.LibvirtConfigCaps()
caps.host = vconfig.LibvirtConfigCapsHost()
@ -8791,12 +8836,25 @@ class LibvirtConnTestCase(test.NoDBTestCase):
self.assertIsNone(drvr._get_host_numa_topology())
get_caps.assert_called_once_with()
def test_get_host_numa_topology_not_supported(self):
# Tests that libvirt isn't new enough to support numa topology.
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion')
def test_get_host_numa_topology_old_version(self, mock_version):
self.flags(virt_type='kvm', group='libvirt')
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
with mock.patch.object(host.Host, 'has_min_version',
return_value=False):
self.assertIsNone(drvr._get_host_numa_topology())
mock_version.side_effect = utils.convert_version_to_int(
libvirt_driver.MIN_LIBVIRT_NUMA_VERSION) - 1
self.assertIsNone(drvr._get_host_numa_topology())
@mock.patch.object(host.Host, 'has_min_version')
def test_get_host_numa_topology_xen(self, mock_version):
self.flags(virt_type='xen', group='libvirt')
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
def fake_version(ver):
return True
mock_version.side_effect = fake_version
self.assertIsNone(drvr._get_host_numa_topology())
def test_diagnostic_vcpus_exception(self):
xml = """

View File

@ -359,14 +359,20 @@ MIN_LIBVIRT_BLOCKCOMMIT_RELATIVE_VERSION = (1, 2, 7)
MIN_LIBVIRT_DISCARD_VERSION = (1, 0, 6)
MIN_QEMU_DISCARD_VERSION = (1, 6, 0)
REQ_HYPERVISOR_DISCARD = "QEMU"
# libvirt numa topology support
MIN_LIBVIRT_NUMA_TOPOLOGY_VERSION = (1, 0, 4)
# While earlier versions could support NUMA reporting and
# NUMA placement, not until 1.2.7 was there the ability
# to pin guest nodes to host nodes, so mandate that. Without
# this the scheduler cannot make guaranteed decisions, as the
# guest placement may not match what was requested
MIN_LIBVIRT_NUMA_VERSION = (1, 2, 7)
# While earlier versions could support hugepage backed
# guests, not until 1.2.8 was there the ability to request
# a particular huge page size. Without this the scheduler
# cannot make guaranteed decisions, as the huge page size
# used by the guest may not match what was requested
MIN_LIBVIRT_HUGEPAGE_VERSION = (1, 2, 8)
# fsFreeze/fsThaw requirement
MIN_LIBVIRT_FSFREEZE_VERSION = (1, 2, 5)
# libvirt mempage size report
MIN_LIBVIRT_MEMPAGES_VERSION = (1, 2, 8)
# libvirt memory allocation policy for each guest NUMA node
MIN_LIBVIRT_MEMNODE_VERSION = (1, 2, 7)
# Hyper-V paravirtualized time source
MIN_LIBVIRT_HYPERV_TIMER_VERSION = (1, 2, 2)
@ -3426,6 +3432,15 @@ class LibvirtDriver(driver.ComputeDriver):
not support NUMA, then guest_cpu_tune and guest_numa_tune
will be None.
"""
if (not self._has_numa_support() and
instance_numa_topology is not None):
# We should not get here, since we should have avoided
# reporting NUMA topology from _get_host_numa_topology
# in the first place. Just in case of a scheduler
# mess up though, raise an exception
raise exception.NUMATopologyUnsupported()
topology = self._get_host_numa_topology()
# We have instance NUMA so translate it to the config class
guest_cpu_numa_config = self._get_cpu_numa_config_from_instance(
@ -3521,14 +3536,14 @@ class LibvirtDriver(driver.ComputeDriver):
guest_cpu_tune.vcpupin.sort(key=operator.attrgetter("id"))
guest_numa_tune.memory = numa_mem
guest_numa_tune.memnodes = numa_memnodes
# normalize cell.id
for i, cell in enumerate(guest_cpu_numa_config.cells):
for i, (cell, memnode) in enumerate(
zip(guest_cpu_numa_config.cells,
guest_numa_tune.memnodes)):
cell.id = i
guest_numa_tune.memnodes = self._get_guest_numa_tune_memnodes(
guest_cpu_numa_config,
numa_memnodes)
memnode.cellid = i
return GuestNumaConfig(None, guest_cpu_tune,
guest_cpu_numa_config,
@ -3537,18 +3552,6 @@ class LibvirtDriver(driver.ComputeDriver):
return GuestNumaConfig(allowed_cpus, None,
guest_cpu_numa_config, None)
def _get_guest_numa_tune_memnodes(self, guest_cpu_numa_config,
numa_memnodes):
if not self._host.has_min_version(MIN_LIBVIRT_MEMNODE_VERSION):
return []
# normalize cell.id
for (cell, memnode) in zip(guest_cpu_numa_config.cells,
numa_memnodes):
memnode.cellid = cell.id
return numa_memnodes
def _get_guest_os_type(self, virt_type):
"""Returns the guest OS type based on virt type."""
if virt_type == "lxc":
@ -3779,37 +3782,52 @@ class LibvirtDriver(driver.ComputeDriver):
self._add_rng_device(guest, flavor)
def _get_guest_memory_backing_config(self, inst_topology, numatune):
if not self._host.has_min_version(MIN_LIBVIRT_MEMPAGES_VERSION):
wantsmempages = False
if inst_topology:
for cell in inst_topology.cells:
if cell.pagesize:
wantsmempages = True
if not wantsmempages:
return
if not self._has_hugepage_support():
# We should not get here, since we should have avoided
# reporting NUMA topology from _get_host_numa_topology
# in the first place. Just in case of a scheduler
# mess up though, raise an exception
raise exception.MemoryPagesUnsupported()
host_topology = self._get_host_numa_topology()
membacking = None
if inst_topology and host_topology:
# Currently libvirt does not support the smallest
# pagesize set as a backend memory.
# https://bugzilla.redhat.com/show_bug.cgi?id=1173507
avail_pagesize = [page.size_kb
for page in host_topology.cells[0].mempages]
avail_pagesize.sort()
smallest = avail_pagesize[0]
if host_topology is None:
# As above, we should not get here but just in case...
raise exception.MemoryPagesUnsupported()
pages = []
for guest_cellid, inst_cell in enumerate(inst_topology.cells):
if inst_cell.pagesize and inst_cell.pagesize > smallest:
for memnode in numatune.memnodes:
if guest_cellid == memnode.cellid:
page = (
vconfig.LibvirtConfigGuestMemoryBackingPage())
page.nodeset = [guest_cellid]
page.size_kb = inst_cell.pagesize
pages.append(page)
break # Quit early...
if pages:
membacking = vconfig.LibvirtConfigGuestMemoryBacking()
membacking.hugepages = pages
# Currently libvirt does not support the smallest
# pagesize set as a backend memory.
# https://bugzilla.redhat.com/show_bug.cgi?id=1173507
avail_pagesize = [page.size_kb
for page in host_topology.cells[0].mempages]
avail_pagesize.sort()
smallest = avail_pagesize[0]
return membacking
pages = []
for guest_cellid, inst_cell in enumerate(inst_topology.cells):
if inst_cell.pagesize and inst_cell.pagesize > smallest:
for memnode in numatune.memnodes:
if guest_cellid == memnode.cellid:
page = (
vconfig.LibvirtConfigGuestMemoryBackingPage())
page.nodeset = [guest_cellid]
page.size_kb = inst_cell.pagesize
pages.append(page)
break # Quit early...
if pages:
membacking = vconfig.LibvirtConfigGuestMemoryBacking()
membacking.hugepages = pages
return membacking
def _get_flavor(self, ctxt, instance, flavor):
if flavor is not None:
@ -4701,8 +4719,26 @@ class LibvirtDriver(driver.ComputeDriver):
return jsonutils.dumps(pci_info)
def _has_numa_support(self):
if not self._host.has_min_version(MIN_LIBVIRT_NUMA_VERSION):
return False
if CONF.libvirt.virt_type not in ['qemu', 'kvm']:
return False
return True
def _has_hugepage_support(self):
if not self._host.has_min_version(MIN_LIBVIRT_HUGEPAGE_VERSION):
return False
if CONF.libvirt.virt_type not in ['qemu', 'kvm']:
return False
return True
def _get_host_numa_topology(self):
if not self._host.has_min_version(MIN_LIBVIRT_NUMA_TOPOLOGY_VERSION):
if not self._has_numa_support():
return
caps = self._host.get_capabilities()
@ -4731,17 +4767,21 @@ class LibvirtDriver(driver.ComputeDriver):
# Filter out singles and empty sibling sets that may be left
siblings = [sib for sib in siblings if len(sib) > 1]
mempages = []
if self._has_hugepage_support():
mempages = [
objects.NUMAPagesTopology(
size_kb=pages.size,
total=pages.total,
used=0)
for pages in cell.mempages]
cell = objects.NUMACell(id=cell.id, cpuset=cpuset,
memory=cell.memory / units.Ki,
cpu_usage=0, memory_usage=0,
siblings=siblings,
pinned_cpus=set([]),
mempages=[
objects.NUMAPagesTopology(
size_kb=pages.size,
total=pages.total,
used=0)
for pages in cell.mempages])
mempages=mempages)
cells.append(cell)
return objects.NUMATopology(cells=cells)