libvirt: remove default cputune shares value

Previously, the libvirt driver defaulted to 1024 * (# of CPUs) for the
value of domain/cputune/shares in the libvirt XML. This value is then
passed directly by libvirt to the cgroups API. Cgroups v2 imposes a
maximum value of 10000 that can be passed in. This makes Nova
unable to launch instances with more than 9 CPUs on hosts that run
cgroups v2, like Ubuntu Jammy or RHEL 9.

Fix this by just removing the default entirely. Because there is no
longer a guarantee that domain/cputune will contain at least a shares
element, we can stop always generating the former, and only generate
it if it will actually contain something.

We can also make operators's lives easier by leveraging the fact that
we update the XML during live migration, so this patch also adds a
method to remove the shares value from the live migration XML if one
was not set as the quota:cpu_shares flavor extra spec.

For operators that *have* set this extra spec to something greater
than 10000, their flavors will have to get updates, and their
instances resized.

Partial-bug: 1978489
Change-Id: I49d757f5f261b3562ada27e6cf57284f615ca395
This commit is contained in:
Artom Lifshitz 2022-01-10 13:36:36 -05:00
parent d869163608
commit f77a9fee5b
6 changed files with 80 additions and 39 deletions

View File

@ -38,7 +38,8 @@ CPU limits
Libvirt enforces CPU limits in terms of *shares* and *quotas*, configured
via :nova:extra-spec:`quota:cpu_shares` and :nova:extra-spec:`quota:cpu_period`
/ :nova:extra-spec:`quota:cpu_quota`, respectively. Both are implemented using
the `cgroups v1 cpu controller`__.
the `cgroups cpu controller`__. Note that allowed values for *shares* are
platform dependant.
CPU shares are a proportional weighted share of total CPU resources relative to
other instances. It does not limit CPU usage if CPUs are not busy. There is no

View File

@ -2991,7 +2991,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
cfg = drvr._get_guest_config(instance_ref, [],
image_meta, disk_info)
self.assertIsNone(cfg.cpuset)
self.assertEqual(0, len(cfg.cputune.vcpupin))
self.assertIsNone(cfg.cputune)
self.assertIsNone(cfg.cpu.numa)
@mock.patch('nova.privsep.utils.supports_direct_io',
@ -3028,7 +3028,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
image_meta, disk_info)
self.assertFalse(choice_mock.called)
self.assertIsNone(cfg.cpuset)
self.assertEqual(0, len(cfg.cputune.vcpupin))
self.assertIsNone(cfg.cputune)
self.assertIsNone(cfg.cpu.numa)
def _test_get_guest_memory_backing_config(
@ -3436,7 +3436,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
cfg = conn._get_guest_config(instance_ref, [],
image_meta, disk_info)
self.assertEqual(set([3]), cfg.cpuset)
self.assertEqual(0, len(cfg.cputune.vcpupin))
self.assertIsNone(cfg.cputune)
self.assertIsNone(cfg.cpu.numa)
@mock.patch('nova.privsep.utils.supports_direct_io',
@ -3490,7 +3490,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
image_meta, disk_info)
self.assertFalse(choice_mock.called)
self.assertEqual(set([3]), cfg.cpuset)
self.assertEqual(0, len(cfg.cputune.vcpupin))
self.assertIsNone(cfg.cputune)
self.assertIsNone(cfg.cpu.numa)
@mock.patch.object(fakelibvirt.Connection, 'getType')
@ -3589,7 +3589,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
# NOTE(ndipanov): we make sure that pin_set was taken into account
# when choosing viable cells
self.assertEqual(set([2, 3]), cfg.cpuset)
self.assertEqual(0, len(cfg.cputune.vcpupin))
self.assertIsNone(cfg.cputune)
self.assertIsNone(cfg.cpu.numa)
@mock.patch.object(
@ -3631,7 +3631,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
cfg = drvr._get_guest_config(instance_ref, [],
image_meta, disk_info)
self.assertIsNone(cfg.cpuset)
self.assertEqual(0, len(cfg.cputune.vcpupin))
self.assertIsNone(cfg.cputune)
self.assertIsNone(cfg.numatune)
self.assertIsNotNone(cfg.cpu.numa)
for instance_cell, numa_cfg_cell in zip(
@ -7038,26 +7038,6 @@ class LibvirtConnTestCase(test.NoDBTestCase,
[],
image_meta, disk_info)
@mock.patch.object(
host.Host, "is_cpu_control_policy_capable", return_value=True)
def test_guest_cpu_shares_with_multi_vcpu(self, is_able):
self.flags(virt_type='kvm', group='libvirt')
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
instance_ref = objects.Instance(**self.test_instance)
instance_ref.flavor.vcpus = 4
image_meta = objects.ImageMeta.from_dict(self.test_image_meta)
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance_ref,
image_meta)
cfg = drvr._get_guest_config(instance_ref, [],
image_meta, disk_info)
self.assertEqual(4096, cfg.cputune.shares)
@mock.patch.object(
host.Host, "is_cpu_control_policy_capable", return_value=True)
def test_get_guest_config_with_cpu_quota(self, is_able):
@ -11689,7 +11669,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_migrateToURI3,
mock_min_version):
self.compute = manager.ComputeManager()
instance_ref = self.test_instance
instance_ref = objects.Instance(**self.test_instance)
target_connection = '127.0.0.2'
xml_tmpl = ("<domain type='kvm'>"
@ -12369,7 +12349,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_get,
mock_min_version):
self.compute = manager.ComputeManager()
instance_ref = self.test_instance
instance_ref = objects.Instance(**self.test_instance)
target_connection = '127.0.0.2'
xml_tmpl = ("<domain type='kvm'>"
@ -12659,7 +12639,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_min_version):
# Prepare data
self.compute = manager.ComputeManager()
instance_ref = self.test_instance
instance_ref = objects.Instance(**self.test_instance)
target_connection = '127.0.0.2'
disk_paths = ['vda', 'vdb']

View File

@ -28,6 +28,7 @@ from nova import objects
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.fixtures import libvirt as fakelibvirt
from nova.tests.unit.virt.libvirt import test_driver
from nova.virt.libvirt import config as vconfig
from nova.virt.libvirt import guest as libvirt_guest
from nova.virt.libvirt import host
@ -80,16 +81,51 @@ class UtilityMigrationTestCase(test.NoDBTestCase):
get_volume_config = mock.MagicMock()
mock_guest.get_xml_desc.return_value = '<domain></domain>'
migration.get_updated_guest_xml(
mock.sentinel.instance, mock_guest, data, get_volume_config)
instance = objects.Instance(**test_driver._create_test_instance())
migration.get_updated_guest_xml(instance, mock_guest, data,
get_volume_config)
mock_graphics.assert_called_once_with(mock.ANY, data)
mock_serial.assert_called_once_with(mock.ANY, data)
mock_volume.assert_called_once_with(
mock.ANY, data, mock.sentinel.instance, get_volume_config)
mock.ANY, data, instance, get_volume_config)
mock_perf_events_xml.assert_called_once_with(mock.ANY, data)
mock_memory_backing.assert_called_once_with(mock.ANY, data)
self.assertEqual(1, mock_tostring.called)
def test_update_quota_xml(self):
old_xml = """<domain>
<name>fake-instance</name>
<cputune>
<shares>42</shares>
<period>1337</period>
</cputune>
</domain>"""
instance = objects.Instance(**test_driver._create_test_instance())
new_xml = migration._update_quota_xml(instance,
etree.fromstring(old_xml))
new_xml = etree.tostring(new_xml, encoding='unicode')
self.assertXmlEqual(
"""<domain>
<name>fake-instance</name>
<cputune>
<period>1337</period>
</cputune>
</domain>""", new_xml)
def test_update_quota_xml_empty_cputune(self):
old_xml = """<domain>
<name>fake-instance</name>
<cputune>
<shares>42</shares>
</cputune>
</domain>"""
instance = objects.Instance(**test_driver._create_test_instance())
new_xml = migration._update_quota_xml(instance,
etree.fromstring(old_xml))
new_xml = etree.tostring(new_xml, encoding='unicode')
self.assertXmlEqual('<domain><name>fake-instance</name></domain>',
new_xml)
def test_update_device_resources_xml_vpmem(self):
# original xml for vpmems, /dev/dax0.1 and /dev/dax0.2 here
# are vpmem device path on source host

View File

@ -5692,15 +5692,11 @@ class LibvirtDriver(driver.ComputeDriver):
if not is_able or CONF.libvirt.virt_type not in ('lxc', 'kvm', 'qemu'):
return
if guest.cputune is None:
guest.cputune = vconfig.LibvirtConfigGuestCPUTune()
# Setting the default cpu.shares value to be a value
# dependent on the number of vcpus
guest.cputune.shares = 1024 * guest.vcpus
for name in cputuning:
key = "quota:cpu_" + name
if key in flavor.extra_specs:
if guest.cputune is None:
guest.cputune = vconfig.LibvirtConfigGuestCPUTune()
setattr(guest.cputune, name,
int(flavor.extra_specs[key]))

View File

@ -62,6 +62,7 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config,
xml_doc, migrate_data, instance, get_volume_config)
xml_doc = _update_perf_events_xml(xml_doc, migrate_data)
xml_doc = _update_memory_backing_xml(xml_doc, migrate_data)
xml_doc = _update_quota_xml(instance, xml_doc)
if get_vif_config is not None:
xml_doc = _update_vif_xml(xml_doc, migrate_data, get_vif_config)
if 'dst_numa_info' in migrate_data:
@ -71,6 +72,18 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config,
return etree.tostring(xml_doc, encoding='unicode')
def _update_quota_xml(instance, xml_doc):
flavor_shares = instance.flavor.extra_specs.get('quota:cpu_shares')
cputune = xml_doc.find('./cputune')
shares = xml_doc.find('./cputune/shares')
if shares is not None and not flavor_shares:
cputune.remove(shares)
# Remove the cputune element entirely if it has no children left.
if cputune is not None and not list(cputune):
xml_doc.remove(cputune)
return xml_doc
def _update_device_resources_xml(xml_doc, new_resources):
vpmems = []
for resource in new_resources:

View File

@ -0,0 +1,15 @@
upgrade:
- |
In the libvirt driver, the default value of the ``<cputune><shares>``
element has been removed, and is now left to libvirt to decide. This is
because allowed values are platform dependant, and the previous code was
not guaranteed to be supported on all platforms. If any of your flavors are
using the quota:cpu_shares extra spec, you may need to resize to a
supported value before upgrading.
To facilitate the transition to no Nova default for ``<cputune><shares>``,
its value will be removed during live migration unless a value is set in
the ``quota:cpu_shares`` extra spec. This can cause temporary CPU
starvation for the live migrated instance if other instances on the
destination host still have the old default ``<cputune><shares>`` value. To
fix this, hard reboot, cold migrate, or live migrate the other instances.