libvirt: remove default cputune shares value
Previously, the libvirt driver defaulted to 1024 * (# of CPUs) for the value of domain/cputune/shares in the libvirt XML. This value is then passed directly by libvirt to the cgroups API. Cgroups v2 imposes a maximum value of 10000 that can be passed in. This makes Nova unable to launch instances with more than 9 CPUs on hosts that run cgroups v2, like Ubuntu Jammy or RHEL 9. Fix this by just removing the default entirely. Because there is no longer a guarantee that domain/cputune will contain at least a shares element, we can stop always generating the former, and only generate it if it will actually contain something. We can also make operators's lives easier by leveraging the fact that we update the XML during live migration, so this patch also adds a method to remove the shares value from the live migration XML if one was not set as the quota:cpu_shares flavor extra spec. For operators that *have* set this extra spec to something greater than 10000, their flavors will have to get updates, and their instances resized. Partial-bug: 1978489 Change-Id: I49d757f5f261b3562ada27e6cf57284f615ca395
This commit is contained in:
parent
d869163608
commit
f77a9fee5b
@ -38,7 +38,8 @@ CPU limits
|
||||
Libvirt enforces CPU limits in terms of *shares* and *quotas*, configured
|
||||
via :nova:extra-spec:`quota:cpu_shares` and :nova:extra-spec:`quota:cpu_period`
|
||||
/ :nova:extra-spec:`quota:cpu_quota`, respectively. Both are implemented using
|
||||
the `cgroups v1 cpu controller`__.
|
||||
the `cgroups cpu controller`__. Note that allowed values for *shares* are
|
||||
platform dependant.
|
||||
|
||||
CPU shares are a proportional weighted share of total CPU resources relative to
|
||||
other instances. It does not limit CPU usage if CPUs are not busy. There is no
|
||||
|
@ -2991,7 +2991,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
cfg = drvr._get_guest_config(instance_ref, [],
|
||||
image_meta, disk_info)
|
||||
self.assertIsNone(cfg.cpuset)
|
||||
self.assertEqual(0, len(cfg.cputune.vcpupin))
|
||||
self.assertIsNone(cfg.cputune)
|
||||
self.assertIsNone(cfg.cpu.numa)
|
||||
|
||||
@mock.patch('nova.privsep.utils.supports_direct_io',
|
||||
@ -3028,7 +3028,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
image_meta, disk_info)
|
||||
self.assertFalse(choice_mock.called)
|
||||
self.assertIsNone(cfg.cpuset)
|
||||
self.assertEqual(0, len(cfg.cputune.vcpupin))
|
||||
self.assertIsNone(cfg.cputune)
|
||||
self.assertIsNone(cfg.cpu.numa)
|
||||
|
||||
def _test_get_guest_memory_backing_config(
|
||||
@ -3436,7 +3436,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
cfg = conn._get_guest_config(instance_ref, [],
|
||||
image_meta, disk_info)
|
||||
self.assertEqual(set([3]), cfg.cpuset)
|
||||
self.assertEqual(0, len(cfg.cputune.vcpupin))
|
||||
self.assertIsNone(cfg.cputune)
|
||||
self.assertIsNone(cfg.cpu.numa)
|
||||
|
||||
@mock.patch('nova.privsep.utils.supports_direct_io',
|
||||
@ -3490,7 +3490,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
image_meta, disk_info)
|
||||
self.assertFalse(choice_mock.called)
|
||||
self.assertEqual(set([3]), cfg.cpuset)
|
||||
self.assertEqual(0, len(cfg.cputune.vcpupin))
|
||||
self.assertIsNone(cfg.cputune)
|
||||
self.assertIsNone(cfg.cpu.numa)
|
||||
|
||||
@mock.patch.object(fakelibvirt.Connection, 'getType')
|
||||
@ -3589,7 +3589,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
# NOTE(ndipanov): we make sure that pin_set was taken into account
|
||||
# when choosing viable cells
|
||||
self.assertEqual(set([2, 3]), cfg.cpuset)
|
||||
self.assertEqual(0, len(cfg.cputune.vcpupin))
|
||||
self.assertIsNone(cfg.cputune)
|
||||
self.assertIsNone(cfg.cpu.numa)
|
||||
|
||||
@mock.patch.object(
|
||||
@ -3631,7 +3631,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
cfg = drvr._get_guest_config(instance_ref, [],
|
||||
image_meta, disk_info)
|
||||
self.assertIsNone(cfg.cpuset)
|
||||
self.assertEqual(0, len(cfg.cputune.vcpupin))
|
||||
self.assertIsNone(cfg.cputune)
|
||||
self.assertIsNone(cfg.numatune)
|
||||
self.assertIsNotNone(cfg.cpu.numa)
|
||||
for instance_cell, numa_cfg_cell in zip(
|
||||
@ -7038,26 +7038,6 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
[],
|
||||
image_meta, disk_info)
|
||||
|
||||
@mock.patch.object(
|
||||
host.Host, "is_cpu_control_policy_capable", return_value=True)
|
||||
def test_guest_cpu_shares_with_multi_vcpu(self, is_able):
|
||||
self.flags(virt_type='kvm', group='libvirt')
|
||||
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
|
||||
|
||||
instance_ref = objects.Instance(**self.test_instance)
|
||||
instance_ref.flavor.vcpus = 4
|
||||
image_meta = objects.ImageMeta.from_dict(self.test_image_meta)
|
||||
|
||||
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
|
||||
instance_ref,
|
||||
image_meta)
|
||||
|
||||
cfg = drvr._get_guest_config(instance_ref, [],
|
||||
image_meta, disk_info)
|
||||
|
||||
self.assertEqual(4096, cfg.cputune.shares)
|
||||
|
||||
@mock.patch.object(
|
||||
host.Host, "is_cpu_control_policy_capable", return_value=True)
|
||||
def test_get_guest_config_with_cpu_quota(self, is_able):
|
||||
@ -11689,7 +11669,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
mock_migrateToURI3,
|
||||
mock_min_version):
|
||||
self.compute = manager.ComputeManager()
|
||||
instance_ref = self.test_instance
|
||||
instance_ref = objects.Instance(**self.test_instance)
|
||||
target_connection = '127.0.0.2'
|
||||
|
||||
xml_tmpl = ("<domain type='kvm'>"
|
||||
@ -12369,7 +12349,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
mock_get,
|
||||
mock_min_version):
|
||||
self.compute = manager.ComputeManager()
|
||||
instance_ref = self.test_instance
|
||||
instance_ref = objects.Instance(**self.test_instance)
|
||||
target_connection = '127.0.0.2'
|
||||
|
||||
xml_tmpl = ("<domain type='kvm'>"
|
||||
@ -12659,7 +12639,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
mock_min_version):
|
||||
# Prepare data
|
||||
self.compute = manager.ComputeManager()
|
||||
instance_ref = self.test_instance
|
||||
instance_ref = objects.Instance(**self.test_instance)
|
||||
target_connection = '127.0.0.2'
|
||||
|
||||
disk_paths = ['vda', 'vdb']
|
||||
|
@ -28,6 +28,7 @@ from nova import objects
|
||||
from nova import test
|
||||
from nova.tests import fixtures as nova_fixtures
|
||||
from nova.tests.fixtures import libvirt as fakelibvirt
|
||||
from nova.tests.unit.virt.libvirt import test_driver
|
||||
from nova.virt.libvirt import config as vconfig
|
||||
from nova.virt.libvirt import guest as libvirt_guest
|
||||
from nova.virt.libvirt import host
|
||||
@ -80,16 +81,51 @@ class UtilityMigrationTestCase(test.NoDBTestCase):
|
||||
get_volume_config = mock.MagicMock()
|
||||
mock_guest.get_xml_desc.return_value = '<domain></domain>'
|
||||
|
||||
migration.get_updated_guest_xml(
|
||||
mock.sentinel.instance, mock_guest, data, get_volume_config)
|
||||
instance = objects.Instance(**test_driver._create_test_instance())
|
||||
migration.get_updated_guest_xml(instance, mock_guest, data,
|
||||
get_volume_config)
|
||||
mock_graphics.assert_called_once_with(mock.ANY, data)
|
||||
mock_serial.assert_called_once_with(mock.ANY, data)
|
||||
mock_volume.assert_called_once_with(
|
||||
mock.ANY, data, mock.sentinel.instance, get_volume_config)
|
||||
mock.ANY, data, instance, get_volume_config)
|
||||
mock_perf_events_xml.assert_called_once_with(mock.ANY, data)
|
||||
mock_memory_backing.assert_called_once_with(mock.ANY, data)
|
||||
self.assertEqual(1, mock_tostring.called)
|
||||
|
||||
def test_update_quota_xml(self):
|
||||
old_xml = """<domain>
|
||||
<name>fake-instance</name>
|
||||
<cputune>
|
||||
<shares>42</shares>
|
||||
<period>1337</period>
|
||||
</cputune>
|
||||
</domain>"""
|
||||
instance = objects.Instance(**test_driver._create_test_instance())
|
||||
new_xml = migration._update_quota_xml(instance,
|
||||
etree.fromstring(old_xml))
|
||||
new_xml = etree.tostring(new_xml, encoding='unicode')
|
||||
self.assertXmlEqual(
|
||||
"""<domain>
|
||||
<name>fake-instance</name>
|
||||
<cputune>
|
||||
<period>1337</period>
|
||||
</cputune>
|
||||
</domain>""", new_xml)
|
||||
|
||||
def test_update_quota_xml_empty_cputune(self):
|
||||
old_xml = """<domain>
|
||||
<name>fake-instance</name>
|
||||
<cputune>
|
||||
<shares>42</shares>
|
||||
</cputune>
|
||||
</domain>"""
|
||||
instance = objects.Instance(**test_driver._create_test_instance())
|
||||
new_xml = migration._update_quota_xml(instance,
|
||||
etree.fromstring(old_xml))
|
||||
new_xml = etree.tostring(new_xml, encoding='unicode')
|
||||
self.assertXmlEqual('<domain><name>fake-instance</name></domain>',
|
||||
new_xml)
|
||||
|
||||
def test_update_device_resources_xml_vpmem(self):
|
||||
# original xml for vpmems, /dev/dax0.1 and /dev/dax0.2 here
|
||||
# are vpmem device path on source host
|
||||
|
@ -5692,15 +5692,11 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
if not is_able or CONF.libvirt.virt_type not in ('lxc', 'kvm', 'qemu'):
|
||||
return
|
||||
|
||||
if guest.cputune is None:
|
||||
guest.cputune = vconfig.LibvirtConfigGuestCPUTune()
|
||||
# Setting the default cpu.shares value to be a value
|
||||
# dependent on the number of vcpus
|
||||
guest.cputune.shares = 1024 * guest.vcpus
|
||||
|
||||
for name in cputuning:
|
||||
key = "quota:cpu_" + name
|
||||
if key in flavor.extra_specs:
|
||||
if guest.cputune is None:
|
||||
guest.cputune = vconfig.LibvirtConfigGuestCPUTune()
|
||||
setattr(guest.cputune, name,
|
||||
int(flavor.extra_specs[key]))
|
||||
|
||||
|
@ -62,6 +62,7 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config,
|
||||
xml_doc, migrate_data, instance, get_volume_config)
|
||||
xml_doc = _update_perf_events_xml(xml_doc, migrate_data)
|
||||
xml_doc = _update_memory_backing_xml(xml_doc, migrate_data)
|
||||
xml_doc = _update_quota_xml(instance, xml_doc)
|
||||
if get_vif_config is not None:
|
||||
xml_doc = _update_vif_xml(xml_doc, migrate_data, get_vif_config)
|
||||
if 'dst_numa_info' in migrate_data:
|
||||
@ -71,6 +72,18 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config,
|
||||
return etree.tostring(xml_doc, encoding='unicode')
|
||||
|
||||
|
||||
def _update_quota_xml(instance, xml_doc):
|
||||
flavor_shares = instance.flavor.extra_specs.get('quota:cpu_shares')
|
||||
cputune = xml_doc.find('./cputune')
|
||||
shares = xml_doc.find('./cputune/shares')
|
||||
if shares is not None and not flavor_shares:
|
||||
cputune.remove(shares)
|
||||
# Remove the cputune element entirely if it has no children left.
|
||||
if cputune is not None and not list(cputune):
|
||||
xml_doc.remove(cputune)
|
||||
return xml_doc
|
||||
|
||||
|
||||
def _update_device_resources_xml(xml_doc, new_resources):
|
||||
vpmems = []
|
||||
for resource in new_resources:
|
||||
|
@ -0,0 +1,15 @@
|
||||
upgrade:
|
||||
- |
|
||||
In the libvirt driver, the default value of the ``<cputune><shares>``
|
||||
element has been removed, and is now left to libvirt to decide. This is
|
||||
because allowed values are platform dependant, and the previous code was
|
||||
not guaranteed to be supported on all platforms. If any of your flavors are
|
||||
using the quota:cpu_shares extra spec, you may need to resize to a
|
||||
supported value before upgrading.
|
||||
|
||||
To facilitate the transition to no Nova default for ``<cputune><shares>``,
|
||||
its value will be removed during live migration unless a value is set in
|
||||
the ``quota:cpu_shares`` extra spec. This can cause temporary CPU
|
||||
starvation for the live migrated instance if other instances on the
|
||||
destination host still have the old default ``<cputune><shares>`` value. To
|
||||
fix this, hard reboot, cold migrate, or live migrate the other instances.
|
Loading…
Reference in New Issue
Block a user