From f77a9fee5b736899ecc39d33e4f4e4012cee751c Mon Sep 17 00:00:00 2001 From: Artom Lifshitz Date: Mon, 10 Jan 2022 13:36:36 -0500 Subject: [PATCH] libvirt: remove default cputune shares value Previously, the libvirt driver defaulted to 1024 * (# of CPUs) for the value of domain/cputune/shares in the libvirt XML. This value is then passed directly by libvirt to the cgroups API. Cgroups v2 imposes a maximum value of 10000 that can be passed in. This makes Nova unable to launch instances with more than 9 CPUs on hosts that run cgroups v2, like Ubuntu Jammy or RHEL 9. Fix this by just removing the default entirely. Because there is no longer a guarantee that domain/cputune will contain at least a shares element, we can stop always generating the former, and only generate it if it will actually contain something. We can also make operators's lives easier by leveraging the fact that we update the XML during live migration, so this patch also adds a method to remove the shares value from the live migration XML if one was not set as the quota:cpu_shares flavor extra spec. For operators that *have* set this extra spec to something greater than 10000, their flavors will have to get updates, and their instances resized. Partial-bug: 1978489 Change-Id: I49d757f5f261b3562ada27e6cf57284f615ca395 --- doc/source/admin/resource-limits.rst | 3 +- nova/tests/unit/virt/libvirt/test_driver.py | 38 ++++------------- .../tests/unit/virt/libvirt/test_migration.py | 42 +++++++++++++++++-- nova/virt/libvirt/driver.py | 8 +--- nova/virt/libvirt/migration.py | 13 ++++++ ...putune-shares-values-85d5ddf4b8e24eaa.yaml | 15 +++++++ 6 files changed, 80 insertions(+), 39 deletions(-) create mode 100644 releasenotes/notes/remove-default-cputune-shares-values-85d5ddf4b8e24eaa.yaml diff --git a/doc/source/admin/resource-limits.rst b/doc/source/admin/resource-limits.rst index c74ad31c17bc..8ef248a9a1d2 100644 --- a/doc/source/admin/resource-limits.rst +++ b/doc/source/admin/resource-limits.rst @@ -38,7 +38,8 @@ CPU limits Libvirt enforces CPU limits in terms of *shares* and *quotas*, configured via :nova:extra-spec:`quota:cpu_shares` and :nova:extra-spec:`quota:cpu_period` / :nova:extra-spec:`quota:cpu_quota`, respectively. Both are implemented using -the `cgroups v1 cpu controller`__. +the `cgroups cpu controller`__. Note that allowed values for *shares* are +platform dependant. CPU shares are a proportional weighted share of total CPU resources relative to other instances. It does not limit CPU usage if CPUs are not busy. There is no diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index df9bc71fc40a..6dedf7ef4627 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -2991,7 +2991,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, cfg = drvr._get_guest_config(instance_ref, [], image_meta, disk_info) self.assertIsNone(cfg.cpuset) - self.assertEqual(0, len(cfg.cputune.vcpupin)) + self.assertIsNone(cfg.cputune) self.assertIsNone(cfg.cpu.numa) @mock.patch('nova.privsep.utils.supports_direct_io', @@ -3028,7 +3028,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, image_meta, disk_info) self.assertFalse(choice_mock.called) self.assertIsNone(cfg.cpuset) - self.assertEqual(0, len(cfg.cputune.vcpupin)) + self.assertIsNone(cfg.cputune) self.assertIsNone(cfg.cpu.numa) def _test_get_guest_memory_backing_config( @@ -3436,7 +3436,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, cfg = conn._get_guest_config(instance_ref, [], image_meta, disk_info) self.assertEqual(set([3]), cfg.cpuset) - self.assertEqual(0, len(cfg.cputune.vcpupin)) + self.assertIsNone(cfg.cputune) self.assertIsNone(cfg.cpu.numa) @mock.patch('nova.privsep.utils.supports_direct_io', @@ -3490,7 +3490,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, image_meta, disk_info) self.assertFalse(choice_mock.called) self.assertEqual(set([3]), cfg.cpuset) - self.assertEqual(0, len(cfg.cputune.vcpupin)) + self.assertIsNone(cfg.cputune) self.assertIsNone(cfg.cpu.numa) @mock.patch.object(fakelibvirt.Connection, 'getType') @@ -3589,7 +3589,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, # NOTE(ndipanov): we make sure that pin_set was taken into account # when choosing viable cells self.assertEqual(set([2, 3]), cfg.cpuset) - self.assertEqual(0, len(cfg.cputune.vcpupin)) + self.assertIsNone(cfg.cputune) self.assertIsNone(cfg.cpu.numa) @mock.patch.object( @@ -3631,7 +3631,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, cfg = drvr._get_guest_config(instance_ref, [], image_meta, disk_info) self.assertIsNone(cfg.cpuset) - self.assertEqual(0, len(cfg.cputune.vcpupin)) + self.assertIsNone(cfg.cputune) self.assertIsNone(cfg.numatune) self.assertIsNotNone(cfg.cpu.numa) for instance_cell, numa_cfg_cell in zip( @@ -7038,26 +7038,6 @@ class LibvirtConnTestCase(test.NoDBTestCase, [], image_meta, disk_info) - @mock.patch.object( - host.Host, "is_cpu_control_policy_capable", return_value=True) - def test_guest_cpu_shares_with_multi_vcpu(self, is_able): - self.flags(virt_type='kvm', group='libvirt') - - drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) - - instance_ref = objects.Instance(**self.test_instance) - instance_ref.flavor.vcpus = 4 - image_meta = objects.ImageMeta.from_dict(self.test_image_meta) - - disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type, - instance_ref, - image_meta) - - cfg = drvr._get_guest_config(instance_ref, [], - image_meta, disk_info) - - self.assertEqual(4096, cfg.cputune.shares) - @mock.patch.object( host.Host, "is_cpu_control_policy_capable", return_value=True) def test_get_guest_config_with_cpu_quota(self, is_able): @@ -11689,7 +11669,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_migrateToURI3, mock_min_version): self.compute = manager.ComputeManager() - instance_ref = self.test_instance + instance_ref = objects.Instance(**self.test_instance) target_connection = '127.0.0.2' xml_tmpl = ("" @@ -12369,7 +12349,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_get, mock_min_version): self.compute = manager.ComputeManager() - instance_ref = self.test_instance + instance_ref = objects.Instance(**self.test_instance) target_connection = '127.0.0.2' xml_tmpl = ("" @@ -12659,7 +12639,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_min_version): # Prepare data self.compute = manager.ComputeManager() - instance_ref = self.test_instance + instance_ref = objects.Instance(**self.test_instance) target_connection = '127.0.0.2' disk_paths = ['vda', 'vdb'] diff --git a/nova/tests/unit/virt/libvirt/test_migration.py b/nova/tests/unit/virt/libvirt/test_migration.py index f4e64fbe53ed..70488f88cf43 100644 --- a/nova/tests/unit/virt/libvirt/test_migration.py +++ b/nova/tests/unit/virt/libvirt/test_migration.py @@ -28,6 +28,7 @@ from nova import objects from nova import test from nova.tests import fixtures as nova_fixtures from nova.tests.fixtures import libvirt as fakelibvirt +from nova.tests.unit.virt.libvirt import test_driver from nova.virt.libvirt import config as vconfig from nova.virt.libvirt import guest as libvirt_guest from nova.virt.libvirt import host @@ -80,16 +81,51 @@ class UtilityMigrationTestCase(test.NoDBTestCase): get_volume_config = mock.MagicMock() mock_guest.get_xml_desc.return_value = '' - migration.get_updated_guest_xml( - mock.sentinel.instance, mock_guest, data, get_volume_config) + instance = objects.Instance(**test_driver._create_test_instance()) + migration.get_updated_guest_xml(instance, mock_guest, data, + get_volume_config) mock_graphics.assert_called_once_with(mock.ANY, data) mock_serial.assert_called_once_with(mock.ANY, data) mock_volume.assert_called_once_with( - mock.ANY, data, mock.sentinel.instance, get_volume_config) + mock.ANY, data, instance, get_volume_config) mock_perf_events_xml.assert_called_once_with(mock.ANY, data) mock_memory_backing.assert_called_once_with(mock.ANY, data) self.assertEqual(1, mock_tostring.called) + def test_update_quota_xml(self): + old_xml = """ + fake-instance + + 42 + 1337 + + """ + instance = objects.Instance(**test_driver._create_test_instance()) + new_xml = migration._update_quota_xml(instance, + etree.fromstring(old_xml)) + new_xml = etree.tostring(new_xml, encoding='unicode') + self.assertXmlEqual( + """ + fake-instance + + 1337 + + """, new_xml) + + def test_update_quota_xml_empty_cputune(self): + old_xml = """ + fake-instance + + 42 + + """ + instance = objects.Instance(**test_driver._create_test_instance()) + new_xml = migration._update_quota_xml(instance, + etree.fromstring(old_xml)) + new_xml = etree.tostring(new_xml, encoding='unicode') + self.assertXmlEqual('fake-instance', + new_xml) + def test_update_device_resources_xml_vpmem(self): # original xml for vpmems, /dev/dax0.1 and /dev/dax0.2 here # are vpmem device path on source host diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index b35f17fe3ae4..132a24b7b7f0 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -5692,15 +5692,11 @@ class LibvirtDriver(driver.ComputeDriver): if not is_able or CONF.libvirt.virt_type not in ('lxc', 'kvm', 'qemu'): return - if guest.cputune is None: - guest.cputune = vconfig.LibvirtConfigGuestCPUTune() - # Setting the default cpu.shares value to be a value - # dependent on the number of vcpus - guest.cputune.shares = 1024 * guest.vcpus - for name in cputuning: key = "quota:cpu_" + name if key in flavor.extra_specs: + if guest.cputune is None: + guest.cputune = vconfig.LibvirtConfigGuestCPUTune() setattr(guest.cputune, name, int(flavor.extra_specs[key])) diff --git a/nova/virt/libvirt/migration.py b/nova/virt/libvirt/migration.py index 8cea9f298313..4726111a7658 100644 --- a/nova/virt/libvirt/migration.py +++ b/nova/virt/libvirt/migration.py @@ -62,6 +62,7 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config, xml_doc, migrate_data, instance, get_volume_config) xml_doc = _update_perf_events_xml(xml_doc, migrate_data) xml_doc = _update_memory_backing_xml(xml_doc, migrate_data) + xml_doc = _update_quota_xml(instance, xml_doc) if get_vif_config is not None: xml_doc = _update_vif_xml(xml_doc, migrate_data, get_vif_config) if 'dst_numa_info' in migrate_data: @@ -71,6 +72,18 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config, return etree.tostring(xml_doc, encoding='unicode') +def _update_quota_xml(instance, xml_doc): + flavor_shares = instance.flavor.extra_specs.get('quota:cpu_shares') + cputune = xml_doc.find('./cputune') + shares = xml_doc.find('./cputune/shares') + if shares is not None and not flavor_shares: + cputune.remove(shares) + # Remove the cputune element entirely if it has no children left. + if cputune is not None and not list(cputune): + xml_doc.remove(cputune) + return xml_doc + + def _update_device_resources_xml(xml_doc, new_resources): vpmems = [] for resource in new_resources: diff --git a/releasenotes/notes/remove-default-cputune-shares-values-85d5ddf4b8e24eaa.yaml b/releasenotes/notes/remove-default-cputune-shares-values-85d5ddf4b8e24eaa.yaml new file mode 100644 index 000000000000..9dd0987bb8c1 --- /dev/null +++ b/releasenotes/notes/remove-default-cputune-shares-values-85d5ddf4b8e24eaa.yaml @@ -0,0 +1,15 @@ +upgrade: + - | + In the libvirt driver, the default value of the ```` + element has been removed, and is now left to libvirt to decide. This is + because allowed values are platform dependant, and the previous code was + not guaranteed to be supported on all platforms. If any of your flavors are + using the quota:cpu_shares extra spec, you may need to resize to a + supported value before upgrading. + + To facilitate the transition to no Nova default for ````, + its value will be removed during live migration unless a value is set in + the ``quota:cpu_shares`` extra spec. This can cause temporary CPU + starvation for the live migrated instance if other instances on the + destination host still have the old default ```` value. To + fix this, hard reboot, cold migrate, or live migrate the other instances.