From 7ddab327675d36a4ba59d02d22d042d418236336 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Thu, 30 Jul 2020 17:36:24 +0100 Subject: [PATCH] hardware: Reject requests for no hyperthreads on hosts with HT Attempting to boot an instance with 'hw:cpu_policy=dedicated' will result in a request from nova-scheduler to placement for allocation candidates with $flavor.vcpu 'PCPU' inventory. Similarly, booting an instance with 'hw:cpu_thread_policy=isolate' will result in a request for allocation candidates with 'HW_CPU_HYPERTHREADING=forbidden', i.e. hosts without hyperthreading. This has been the case since the cpu-resources feature was implemented in Train. However, as part of that work and to enable upgrades from hosts that predated Train, we also make a second request for candidates with $flavor.vcpu 'VCPU' inventory. The idea behind this is that old compute nodes would only report 'VCPU' and should be useable, and any new compute nodes that got caught up in this second request could never actually be scheduled to since there wouldn't be enough cores from 'ComputeNode.numa_topology.cells.[*].pcpuset' available to schedule to, resulting in rejection by the 'NUMATopologyFilter'. However, if a host was rejected in the first query because it reported the 'HW_CPU_HYPERTHREADING' trait, it could get picked up by the second query and would happily be scheduled to, resulting in an instance consuming 'VCPU' inventory from a host that properly supported 'PCPU' inventory. The solution is simply, though also a huge hack. If we detect that the host is using new style configuration and should be able to report 'PCPU', check if the instance asked for no hyperthreading and whether the host has it. If all are True, reject the request. Change-Id: Id39aaaac09585ca1a754b669351c86e234b89dd9 Signed-off-by: Stephen Finucane Closes-Bug: #1889633 (cherry picked from commit 9c270332041d6b98951c0b57d7b344fd551a413c) --- .../functional/libvirt/test_numa_servers.py | 7 +- nova/tests/unit/virt/test_hardware.py | 75 ++++++++++++++++--- nova/virt/hardware.py | 24 ++++++ .../notes/bug-1889633-37e524fb6c20fbdf.yaml | 9 +++ 4 files changed, 99 insertions(+), 16 deletions(-) create mode 100644 releasenotes/notes/bug-1889633-37e524fb6c20fbdf.yaml diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py index a5cee9946d54..184fbb822b28 100644 --- a/nova/tests/functional/libvirt/test_numa_servers.py +++ b/nova/tests/functional/libvirt/test_numa_servers.py @@ -383,12 +383,7 @@ class NUMAServersTest(NUMAServersTestBase): } flavor_id = self._create_flavor(vcpu=2, extra_spec=extra_spec) - # FIXME(stephenfin): This should go to error status since there should - # not be a host available - expected_usage = { - 'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 0, 'VCPU': 2, - } - self._run_build_test(flavor_id, expected_usage=expected_usage) + self._run_build_test(flavor_id, end_status='ERROR') def test_create_server_with_pcpu(self): """Create a server using an explicit 'resources:PCPU' request. diff --git a/nova/tests/unit/virt/test_hardware.py b/nova/tests/unit/virt/test_hardware.py index 7bbaa0fee29b..4349f47c1284 100644 --- a/nova/tests/unit/virt/test_hardware.py +++ b/nova/tests/unit/virt/test_hardware.py @@ -3022,10 +3022,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1) self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_get_pinning_isolate_policy_too_few_fully_free_cores(self): host_pin = objects.NUMACell( id=0, - cpuset=set(), + # Simulate a legacy host with vcpu_pin_set configuration, + # meaning cpuset == pcpuset + cpuset=set([0, 1, 2, 3]), pcpuset=set([0, 1, 2, 3]), memory=4096, memory_usage=0, @@ -3040,10 +3043,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin) self.assertIsNone(inst_pin) + # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_get_pinning_isolate_policy_no_fully_free_cores(self): host_pin = objects.NUMACell( id=0, - cpuset=set(), + # Simulate a legacy host with vcpu_pin_set configuration, + # meaning cpuset == pcpuset + cpuset=set([0, 1, 2, 3]), pcpuset=set([0, 1, 2, 3]), memory=4096, memory_usage=0, @@ -3058,10 +3064,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin) self.assertIsNone(inst_pin) + # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_get_pinning_isolate_policy_fits(self): host_pin = objects.NUMACell( id=0, - cpuset=set(), + # Simulate a legacy host with vcpu_pin_set configuration, + # meaning cpuset == pcpuset + cpuset=set([0, 1, 2, 3]), pcpuset=set([0, 1, 2, 3]), memory=4096, memory_usage=0, @@ -3078,10 +3087,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1) self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_get_pinning_isolate_policy_fits_ht_host(self): host_pin = objects.NUMACell( id=0, - cpuset=set(), + # Simulate a legacy host with vcpu_pin_set configuration, + # meaning cpuset == pcpuset + cpuset=set([0, 1, 2, 3]), pcpuset=set([0, 1, 2, 3]), memory=4096, memory_usage=0, @@ -3098,10 +3110,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1) self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_get_pinning_isolate_policy_fits_w_usage(self): host_pin = objects.NUMACell( id=0, - cpuset=set(), + # Simulate a legacy host with vcpu_pin_set configuration, + # meaning cpuset == pcpuset + cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]), pcpuset=set([0, 1, 2, 3, 4, 5, 6, 7]), memory=4096, memory_usage=0, @@ -3118,6 +3133,38 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1) self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + # TODO(stephenfin): Remove when we drop support for vcpu_pin_set + @mock.patch.object(hw, 'LOG') + def test_get_pinning_isolate_policy_bug_1889633(self, mock_log): + host_pin = objects.NUMACell( + id=0, + cpuset={0, 1, 4, 5}, + pcpuset={2, 3, 6, 7}, + memory=4096, + memory_usage=0, + pinned_cpus=set(), + siblings=[{0, 4}, {1, 5}, {2, 6}, {3, 7}], + mempages=[], + ) + inst_pin = objects.InstanceNUMACell( + cpuset=set(), + pcpuset={0, 1}, + memory=2048, + cpu_policy=fields.CPUAllocationPolicy.DEDICATED, + cpu_thread_policy=fields.CPUThreadAllocationPolicy.ISOLATE, + ) + limits = objects.NUMATopologyLimits( + cpu_allocation_ratio=2, ram_allocation_ratio=2, + ) + + # host has hyperthreads, which means no NUMA topology should be built + inst_topo = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) + self.assertIsNone(inst_topo) + self.assertIn( + 'Host supports hyperthreads, but instance requested no', + mock_log.warning.call_args[0][0], + ) + class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): def test_host_numa_fit_instance_to_host_single_cell(self): @@ -3825,11 +3872,14 @@ class EmulatorThreadsTestCase(test.NoDBTestCase): self.assertEqual(set([0, 1]), host_topo.cells[0].pinned_cpus) + # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_isolate_w_isolate_thread_alloc(self): host_topo = objects.NUMATopology(cells=[ objects.NUMACell( id=0, - cpuset=set(), + # Simulate a legacy host with vcpu_pin_set configuration, + # meaning cpuset == pcpuset + cpuset=set([0, 1, 2, 3, 4, 5]), pcpuset=set([0, 1, 2, 3, 4, 5]), memory=2048, cpu_usage=0, @@ -3852,11 +3902,14 @@ class EmulatorThreadsTestCase(test.NoDBTestCase): self.assertEqual({0: 0, 1: 2}, inst_topo.cells[0].cpu_pinning) self.assertEqual(set([4]), inst_topo.cells[0].cpuset_reserved) + # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_isolate_w_isolate_thread_alloc_usage(self): host_topo = objects.NUMATopology(cells=[ objects.NUMACell( id=0, - cpuset=set(), + # Simulate a legacy host with vcpu_pin_set configuration, + # meaning cpuset == pcpuset + cpuset=set([0, 1, 2, 3, 4, 5]), pcpuset=set([0, 1, 2, 3, 4, 5]), memory=2048, cpu_usage=0, @@ -3912,11 +3965,14 @@ class EmulatorThreadsTestCase(test.NoDBTestCase): self.assertEqual({0: 2, 1: 3}, inst_topo.cells[0].cpu_pinning) self.assertEqual(set([1]), inst_topo.cells[0].cpuset_reserved) + # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_asymmetric_host_w_isolate_thread_alloc(self): host_topo = objects.NUMATopology(cells=[ objects.NUMACell( id=0, - cpuset=set(), + # Simulate a legacy host with vcpu_pin_set configuration, + # meaning cpuset == pcpuset + cpuset=set([1, 2, 3, 4, 5]), pcpuset=set([1, 2, 3, 4, 5]), memory=2048, cpu_usage=0, @@ -3926,8 +3982,7 @@ class EmulatorThreadsTestCase(test.NoDBTestCase): mempages=[objects.NUMAPagesTopology( size_kb=4, total=524288, used=0)])]) inst_topo = objects.InstanceNUMATopology( - emulator_threads_policy=( - fields.CPUEmulatorThreadsPolicy.ISOLATE), + emulator_threads_policy=fields.CPUEmulatorThreadsPolicy.ISOLATE, cells=[objects.InstanceNUMACell( id=0, cpuset=set([0, 1]), memory=2048, diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py index 0253dd992786..ccd5eb958bda 100644 --- a/nova/virt/hardware.py +++ b/nova/virt/hardware.py @@ -903,6 +903,30 @@ def _pack_instance_onto_cores(host_cell, instance_cell, 'for the isolate policy without this.') return + # TODO(stephenfin): Drop this when we drop support for 'vcpu_pin_set' + # NOTE(stephenfin): This is total hack. We're relying on the fact that + # the libvirt driver, which is the only one that currently supports + # pinned CPUs, will set cpuset and pcpuset to the same value if using + # legacy configuration, i.e. 'vcpu_pin_set', as part of + # '_get_host_numa_topology'. They can't be equal otherwise since + # 'cpu_dedicated_set' and 'cpu_shared_set' must be disjoint. Therefore, + # if these are equal, the host that this NUMA cell corresponds to is + # using legacy configuration and it's okay to use the old, "pin a core + # and reserve its siblings" implementation of the 'isolate' policy. If + # they're not, the host is using new-style configuration and we've just + # hit bug #1889633 + if threads_per_core != 1 and host_cell.pcpuset != host_cell.cpuset: + LOG.warning( + "Host supports hyperthreads, but instance requested no " + "hyperthreads. This should have been rejected by the " + "scheduler but we likely got here due to the fallback VCPU " + "query. Consider setting '[workarounds] " + "disable_fallback_pcpu_query' to 'True' once hosts are no " + "longer using 'vcpu_pin_set'. Refer to bug #1889633 for more " + "information." + ) + return + pinning = _get_pinning( 1, # we only want to "use" one thread per core sibling_sets[threads_per_core], diff --git a/releasenotes/notes/bug-1889633-37e524fb6c20fbdf.yaml b/releasenotes/notes/bug-1889633-37e524fb6c20fbdf.yaml new file mode 100644 index 000000000000..77c86013e072 --- /dev/null +++ b/releasenotes/notes/bug-1889633-37e524fb6c20fbdf.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + An issue that could result in instances with the ``isolate`` thread policy + (``hw:cpu_thread_policy=isolate``) being scheduled to hosts with SMT + (HyperThreading) and consuming ``VCPU`` instead of ``PCPU`` has been + resolved. See `bug #1889633`__ for more information. + + .. __: https://bugs.launchpad.net/nova/+bug/1889633