hardware: Reject requests for no hyperthreads on hosts with HT

Attempting to boot an instance with 'hw:cpu_policy=dedicated' will result in a request from nova-scheduler to placement for allocation candidates with $flavor.vcpu 'PCPU' inventory. Similarly, booting an instance with 'hw:cpu_thread_policy=isolate' will result in a request for allocation candidates with 'HW_CPU_HYPERTHREADING=forbidden', i.e. hosts without hyperthreading. This has been the case since the cpu-resources feature was implemented in Train. However, as part of that work and to enable upgrades from hosts that predated Train, we also make a second request for candidates with $flavor.vcpu 'VCPU' inventory. The idea behind this is that old compute nodes would only report 'VCPU' and should be useable, and any new compute nodes that got caught up in this second request could never actually be scheduled to since there wouldn't be enough cores from 'ComputeNode.numa_topology.cells.[*].pcpuset' available to schedule to, resulting in rejection by the 'NUMATopologyFilter'. However, if a host was rejected in the first query because it reported the 'HW_CPU_HYPERTHREADING' trait, it could get picked up by the second query and would happily be scheduled to, resulting in an instance consuming 'VCPU' inventory from a host that properly supported 'PCPU' inventory. The solution is simply, though also a huge hack. If we detect that the host is using new style configuration and should be able to report 'PCPU', check if the instance asked for no hyperthreading and whether the host has it. If all are True, reject the request. Change-Id: Id39aaaac09585ca1a754b669351c86e234b89dd9 Signed-off-by: Stephen Finucane <stephenfin@redhat.com> Closes-Bug: #1889633
2020-07-30 17:36:24 +01:00 · 2020-07-30 17:36:24 +01:00 · 9c27033204
commit 9c27033204
parent 737e0c0111
4 changed files with 99 additions and 16 deletions
--- a/nova/tests/functional/libvirt/test_numa_servers.py
+++ b/nova/tests/functional/libvirt/test_numa_servers.py
@ -383,12 +383,7 @@ class NUMAServersTest(NUMAServersTestBase):
        }
        flavor_id = self._create_flavor(vcpu=2, extra_spec=extra_spec)

-        # FIXME(stephenfin): This should go to error status since there should
-        # not be a host available
-        expected_usage = {
-            'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 0, 'VCPU': 2,
-        }
-        self._run_build_test(flavor_id, expected_usage=expected_usage)
+        self._run_build_test(flavor_id, end_status='ERROR')

    def test_create_server_with_pcpu(self):
        """Create a server using an explicit 'resources:PCPU' request.
--- a/nova/tests/unit/virt/test_hardware.py
+++ b/nova/tests/unit/virt/test_hardware.py
@ -3749,10 +3749,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
        got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1)
        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)

+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
    def test_get_pinning_isolate_policy_too_few_fully_free_cores(self):
        host_pin = objects.NUMACell(
            id=0,
-            cpuset=set(),
+            # Simulate a legacy host with vcpu_pin_set configuration,
+            # meaning cpuset == pcpuset
+            cpuset=set([0, 1, 2, 3]),
            pcpuset=set([0, 1, 2, 3]),
            memory=4096,
            memory_usage=0,
@ -3774,10 +3777,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):

        self.assertIsNone(inst_pin)

+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
    def test_get_pinning_isolate_policy_no_fully_free_cores(self):
        host_pin = objects.NUMACell(
            id=0,
-            cpuset=set(),
+            # Simulate a legacy host with vcpu_pin_set configuration,
+            # meaning cpuset == pcpuset
+            cpuset=set([0, 1, 2, 3]),
            pcpuset=set([0, 1, 2, 3]),
            memory=4096,
            memory_usage=0,
@ -3799,10 +3805,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):

        self.assertIsNone(inst_pin)

+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
    def test_get_pinning_isolate_policy_fits(self):
        host_pin = objects.NUMACell(
            id=0,
-            cpuset=set(),
+            # Simulate a legacy host with vcpu_pin_set configuration,
+            # meaning cpuset == pcpuset
+            cpuset=set([0, 1, 2, 3]),
            pcpuset=set([0, 1, 2, 3]),
            memory=4096,
            memory_usage=0,
@ -3825,10 +3834,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
        got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)

+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
    def test_get_pinning_isolate_policy_fits_ht_host(self):
        host_pin = objects.NUMACell(
            id=0,
-            cpuset=set(),
+            # Simulate a legacy host with vcpu_pin_set configuration,
+            # meaning cpuset == pcpuset
+            cpuset=set([0, 1, 2, 3]),
            pcpuset=set([0, 1, 2, 3]),
            memory=4096,
            memory_usage=0,
@ -3852,10 +3864,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
        got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)

+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
    def test_get_pinning_isolate_policy_fits_w_usage(self):
        host_pin = objects.NUMACell(
            id=0,
-            cpuset=set(),
+            # Simulate a legacy host with vcpu_pin_set configuration,
+            # meaning cpuset == pcpuset
+            cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
            pcpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
            memory=4096,
            memory_usage=0,
@ -3879,6 +3894,38 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
        got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)

+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
+    @mock.patch.object(hw, 'LOG')
+    def test_get_pinning_isolate_policy_bug_1889633(self, mock_log):
+        host_pin = objects.NUMACell(
+            id=0,
+            cpuset={0, 1, 4, 5},
+            pcpuset={2, 3, 6, 7},
+            memory=4096,
+            memory_usage=0,
+            pinned_cpus=set(),
+            siblings=[{0, 4}, {1, 5}, {2, 6}, {3, 7}],
+            mempages=[],
+        )
+        inst_pin = objects.InstanceNUMACell(
+            cpuset=set(),
+            pcpuset={0, 1},
+            memory=2048,
+            cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+            cpu_thread_policy=fields.CPUThreadAllocationPolicy.ISOLATE,
+        )
+        limits = objects.NUMATopologyLimits(
+            cpu_allocation_ratio=2, ram_allocation_ratio=2,
+        )
+
+        # host has hyperthreads, which means no NUMA topology should be built
+        inst_topo = hw._numa_fit_instance_cell(host_pin, inst_pin, limits)
+        self.assertIsNone(inst_topo)
+        self.assertIn(
+            'Host supports hyperthreads, but instance requested no',
+            mock_log.warning.call_args[0][0],
+        )
+

 class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
    def test_host_numa_fit_instance_to_host_single_cell(self):
@ -4898,11 +4945,14 @@ class EmulatorThreadsTestCase(test.NoDBTestCase):

        self.assertEqual(set([0, 1]), host_topo.cells[0].pinned_cpus)

+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
    def test_isolate_w_isolate_thread_alloc(self):
        host_topo = objects.NUMATopology(cells=[
            objects.NUMACell(
                id=0,
-                cpuset=set(),
+                # Simulate a legacy host with vcpu_pin_set configuration,
+                # meaning cpuset == pcpuset
+                cpuset=set([0, 1, 2, 3, 4, 5]),
                pcpuset=set([0, 1, 2, 3, 4, 5]),
                memory=2048,
                cpu_usage=0,
@ -4925,11 +4975,14 @@ class EmulatorThreadsTestCase(test.NoDBTestCase):
        self.assertEqual({0: 0, 1: 2}, inst_topo.cells[0].cpu_pinning)
        self.assertEqual(set([4]), inst_topo.cells[0].cpuset_reserved)

+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
    def test_isolate_w_isolate_thread_alloc_usage(self):
        host_topo = objects.NUMATopology(cells=[
            objects.NUMACell(
                id=0,
-                cpuset=set(),
+                # Simulate a legacy host with vcpu_pin_set configuration,
+                # meaning cpuset == pcpuset
+                cpuset=set([0, 1, 2, 3, 4, 5]),
                pcpuset=set([0, 1, 2, 3, 4, 5]),
                memory=2048,
                cpu_usage=0,
@ -5038,11 +5091,14 @@ class EmulatorThreadsTestCase(test.NoDBTestCase):
        self.assertEqual({0: 2, 1: 3}, inst_topo.cells[0].cpu_pinning)
        self.assertEqual(set([1]), inst_topo.cells[0].cpuset_reserved)

+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
    def test_asymmetric_host_w_isolate_thread_alloc(self):
        host_topo = objects.NUMATopology(cells=[
            objects.NUMACell(
                id=0,
-                cpuset=set(),
+                # Simulate a legacy host with vcpu_pin_set configuration,
+                # meaning cpuset == pcpuset
+                cpuset=set([1, 2, 3, 4, 5]),
                pcpuset=set([1, 2, 3, 4, 5]),
                memory=2048,
                cpu_usage=0,
@ -5052,8 +5108,7 @@ class EmulatorThreadsTestCase(test.NoDBTestCase):
                mempages=[objects.NUMAPagesTopology(
                    size_kb=4, total=524288, used=0)])])
        inst_topo = objects.InstanceNUMATopology(
-            emulator_threads_policy=(
-                fields.CPUEmulatorThreadsPolicy.ISOLATE),
+            emulator_threads_policy=fields.CPUEmulatorThreadsPolicy.ISOLATE,
            cells=[objects.InstanceNUMACell(
                id=0,
                cpuset=set(), pcpuset=set([0, 1]), memory=2048,
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@ -918,6 +918,30 @@ def _pack_instance_onto_cores(host_cell, instance_cell,
                      'for the isolate policy without this.')
            return

+        # TODO(stephenfin): Drop this when we drop support for 'vcpu_pin_set'
+        # NOTE(stephenfin): This is total hack. We're relying on the fact that
+        # the libvirt driver, which is the only one that currently supports
+        # pinned CPUs, will set cpuset and pcpuset to the same value if using
+        # legacy configuration, i.e. 'vcpu_pin_set', as part of
+        # '_get_host_numa_topology'. They can't be equal otherwise since
+        # 'cpu_dedicated_set' and 'cpu_shared_set' must be disjoint. Therefore,
+        # if these are equal, the host that this NUMA cell corresponds to is
+        # using legacy configuration and it's okay to use the old, "pin a core
+        # and reserve its siblings" implementation of the 'isolate' policy. If
+        # they're not, the host is using new-style configuration and we've just
+        # hit bug #1889633
+        if threads_per_core != 1 and host_cell.pcpuset != host_cell.cpuset:
+            LOG.warning(
+                "Host supports hyperthreads, but instance requested no "
+                "hyperthreads. This should have been rejected by the "
+                "scheduler but we likely got here due to the fallback VCPU "
+                "query. Consider setting '[workarounds] "
+                "disable_fallback_pcpu_query' to 'True' once hosts are no "
+                "longer using 'vcpu_pin_set'. Refer to bug #1889633 for more "
+                "information."
+            )
+            return
+
        pinning = _get_pinning(
            1,  # we only want to "use" one thread per core
            sibling_sets[threads_per_core],
--- a/releasenotes/notes/bug-1889633-37e524fb6c20fbdf.yaml
+++ b/releasenotes/notes/bug-1889633-37e524fb6c20fbdf.yaml
@ -0,0 +1,9 @@
+---
+fixes:
+  - |
+    An issue that could result in instances with the ``isolate`` thread policy
+    (``hw:cpu_thread_policy=isolate``) being scheduled to hosts with SMT
+    (HyperThreading) and consuming ``VCPU`` instead of ``PCPU`` has been
+    resolved. See `bug #1889633`__ for more information.
+
+    .. __: https://bugs.launchpad.net/nova/+bug/1889633