hardware: Reject requests for no hyperthreads on hosts with HT

Attempting to boot an instance with 'hw:cpu_policy=dedicated' will
result in a request from nova-scheduler to placement for allocation
candidates with $flavor.vcpu 'PCPU' inventory. Similarly, booting an
instance with 'hw:cpu_thread_policy=isolate' will result in a request
for allocation candidates with 'HW_CPU_HYPERTHREADING=forbidden', i.e.
hosts without hyperthreading. This has been the case since the
cpu-resources feature was implemented in Train. However, as part of that
work and to enable upgrades from hosts that predated Train, we also make
a second request for candidates with $flavor.vcpu 'VCPU' inventory. The
idea behind this is that old compute nodes would only report 'VCPU' and
should be useable, and any new compute nodes that got caught up in this
second request could never actually be scheduled to since there wouldn't
be enough cores from 'ComputeNode.numa_topology.cells.[*].pcpuset'
available to schedule to, resulting in rejection by the
'NUMATopologyFilter'. However, if a host was rejected in the first
query because it reported the 'HW_CPU_HYPERTHREADING' trait, it could
get picked up by the second query and would happily be scheduled to,
resulting in an instance consuming 'VCPU' inventory from a host that
properly supported 'PCPU' inventory.

The solution is simply, though also a huge hack. If we detect that the
host is using new style configuration and should be able to report
'PCPU', check if the instance asked for no hyperthreading and whether
the host has it. If all are True, reject the request.

Change-Id: Id39aaaac09585ca1a754b669351c86e234b89dd9
Signed-off-by: Stephen Finucane <stephenfin@redhat.com>
Closes-Bug: #1889633
This commit is contained in:
Stephen Finucane 2020-07-30 17:36:24 +01:00
parent 737e0c0111
commit 9c27033204
4 changed files with 99 additions and 16 deletions

View File

@ -383,12 +383,7 @@ class NUMAServersTest(NUMAServersTestBase):
}
flavor_id = self._create_flavor(vcpu=2, extra_spec=extra_spec)
# FIXME(stephenfin): This should go to error status since there should
# not be a host available
expected_usage = {
'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 0, 'VCPU': 2,
}
self._run_build_test(flavor_id, expected_usage=expected_usage)
self._run_build_test(flavor_id, end_status='ERROR')
def test_create_server_with_pcpu(self):
"""Create a server using an explicit 'resources:PCPU' request.

View File

@ -3749,10 +3749,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1)
self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
# TODO(stephenfin): Remove when we drop support for vcpu_pin_set
def test_get_pinning_isolate_policy_too_few_fully_free_cores(self):
host_pin = objects.NUMACell(
id=0,
cpuset=set(),
# Simulate a legacy host with vcpu_pin_set configuration,
# meaning cpuset == pcpuset
cpuset=set([0, 1, 2, 3]),
pcpuset=set([0, 1, 2, 3]),
memory=4096,
memory_usage=0,
@ -3774,10 +3777,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
self.assertIsNone(inst_pin)
# TODO(stephenfin): Remove when we drop support for vcpu_pin_set
def test_get_pinning_isolate_policy_no_fully_free_cores(self):
host_pin = objects.NUMACell(
id=0,
cpuset=set(),
# Simulate a legacy host with vcpu_pin_set configuration,
# meaning cpuset == pcpuset
cpuset=set([0, 1, 2, 3]),
pcpuset=set([0, 1, 2, 3]),
memory=4096,
memory_usage=0,
@ -3799,10 +3805,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
self.assertIsNone(inst_pin)
# TODO(stephenfin): Remove when we drop support for vcpu_pin_set
def test_get_pinning_isolate_policy_fits(self):
host_pin = objects.NUMACell(
id=0,
cpuset=set(),
# Simulate a legacy host with vcpu_pin_set configuration,
# meaning cpuset == pcpuset
cpuset=set([0, 1, 2, 3]),
pcpuset=set([0, 1, 2, 3]),
memory=4096,
memory_usage=0,
@ -3825,10 +3834,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
# TODO(stephenfin): Remove when we drop support for vcpu_pin_set
def test_get_pinning_isolate_policy_fits_ht_host(self):
host_pin = objects.NUMACell(
id=0,
cpuset=set(),
# Simulate a legacy host with vcpu_pin_set configuration,
# meaning cpuset == pcpuset
cpuset=set([0, 1, 2, 3]),
pcpuset=set([0, 1, 2, 3]),
memory=4096,
memory_usage=0,
@ -3852,10 +3864,13 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
# TODO(stephenfin): Remove when we drop support for vcpu_pin_set
def test_get_pinning_isolate_policy_fits_w_usage(self):
host_pin = objects.NUMACell(
id=0,
cpuset=set(),
# Simulate a legacy host with vcpu_pin_set configuration,
# meaning cpuset == pcpuset
cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
pcpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
memory=4096,
memory_usage=0,
@ -3879,6 +3894,38 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
# TODO(stephenfin): Remove when we drop support for vcpu_pin_set
@mock.patch.object(hw, 'LOG')
def test_get_pinning_isolate_policy_bug_1889633(self, mock_log):
host_pin = objects.NUMACell(
id=0,
cpuset={0, 1, 4, 5},
pcpuset={2, 3, 6, 7},
memory=4096,
memory_usage=0,
pinned_cpus=set(),
siblings=[{0, 4}, {1, 5}, {2, 6}, {3, 7}],
mempages=[],
)
inst_pin = objects.InstanceNUMACell(
cpuset=set(),
pcpuset={0, 1},
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
cpu_thread_policy=fields.CPUThreadAllocationPolicy.ISOLATE,
)
limits = objects.NUMATopologyLimits(
cpu_allocation_ratio=2, ram_allocation_ratio=2,
)
# host has hyperthreads, which means no NUMA topology should be built
inst_topo = hw._numa_fit_instance_cell(host_pin, inst_pin, limits)
self.assertIsNone(inst_topo)
self.assertIn(
'Host supports hyperthreads, but instance requested no',
mock_log.warning.call_args[0][0],
)
class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
def test_host_numa_fit_instance_to_host_single_cell(self):
@ -4898,11 +4945,14 @@ class EmulatorThreadsTestCase(test.NoDBTestCase):
self.assertEqual(set([0, 1]), host_topo.cells[0].pinned_cpus)
# TODO(stephenfin): Remove when we drop support for vcpu_pin_set
def test_isolate_w_isolate_thread_alloc(self):
host_topo = objects.NUMATopology(cells=[
objects.NUMACell(
id=0,
cpuset=set(),
# Simulate a legacy host with vcpu_pin_set configuration,
# meaning cpuset == pcpuset
cpuset=set([0, 1, 2, 3, 4, 5]),
pcpuset=set([0, 1, 2, 3, 4, 5]),
memory=2048,
cpu_usage=0,
@ -4925,11 +4975,14 @@ class EmulatorThreadsTestCase(test.NoDBTestCase):
self.assertEqual({0: 0, 1: 2}, inst_topo.cells[0].cpu_pinning)
self.assertEqual(set([4]), inst_topo.cells[0].cpuset_reserved)
# TODO(stephenfin): Remove when we drop support for vcpu_pin_set
def test_isolate_w_isolate_thread_alloc_usage(self):
host_topo = objects.NUMATopology(cells=[
objects.NUMACell(
id=0,
cpuset=set(),
# Simulate a legacy host with vcpu_pin_set configuration,
# meaning cpuset == pcpuset
cpuset=set([0, 1, 2, 3, 4, 5]),
pcpuset=set([0, 1, 2, 3, 4, 5]),
memory=2048,
cpu_usage=0,
@ -5038,11 +5091,14 @@ class EmulatorThreadsTestCase(test.NoDBTestCase):
self.assertEqual({0: 2, 1: 3}, inst_topo.cells[0].cpu_pinning)
self.assertEqual(set([1]), inst_topo.cells[0].cpuset_reserved)
# TODO(stephenfin): Remove when we drop support for vcpu_pin_set
def test_asymmetric_host_w_isolate_thread_alloc(self):
host_topo = objects.NUMATopology(cells=[
objects.NUMACell(
id=0,
cpuset=set(),
# Simulate a legacy host with vcpu_pin_set configuration,
# meaning cpuset == pcpuset
cpuset=set([1, 2, 3, 4, 5]),
pcpuset=set([1, 2, 3, 4, 5]),
memory=2048,
cpu_usage=0,
@ -5052,8 +5108,7 @@ class EmulatorThreadsTestCase(test.NoDBTestCase):
mempages=[objects.NUMAPagesTopology(
size_kb=4, total=524288, used=0)])])
inst_topo = objects.InstanceNUMATopology(
emulator_threads_policy=(
fields.CPUEmulatorThreadsPolicy.ISOLATE),
emulator_threads_policy=fields.CPUEmulatorThreadsPolicy.ISOLATE,
cells=[objects.InstanceNUMACell(
id=0,
cpuset=set(), pcpuset=set([0, 1]), memory=2048,

View File

@ -918,6 +918,30 @@ def _pack_instance_onto_cores(host_cell, instance_cell,
'for the isolate policy without this.')
return
# TODO(stephenfin): Drop this when we drop support for 'vcpu_pin_set'
# NOTE(stephenfin): This is total hack. We're relying on the fact that
# the libvirt driver, which is the only one that currently supports
# pinned CPUs, will set cpuset and pcpuset to the same value if using
# legacy configuration, i.e. 'vcpu_pin_set', as part of
# '_get_host_numa_topology'. They can't be equal otherwise since
# 'cpu_dedicated_set' and 'cpu_shared_set' must be disjoint. Therefore,
# if these are equal, the host that this NUMA cell corresponds to is
# using legacy configuration and it's okay to use the old, "pin a core
# and reserve its siblings" implementation of the 'isolate' policy. If
# they're not, the host is using new-style configuration and we've just
# hit bug #1889633
if threads_per_core != 1 and host_cell.pcpuset != host_cell.cpuset:
LOG.warning(
"Host supports hyperthreads, but instance requested no "
"hyperthreads. This should have been rejected by the "
"scheduler but we likely got here due to the fallback VCPU "
"query. Consider setting '[workarounds] "
"disable_fallback_pcpu_query' to 'True' once hosts are no "
"longer using 'vcpu_pin_set'. Refer to bug #1889633 for more "
"information."
)
return
pinning = _get_pinning(
1, # we only want to "use" one thread per core
sibling_sets[threads_per_core],

View File

@ -0,0 +1,9 @@
---
fixes:
- |
An issue that could result in instances with the ``isolate`` thread policy
(``hw:cpu_thread_policy=isolate``) being scheduled to hosts with SMT
(HyperThreading) and consuming ``VCPU`` instead of ``PCPU`` has been
resolved. See `bug #1889633`__ for more information.
.. __: https://bugs.launchpad.net/nova/+bug/1889633