Optimize numa_fit_instance_to_host

The  numa_fit_instance_to_host algorithm tries all the possible
host cell permutations to fit the instance cells. So in worst case
scenario it does  n! / (n-k)! _numa_fit_instance_cell calls
(n=len(host_cells) k=len(instance_cells)) to find if the instance can be
fit to the host. With 16 NUMA nodes host and 8 NUMA node guests this
means 500 million calls to _numa_fit_instance_cell. This takes excessive
time.

However going through these permutations there are many repetitive
host_cell, instance_cell pairs to try to fit.
E.g.
  host_cells=[H1, H2, H2]
  instance_cells=[G1, G2]

Produces pairings:

* H1 <- G1 and H2 <- G2
* H1 <- G1 and H3 <- G2
...

Here G1 is checked to fit H1 twice. But if it does not fit in the first
time then we know that it will not fit in the second time either. So we
can cache the result of the first check and use that cache for the later
permutations.

This patch adds two caches to the algo. A fit_cache to hold
host_cell.id, instance_cell.id pairs that we know fit, and a
no_fit_cache for those pairs that we already know that doesn't fit.

This change significantly boost the performance of the algorithm. The
reproduction provided in the bug 1978372 took 6 minutes on my local
machine to run without the optimization. With the optimization it run in
3 seconds.

This change increase the memory usage of the algorithm with the two
caches. Those caches are sets of integer two tuples. And the total size
of the cache is the total number of possible host_cell, instance_cell
pairs which is len(host_cell) * len(instance_cells). So form the above
example (16 host, 8 instance NUMA) it is 128 pairs of integers in the
cache. That will not cause a significant memory increase.

Closes-Bug: #1978372
Change-Id: Ibcf27d741429a239d13f0404348c61e2668b4ce4
This commit is contained in:
Balazs Gibizer 2022-06-15 09:28:27 +02:00
parent d869163608
commit 099a6f63af
3 changed files with 125 additions and 27 deletions

View File

@ -3836,9 +3836,16 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
siblings=[set([2]), set([3])])
])
inst_topo = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell(
cpuset=set(), pcpuset=set([0, 1]), memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED)])
cells=[
objects.InstanceNUMACell(
id=0,
cpuset=set(),
pcpuset=set([0, 1]),
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
)
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
@ -3867,9 +3874,16 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
siblings=[set([2]), set([3])])
])
inst_topo = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell(
cpuset=set(), pcpuset=set([0, 1]), memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED)])
cells=[
objects.InstanceNUMACell(
id=0,
cpuset=set(),
pcpuset=set([0, 1]),
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
)
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
@ -3898,9 +3912,16 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
siblings=[set([2]), set([3])])
])
inst_topo = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell(
cpuset=set(), pcpuset=set([0, 1]), memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED)])
cells=[
objects.InstanceNUMACell(
id=0,
cpuset=set(),
pcpuset=set([0, 1]),
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
)
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
self.assertIsNone(inst_topo)
@ -3927,12 +3948,24 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
siblings=[set([4]), set([5]), set([6]), set([7])])
])
inst_topo = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell(
cpuset=set(), pcpuset=set([0, 1]), memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED),
objects.InstanceNUMACell(
cpuset=set(), pcpuset=set([2, 3]), memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED)])
cells=[
objects.InstanceNUMACell(
id=0,
cpuset=set(),
pcpuset=set([0, 1]),
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
),
objects.InstanceNUMACell(
id=1,
cpuset=set(),
pcpuset=set([2, 3]),
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
),
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
for cell in inst_topo.cells:
@ -3970,12 +4003,24 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
])
inst_topo = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell(
cpuset=set(), pcpuset=set([0, 1]), memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED),
objects.InstanceNUMACell(
cpuset=set(), pcpuset=set([2, 3]), memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED)])
cells=[
objects.InstanceNUMACell(
id=0,
cpuset=set(),
pcpuset=set([0, 1]),
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
),
objects.InstanceNUMACell(
id=1,
cpuset=set(),
pcpuset=set([2, 3]),
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
),
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
for cell in inst_topo.cells:
@ -4003,12 +4048,24 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
siblings=[set([4]), set([5]), set([6]), set([7])])
])
inst_topo = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell(
cpuset=set(), pcpuset=set([0, 1]), memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED),
objects.InstanceNUMACell(
cpuset=set(), pcpuset=set([2, 3]), memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED)])
cells=[
objects.InstanceNUMACell(
id=0,
cpuset=set(),
pcpuset=set([0, 1]),
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
),
objects.InstanceNUMACell(
id=1,
cpuset=set(),
pcpuset=set([2, 3]),
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
),
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
self.assertIsNone(inst_topo)

View File

@ -2357,12 +2357,37 @@ def numa_fit_instance_to_host(
host_cells,
key=lambda cell: total_pci_in_cell.get(cell.id, 0))
# a set of host_cell.id, instance_cell.id pairs where we already checked
# that the instance cell does not fit
not_fit_cache = set()
# a set of host_cell.id, instance_cell.id pairs where we already checked
# that the instance cell does fit
fit_cache = set()
for host_cell_perm in itertools.permutations(
host_cells, len(instance_topology)):
chosen_instance_cells: ty.List['objects.InstanceNUMACell'] = []
chosen_host_cells: ty.List['objects.NUMACell'] = []
for host_cell, instance_cell in zip(
host_cell_perm, instance_topology.cells):
cell_pair = (host_cell.id, instance_cell.id)
# if we already checked this pair, and they did not fit then no
# need to check again just move to the next permutation
if cell_pair in not_fit_cache:
break
# if we already checked this pair, and they fit before that they
# will fit now too. So no need to check again. Just continue with
# the next cell pair in the permutation
if cell_pair in fit_cache:
chosen_host_cells.append(host_cell)
# Normally this would have done by _numa_fit_instance_cell
# but we optimized that out here based on the cache
instance_cell.id = host_cell.id
chosen_instance_cells.append(instance_cell)
continue
try:
cpuset_reserved = 0
if (instance_topology.emulator_threads_isolated and
@ -2379,11 +2404,18 @@ def numa_fit_instance_to_host(
# This exception will been raised if instance cell's
# custom pagesize is not supported with host cell in
# _numa_cell_supports_pagesize_request function.
# cache the result
not_fit_cache.add(cell_pair)
break
if got_cell is None:
# cache the result
not_fit_cache.add(cell_pair)
break
chosen_host_cells.append(host_cell)
chosen_instance_cells.append(got_cell)
# cache the result
fit_cache.add(cell_pair)
if len(chosen_instance_cells) != len(host_cell_perm):
continue

View File

@ -0,0 +1,9 @@
---
fixes:
- |
The algorithm that is used to see if a multi NUMA guest fits to
a multi NUMA host has been optimized to speed up the decision
on hosts with high number of NUMA nodes ( > 8). For details see
`bug 1978372`_
.. _bug 1978372: https://bugs.launchpad.net/nova/+bug/1978372