diff --git a/nova/tests/unit/virt/test_hardware.py b/nova/tests/unit/virt/test_hardware.py index 1e98dfba6a5e..be6f9d20c726 100644 --- a/nova/tests/unit/virt/test_hardware.py +++ b/nova/tests/unit/virt/test_hardware.py @@ -3836,9 +3836,16 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): siblings=[set([2]), set([3])]) ]) inst_topo = objects.InstanceNUMATopology( - cells=[objects.InstanceNUMACell( - cpuset=set(), pcpuset=set([0, 1]), memory=2048, - cpu_policy=fields.CPUAllocationPolicy.DEDICATED)]) + cells=[ + objects.InstanceNUMACell( + id=0, + cpuset=set(), + pcpuset=set([0, 1]), + memory=2048, + cpu_policy=fields.CPUAllocationPolicy.DEDICATED, + ) + ] + ) inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo) @@ -3867,9 +3874,16 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): siblings=[set([2]), set([3])]) ]) inst_topo = objects.InstanceNUMATopology( - cells=[objects.InstanceNUMACell( - cpuset=set(), pcpuset=set([0, 1]), memory=2048, - cpu_policy=fields.CPUAllocationPolicy.DEDICATED)]) + cells=[ + objects.InstanceNUMACell( + id=0, + cpuset=set(), + pcpuset=set([0, 1]), + memory=2048, + cpu_policy=fields.CPUAllocationPolicy.DEDICATED, + ) + ] + ) inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo) @@ -3898,9 +3912,16 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): siblings=[set([2]), set([3])]) ]) inst_topo = objects.InstanceNUMATopology( - cells=[objects.InstanceNUMACell( - cpuset=set(), pcpuset=set([0, 1]), memory=2048, - cpu_policy=fields.CPUAllocationPolicy.DEDICATED)]) + cells=[ + objects.InstanceNUMACell( + id=0, + cpuset=set(), + pcpuset=set([0, 1]), + memory=2048, + cpu_policy=fields.CPUAllocationPolicy.DEDICATED, + ) + ] + ) inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo) self.assertIsNone(inst_topo) @@ -3927,12 +3948,24 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): siblings=[set([4]), set([5]), set([6]), set([7])]) ]) inst_topo = objects.InstanceNUMATopology( - cells=[objects.InstanceNUMACell( - cpuset=set(), pcpuset=set([0, 1]), memory=2048, - cpu_policy=fields.CPUAllocationPolicy.DEDICATED), - objects.InstanceNUMACell( - cpuset=set(), pcpuset=set([2, 3]), memory=2048, - cpu_policy=fields.CPUAllocationPolicy.DEDICATED)]) + cells=[ + objects.InstanceNUMACell( + id=0, + cpuset=set(), + pcpuset=set([0, 1]), + memory=2048, + cpu_policy=fields.CPUAllocationPolicy.DEDICATED, + ), + objects.InstanceNUMACell( + id=1, + cpuset=set(), + pcpuset=set([2, 3]), + memory=2048, + cpu_policy=fields.CPUAllocationPolicy.DEDICATED, + ), + ] + ) + inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo) for cell in inst_topo.cells: @@ -3970,12 +4003,24 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ]) inst_topo = objects.InstanceNUMATopology( - cells=[objects.InstanceNUMACell( - cpuset=set(), pcpuset=set([0, 1]), memory=2048, - cpu_policy=fields.CPUAllocationPolicy.DEDICATED), - objects.InstanceNUMACell( - cpuset=set(), pcpuset=set([2, 3]), memory=2048, - cpu_policy=fields.CPUAllocationPolicy.DEDICATED)]) + cells=[ + objects.InstanceNUMACell( + id=0, + cpuset=set(), + pcpuset=set([0, 1]), + memory=2048, + cpu_policy=fields.CPUAllocationPolicy.DEDICATED, + ), + objects.InstanceNUMACell( + id=1, + cpuset=set(), + pcpuset=set([2, 3]), + memory=2048, + cpu_policy=fields.CPUAllocationPolicy.DEDICATED, + ), + ] + ) + inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo) for cell in inst_topo.cells: @@ -4003,12 +4048,24 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): siblings=[set([4]), set([5]), set([6]), set([7])]) ]) inst_topo = objects.InstanceNUMATopology( - cells=[objects.InstanceNUMACell( - cpuset=set(), pcpuset=set([0, 1]), memory=2048, - cpu_policy=fields.CPUAllocationPolicy.DEDICATED), - objects.InstanceNUMACell( - cpuset=set(), pcpuset=set([2, 3]), memory=2048, - cpu_policy=fields.CPUAllocationPolicy.DEDICATED)]) + cells=[ + objects.InstanceNUMACell( + id=0, + cpuset=set(), + pcpuset=set([0, 1]), + memory=2048, + cpu_policy=fields.CPUAllocationPolicy.DEDICATED, + ), + objects.InstanceNUMACell( + id=1, + cpuset=set(), + pcpuset=set([2, 3]), + memory=2048, + cpu_policy=fields.CPUAllocationPolicy.DEDICATED, + ), + ] + ) + inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo) self.assertIsNone(inst_topo) diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py index c4ebae11ca0f..f6f96a1af202 100644 --- a/nova/virt/hardware.py +++ b/nova/virt/hardware.py @@ -2357,12 +2357,37 @@ def numa_fit_instance_to_host( host_cells, key=lambda cell: total_pci_in_cell.get(cell.id, 0)) + # a set of host_cell.id, instance_cell.id pairs where we already checked + # that the instance cell does not fit + not_fit_cache = set() + # a set of host_cell.id, instance_cell.id pairs where we already checked + # that the instance cell does fit + fit_cache = set() for host_cell_perm in itertools.permutations( host_cells, len(instance_topology)): chosen_instance_cells: ty.List['objects.InstanceNUMACell'] = [] chosen_host_cells: ty.List['objects.NUMACell'] = [] for host_cell, instance_cell in zip( host_cell_perm, instance_topology.cells): + + cell_pair = (host_cell.id, instance_cell.id) + + # if we already checked this pair, and they did not fit then no + # need to check again just move to the next permutation + if cell_pair in not_fit_cache: + break + + # if we already checked this pair, and they fit before that they + # will fit now too. So no need to check again. Just continue with + # the next cell pair in the permutation + if cell_pair in fit_cache: + chosen_host_cells.append(host_cell) + # Normally this would have done by _numa_fit_instance_cell + # but we optimized that out here based on the cache + instance_cell.id = host_cell.id + chosen_instance_cells.append(instance_cell) + continue + try: cpuset_reserved = 0 if (instance_topology.emulator_threads_isolated and @@ -2379,11 +2404,18 @@ def numa_fit_instance_to_host( # This exception will been raised if instance cell's # custom pagesize is not supported with host cell in # _numa_cell_supports_pagesize_request function. + + # cache the result + not_fit_cache.add(cell_pair) break if got_cell is None: + # cache the result + not_fit_cache.add(cell_pair) break chosen_host_cells.append(host_cell) chosen_instance_cells.append(got_cell) + # cache the result + fit_cache.add(cell_pair) if len(chosen_instance_cells) != len(host_cell_perm): continue diff --git a/releasenotes/notes/bug-1978372-optimized-numa-fitting-algorithm-5d5b922b0bdbf818.yaml b/releasenotes/notes/bug-1978372-optimized-numa-fitting-algorithm-5d5b922b0bdbf818.yaml new file mode 100644 index 000000000000..3f42f7090828 --- /dev/null +++ b/releasenotes/notes/bug-1978372-optimized-numa-fitting-algorithm-5d5b922b0bdbf818.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + The algorithm that is used to see if a multi NUMA guest fits to + a multi NUMA host has been optimized to speed up the decision + on hosts with high number of NUMA nodes ( > 8). For details see + `bug 1978372`_ + + .. _bug 1978372: https://bugs.launchpad.net/nova/+bug/1978372