Merge "pci: implement the 'socket' NUMA affinity policy"

This commit is contained in:
Zuul 2021-03-12 18:21:12 +00:00 committed by Gerrit Code Review
commit 63bba50f43
7 changed files with 177 additions and 4 deletions

View File

@ -495,7 +495,7 @@ PCI NUMA Affinity Policy
PCI passthrough devices and neutron SR-IOV interfaces via the
``hw:pci_numa_affinity_policy`` flavor extra spec or
``hw_pci_numa_affinity_policy`` image property. The allowed values are
``required``,``preferred`` or ``legacy`` (default).
``required``, ``socket``, ``preferred`` or ``legacy`` (default).
**required**
This value will mean that nova will boot instances with PCI devices
@ -504,6 +504,25 @@ PCI NUMA Affinity Policy
devices could not be determined, those PCI devices wouldn't be consumable
by the instance. This provides maximum performance.
**socket**
This means that the PCI device must be affined to the same host socket as
at least one of the guest NUMA nodes. For example, consider a system with
two sockets, each with two NUMA nodes, numbered node 0 and node 1 on
socket 0, and node 2 and node 3 on socket 1. There is a PCI device
affined to node 0. An PCI instance with two guest NUMA nodes and the
``socket`` policy can be affined to either:
* node 0 and node 1
* node 0 and node 2
* node 0 and node 3
* node 1 and node 2
* node 1 and node 3
The instance cannot be affined to node 2 and node 3, as neither of those
are on the same socket as the PCI device. If the other nodes are consumed
by other instances and only nodes 2 and 3 are available, the instance
will not boot.
**preferred**
This value will mean that ``nova-scheduler`` will choose a compute host
with minimal consideration for the NUMA affinity of PCI devices.

View File

@ -298,6 +298,15 @@ class PciDeviceStats(object):
pool['count'] for pool in filtered_pools) >= requested_count:
return filtered_pools
# the SOCKET policy is a bit of a special case. It's less strict than
# REQUIRED (so REQUIRED will automatically fulfil SOCKET, at least
# with our assumption of never having multiple sockets per NUMA node),
# but not always more strict than LEGACY: a PCI device with no NUMA
# affinity will fulfil LEGACY but not SOCKET. If we have SOCKET,
# process it here and don't continue.
if requested_policy == fields.PCINUMAAffinityPolicy.SOCKET:
return self._filter_pools_for_socket_affinity(pools, numa_cells)
# some systems don't report NUMA node info for PCI devices, in which
# case None is reported in 'pci_device.numa_node'. The LEGACY policy
# allows us to use these devices so we include None in the list of
@ -323,6 +332,39 @@ class PciDeviceStats(object):
return sorted(
pools, key=lambda pool: pool.get('numa_node') not in numa_cell_ids)
def _filter_pools_for_socket_affinity(self, pools, numa_cells):
host_cells = self.numa_topology.cells
# bail early if we don't have socket information for all host_cells.
# This could happen if we're running on an weird older system with
# multiple sockets per NUMA node, which is a configuration that we
# explicitly chose not to support.
if any(cell.socket is None for cell in host_cells):
LOG.debug('No socket information in host NUMA cell(s).')
return []
# get a set of host sockets that the guest cells are in. Since guest
# cell IDs map to host cell IDs, we can just lookup the latter's
# socket.
socket_ids = set()
for guest_cell in numa_cells:
for host_cell in host_cells:
if guest_cell.id == host_cell.id:
socket_ids.add(host_cell.socket)
# now get a set of host NUMA nodes that are in the above sockets
allowed_numa_nodes = set()
for host_cell in host_cells:
if host_cell.socket in socket_ids:
allowed_numa_nodes.add(host_cell.id)
# filter out pools that are not in one of the correct host NUMA nodes.
return [
pool for pool in pools if any(
utils.pci_device_prop_match(pool, [{'numa_node': numa_node}])
for numa_node in allowed_numa_nodes
)
]
def _filter_pools_for_unrequested_pfs(self, pools, request):
"""Filter out pools with PFs, unless these are required.
@ -383,8 +425,8 @@ class PciDeviceStats(object):
return None
# Next, let's exclude all devices that aren't on the correct NUMA node
# *assuming* we have devices and care about that, as determined by
# policy
# or socket, *assuming* we have devices and care about that, as
# determined by policy
before_count = after_count
pools = self._filter_pools_for_numa_cells(pools, request, numa_cells)
after_count = sum([pool['count'] for pool in pools])

View File

@ -23,6 +23,7 @@ import mock
from oslo_config import cfg
from oslo_log import log as logging
from oslo_serialization import jsonutils
from oslo_utils import units
import nova
from nova import context
@ -1027,3 +1028,72 @@ class PCIServersWithSRIOVAffinityPoliciesTest(_PCIServersTestBase):
group='pci')
self._test_policy(pci_numa_node, status, 'required')
def test_socket_policy_pass(self):
# With 1 socket containing 2 NUMA nodes, make the first node's CPU
# available for pinning, but affine the PCI device to the second node.
# This should pass.
host_info = fakelibvirt.HostInfo(
cpu_nodes=2, cpu_sockets=1, cpu_cores=2, cpu_threads=2,
kB_mem=(16 * units.Gi) // units.Ki)
self.flags(cpu_dedicated_set='0-3', group='compute')
pci_info = fakelibvirt.HostPCIDevicesInfo(num_pci=1, numa_node=1)
self.start_compute(host_info=host_info, pci_info=pci_info)
extra_spec = {
'hw:cpu_policy': 'dedicated',
'pci_passthrough:alias': '%s:1' % self.ALIAS_NAME,
'hw:pci_numa_affinity_policy': 'socket'
}
flavor_id = self._create_flavor(extra_spec=extra_spec)
self._create_server(flavor_id=flavor_id)
self.assertTrue(self.mock_filter.called)
def test_socket_policy_fail(self):
# With 2 sockets containing 1 NUMA node each, make the first socket's
# CPUs available for pinning, but affine the PCI device to the second
# NUMA node in the second socket. This should fail.
host_info = fakelibvirt.HostInfo(
cpu_nodes=1, cpu_sockets=2, cpu_cores=2, cpu_threads=2,
kB_mem=(16 * units.Gi) // units.Ki)
self.flags(cpu_dedicated_set='0-3', group='compute')
pci_info = fakelibvirt.HostPCIDevicesInfo(num_pci=1, numa_node=1)
self.start_compute(host_info=host_info, pci_info=pci_info)
extra_spec = {
'hw:cpu_policy': 'dedicated',
'pci_passthrough:alias': '%s:1' % self.ALIAS_NAME,
'hw:pci_numa_affinity_policy': 'socket'
}
flavor_id = self._create_flavor(extra_spec=extra_spec)
server = self._create_server(
flavor_id=flavor_id, expected_state='ERROR')
self.assertIn('fault', server)
self.assertIn('No valid host', server['fault']['message'])
def test_socket_policy_multi_numa_pass(self):
# 2 sockets, 2 NUMA nodes each, with the PCI device on NUMA 0 and
# socket 0. If we restrict cpu_dedicated_set to NUMA 1, 2 and 3, we
# should still be able to boot an instance with hw:numa_nodes=3 and the
# `socket` policy, because one of the instance's NUMA nodes will be on
# the same socket as the PCI device (even if there is no direct NUMA
# node affinity).
host_info = fakelibvirt.HostInfo(
cpu_nodes=2, cpu_sockets=2, cpu_cores=2, cpu_threads=1,
kB_mem=(16 * units.Gi) // units.Ki)
self.flags(cpu_dedicated_set='2-7', group='compute')
pci_info = fakelibvirt.HostPCIDevicesInfo(num_pci=1, numa_node=0)
self.start_compute(host_info=host_info, pci_info=pci_info)
extra_spec = {
'hw:numa_nodes': '3',
'hw:cpu_policy': 'dedicated',
'pci_passthrough:alias': '%s:1' % self.ALIAS_NAME,
'hw:pci_numa_affinity_policy': 'socket'
}
flavor_id = self._create_flavor(vcpu=6, memory_mb=3144,
extra_spec=extra_spec)
self._create_server(flavor_id=flavor_id)
self.assertTrue(self.mock_filter.called)

View File

@ -97,7 +97,16 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
def setUp(self):
super(PciDeviceStatsTestCase, self).setUp()
self.pci_stats = stats.PciDeviceStats(objects.NUMATopology())
self._setup_pci_stats()
def _setup_pci_stats(self, numa_topology=None):
"""Exists for tests that need to setup pci_stats with a specific NUMA
topology, while still allowing tests that don't care to get the default
"empty" one.
"""
if not numa_topology:
numa_topology = objects.NUMATopology()
self.pci_stats = stats.PciDeviceStats(numa_topology)
# The following two calls need to be made before adding the devices.
patcher = fakes.fake_pci_whitelist()
self.addCleanup(patcher.stop)
@ -229,6 +238,25 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
self.assertFalse(self.pci_stats.support_requests(pci_requests, cells))
def test_filter_pools_for_socket_affinity_no_socket(self):
self._setup_pci_stats(
objects.NUMATopology(
cells=[objects.NUMACell(socket=None)]))
self.assertEqual(
[],
self.pci_stats._filter_pools_for_socket_affinity(
self.pci_stats.pools, [objects.InstanceNUMACell()]))
def test_filter_pools_for_socket_affinity(self):
self._setup_pci_stats(
objects.NUMATopology(
cells=[objects.NUMACell(id=1, socket=1)]))
pools = self.pci_stats._filter_pools_for_socket_affinity(
self.pci_stats.pools, [objects.InstanceNUMACell(id=1)])
self.assertEqual(1, len(pools))
self.assertEqual('p2', pools[0]['product_id'])
self.assertEqual('v2', pools[0]['vendor_id'])
def test_consume_requests(self):
devs = self.pci_stats.consume_requests(pci_requests)
self.assertEqual(2, len(devs))

View File

@ -1210,6 +1210,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
'COMPUTE_NET_VIF_MODEL_VIRTIO': True,
'COMPUTE_SECURITY_TPM_1_2': False,
'COMPUTE_SECURITY_TPM_2_0': False,
'COMPUTE_SOCKET_PCI_NUMA_AFFINITY': True,
}
static_traits = drvr.static_traits
@ -1255,6 +1256,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
'COMPUTE_NET_VIF_MODEL_VIRTIO': True,
'COMPUTE_SECURITY_TPM_1_2': False,
'COMPUTE_SECURITY_TPM_2_0': False,
'COMPUTE_SOCKET_PCI_NUMA_AFFINITY': True,
}
static_traits = drvr.static_traits

View File

@ -8132,6 +8132,7 @@ class LibvirtDriver(driver.ComputeDriver):
traits.update(self._get_video_model_traits())
traits.update(self._get_vif_model_traits())
traits.update(self._get_tpm_traits())
traits.update({ot.COMPUTE_SOCKET_PCI_NUMA_AFFINITY: True})
_, invalid_traits = ot.check_traits(traits)
for invalid_trait in invalid_traits:

View File

@ -0,0 +1,11 @@
---
features:
- |
A new PCI NUMA affinity policy is available. The
``hw:pci_numa_affinity_policy`` flavor extra spec and
``hw_pci_numa_affinity_policy`` image metadata property now accept a
``socket`` policy value. This value indicates that the PCI device must be
affined to the same host socket as at least one of the guest NUMA nodes.
For more information, see the `PCI Passthrough`__ guide.
.. __: https://docs.openstack.org/nova/latest/admin/pci-passthrough.html