Support per port numa policies with SR-IOV

This change adds support for booting vms and attaching port with per port sriov numa policies. Affinity during scheduling is enforced by the PCI and NUMA affinity filters. Affinity for attach is enforced by the hardware module. Change-Id: Id94fdc6789b2bae735b0d1e9a02dce60a8f3fb40 Partial-Implements: blueprint port-scoped-sriov-numa-affinity
2021-02-02 19:56:00 +00:00 · 2021-02-02 19:56:00 +00:00 · 5cd1acc3bf
commit 5cd1acc3bf
parent b2471dd578
5 changed files with 301 additions and 27 deletions
--- a/nova/network/constants.py
+++ b/nova/network/constants.py
@ -30,3 +30,4 @@ L3_NETWORK_TYPES = ['vxlan', 'gre', 'geneve']
 ALLOCATION = 'allocation'
 RESOURCE_REQUEST = 'resource_request'
 SEGMENT = 'Segment'
+NUMA_POLICY = 'numa_affinity_policy'
--- a/nova/network/neutron.py
+++ b/nova/network/neutron.py
@ -2026,6 +2026,8 @@ class API(base.Base):
            # the port binding profile and we can handle it as a boolean.
            return strutils.bool_from_string(value)

+    # NOTE(sean-k-mooney): we might want to have this return a
+    # nova.network.model.VIF object instead in the future.
    def _get_port_vnic_info(self, context, neutron, port_id):
        """Retrieve port vNIC info

@ -2033,14 +2035,17 @@ class API(base.Base):
        :param neutron: The Neutron client
        :param port_id: The id of port to be queried

-        :return: A tuple of vNIC type, trusted status, network ID and resource
-                 request of the port if any. Trusted status only affects SR-IOV
-                 ports and will always be None for other port types.
+        :return: A tuple of vNIC type, trusted status, network ID, resource
+            request of the port if any and port numa affintiy policy.
+            Trusted status only affects SR-IOV ports and will always be
+            None for other port types. If no port numa policy is
+            requested by a port, None will be returned.
        """
+        fields = ['binding:vnic_type', constants.BINDING_PROFILE,
+                  'network_id', constants.RESOURCE_REQUEST,
+                  constants.NUMA_POLICY]
        port = self._show_port(
-            context, port_id, neutron_client=neutron,
-            fields=['binding:vnic_type', constants.BINDING_PROFILE,
-                    'network_id', constants.RESOURCE_REQUEST])
+            context, port_id, neutron_client=neutron, fields=fields)
        network_id = port.get('network_id')
        trusted = None
        vnic_type = port.get('binding:vnic_type',
@ -2053,7 +2058,8 @@ class API(base.Base):
        # applied to the port/network and the port-resource-request API
        # extension is enabled.
        resource_request = port.get(constants.RESOURCE_REQUEST, None)
-        return vnic_type, trusted, network_id, resource_request
+        numa_policy = port.get(constants.NUMA_POLICY, None)
+        return vnic_type, trusted, network_id, resource_request, numa_policy

    def create_resource_requests(
            self, context, requested_networks, pci_requests=None,
@ -2091,11 +2097,12 @@ class API(base.Base):
            vnic_type = network_model.VNIC_TYPE_NORMAL
            pci_request_id = None
            requester_id = None
+            port_numa_policy = None

            if request_net.port_id:
-                result = self._get_port_vnic_info(
-                    context, neutron, request_net.port_id)
-                vnic_type, trusted, network_id, resource_request = result
+                (vnic_type, trusted, network_id, resource_request,
+                 port_numa_policy) = self._get_port_vnic_info(
+                     context, neutron, request_net.port_id)
                physnet, tunneled_ = self._get_physnet_tunneled_info(
                    context, neutron, network_id)

@ -2149,8 +2156,11 @@ class API(base.Base):
                    spec=[spec],
                    request_id=uuidutils.generate_uuid(),
                    requester_id=requester_id)
-                if affinity_policy:
-                    request.numa_policy = affinity_policy
+                # NOTE(sean-k-mooney): port NUMA policies take precedence
+                # over image and flavor policies.
+                numa_policy = port_numa_policy or affinity_policy
+                if numa_policy:
+                    request.numa_policy = numa_policy
                pci_requests.requests.append(request)
                pci_request_id = request.request_id

@ -3049,7 +3059,8 @@ class API(base.Base):
            ovs_interfaceid=ovs_interfaceid,
            devname=devname,
            active=vif_active,
-            preserve_on_delete=preserve_on_delete)
+            preserve_on_delete=preserve_on_delete
+        )

    def _build_network_info_model(self, context, instance, networks=None,
                                  port_ids=None, admin_client=None,
--- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py
+++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py
@ -27,6 +27,7 @@ from oslo_utils import units

 import nova
 from nova import context
+from nova.network import constants
 from nova import objects
 from nova.objects import fields
 from nova.tests import fixtures as nova_fixtures
@ -1097,3 +1098,238 @@ class PCIServersWithSRIOVAffinityPoliciesTest(_PCIServersTestBase):
                                        extra_spec=extra_spec)
        self._create_server(flavor_id=flavor_id)
        self.assertTrue(self.mock_filter.called)
+
+
+@ddt.ddt
+class PCIServersWithPortNUMAPoliciesTest(_PCIServersTestBase):
+
+    ALIAS_NAME = 'a1'
+    PCI_PASSTHROUGH_WHITELIST = [jsonutils.dumps(x) for x in (
+        {
+            'vendor_id': fakelibvirt.PCI_VEND_ID,
+            'product_id': fakelibvirt.PF_PROD_ID,
+            'physical_network': 'physnet4',
+        },
+        {
+            'vendor_id': fakelibvirt.PCI_VEND_ID,
+            'product_id': fakelibvirt.VF_PROD_ID,
+            'physical_network': 'physnet4',
+        },
+    )]
+    # we set the numa_affinity policy to required to ensure strict affinity
+    # between pci devices and the guest cpu and memory will be enforced.
+    PCI_ALIAS = [jsonutils.dumps(
+        {
+            'vendor_id': fakelibvirt.PCI_VEND_ID,
+            'product_id': fakelibvirt.PCI_PROD_ID,
+            'name': ALIAS_NAME,
+            'device_type': fields.PciDeviceType.STANDARD,
+            'numa_policy': fields.PCINUMAAffinityPolicy.REQUIRED,
+        }
+    )]
+
+    def setUp(self):
+        super().setUp()
+
+        # The ultimate base class _IntegratedTestBase uses NeutronFixture but
+        # we need a bit more intelligent neutron for these tests. Applying the
+        # new fixture here means that we re-stub what the previous neutron
+        # fixture already stubbed.
+        self.neutron = self.useFixture(base.LibvirtNeutronFixture(self))
+        self.flags(disable_fallback_pcpu_query=True, group='workarounds')
+
+    def _create_port_with_policy(self, policy):
+        port_data = copy.deepcopy(
+            base.LibvirtNeutronFixture.network_4_port_1)
+        port_data[constants.NUMA_POLICY] = policy
+        # create the port
+        new_port = self.neutron.create_port({'port': port_data})
+        port_id = new_port['port']['id']
+        port = self.neutron.show_port(port_id)['port']
+        self.assertEqual(port[constants.NUMA_POLICY], policy)
+        return port_id
+
+    # NOTE(sean-k-mooney): i could just apply the ddt decorators
+    # to this function for the most part but i have chosen to
+    # keep one top level function per policy to make documenting
+    # the test cases simpler.
+    def _test_policy(self, pci_numa_node, status, policy):
+        # only allow cpus on numa node 1 to be used for pinning
+        self.flags(cpu_dedicated_set='4-7', group='compute')
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
+            num_pfs=1, num_vfs=2, numa_node=pci_numa_node)
+        self.start_compute(pci_info=pci_info)
+
+        # request cpu pinning to create a numa toplogy and allow the test to
+        # force which numa node the vm would have to be pinned too.
+        extra_spec = {
+            'hw:cpu_policy': 'dedicated',
+        }
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+
+        port_id = self._create_port_with_policy(policy)
+        # create a server using the VF via neutron
+        self._create_server(
+            flavor_id=flavor_id,
+            networks=[
+                {'port': port_id},
+            ],
+            expected_state=status
+        )
+
+        if status == 'ACTIVE':
+            self.assertTrue(self.mock_filter.called)
+        else:
+            # the PciPassthroughFilter should not have been called, since the
+            # NUMATopologyFilter should have eliminated the filter first
+            self.assertFalse(self.mock_filter.called)
+
+    @ddt.unpack  # unpacks each sub-tuple e.g. *(pci_numa_node, status)
+    # the preferred policy should always pass regardless of numa affinity
+    @ddt.data((-1, 'ACTIVE'), (0, 'ACTIVE'), (1, 'ACTIVE'))
+    def test_create_server_with_sriov_numa_affinity_policy_preferred(
+            self, pci_numa_node, status):
+        """Validate behavior of 'preferred' PCI NUMA affinity policy.
+
+        This test ensures that it *is* possible to allocate CPU and memory
+        resources from one NUMA node and a PCI device from another *if*
+        the port NUMA affinity policy is set to preferred.
+        """
+        self._test_policy(pci_numa_node, status, 'preferred')
+
+    @ddt.unpack  # unpacks each sub-tuple e.g. *(pci_numa_node, status)
+    # the legacy policy allow a PCI device to be used if it has NUMA
+    # affinity or if no NUMA info is available so we set the NUMA
+    # node for this device to -1 which is the sentinel value use by the
+    # Linux kernel for a device with no NUMA affinity.
+    @ddt.data((-1, 'ACTIVE'), (0, 'ERROR'), (1, 'ACTIVE'))
+    def test_create_server_with_sriov_numa_affinity_policy_legacy(
+            self, pci_numa_node, status):
+        """Validate behavior of 'legacy' PCI NUMA affinity policy.
+
+        This test ensures that it *is* possible to allocate CPU and memory
+        resources from one NUMA node and a PCI device from another *if*
+        the port NUMA affinity policy is set to legacy and the device
+        does not report NUMA information.
+        """
+        self._test_policy(pci_numa_node, status, 'legacy')
+
+    @ddt.unpack  # unpacks each sub-tuple e.g. *(pci_numa_node, status)
+    # The required policy requires a PCI device to both report a NUMA
+    # and for the guest cpus and ram to be affinitized to the same
+    # NUMA node so we create 1 pci device in the first NUMA node.
+    @ddt.data((-1, 'ERROR'), (0, 'ERROR'), (1, 'ACTIVE'))
+    def test_create_server_with_sriov_numa_affinity_policy_required(
+            self, pci_numa_node, status):
+        """Validate behavior of 'required' PCI NUMA affinity policy.
+
+        This test ensures that it *is not* possible to allocate CPU and memory
+        resources from one NUMA node and a PCI device from another *if*
+        the port NUMA affinity policy is set to required and the device
+        does reports NUMA information.
+        """
+
+        # we set the numa_affinity policy to preferred to allow the PCI device
+        # to be selected from any numa node so we can prove the flavor
+        # overrides the alias.
+        alias = [jsonutils.dumps(
+            {
+                'vendor_id': fakelibvirt.PCI_VEND_ID,
+                'product_id': fakelibvirt.PCI_PROD_ID,
+                'name': self.ALIAS_NAME,
+                'device_type': fields.PciDeviceType.STANDARD,
+                'numa_policy': fields.PCINUMAAffinityPolicy.PREFERRED,
+            }
+        )]
+
+        self.flags(passthrough_whitelist=self.PCI_PASSTHROUGH_WHITELIST,
+                   alias=alias,
+                   group='pci')
+
+        self._test_policy(pci_numa_node, status, 'required')
+
+    def test_socket_policy_pass(self):
+        # With 1 socket containing 2 NUMA nodes, make the first node's CPU
+        # available for pinning, but affine the PCI device to the second node.
+        # This should pass.
+        host_info = fakelibvirt.HostInfo(
+            cpu_nodes=2, cpu_sockets=1, cpu_cores=2, cpu_threads=2,
+            kB_mem=(16 * units.Gi) // units.Ki)
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
+            num_pfs=1, num_vfs=1, numa_node=1)
+        self.flags(cpu_dedicated_set='0-3', group='compute')
+        self.start_compute(host_info=host_info, pci_info=pci_info)
+
+        extra_spec = {
+            'hw:cpu_policy': 'dedicated',
+        }
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        port_id = self._create_port_with_policy('socket')
+        # create a server using the VF via neutron
+        self._create_server(
+            flavor_id=flavor_id,
+            networks=[
+                {'port': port_id},
+            ],
+        )
+        self.assertTrue(self.mock_filter.called)
+
+    def test_socket_policy_fail(self):
+        # With 2 sockets containing 1 NUMA node each, make the first socket's
+        # CPUs available for pinning, but affine the PCI device to the second
+        # NUMA node in the second socket. This should fail.
+        host_info = fakelibvirt.HostInfo(
+            cpu_nodes=1, cpu_sockets=2, cpu_cores=2, cpu_threads=2,
+            kB_mem=(16 * units.Gi) // units.Ki)
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
+            num_pfs=1, num_vfs=1, numa_node=1)
+        self.flags(cpu_dedicated_set='0-3', group='compute')
+        self.start_compute(host_info=host_info, pci_info=pci_info)
+
+        extra_spec = {
+            'hw:cpu_policy': 'dedicated',
+        }
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        port_id = self._create_port_with_policy('socket')
+        # create a server using the VF via neutron
+        server = self._create_server(
+            flavor_id=flavor_id,
+            networks=[
+                {'port': port_id},
+            ],
+            expected_state='ERROR'
+        )
+        self.assertIn('fault', server)
+        self.assertIn('No valid host', server['fault']['message'])
+        self.assertFalse(self.mock_filter.called)
+
+    def test_socket_policy_multi_numa_pass(self):
+        # 2 sockets, 2 NUMA nodes each, with the PCI device on NUMA 0 and
+        # socket 0. If we restrict cpu_dedicated_set to NUMA 1, 2 and 3, we
+        # should still be able to boot an instance with hw:numa_nodes=3 and the
+        # `socket` policy, because one of the instance's NUMA nodes will be on
+        # the same socket as the PCI device (even if there is no direct NUMA
+        # node affinity).
+        host_info = fakelibvirt.HostInfo(
+            cpu_nodes=2, cpu_sockets=2, cpu_cores=2, cpu_threads=1,
+            kB_mem=(16 * units.Gi) // units.Ki)
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
+            num_pfs=1, num_vfs=1, numa_node=0)
+        self.flags(cpu_dedicated_set='2-7', group='compute')
+        self.start_compute(host_info=host_info, pci_info=pci_info)
+
+        extra_spec = {
+            'hw:numa_nodes': '3',
+            'hw:cpu_policy': 'dedicated',
+        }
+        flavor_id = self._create_flavor(vcpu=6, memory_mb=3144,
+                                        extra_spec=extra_spec)
+        port_id = self._create_port_with_policy('socket')
+        # create a server using the VF via neutron
+        self._create_server(
+            flavor_id=flavor_id,
+            networks=[
+                {'port': port_id},
+            ],
+        )
+        self.assertTrue(self.mock_filter.called)
--- a/nova/tests/unit/network/test_neutron.py
+++ b/nova/tests/unit/network/test_neutron.py
@ -3533,10 +3533,10 @@ class TestAPI(TestAPIBase):
        self.assertFalse(tunneled)
        self.assertIsNone(physnet_name)

-    def _test_get_port_vnic_info(self, mock_get_client,
-                                 binding_vnic_type,
-                                 expected_vnic_type,
-                                 port_resource_request=None):
+    def _test_get_port_vnic_info(
+        self, mock_get_client, binding_vnic_type, expected_vnic_type,
+        port_resource_request=None, numa_policy=None
+    ):
        test_port = {
            'port': {'id': 'my_port_id2',
                      'network_id': 'net-id',
@ -3548,22 +3548,25 @@ class TestAPI(TestAPIBase):
        if port_resource_request:
            test_port['port'][
                constants.RESOURCE_REQUEST] = port_resource_request
+        if numa_policy:
+            test_port['port'][constants.NUMA_POLICY] = numa_policy

        mock_get_client.reset_mock()
        mock_client = mock_get_client.return_value
        mock_client.show_port.return_value = test_port

-        vnic_type, trusted, network_id, resource_request = (
+        vnic_type, trusted, network_id, resource_request, numa = (
            self.api._get_port_vnic_info(
                self.context, mock_client, test_port['port']['id']))

        mock_client.show_port.assert_called_once_with(test_port['port']['id'],
            fields=['binding:vnic_type', 'binding:profile', 'network_id',
-                    constants.RESOURCE_REQUEST])
+                    constants.RESOURCE_REQUEST, constants.NUMA_POLICY])
        self.assertEqual(expected_vnic_type, vnic_type)
        self.assertEqual('net-id', network_id)
        self.assertIsNone(trusted)
        self.assertEqual(port_resource_request, resource_request)
+        self.assertEqual(numa_policy, numa)

    @mock.patch.object(neutronapi, 'get_client', return_value=mock.MagicMock())
    def test_get_port_vnic_info_1(self, mock_get_client):
@ -3580,6 +3583,14 @@ class TestAPI(TestAPIBase):
        self._test_get_port_vnic_info(mock_get_client, None,
                                      model.VNIC_TYPE_NORMAL)

+    @mock.patch.object(neutronapi, 'get_client', return_value=mock.Mock())
+    def test_get_port_vnic_info_4(self, mock_get_client):
+        policies = ['required', 'legacy', 'preferred']
+        for policy_name in policies:
+            self._test_get_port_vnic_info(
+                mock_get_client, None, model.VNIC_TYPE_NORMAL,
+                numa_policy=policy_name)
+
    @mock.patch.object(neutronapi, 'get_client')
    def test_get_port_vnic_info_requested_resources(self, mock_get_client):
        self._test_get_port_vnic_info(
@ -3612,11 +3623,11 @@ class TestAPI(TestAPIBase):
        mock_client.list_extensions.return_value = test_ext_list
        result = self.api._get_port_vnic_info(
            self.context, mock_client, test_port['port']['id'])
-        vnic_type, trusted, network_id, resource_requests = result
+        vnic_type, trusted, network_id, resource_requests, _ = result

        mock_client.show_port.assert_called_once_with(test_port['port']['id'],
            fields=['binding:vnic_type', 'binding:profile', 'network_id',
-                    constants.RESOURCE_REQUEST])
+                    constants.RESOURCE_REQUEST, constants.NUMA_POLICY])
        self.assertEqual(model.VNIC_TYPE_DIRECT, vnic_type)
        self.assertEqual('net-id', network_id)
        self.assertTrue(trusted)
@ -5766,14 +5777,14 @@ class TestAPI(TestAPIBase):
        # _get_port_vnic_info should be called for every NetworkRequest with a
        # port_id attribute (so six times)
        mock_get_port_vnic_info.side_effect = [
-            (model.VNIC_TYPE_DIRECT, None, 'netN', None),
+            (model.VNIC_TYPE_DIRECT, None, 'netN', None, None),
            (model.VNIC_TYPE_NORMAL, None, 'netN',
-             mock.sentinel.resource_request1),
-            (model.VNIC_TYPE_MACVTAP, None, 'netN', None),
-            (model.VNIC_TYPE_MACVTAP, None, 'netN', None),
-            (model.VNIC_TYPE_DIRECT_PHYSICAL, None, 'netN', None),
+             mock.sentinel.resource_request1, None),
+            (model.VNIC_TYPE_MACVTAP, None, 'netN', None, None),
+            (model.VNIC_TYPE_MACVTAP, None, 'netN', None, None),
+            (model.VNIC_TYPE_DIRECT_PHYSICAL, None, 'netN', None, None),
            (model.VNIC_TYPE_DIRECT, True, 'netN',
-             mock.sentinel.resource_request2),
+             mock.sentinel.resource_request2, None),
        ]
        # _get_physnet_tunneled_info should be called for every NetworkRequest
        # (so seven times)
--- a/releasenotes/notes/port-numa-affinity-policy-f51a74065a1e4369.yaml
+++ b/releasenotes/notes/port-numa-affinity-policy-f51a74065a1e4369.yaml
@ -0,0 +1,15 @@
+---
+features:
+  - |
+    Support was added to specify a port NUMA affinity policy
+    for SR-IOV ports. This feature allows users to set a NUMA affinity
+    policy between a neutron port and a NUMA guest's CPUs and memory.
+    This feature supports the same policies as the existing VM Scoped
+    PCI NUMA Affinity policy and take precedence over the
+    flavor and image policy. This allows operators to set a default
+    affinity policy in the flavor or image while end users can
+    express a more granular affinity policy.
+    To use this feature operators must enable the
+    ``port-numa-affinity-policy`` neutron extension and configure the
+    service plugin in neutron. By default the extension
+    is listed as available but is not enabled.