diff --git a/nova/network/constants.py b/nova/network/constants.py index f2c59efeefc4..d392141e6892 100644 --- a/nova/network/constants.py +++ b/nova/network/constants.py @@ -30,3 +30,4 @@ L3_NETWORK_TYPES = ['vxlan', 'gre', 'geneve'] ALLOCATION = 'allocation' RESOURCE_REQUEST = 'resource_request' SEGMENT = 'Segment' +NUMA_POLICY = 'numa_affinity_policy' diff --git a/nova/network/neutron.py b/nova/network/neutron.py index 6139a7d53434..a45e48bf2485 100644 --- a/nova/network/neutron.py +++ b/nova/network/neutron.py @@ -2026,6 +2026,8 @@ class API(base.Base): # the port binding profile and we can handle it as a boolean. return strutils.bool_from_string(value) + # NOTE(sean-k-mooney): we might want to have this return a + # nova.network.model.VIF object instead in the future. def _get_port_vnic_info(self, context, neutron, port_id): """Retrieve port vNIC info @@ -2033,14 +2035,17 @@ class API(base.Base): :param neutron: The Neutron client :param port_id: The id of port to be queried - :return: A tuple of vNIC type, trusted status, network ID and resource - request of the port if any. Trusted status only affects SR-IOV - ports and will always be None for other port types. + :return: A tuple of vNIC type, trusted status, network ID, resource + request of the port if any and port numa affintiy policy. + Trusted status only affects SR-IOV ports and will always be + None for other port types. If no port numa policy is + requested by a port, None will be returned. """ + fields = ['binding:vnic_type', constants.BINDING_PROFILE, + 'network_id', constants.RESOURCE_REQUEST, + constants.NUMA_POLICY] port = self._show_port( - context, port_id, neutron_client=neutron, - fields=['binding:vnic_type', constants.BINDING_PROFILE, - 'network_id', constants.RESOURCE_REQUEST]) + context, port_id, neutron_client=neutron, fields=fields) network_id = port.get('network_id') trusted = None vnic_type = port.get('binding:vnic_type', @@ -2053,7 +2058,8 @@ class API(base.Base): # applied to the port/network and the port-resource-request API # extension is enabled. resource_request = port.get(constants.RESOURCE_REQUEST, None) - return vnic_type, trusted, network_id, resource_request + numa_policy = port.get(constants.NUMA_POLICY, None) + return vnic_type, trusted, network_id, resource_request, numa_policy def create_resource_requests( self, context, requested_networks, pci_requests=None, @@ -2091,11 +2097,12 @@ class API(base.Base): vnic_type = network_model.VNIC_TYPE_NORMAL pci_request_id = None requester_id = None + port_numa_policy = None if request_net.port_id: - result = self._get_port_vnic_info( - context, neutron, request_net.port_id) - vnic_type, trusted, network_id, resource_request = result + (vnic_type, trusted, network_id, resource_request, + port_numa_policy) = self._get_port_vnic_info( + context, neutron, request_net.port_id) physnet, tunneled_ = self._get_physnet_tunneled_info( context, neutron, network_id) @@ -2149,8 +2156,11 @@ class API(base.Base): spec=[spec], request_id=uuidutils.generate_uuid(), requester_id=requester_id) - if affinity_policy: - request.numa_policy = affinity_policy + # NOTE(sean-k-mooney): port NUMA policies take precedence + # over image and flavor policies. + numa_policy = port_numa_policy or affinity_policy + if numa_policy: + request.numa_policy = numa_policy pci_requests.requests.append(request) pci_request_id = request.request_id @@ -3049,7 +3059,8 @@ class API(base.Base): ovs_interfaceid=ovs_interfaceid, devname=devname, active=vif_active, - preserve_on_delete=preserve_on_delete) + preserve_on_delete=preserve_on_delete + ) def _build_network_info_model(self, context, instance, networks=None, port_ids=None, admin_client=None, diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index 883532249e23..c42c2eac8d68 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -27,6 +27,7 @@ from oslo_utils import units import nova from nova import context +from nova.network import constants from nova import objects from nova.objects import fields from nova.tests import fixtures as nova_fixtures @@ -1097,3 +1098,238 @@ class PCIServersWithSRIOVAffinityPoliciesTest(_PCIServersTestBase): extra_spec=extra_spec) self._create_server(flavor_id=flavor_id) self.assertTrue(self.mock_filter.called) + + +@ddt.ddt +class PCIServersWithPortNUMAPoliciesTest(_PCIServersTestBase): + + ALIAS_NAME = 'a1' + PCI_PASSTHROUGH_WHITELIST = [jsonutils.dumps(x) for x in ( + { + 'vendor_id': fakelibvirt.PCI_VEND_ID, + 'product_id': fakelibvirt.PF_PROD_ID, + 'physical_network': 'physnet4', + }, + { + 'vendor_id': fakelibvirt.PCI_VEND_ID, + 'product_id': fakelibvirt.VF_PROD_ID, + 'physical_network': 'physnet4', + }, + )] + # we set the numa_affinity policy to required to ensure strict affinity + # between pci devices and the guest cpu and memory will be enforced. + PCI_ALIAS = [jsonutils.dumps( + { + 'vendor_id': fakelibvirt.PCI_VEND_ID, + 'product_id': fakelibvirt.PCI_PROD_ID, + 'name': ALIAS_NAME, + 'device_type': fields.PciDeviceType.STANDARD, + 'numa_policy': fields.PCINUMAAffinityPolicy.REQUIRED, + } + )] + + def setUp(self): + super().setUp() + + # The ultimate base class _IntegratedTestBase uses NeutronFixture but + # we need a bit more intelligent neutron for these tests. Applying the + # new fixture here means that we re-stub what the previous neutron + # fixture already stubbed. + self.neutron = self.useFixture(base.LibvirtNeutronFixture(self)) + self.flags(disable_fallback_pcpu_query=True, group='workarounds') + + def _create_port_with_policy(self, policy): + port_data = copy.deepcopy( + base.LibvirtNeutronFixture.network_4_port_1) + port_data[constants.NUMA_POLICY] = policy + # create the port + new_port = self.neutron.create_port({'port': port_data}) + port_id = new_port['port']['id'] + port = self.neutron.show_port(port_id)['port'] + self.assertEqual(port[constants.NUMA_POLICY], policy) + return port_id + + # NOTE(sean-k-mooney): i could just apply the ddt decorators + # to this function for the most part but i have chosen to + # keep one top level function per policy to make documenting + # the test cases simpler. + def _test_policy(self, pci_numa_node, status, policy): + # only allow cpus on numa node 1 to be used for pinning + self.flags(cpu_dedicated_set='4-7', group='compute') + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pfs=1, num_vfs=2, numa_node=pci_numa_node) + self.start_compute(pci_info=pci_info) + + # request cpu pinning to create a numa toplogy and allow the test to + # force which numa node the vm would have to be pinned too. + extra_spec = { + 'hw:cpu_policy': 'dedicated', + } + flavor_id = self._create_flavor(extra_spec=extra_spec) + + port_id = self._create_port_with_policy(policy) + # create a server using the VF via neutron + self._create_server( + flavor_id=flavor_id, + networks=[ + {'port': port_id}, + ], + expected_state=status + ) + + if status == 'ACTIVE': + self.assertTrue(self.mock_filter.called) + else: + # the PciPassthroughFilter should not have been called, since the + # NUMATopologyFilter should have eliminated the filter first + self.assertFalse(self.mock_filter.called) + + @ddt.unpack # unpacks each sub-tuple e.g. *(pci_numa_node, status) + # the preferred policy should always pass regardless of numa affinity + @ddt.data((-1, 'ACTIVE'), (0, 'ACTIVE'), (1, 'ACTIVE')) + def test_create_server_with_sriov_numa_affinity_policy_preferred( + self, pci_numa_node, status): + """Validate behavior of 'preferred' PCI NUMA affinity policy. + + This test ensures that it *is* possible to allocate CPU and memory + resources from one NUMA node and a PCI device from another *if* + the port NUMA affinity policy is set to preferred. + """ + self._test_policy(pci_numa_node, status, 'preferred') + + @ddt.unpack # unpacks each sub-tuple e.g. *(pci_numa_node, status) + # the legacy policy allow a PCI device to be used if it has NUMA + # affinity or if no NUMA info is available so we set the NUMA + # node for this device to -1 which is the sentinel value use by the + # Linux kernel for a device with no NUMA affinity. + @ddt.data((-1, 'ACTIVE'), (0, 'ERROR'), (1, 'ACTIVE')) + def test_create_server_with_sriov_numa_affinity_policy_legacy( + self, pci_numa_node, status): + """Validate behavior of 'legacy' PCI NUMA affinity policy. + + This test ensures that it *is* possible to allocate CPU and memory + resources from one NUMA node and a PCI device from another *if* + the port NUMA affinity policy is set to legacy and the device + does not report NUMA information. + """ + self._test_policy(pci_numa_node, status, 'legacy') + + @ddt.unpack # unpacks each sub-tuple e.g. *(pci_numa_node, status) + # The required policy requires a PCI device to both report a NUMA + # and for the guest cpus and ram to be affinitized to the same + # NUMA node so we create 1 pci device in the first NUMA node. + @ddt.data((-1, 'ERROR'), (0, 'ERROR'), (1, 'ACTIVE')) + def test_create_server_with_sriov_numa_affinity_policy_required( + self, pci_numa_node, status): + """Validate behavior of 'required' PCI NUMA affinity policy. + + This test ensures that it *is not* possible to allocate CPU and memory + resources from one NUMA node and a PCI device from another *if* + the port NUMA affinity policy is set to required and the device + does reports NUMA information. + """ + + # we set the numa_affinity policy to preferred to allow the PCI device + # to be selected from any numa node so we can prove the flavor + # overrides the alias. + alias = [jsonutils.dumps( + { + 'vendor_id': fakelibvirt.PCI_VEND_ID, + 'product_id': fakelibvirt.PCI_PROD_ID, + 'name': self.ALIAS_NAME, + 'device_type': fields.PciDeviceType.STANDARD, + 'numa_policy': fields.PCINUMAAffinityPolicy.PREFERRED, + } + )] + + self.flags(passthrough_whitelist=self.PCI_PASSTHROUGH_WHITELIST, + alias=alias, + group='pci') + + self._test_policy(pci_numa_node, status, 'required') + + def test_socket_policy_pass(self): + # With 1 socket containing 2 NUMA nodes, make the first node's CPU + # available for pinning, but affine the PCI device to the second node. + # This should pass. + host_info = fakelibvirt.HostInfo( + cpu_nodes=2, cpu_sockets=1, cpu_cores=2, cpu_threads=2, + kB_mem=(16 * units.Gi) // units.Ki) + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pfs=1, num_vfs=1, numa_node=1) + self.flags(cpu_dedicated_set='0-3', group='compute') + self.start_compute(host_info=host_info, pci_info=pci_info) + + extra_spec = { + 'hw:cpu_policy': 'dedicated', + } + flavor_id = self._create_flavor(extra_spec=extra_spec) + port_id = self._create_port_with_policy('socket') + # create a server using the VF via neutron + self._create_server( + flavor_id=flavor_id, + networks=[ + {'port': port_id}, + ], + ) + self.assertTrue(self.mock_filter.called) + + def test_socket_policy_fail(self): + # With 2 sockets containing 1 NUMA node each, make the first socket's + # CPUs available for pinning, but affine the PCI device to the second + # NUMA node in the second socket. This should fail. + host_info = fakelibvirt.HostInfo( + cpu_nodes=1, cpu_sockets=2, cpu_cores=2, cpu_threads=2, + kB_mem=(16 * units.Gi) // units.Ki) + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pfs=1, num_vfs=1, numa_node=1) + self.flags(cpu_dedicated_set='0-3', group='compute') + self.start_compute(host_info=host_info, pci_info=pci_info) + + extra_spec = { + 'hw:cpu_policy': 'dedicated', + } + flavor_id = self._create_flavor(extra_spec=extra_spec) + port_id = self._create_port_with_policy('socket') + # create a server using the VF via neutron + server = self._create_server( + flavor_id=flavor_id, + networks=[ + {'port': port_id}, + ], + expected_state='ERROR' + ) + self.assertIn('fault', server) + self.assertIn('No valid host', server['fault']['message']) + self.assertFalse(self.mock_filter.called) + + def test_socket_policy_multi_numa_pass(self): + # 2 sockets, 2 NUMA nodes each, with the PCI device on NUMA 0 and + # socket 0. If we restrict cpu_dedicated_set to NUMA 1, 2 and 3, we + # should still be able to boot an instance with hw:numa_nodes=3 and the + # `socket` policy, because one of the instance's NUMA nodes will be on + # the same socket as the PCI device (even if there is no direct NUMA + # node affinity). + host_info = fakelibvirt.HostInfo( + cpu_nodes=2, cpu_sockets=2, cpu_cores=2, cpu_threads=1, + kB_mem=(16 * units.Gi) // units.Ki) + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pfs=1, num_vfs=1, numa_node=0) + self.flags(cpu_dedicated_set='2-7', group='compute') + self.start_compute(host_info=host_info, pci_info=pci_info) + + extra_spec = { + 'hw:numa_nodes': '3', + 'hw:cpu_policy': 'dedicated', + } + flavor_id = self._create_flavor(vcpu=6, memory_mb=3144, + extra_spec=extra_spec) + port_id = self._create_port_with_policy('socket') + # create a server using the VF via neutron + self._create_server( + flavor_id=flavor_id, + networks=[ + {'port': port_id}, + ], + ) + self.assertTrue(self.mock_filter.called) diff --git a/nova/tests/unit/network/test_neutron.py b/nova/tests/unit/network/test_neutron.py index 1db825ea1a75..b039c75b26a6 100644 --- a/nova/tests/unit/network/test_neutron.py +++ b/nova/tests/unit/network/test_neutron.py @@ -3533,10 +3533,10 @@ class TestAPI(TestAPIBase): self.assertFalse(tunneled) self.assertIsNone(physnet_name) - def _test_get_port_vnic_info(self, mock_get_client, - binding_vnic_type, - expected_vnic_type, - port_resource_request=None): + def _test_get_port_vnic_info( + self, mock_get_client, binding_vnic_type, expected_vnic_type, + port_resource_request=None, numa_policy=None + ): test_port = { 'port': {'id': 'my_port_id2', 'network_id': 'net-id', @@ -3548,22 +3548,25 @@ class TestAPI(TestAPIBase): if port_resource_request: test_port['port'][ constants.RESOURCE_REQUEST] = port_resource_request + if numa_policy: + test_port['port'][constants.NUMA_POLICY] = numa_policy mock_get_client.reset_mock() mock_client = mock_get_client.return_value mock_client.show_port.return_value = test_port - vnic_type, trusted, network_id, resource_request = ( + vnic_type, trusted, network_id, resource_request, numa = ( self.api._get_port_vnic_info( self.context, mock_client, test_port['port']['id'])) mock_client.show_port.assert_called_once_with(test_port['port']['id'], fields=['binding:vnic_type', 'binding:profile', 'network_id', - constants.RESOURCE_REQUEST]) + constants.RESOURCE_REQUEST, constants.NUMA_POLICY]) self.assertEqual(expected_vnic_type, vnic_type) self.assertEqual('net-id', network_id) self.assertIsNone(trusted) self.assertEqual(port_resource_request, resource_request) + self.assertEqual(numa_policy, numa) @mock.patch.object(neutronapi, 'get_client', return_value=mock.MagicMock()) def test_get_port_vnic_info_1(self, mock_get_client): @@ -3580,6 +3583,14 @@ class TestAPI(TestAPIBase): self._test_get_port_vnic_info(mock_get_client, None, model.VNIC_TYPE_NORMAL) + @mock.patch.object(neutronapi, 'get_client', return_value=mock.Mock()) + def test_get_port_vnic_info_4(self, mock_get_client): + policies = ['required', 'legacy', 'preferred'] + for policy_name in policies: + self._test_get_port_vnic_info( + mock_get_client, None, model.VNIC_TYPE_NORMAL, + numa_policy=policy_name) + @mock.patch.object(neutronapi, 'get_client') def test_get_port_vnic_info_requested_resources(self, mock_get_client): self._test_get_port_vnic_info( @@ -3612,11 +3623,11 @@ class TestAPI(TestAPIBase): mock_client.list_extensions.return_value = test_ext_list result = self.api._get_port_vnic_info( self.context, mock_client, test_port['port']['id']) - vnic_type, trusted, network_id, resource_requests = result + vnic_type, trusted, network_id, resource_requests, _ = result mock_client.show_port.assert_called_once_with(test_port['port']['id'], fields=['binding:vnic_type', 'binding:profile', 'network_id', - constants.RESOURCE_REQUEST]) + constants.RESOURCE_REQUEST, constants.NUMA_POLICY]) self.assertEqual(model.VNIC_TYPE_DIRECT, vnic_type) self.assertEqual('net-id', network_id) self.assertTrue(trusted) @@ -5766,14 +5777,14 @@ class TestAPI(TestAPIBase): # _get_port_vnic_info should be called for every NetworkRequest with a # port_id attribute (so six times) mock_get_port_vnic_info.side_effect = [ - (model.VNIC_TYPE_DIRECT, None, 'netN', None), + (model.VNIC_TYPE_DIRECT, None, 'netN', None, None), (model.VNIC_TYPE_NORMAL, None, 'netN', - mock.sentinel.resource_request1), - (model.VNIC_TYPE_MACVTAP, None, 'netN', None), - (model.VNIC_TYPE_MACVTAP, None, 'netN', None), - (model.VNIC_TYPE_DIRECT_PHYSICAL, None, 'netN', None), + mock.sentinel.resource_request1, None), + (model.VNIC_TYPE_MACVTAP, None, 'netN', None, None), + (model.VNIC_TYPE_MACVTAP, None, 'netN', None, None), + (model.VNIC_TYPE_DIRECT_PHYSICAL, None, 'netN', None, None), (model.VNIC_TYPE_DIRECT, True, 'netN', - mock.sentinel.resource_request2), + mock.sentinel.resource_request2, None), ] # _get_physnet_tunneled_info should be called for every NetworkRequest # (so seven times) diff --git a/releasenotes/notes/port-numa-affinity-policy-f51a74065a1e4369.yaml b/releasenotes/notes/port-numa-affinity-policy-f51a74065a1e4369.yaml new file mode 100644 index 000000000000..5f861de14b76 --- /dev/null +++ b/releasenotes/notes/port-numa-affinity-policy-f51a74065a1e4369.yaml @@ -0,0 +1,15 @@ +--- +features: + - | + Support was added to specify a port NUMA affinity policy + for SR-IOV ports. This feature allows users to set a NUMA affinity + policy between a neutron port and a NUMA guest's CPUs and memory. + This feature supports the same policies as the existing VM Scoped + PCI NUMA Affinity policy and take precedence over the + flavor and image policy. This allows operators to set a default + affinity policy in the flavor or image while end users can + express a more granular affinity policy. + To use this feature operators must enable the + ``port-numa-affinity-policy`` neutron extension and configure the + service plugin in neutron. By default the extension + is listed as available but is not enabled.