From 0620678344d0f032a33e952d4d0fa653741f09e7 Mon Sep 17 00:00:00 2001 From: Dmitrii Shcherbakov Date: Sun, 16 Jan 2022 23:27:38 +0300 Subject: [PATCH] [yoga] Add support for VNIC_REMOTE_MANAGED Allow instances to be created with VNIC_TYPE_REMOTE_MANAGED ports. Those ports are assumed to require remote-managed PCI devices which means that operators need to tag those as "remote_managed" in the PCI whitelist if this is the case (there is no meta information or standard means of querying this information). The following changes are introduced: * Handling for VNIC_TYPE_REMOTE_MANAGED ports during allocation of resources for instance creation (remote_managed == true in InstancePciRequests); * Usage of the noop os-vif plugin for VNIC_TYPE_REMOTE_MANAGED ports in order to avoid the invocation of the local representor plugging logic since a networking backend is responsible for that in this case; * Expectation of bind time events for ports of VNIC_TYPE_REMOTE_MANAGED. Events for those arrive early from Neutron after a port update (before Nova begins to wait in the virt driver code, therefore, Nova is set to avoid waiting for plug events for VNIC_TYPE_REMOTE_MANAGED ports; * Making sure the service version is high enough on all compute services before creating instances with ports that have VNIC type VNIC_TYPE_REMOTE_MANAGED. Network requests are examined for the presence of port ids to determine the VNIC type via Neutron API. If remote-managed ports are requested, a compute service version check is performed across all cells. Change-Id: Ica09376951d49bc60ce6e33147477e4fa38b9482 Implements: blueprint integration-with-off-path-network-backends --- .../openstack/compute/attach_interfaces.py | 1 + nova/api/openstack/compute/servers.py | 1 + nova/compute/api.py | 23 ++++++++++++ nova/conf/workarounds.py | 2 ++ nova/exception.py | 5 +++ nova/network/neutron.py | 8 ++++- nova/network/os_vif_util.py | 9 +++++ nova/objects/service.py | 5 ++- nova/tests/unit/compute/test_api.py | 35 +++++++++++++++++++ nova/tests/unit/compute/test_compute.py | 8 +++++ nova/tests/unit/network/test_neutron.py | 27 +++++++++++--- nova/tests/unit/network/test_os_vif_util.py | 33 +++++++++++++++++ nova/tests/unit/virt/libvirt/test_driver.py | 23 ++++++++++-- nova/virt/libvirt/driver.py | 3 +- ...-type-remote-managed-b90cacf1c91df22b.yaml | 27 ++++++++++++++ 15 files changed, 200 insertions(+), 10 deletions(-) create mode 100644 releasenotes/notes/vnic-type-remote-managed-b90cacf1c91df22b.yaml diff --git a/nova/api/openstack/compute/attach_interfaces.py b/nova/api/openstack/compute/attach_interfaces.py index 6a24a6095993..34edf30cb680 100644 --- a/nova/api/openstack/compute/attach_interfaces.py +++ b/nova/api/openstack/compute/attach_interfaces.py @@ -178,6 +178,7 @@ class InterfaceAttachmentController(wsgi.Controller): exception.InterfaceAttachPciClaimFailed, exception.InterfaceAttachResourceAllocationFailed, exception.ForbiddenPortsWithAccelerator, + exception.ForbiddenWithRemoteManagedPorts, exception.ExtendedResourceRequestOldCompute, ) as e: raise exc.HTTPBadRequest(explanation=e.format_message()) diff --git a/nova/api/openstack/compute/servers.py b/nova/api/openstack/compute/servers.py index 1d6d29b45f60..1bfb3db698e5 100644 --- a/nova/api/openstack/compute/servers.py +++ b/nova/api/openstack/compute/servers.py @@ -862,6 +862,7 @@ class ServersController(wsgi.Controller): exception.DeviceProfileError, exception.ComputeHostNotFound, exception.ForbiddenPortsWithAccelerator, + exception.ForbiddenWithRemoteManagedPorts, exception.ExtendedResourceRequestOldCompute, ) as error: raise exc.HTTPBadRequest(explanation=error.format_message()) diff --git a/nova/compute/api.py b/nova/compute/api.py index 28368d910fde..20c7ee795917 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -115,6 +115,8 @@ MIN_COMPUTE_BOOT_WITH_EXTENDED_RESOURCE_REQUEST = 58 MIN_COMPUTE_MOVE_WITH_EXTENDED_RESOURCE_REQUEST = 59 MIN_COMPUTE_INT_ATTACH_WITH_EXTENDED_RES_REQ = 60 +SUPPORT_VNIC_TYPE_REMOTE_MANAGED = 61 + # FIXME(danms): Keep a global cache of the cells we find the # first time we look. This needs to be refreshed on a timer or # trigger. @@ -1017,6 +1019,22 @@ class API: " until upgrade finished.") raise exception.ForbiddenPortsWithAccelerator(msg) + def _check_vnic_remote_managed_min_version(self, context): + min_version = (objects.service.get_minimum_version_all_cells( + context, ['nova-compute'])) + if min_version < SUPPORT_VNIC_TYPE_REMOTE_MANAGED: + msg = ("Remote-managed ports are not supported" + " until an upgrade is fully finished.") + raise exception.ForbiddenWithRemoteManagedPorts(msg) + + def _check_support_vnic_remote_managed(self, context, requested_networks): + if requested_networks: + for request_net in requested_networks: + if (request_net.port_id and + self.network_api.is_remote_managed_port( + context, request_net.port_id)): + self._check_vnic_remote_managed_min_version(context) + def _validate_and_build_base_options( self, context, flavor, boot_meta, image_href, image_id, kernel_id, ramdisk_id, display_name, display_description, hostname, key_name, @@ -1087,6 +1105,7 @@ class API: network_metadata, port_resource_requests, req_lvl_params = result self._check_support_vnic_accelerator(context, requested_networks) + self._check_support_vnic_remote_managed(context, requested_networks) # Creating servers with ports that have resource requests, like QoS # minimum bandwidth rules, is only supported in a requested minimum @@ -5161,6 +5180,10 @@ class API: network_model.VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL): raise exception.ForbiddenPortsWithAccelerator() + if port.get('binding:vnic_type', + 'normal') == network_model.VNIC_TYPE_REMOTE_MANAGED: + self._check_vnic_remote_managed_min_version(context) + self.ensure_compute_version_for_resource_request( context, instance, port) diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index 6d6e1d0adf24..bbfbb73215df 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -313,6 +313,7 @@ use outside of a CI or developer cloud. "vdpa", "accelerator-direct", "accelerator-direct-physical", + "remote-managed", ]), default=[], help=""" @@ -336,6 +337,7 @@ during hard reboot. The possible values are neutron port vnic types: * vdpa * accelerator-direct * accelerator-direct-physical +* remote-managed Adding a ``vnic_type`` to this configuration makes Nova wait for a network-vif-plugged event for each of the instance's vifs having the specific diff --git a/nova/exception.py b/nova/exception.py index bda5a009f1a4..e898ab3786e5 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -175,6 +175,11 @@ class ForbiddenPortsWithAccelerator(NotSupported): msg_fmt = _("Feature not supported with Ports that have accelerators.") +class ForbiddenWithRemoteManagedPorts(NotSupported): + msg_fmt = _("This feature is not supported when remote-managed ports" + " are in use.") + + class AdminRequired(Forbidden): msg_fmt = _("User does not have admin privileges") diff --git a/nova/network/neutron.py b/nova/network/neutron.py index 1bdafcfc1590..a33b212c2a47 100644 --- a/nova/network/neutron.py +++ b/nova/network/neutron.py @@ -2315,7 +2315,13 @@ class API: # libvirt to expose the nic feature. At the moment # there is a limitation that deployers cannot use both # SR-IOV modes (legacy and ovs) in the same deployment. - spec = {pci_request.PCI_NET_TAG: physnet} + spec = { + pci_request.PCI_NET_TAG: physnet, + # Convert the value to string since tags are compared as + # string values case-insensitively. + pci_request.PCI_REMOTE_MANAGED_TAG: + str(self._is_remote_managed(vnic_type)), + } dev_type = pci_request.DEVICE_TYPE_FOR_VNIC_TYPE.get(vnic_type) if dev_type: spec[pci_request.PCI_DEVICE_TYPE_TAG] = dev_type diff --git a/nova/network/os_vif_util.py b/nova/network/os_vif_util.py index bf643ff1052b..21d6f66b7921 100644 --- a/nova/network/os_vif_util.py +++ b/nova/network/os_vif_util.py @@ -338,6 +338,15 @@ def _nova_to_osvif_vif_ovs(vif): port_profile=_get_ovs_representor_port_profile(vif), plugin="ovs") _set_representor_datapath_offload_settings(vif, obj) + elif vnic_type == model.VNIC_TYPE_REMOTE_MANAGED: + # A networking backend is responsible for setting up a + # representor in this case so the driver is noop. + obj = _get_vif_instance( + vif, objects.vif.VIFHostDevice, + plugin="noop", + vif_name=vif_name, + dev_address=vif["profile"]["pci_slot"], + dev_type=objects.fields.VIFHostDeviceDevType.ETHERNET) elif vif.is_hybrid_plug_enabled(): obj = _get_vif_instance( vif, diff --git a/nova/objects/service.py b/nova/objects/service.py index c027412d9d17..7d34204b0e54 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__) # NOTE(danms): This is the global service version counter -SERVICE_VERSION = 60 +SERVICE_VERSION = 61 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any @@ -213,6 +213,9 @@ SERVICE_VERSION_HISTORY = ( # Add support for interface attach operation with neutron extended resource # request {'compute_rpc': '6.0'}, + # Version 61: Compute RPC v6.0: + # Add support for remotely-managed ports (vnic-type 'remote-managed') + {'compute_rpc': '6.0'}, ) # This is used to raise an error at service startup if older than N-1 computes diff --git a/nova/tests/unit/compute/test_api.py b/nova/tests/unit/compute/test_api.py index 64064cf63674..0932b07ce404 100644 --- a/nova/tests/unit/compute/test_api.py +++ b/nova/tests/unit/compute/test_api.py @@ -206,6 +206,10 @@ class _ComputeAPIUnitTestMixIn(object): list_obj.obj_reset_changes() return list_obj + @mock.patch( + 'nova.network.neutron.API.is_remote_managed_port', + new=mock.Mock(return_value=False), + ) @mock.patch('nova.objects.Quotas.check_deltas') @mock.patch('nova.conductor.conductor_api.ComputeTaskAPI.build_instances') @mock.patch('nova.compute.api.API._record_action_start') @@ -7243,6 +7247,37 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase): requested_networks) mock_get.assert_called_once_with(self.context, ['nova-compute']) + @mock.patch( + 'nova.network.neutron.API.is_remote_managed_port', + new=mock.Mock(return_value=True), + ) + @mock.patch('nova.objects.service.get_minimum_version_all_cells', + return_value=60) + def test_check_support_vnic_remote_managed_version_before_61( + self, mock_get): + requested_networks = objects.NetworkRequestList( + objects=[objects.NetworkRequest(port_id=uuids.port)]) + self.assertRaisesRegex(exception.ForbiddenWithRemoteManagedPorts, + 'Remote-managed ports are not supported until an upgrade is fully' + ' finished.', + self.compute_api._check_support_vnic_remote_managed, + self.context, + requested_networks) + mock_get.assert_called_once_with(self.context, ['nova-compute']) + + @mock.patch( + 'nova.network.neutron.API.is_remote_managed_port', + new=mock.Mock(return_value=True), + ) + @mock.patch('nova.objects.service.get_minimum_version_all_cells', + return_value=61) + def test_check_support_vnic_remote_managed_version_61(self, mock_get): + requested_networks = objects.NetworkRequestList( + objects=[objects.NetworkRequest(port_id=uuids.port)]) + self.compute_api._check_support_vnic_remote_managed(self.context, + requested_networks) + mock_get.assert_called_once_with(self.context, ['nova-compute']) + def test_validate_and_build_base_options_translate_neutron_secgroup(self): """Tests that _check_requested_secgroups will return a uuid for a requested Neutron security group and that will be returned from diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index f65f1abdb740..cd0556e9a5e4 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -8692,6 +8692,10 @@ class ComputeAPITestCase(BaseTestCase): len(db.instance_get_all(self.context))) mock_secgroups.assert_called_once_with(mock.ANY, 'invalid_sec_group') + @mock.patch( + 'nova.network.neutron.API.is_remote_managed_port', + new=mock.Mock(return_value=False), + ) def test_create_instance_associates_requested_networks(self): # Make sure create adds the requested networks to the RequestSpec @@ -9827,6 +9831,10 @@ class ComputeAPITestCase(BaseTestCase): self.assertEqual(refs[i]['display_name'], name) self.assertEqual(refs[i]['hostname'], name) + @mock.patch( + 'nova.network.neutron.API.is_remote_managed_port', + new=mock.Mock(return_value=False), + ) @mock.patch("nova.objects.service.get_minimum_version_all_cells") @mock.patch( "nova.network.neutron.API.has_extended_resource_request_extension") diff --git a/nova/tests/unit/network/test_neutron.py b/nova/tests/unit/network/test_neutron.py index 38d5e490f178..7c44729105fe 100644 --- a/nova/tests/unit/network/test_neutron.py +++ b/nova/tests/unit/network/test_neutron.py @@ -6162,7 +6162,8 @@ class TestAPI(TestAPIBase): objects.NetworkRequest(port_id=uuids.portid_4), objects.NetworkRequest(port_id=uuids.portid_5), objects.NetworkRequest(port_id=uuids.trusted_port), - objects.NetworkRequest(port_id=uuids.portid_vdpa)]) + objects.NetworkRequest(port_id=uuids.portid_vdpa), + objects.NetworkRequest(port_id=uuids.portid_remote_managed)]) pci_requests = objects.InstancePCIRequests(requests=[]) # _get_port_vnic_info should be called for every NetworkRequest with a # port_id attribute (so six times) @@ -6176,13 +6177,14 @@ class TestAPI(TestAPIBase): (model.VNIC_TYPE_DIRECT, True, 'netN', mock.sentinel.resource_request2, None, None), (model.VNIC_TYPE_VDPA, None, 'netN', None, None, None), + (model.VNIC_TYPE_REMOTE_MANAGED, None, 'netN', None, None, None), ] # _get_physnet_tunneled_info should be called for every NetworkRequest # (so seven times) mock_get_physnet_tunneled_info.side_effect = [ ('physnet1', False), ('physnet1', False), ('', True), ('physnet1', False), ('physnet2', False), ('physnet3', False), - ('physnet4', False), ('physnet1', False) + ('physnet4', False), ('physnet1', False), ('physnet1', False), ] api = neutronapi.API() @@ -6199,13 +6201,16 @@ class TestAPI(TestAPIBase): mock.sentinel.request_group1, mock.sentinel.request_group2], port_resource_requests) - self.assertEqual(6, len(pci_requests.requests)) + self.assertEqual(7, len(pci_requests.requests)) has_pci_request_id = [net.pci_request_id is not None for net in requested_networks.objects] self.assertEqual(pci_requests.requests[3].spec[0]["dev_type"], "type-PF") self.assertEqual(pci_requests.requests[5].spec[0]["dev_type"], "vdpa") - expected_results = [True, False, False, True, True, True, True, True] + self.assertEqual(pci_requests.requests[6].spec[0]["remote_managed"], + 'True') + expected_results = [True, False, False, True, True, True, True, True, + True] self.assertEqual(expected_results, has_pci_request_id) # Make sure only the trusted VF has the 'trusted' tag set in the spec. for pci_req in pci_requests.requests: @@ -6217,11 +6222,23 @@ class TestAPI(TestAPIBase): else: self.assertNotIn(pci_request.PCI_TRUSTED_TAG, spec) + # Only remote-managed ports must have the remote_managed tag set + # to True. + for pci_req in pci_requests.requests: + spec = pci_req.spec[0] + if pci_req.requester_id == uuids.portid_remote_managed: + self.assertEqual('True', + spec[pci_request.PCI_REMOTE_MANAGED_TAG]) + else: + self.assertEqual('False', + spec[pci_request.PCI_REMOTE_MANAGED_TAG]) + # Only SRIOV ports and those with a resource_request will have # pci_req.requester_id. self.assertEqual( [uuids.portid_1, uuids.portid_3, uuids.portid_4, uuids.portid_5, - uuids.trusted_port, uuids.portid_vdpa], + uuids.trusted_port, uuids.portid_vdpa, + uuids.portid_remote_managed], [pci_req.requester_id for pci_req in pci_requests.requests]) self.assertCountEqual( diff --git a/nova/tests/unit/network/test_os_vif_util.py b/nova/tests/unit/network/test_os_vif_util.py index e15e4eb92a79..338492aef0d2 100644 --- a/nova/tests/unit/network/test_os_vif_util.py +++ b/nova/tests/unit/network/test_os_vif_util.py @@ -696,6 +696,39 @@ class OSVIFUtilTestCase(test.NoDBTestCase): self.assertObjEqual(expect, actual) + def test_nova_to_osvif_ovs_with_vnic_remote_managed(self): + vif = model.VIF( + id="dc065497-3c8d-4f44-8fb4-e1d33c16a536", + type=model.VIF_TYPE_OVS, + address="22:52:25:62:e2:aa", + vnic_type=model.VNIC_TYPE_REMOTE_MANAGED, + network=model.Network( + id="b82c1929-051e-481d-8110-4669916c7915", + label="Demo Net", + subnets=[]), + profile={'pci_slot': '0000:0a:00.1'} + ) + + actual = os_vif_util.nova_to_osvif_vif(vif) + + expect = osv_objects.vif.VIFHostDevice( + id="dc065497-3c8d-4f44-8fb4-e1d33c16a536", + active=False, + address="22:52:25:62:e2:aa", + dev_address='0000:0a:00.1', + dev_type=os_vif_fields.VIFHostDeviceDevType.ETHERNET, + plugin="noop", + has_traffic_filtering=False, + preserve_on_delete=False, + network=osv_objects.network.Network( + id="b82c1929-051e-481d-8110-4669916c7915", + bridge_interface=None, + label="Demo Net", + subnets=osv_objects.subnet.SubnetList( + objects=[]))) + + self.assertObjEqual(expect, actual) + def test_nova_to_osvif_ovs_with_vnic_vdpa(self): vif = model.VIF( id="dc065497-3c8d-4f44-8fb4-e1d33c16a536", diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 755b02d5b84c..a31abc70ae12 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -19214,8 +19214,10 @@ class LibvirtConnTestCase(test.NoDBTestCase, instance = objects.Instance(**self.test_instance) instance.vm_state = vm_states.BUILDING - vifs = [{'id': uuids.vif_1, 'active': False}, - {'id': uuids.vif_2, 'active': False}] + vifs = [ + network_model.VIF(id=uuids.vif_1, active=False), + network_model.VIF(id=uuids.vif_2, active=False) + ] @mock.patch.object(drvr, 'plug_vifs') @mock.patch.object(drvr, '_create_guest') @@ -19412,6 +19414,23 @@ class LibvirtConnTestCase(test.NoDBTestCase, events = drvr._get_neutron_events(network_info) self.assertEqual([('network-vif-plugged', '1')], events) + def test_get_neutron_events_remote_managed(self): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + network_info = [ + network_model.VIF( + id=uuids.vif_1, + vnic_type=network_model.VNIC_TYPE_REMOTE_MANAGED), + network_model.VIF( + id=uuids.vif_2, + vnic_type=network_model.VNIC_TYPE_REMOTE_MANAGED, + active=True), + ] + events = drvr._get_neutron_events(network_info) + # For VNIC_TYPE_REMOTE_MANAGED events are only bind-time currently. + # Until this changes, they need to be filtered out to avoid waiting + # for them unnecessarily. + self.assertEqual([], events) + def test_unplug_vifs_ignores_errors(self): drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI()) with mock.patch.object(drvr, 'vif_driver') as vif_driver: diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 4e920afe13fb..246308a79f50 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -7232,7 +7232,8 @@ class LibvirtDriver(driver.ComputeDriver): # anything that might be stale (cache-wise) assume it's # already up so we don't block on it. return [('network-vif-plugged', vif['id']) - for vif in network_info if vif.get('active', True) is False] + for vif in network_info if vif.get('active', True) is False and + vif['vnic_type'] != network_model.VNIC_TYPE_REMOTE_MANAGED] def _create_guest_with_network( self, diff --git a/releasenotes/notes/vnic-type-remote-managed-b90cacf1c91df22b.yaml b/releasenotes/notes/vnic-type-remote-managed-b90cacf1c91df22b.yaml new file mode 100644 index 000000000000..826729f378d9 --- /dev/null +++ b/releasenotes/notes/vnic-type-remote-managed-b90cacf1c91df22b.yaml @@ -0,0 +1,27 @@ +--- +features: + - | + Added support for off-path networking backends where devices exposed to the + hypervisor host are managed remotely (which is the case, for example, with + various SmartNIC DPU devices). ``VNIC_TYPE_REMOTE_MANAGED`` ports can now + be added to Nova instances as soon as all compute nodes are upgraded to + the new compute service version. In order to use this feature, VF PCI/PCIe + devices need to be tagged as ``remote_managed: "true"` in the Nova config + in the ``passthrough_whitelist`` option. + + This feature relies on Neutron being upgraded to the corresponding release + of OpenStack and having an appropriate backend capable of binding + ``VNIC_TYPE_REMOTE_MANAGED`` ports (at the time of writing, ML2 with the OVN + ML2 mechanism driver is the only supported backend, see the Neutron + documentation for more details). + + Note that the PCI devices (VFs or, alternatively, their PF) must have a + valid PCI Vital Product Data (VPD) with a serial number present in it for + this feature to work properly. Also note that only VFs can be tagged as + ``remote_managed: "true"`` and they cannot be used for legacy SR-IOV + use-cases. + + Nova operations on instances with ``VNIC_TYPE_REMOTE_MANAGED`` ports + follow the same logic as the operations on direct SR-IOV ports. + + This feature is only supported with the Libvirt driver.