diff --git a/nova/pci/request.py b/nova/pci/request.py index 7ec0687f6dd1..c5d1a854a6f8 100644 --- a/nova/pci/request.py +++ b/nova/pci/request.py @@ -19,7 +19,7 @@ | "name": "QuicAssist", | "product_id": "0443", | "vendor_id": "8086", - | "device_type": "ACCEL", + | "device_type": "type-PCI", | }' Aliases with the same name and the same device_type are OR operation:: @@ -28,7 +28,7 @@ | "name": "QuicAssist", | "product_id": "0442", | "vendor_id": "8086", - | "device_type": "ACCEL", + | "device_type": "type-PCI", | }' These 2 aliases define a device request meaning: vendor_id is "8086" and @@ -45,6 +45,7 @@ import six from nova import exception from nova import objects +from nova.objects import fields as obj_fields from nova.pci import utils pci_alias_opts = [ @@ -58,7 +59,7 @@ pci_alias_opts = [ '{ "name": "QuickAssist", ' ' "product_id": "0443", ' ' "vendor_id": "8086", ' - ' "device_type": "ACCEL" ' + ' "device_type": "type-PCI" ' '} ' 'defines an alias for the Intel QuickAssist card. ' '(multi valued)' @@ -71,7 +72,9 @@ CONF = cfg.CONF CONF.register_opts(pci_alias_opts) -_ALIAS_DEV_TYPE = ['NIC', 'ACCEL', 'GPU'] +_ALIAS_DEV_TYPE = [obj_fields.PciDeviceType.STANDARD, + obj_fields.PciDeviceType.SRIOV_PF, + obj_fields.PciDeviceType.SRIOV_VF] _ALIAS_CAP_TYPE = ['pci'] _ALIAS_SCHEMA = { "type": "object", @@ -112,10 +115,13 @@ def _get_alias_from_config(): spec = jsonutils.loads(jsonspecs) jsonschema.validate(spec, _ALIAS_SCHEMA) name = spec.pop("name") + dev_type = spec.pop('device_type', None) + if dev_type: + spec['dev_type'] = dev_type if name not in aliases: aliases[name] = [spec] else: - if aliases[name][0]["device_type"] == spec["device_type"]: + if aliases[name][0]["dev_type"] == spec["dev_type"]: aliases[name].append(spec) else: reason = "Device type mismatch for alias '%s'" % name diff --git a/nova/pci/stats.py b/nova/pci/stats.py index cddb15a27c45..3d1351181f82 100644 --- a/nova/pci/stats.py +++ b/nova/pci/stats.py @@ -21,6 +21,8 @@ import six from nova import exception from nova.i18n import _LE +from nova import objects +from nova.objects import fields from nova.objects import pci_device_pool from nova.pci import utils from nova.pci import whitelist @@ -53,7 +55,7 @@ class PciDeviceStats(object): This summary information will be helpful for cloud management also. """ - pool_keys = ['product_id', 'vendor_id', 'numa_node'] + pool_keys = ['product_id', 'vendor_id', 'numa_node', 'dev_type'] def __init__(self, stats=None): super(PciDeviceStats, self).__init__() @@ -148,6 +150,7 @@ class PciDeviceStats(object): pools = self._filter_pools_for_spec(self.pools, spec) if numa_cells: pools = self._filter_pools_for_numa_cells(pools, numa_cells) + pools = self._filter_non_requested_pfs(request, pools) # Failed to allocate the required number of devices # Return the devices already allocated back to their pools if sum([pool['count'] for pool in pools]) < count: @@ -169,12 +172,41 @@ class PciDeviceStats(object): pool['count'] -= num_alloc for d in range(num_alloc): pci_dev = pool['devices'].pop() + self._handle_device_dependents(pci_dev) pci_dev.request_id = request.request_id alloc_devices.append(pci_dev) if count == 0: break return alloc_devices + def _handle_device_dependents(self, pci_dev): + """Remove device dependents or a parent from pools. + + In case the device is a PF, all of it's dependent VFs should + be removed from pools count, if these are present. + When the device is a VF, it's parent PF pool count should be + decreased, unless it is no longer in a pool. + """ + if pci_dev.dev_type == fields.PciDeviceType.SRIOV_PF: + vfs_list = objects.PciDeviceList.get_by_parent_address( + pci_dev._context, + pci_dev.compute_node_id, + pci_dev.address) + if vfs_list: + for vf in vfs_list: + self.remove_device(vf) + elif pci_dev.dev_type == fields.PciDeviceType.SRIOV_VF: + try: + parent = pci_dev.get_by_dev_addr(pci_dev._context, + pci_dev.compute_node_id, + pci_dev.parent_addr) + # Make sure not to decrease PF pool count if this parent has + # been already removed from pools + if parent in self.get_free_devs(): + self.remove_device(parent) + except exception.PciDeviceNotFound: + return + @staticmethod def _filter_pools_for_spec(pools, request_specs): return [pool for pool in pools @@ -192,12 +224,31 @@ class PciDeviceStats(object): pool, [{'numa_node': cell}]) for cell in numa_cells)] + def _filter_non_requested_pfs(self, request, matching_pools): + # Remove SRIOV_PFs from pools, unless it has been explicitly requested + # This is especially needed in cases where PFs and VFs has the same + # product_id. + if all(spec.get('dev_type') != fields.PciDeviceType.SRIOV_PF for + spec in request.spec): + matching_pools = self._filter_pools_for_pfs(matching_pools) + return matching_pools + + @staticmethod + def _filter_pools_for_pfs(pools): + return [pool for pool in pools + if not pool.get('dev_type') == fields.PciDeviceType.SRIOV_PF] + def _apply_request(self, pools, request, numa_cells=None): + # NOTE(vladikr): This code maybe open to race conditions. + # Two concurrent requests may succeed when called support_requests + # because this method does not remove related devices from the pools count = request.count matching_pools = self._filter_pools_for_spec(pools, request.spec) if numa_cells: matching_pools = self._filter_pools_for_numa_cells(matching_pools, numa_cells) + matching_pools = self._filter_non_requested_pfs(request, + matching_pools) if sum([pool['count'] for pool in matching_pools]) < count: return False else: diff --git a/nova/tests/unit/compute/test_claims.py b/nova/tests/unit/compute/test_claims.py index 3bc9082edb47..2a22e95bdaeb 100644 --- a/nova/tests/unit/compute/test_claims.py +++ b/nova/tests/unit/compute/test_claims.py @@ -201,6 +201,8 @@ class ClaimTestCase(test.NoDBTestCase): 'product_id': 'p', 'vendor_id': 'v', 'numa_node': 0, + 'dev_type': 'type-PCI', + 'parent_addr': 'a1', 'status': 'available'} self.tracker.new_pci_tracker() self.tracker.pci_tracker._set_hvdevs([dev_dict]) @@ -221,6 +223,8 @@ class ClaimTestCase(test.NoDBTestCase): 'product_id': 'p', 'vendor_id': 'v1', 'numa_node': 1, + 'dev_type': 'type-PCI', + 'parent_addr': 'a1', 'status': 'available'} self.tracker.new_pci_tracker() self.tracker.pci_tracker._set_hvdevs([dev_dict]) @@ -240,6 +244,8 @@ class ClaimTestCase(test.NoDBTestCase): 'product_id': 'p', 'vendor_id': 'v', 'numa_node': 0, + 'dev_type': 'type-PCI', + 'parent_addr': 'a1', 'status': 'available'} self.tracker.new_pci_tracker() self.tracker.pci_tracker._set_hvdevs([dev_dict]) @@ -286,6 +292,8 @@ class ClaimTestCase(test.NoDBTestCase): 'product_id': 'p', 'vendor_id': 'v', 'numa_node': 1, + 'dev_type': 'type-PCI', + 'parent_addr': 'a1', 'status': 'available'} self.tracker.new_pci_tracker() self.tracker.pci_tracker._set_hvdevs([dev_dict]) @@ -310,6 +318,8 @@ class ClaimTestCase(test.NoDBTestCase): 'product_id': 'p', 'vendor_id': 'v', 'numa_node': 1, + 'dev_type': 'type-PCI', + 'parent_addr': 'a1', 'status': 'available'} dev_dict2 = { 'compute_node_id': 1, @@ -317,6 +327,8 @@ class ClaimTestCase(test.NoDBTestCase): 'product_id': 'p', 'vendor_id': 'v', 'numa_node': 2, + 'dev_type': 'type-PCI', + 'parent_addr': 'a1', 'status': 'available'} self.tracker.new_pci_tracker() self.tracker.pci_tracker._set_hvdevs([dev_dict, dev_dict2]) @@ -346,6 +358,8 @@ class ClaimTestCase(test.NoDBTestCase): 'product_id': 'p', 'vendor_id': 'v', 'numa_node': None, + 'dev_type': 'type-PCI', + 'parent_addr': 'a1', 'status': 'available'} self.tracker.new_pci_tracker() self.tracker.pci_tracker._set_hvdevs([dev_dict]) diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index 4ca1c2c770e8..3137a56ad2e1 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -159,16 +159,25 @@ class FakeVirtDriver(driver.ComputeDriver): ] if self.pci_support else [] self.pci_stats = [ { - 'count': 3, + 'count': 2, 'vendor_id': '8086', 'product_id': '0443', - 'numa_node': 1 + 'numa_node': 1, + 'dev_type': fields.PciDeviceType.SRIOV_VF + }, + { + 'count': 1, + 'vendor_id': '8086', + 'product_id': '0443', + 'numa_node': 1, + 'dev_type': fields.PciDeviceType.SRIOV_PF }, { 'count': 1, 'vendor_id': '8086', 'product_id': '7891', - 'numa_node': None + 'numa_node': None, + 'dev_type': fields.PciDeviceType.SRIOV_VF }, ] if self.pci_support else [] if stats is not None: @@ -217,9 +226,7 @@ class BaseTestCase(test.TestCase): self.context = context.get_admin_context() - self.flags(pci_passthrough_whitelist=[ - '{"vendor_id": "8086", "product_id": "0443"}', - '{"vendor_id": "8086", "product_id": "7891"}']) + self._set_pci_passthrough_whitelist() self.flags(use_local=True, group='conductor') self.conductor = self.start_service('conductor', manager=CONF.conductor.manager) @@ -238,6 +245,11 @@ class BaseTestCase(test.TestCase): self.deleted = False self.update_call_count = 0 + def _set_pci_passthrough_whitelist(self): + self.flags(pci_passthrough_whitelist=[ + '{"vendor_id": "8086", "product_id": "0443"}', + '{"vendor_id": "8086", "product_id": "7891"}']) + def _create_compute_node(self, values=None): # This creates a db representation of a compute_node. compute = { diff --git a/nova/tests/unit/pci/test_manager.py b/nova/tests/unit/pci/test_manager.py index b253b1c82290..0dc976a101cb 100644 --- a/nova/tests/unit/pci/test_manager.py +++ b/nova/tests/unit/pci/test_manager.py @@ -36,6 +36,8 @@ fake_pci = { 'vendor_id': 'v', 'request_id': None, 'status': fields.PciDeviceStatus.AVAILABLE, + 'dev_type': fields.PciDeviceType.STANDARD, + 'parent_addr': None, 'numa_node': 0} fake_pci_1 = dict(fake_pci, address='0000:00:00.2', product_id='p1', vendor_id='v1') diff --git a/nova/tests/unit/pci/test_request.py b/nova/tests/unit/pci/test_request.py index 32c768b0c0c0..8424c4445cd6 100644 --- a/nova/tests/unit/pci/test_request.py +++ b/nova/tests/unit/pci/test_request.py @@ -25,7 +25,7 @@ _fake_alias1 = """{ "capability_type": "pci", "product_id": "4443", "vendor_id": "8086", - "device_type": "ACCEL" + "device_type": "type-PCI" }""" _fake_alias11 = """{ @@ -33,7 +33,7 @@ _fake_alias11 = """{ "capability_type": "pci", "product_id": "4444", "vendor_id": "8086", - "device_type": "ACCEL" + "device_type": "type-PCI" }""" _fake_alias2 = """{ @@ -49,7 +49,7 @@ _fake_alias3 = """{ "capability_type": "pci", "product_id": "1111", "vendor_id": "8086", - "device_type": "NIC" + "device_type": "type-PF" }""" @@ -62,7 +62,7 @@ class AliasTestCase(test.NoDBTestCase): "capability_type": "pci", "product_id": "4443", "vendor_id": "8086", - "device_type": "ACCEL" + "dev_type": "type-PCI" } self.assertEqual(expect_dict, als['QuicAssist'][0]) @@ -74,13 +74,13 @@ class AliasTestCase(test.NoDBTestCase): "capability_type": "pci", "product_id": "4443", "vendor_id": "8086", - "device_type": "ACCEL" + "dev_type": "type-PCI" } expect_dict2 = { "capability_type": "pci", "product_id": "4444", "vendor_id": "8086", - "device_type": "ACCEL" + "dev_type": "type-PCI" } self.assertEqual(expect_dict1, als['QuicAssist'][0]) @@ -141,7 +141,7 @@ class AliasTestCase(test.NoDBTestCase): "capability_type": "pci", "product_id": "1111", "vendor_id": "8086", - "device_type": "ACCEL" + "device_type": "type-PCI" }"""]) self.assertRaises( exception.PciInvalidAlias, @@ -159,13 +159,13 @@ class AliasTestCase(test.NoDBTestCase): expect_request = [ {'count': 3, 'spec': [{'vendor_id': '8086', 'product_id': '4443', - 'device_type': 'ACCEL', + 'dev_type': 'type-PCI', 'capability_type': 'pci'}], 'alias_name': 'QuicAssist'}, {'count': 1, 'spec': [{'vendor_id': '8086', 'product_id': '1111', - 'device_type': "NIC", + 'dev_type': "type-PF", 'capability_type': 'pci'}], 'alias_name': 'IntelNIC'}, ] @@ -185,13 +185,13 @@ class AliasTestCase(test.NoDBTestCase): expect_request = [ {'count': 3, 'spec': [{'vendor_id': '8086', 'product_id': '4443', - 'device_type': "ACCEL", + 'dev_type': "type-PCI", 'capability_type': 'pci'}], 'alias_name': 'QuicAssist'}, {'count': 1, 'spec': [{'vendor_id': '8086', 'product_id': '1111', - 'device_type': "NIC", + 'dev_type': "type-PF", 'capability_type': 'pci'}], 'alias_name': 'IntelNIC'}, ] diff --git a/nova/tests/unit/pci/test_stats.py b/nova/tests/unit/pci/test_stats.py index dd7c4c91f36d..f0ee494c5fe1 100644 --- a/nova/tests/unit/pci/test_stats.py +++ b/nova/tests/unit/pci/test_stats.py @@ -13,10 +13,12 @@ # License for the specific language governing permissions and limitations # under the License. +import mock import six from nova import exception from nova import objects +from nova.objects import fields from nova.pci import stats from nova import test from nova.tests.unit.pci import fakes @@ -29,6 +31,8 @@ fake_pci_1 = { 'extra_k1': 'v1', 'request_id': None, 'numa_node': 0, + 'dev_type': fields.PciDeviceType.STANDARD, + 'parent_addr': None, } @@ -322,3 +326,130 @@ class PciDeviceStatsWithTagsTestCase(test.NoDBTestCase): dev2 = self.pci_tagged_devices.pop() self.pci_stats.remove_device(dev2) self._assertPools() + + +class PciDeviceVFPFStatsTestCase(test.NoDBTestCase): + + def setUp(self): + super(PciDeviceVFPFStatsTestCase, self).setUp() + white_list = ['{"vendor_id":"8086","product_id":"1528"}', + '{"vendor_id":"8086","product_id":"1515"}'] + self.flags(pci_passthrough_whitelist=white_list) + self.pci_stats = stats.PciDeviceStats() + + def _create_pci_devices(self, vf_product_id=1515, pf_product_id=1528): + self.sriov_pf_devices = [] + for dev in range(2): + pci_dev = {'compute_node_id': 1, + 'address': '0000:81:00.%d' % dev, + 'vendor_id': '8086', + 'product_id': '%d' % pf_product_id, + 'status': 'available', + 'request_id': None, + 'dev_type': fields.PciDeviceType.SRIOV_PF, + 'parent_addr': None, + 'numa_node': 0} + self.sriov_pf_devices.append(objects.PciDevice.create(pci_dev)) + + self.sriov_vf_devices = [] + for dev in range(8): + pci_dev = {'compute_node_id': 1, + 'address': '0000:81:10.%d' % dev, + 'vendor_id': '8086', + 'product_id': '%d' % vf_product_id, + 'status': 'available', + 'request_id': None, + 'dev_type': fields.PciDeviceType.SRIOV_VF, + 'parent_addr': '0000:81:00.%d' % int(dev / 4), + 'numa_node': 0} + self.sriov_vf_devices.append(objects.PciDevice.create(pci_dev)) + + list(map(self.pci_stats.add_device, self.sriov_pf_devices)) + list(map(self.pci_stats.add_device, self.sriov_vf_devices)) + + def _fake_get_by_parent_address(self, ctxt, node_id, addr): + vf_devs = [] + for dev in self.sriov_vf_devices: + if dev.parent_addr == addr: + vf_devs.append(dev) + return vf_devs + + def _fake_pci_device_get_by_addr(self, ctxt, id, addr): + for dev in self.sriov_pf_devices: + if dev.address == addr: + return dev + + def test_consume_VF_requests(self): + with mock.patch.object(objects.PciDevice, 'get_by_dev_addr', + side_effect=self._fake_pci_device_get_by_addr): + self._create_pci_devices() + pci_requests = [objects.InstancePCIRequest(count=2, + spec=[{'product_id': '1515'}])] + devs = self.pci_stats.consume_requests(pci_requests) + self.assertEqual(2, len(devs)) + self.assertEqual(set(['1515']), + set([dev.product_id for dev in devs])) + free_devs = self.pci_stats.get_free_devs() + # Validate that the parents of these VFs has been removed + # from pools. + for dev in devs: + self.assertTrue(all(dev.parent_addr != free_dev.address + for free_dev in free_devs)) + + def test_consume_PF_requests(self): + with mock.patch.object(objects.PciDeviceList, 'get_by_parent_address', + side_effect=self._fake_get_by_parent_address): + self._create_pci_devices() + pci_requests = [objects.InstancePCIRequest(count=2, + spec=[{'product_id': '1528', + 'dev_type': 'type-PF'}])] + devs = self.pci_stats.consume_requests(pci_requests) + self.assertEqual(2, len(devs)) + self.assertEqual(set(['1528']), + set([dev.product_id for dev in devs])) + free_devs = self.pci_stats.get_free_devs() + # Validate that there are no free devices left, as when allocating + # both available PFs, its VFs should not be available. + self.assertEqual(0, len(free_devs)) + + def test_consume_VF_and_PF_requests(self): + with test.nested( + mock.patch.object(objects.PciDevice, 'get_by_dev_addr', + side_effect=self._fake_pci_device_get_by_addr), + mock.patch.object(objects.PciDeviceList, 'get_by_parent_address', + side_effect=self._fake_get_by_parent_address)): + self._create_pci_devices() + pci_requests = [objects.InstancePCIRequest(count=2, + spec=[{'product_id': '1515'}]), + objects.InstancePCIRequest(count=1, + spec=[{'product_id': '1528', + 'dev_type': 'type-PF'}])] + devs = self.pci_stats.consume_requests(pci_requests) + self.assertEqual(3, len(devs)) + self.assertEqual(set(['1528', '1515']), + set([dev.product_id for dev in devs])) + + def test_consume_VF_and_PF_requests_failed(self): + with test.nested( + mock.patch.object(objects.PciDevice, 'get_by_dev_addr', + side_effect=self._fake_pci_device_get_by_addr), + mock.patch.object(objects.PciDeviceList, 'get_by_parent_address', + side_effect=self._fake_get_by_parent_address)): + self._create_pci_devices() + pci_requests = [objects.InstancePCIRequest(count=5, + spec=[{'product_id': '1515'}]), + objects.InstancePCIRequest(count=1, + spec=[{'product_id': '1528', + 'dev_type': 'type-PF'}])] + self.assertIsNone(self.pci_stats.consume_requests(pci_requests)) + + def test_consume_VF_and_PF_same_prodict_id_failed(self): + with test.nested( + mock.patch.object(objects.PciDevice, 'get_by_dev_addr', + side_effect=self._fake_pci_device_get_by_addr), + mock.patch.object(objects.PciDeviceList, 'get_by_parent_address', + side_effect=self._fake_get_by_parent_address)): + self._create_pci_devices(pf_product_id=1515) + pci_requests = [objects.InstancePCIRequest(count=9, + spec=[{'product_id': '1515'}])] + self.assertIsNone(self.pci_stats.consume_requests(pci_requests))