PCI NUMA filtering

Add pci device numa awareness to scheduling logic. The NUMA topology filter is modified to consider PCI device NUMA node locality during guest placement. When a VM has a defined NUMA topology it will be placed on host numa nodes that have requested pci devices. If a VM doesn't have a NUMA topology if possible it will be placed in one host NUMA node that has requested pci devices. Implements: blueprint input-output-based-numa-scheduling Change-Id: Id076a76d05f3d64facbeb60a7be3d4b60f817b94 Co-Authored-By: James Chapman <james.p.chapman@intel.com>
2014-11-28 17:37:26 +00:00
parent a219393c8c
commit 08713a8e3e
15 changed files with 485 additions and 66 deletions
--- a/nova/pci/manager.py
+++ b/nova/pci/manager.py
@@ -25,6 +25,7 @@ from nova import objects
 from nova.openstack.common import log as logging
 from nova.pci import device
 from nova.pci import stats
+from nova.virt import hardware

 LOG = logging.getLogger(__name__)

@@ -155,11 +156,23 @@ class PciDevTracker(object):
            context, instance)
        if not pci_requests.requests:
            return None
-        devs = self.stats.consume_requests(pci_requests.requests)
+        instance_numa_topology = hardware.instance_topology_from_instance(
+            instance)
+        instance_cells = None
+        if instance_numa_topology:
+            instance_cells = instance_numa_topology.cells
+
+        devs = self.stats.consume_requests(pci_requests.requests,
+                                           instance_cells)
        if not devs:
            raise exception.PciDeviceRequestFailed(pci_requests)
        for dev in devs:
            device.claim(dev, instance)
+        if instance_numa_topology and any(
+                                        dev.numa_node is None for dev in devs):
+            LOG.warning(_LW("Assigning a pci device without numa affinity to"
+            "instance %(instance)s which has numa topology"),
+                        {'instance': instance['uuid']})
        return devs

    def _allocate_instance(self, instance, devs):
--- a/nova/pci/stats.py
+++ b/nova/pci/stats.py
@@ -52,7 +52,7 @@ class PciDeviceStats(object):
    This summary information will be helpful for cloud management also.
    """

-    pool_keys = ['product_id', 'vendor_id']
+    pool_keys = ['product_id', 'vendor_id', 'numa_node']

    def __init__(self, stats=None):
        super(PciDeviceStats, self).__init__()
@@ -135,7 +135,7 @@ class PciDeviceStats(object):
            free_devs.extend(pool['devices'])
        return free_devs

-    def consume_requests(self, pci_requests):
+    def consume_requests(self, pci_requests, numa_cells=None):
        alloc_devices = []
        for request in pci_requests:
            count = request.count
@@ -143,6 +143,8 @@ class PciDeviceStats(object):
            # For now, keep the same algorithm as during scheduling:
            # a spec may be able to match multiple pools.
            pools = self._filter_pools_for_spec(self.pools, spec)
+            if numa_cells:
+                pools = self._filter_pools_for_numa_cells(pools, numa_cells)
            # Failed to allocate the required number of devices
            # Return the devices already allocated back to their pools
            if sum([pool['count'] for pool in pools]) < count:
@@ -176,9 +178,24 @@ class PciDeviceStats(object):
        return [pool for pool in pools
                if utils.pci_device_prop_match(pool, request_specs)]

-    def _apply_request(self, pools, request):
+    @staticmethod
+    def _filter_pools_for_numa_cells(pools, numa_cells):
+        # Some systems don't report numa node info for pci devices, in
+        # that case None is reported in pci_device.numa_node, by adding None
+        # to numa_cells we allow assigning those devices to instances with
+        # numa topology
+        numa_cells = [None] + [cell.id for cell in numa_cells]
+        # filter out pools which numa_node is not included in numa_cells
+        return [pool for pool in pools if any(utils.pci_device_prop_match(
+                                pool, [{'numa_node': cell}])
+                                              for cell in numa_cells)]
+
+    def _apply_request(self, pools, request, numa_cells=None):
        count = request.count
        matching_pools = self._filter_pools_for_spec(pools, request.spec)
+        if numa_cells:
+            matching_pools = self._filter_pools_for_numa_cells(matching_pools,
+                                                          numa_cells)
        if sum([pool['count'] for pool in matching_pools]) < count:
            return False
        else:
@@ -188,25 +205,31 @@ class PciDeviceStats(object):
                    break
        return True

-    def support_requests(self, requests):
+    def support_requests(self, requests, numa_cells=None):
        """Check if the pci requests can be met.

        Scheduler checks compute node's PCI stats to decide if an
        instance can be scheduled into the node. Support does not
        mean real allocation.
+        If numa_cells is provided then only devices contained in
+        those nodes are considered.
        """
        # note (yjiang5): this function has high possibility to fail,
        # so no exception should be triggered for performance reason.
        pools = copy.deepcopy(self.pools)
-        return all([self._apply_request(pools, r) for r in requests])
+        return all([self._apply_request(pools, r, numa_cells)
+                        for r in requests])

-    def apply_requests(self, requests):
+    def apply_requests(self, requests, numa_cells=None):
        """Apply PCI requests to the PCI stats.

        This is used in multiple instance creation, when the scheduler has to
        maintain how the resources are consumed by the instances.
+        If numa_cells is provided then only devices contained in
+        those nodes are considered.
        """
-        if not all([self._apply_request(self.pools, r) for r in requests]):
+        if not all([self._apply_request(self.pools, r, numa_cells)
+                                            for r in requests]):
            raise exception.PciDeviceRequestFailed(requests=requests)

    @staticmethod