PCI NUMA filtering

Add pci device numa awareness to scheduling logic. The NUMA topology filter is modified to consider PCI device NUMA node locality during guest placement. When a VM has a defined NUMA topology it will be placed on host numa nodes that have requested pci devices. If a VM doesn't have a NUMA topology if possible it will be placed in one host NUMA node that has requested pci devices. Implements: blueprint input-output-based-numa-scheduling Change-Id: Id076a76d05f3d64facbeb60a7be3d4b60f817b94 Co-Authored-By: James Chapman <james.p.chapman@intel.com>
2014-11-28 17:37:26 +00:00
parent a219393c8c
commit 08713a8e3e
15 changed files with 485 additions and 66 deletions
--- a/nova/compute/claims.py
+++ b/nova/compute/claims.py
@@ -206,12 +206,27 @@ class Claim(NopClaim):
        if host_topology:
            host_topology = objects.NUMATopology.obj_from_db_obj(
                    host_topology)
+            pci_requests = objects.InstancePCIRequests.get_by_instance_uuid(
+                                        self.context, self.instance['uuid'])
+
+            pci_stats = None
+            if pci_requests.requests:
+                pci_stats = self.tracker.pci_tracker.stats
+
            instance_topology = (
                    hardware.numa_fit_instance_to_host(
                        host_topology, requested_topology,
-                        limits_topology=limit))
+                        limits_topology=limit,
+                        pci_requests=pci_requests.requests,
+                        pci_stats=pci_stats))
+
            if requested_topology and not instance_topology:
-                return (_("Requested instance NUMA topology cannot fit "
+                if pci_requests.requests:
+                    return (_("Requested instance NUMA topology together with"
+                              " requested PCI devices cannot fit the given"
+                              " host NUMA topology"))
+                else:
+                    return (_("Requested instance NUMA topology cannot fit "
                          "the given host NUMA topology"))
            elif instance_topology:
                self.claimed_numa_topology = instance_topology
--- a/nova/pci/manager.py
+++ b/nova/pci/manager.py
@@ -25,6 +25,7 @@ from nova import objects
 from nova.openstack.common import log as logging
 from nova.pci import device
 from nova.pci import stats
+from nova.virt import hardware

 LOG = logging.getLogger(__name__)

@@ -155,11 +156,23 @@ class PciDevTracker(object):
            context, instance)
        if not pci_requests.requests:
            return None
-        devs = self.stats.consume_requests(pci_requests.requests)
+        instance_numa_topology = hardware.instance_topology_from_instance(
+            instance)
+        instance_cells = None
+        if instance_numa_topology:
+            instance_cells = instance_numa_topology.cells
+
+        devs = self.stats.consume_requests(pci_requests.requests,
+                                           instance_cells)
        if not devs:
            raise exception.PciDeviceRequestFailed(pci_requests)
        for dev in devs:
            device.claim(dev, instance)
+        if instance_numa_topology and any(
+                                        dev.numa_node is None for dev in devs):
+            LOG.warning(_LW("Assigning a pci device without numa affinity to"
+            "instance %(instance)s which has numa topology"),
+                        {'instance': instance['uuid']})
        return devs

    def _allocate_instance(self, instance, devs):
--- a/nova/pci/stats.py
+++ b/nova/pci/stats.py
@@ -52,7 +52,7 @@ class PciDeviceStats(object):
    This summary information will be helpful for cloud management also.
    """

-    pool_keys = ['product_id', 'vendor_id']
+    pool_keys = ['product_id', 'vendor_id', 'numa_node']

    def __init__(self, stats=None):
        super(PciDeviceStats, self).__init__()
@@ -135,7 +135,7 @@ class PciDeviceStats(object):
            free_devs.extend(pool['devices'])
        return free_devs

-    def consume_requests(self, pci_requests):
+    def consume_requests(self, pci_requests, numa_cells=None):
        alloc_devices = []
        for request in pci_requests:
            count = request.count
@@ -143,6 +143,8 @@ class PciDeviceStats(object):
            # For now, keep the same algorithm as during scheduling:
            # a spec may be able to match multiple pools.
            pools = self._filter_pools_for_spec(self.pools, spec)
+            if numa_cells:
+                pools = self._filter_pools_for_numa_cells(pools, numa_cells)
            # Failed to allocate the required number of devices
            # Return the devices already allocated back to their pools
            if sum([pool['count'] for pool in pools]) < count:
@@ -176,9 +178,24 @@ class PciDeviceStats(object):
        return [pool for pool in pools
                if utils.pci_device_prop_match(pool, request_specs)]

-    def _apply_request(self, pools, request):
+    @staticmethod
+    def _filter_pools_for_numa_cells(pools, numa_cells):
+        # Some systems don't report numa node info for pci devices, in
+        # that case None is reported in pci_device.numa_node, by adding None
+        # to numa_cells we allow assigning those devices to instances with
+        # numa topology
+        numa_cells = [None] + [cell.id for cell in numa_cells]
+        # filter out pools which numa_node is not included in numa_cells
+        return [pool for pool in pools if any(utils.pci_device_prop_match(
+                                pool, [{'numa_node': cell}])
+                                              for cell in numa_cells)]
+
+    def _apply_request(self, pools, request, numa_cells=None):
        count = request.count
        matching_pools = self._filter_pools_for_spec(pools, request.spec)
+        if numa_cells:
+            matching_pools = self._filter_pools_for_numa_cells(matching_pools,
+                                                          numa_cells)
        if sum([pool['count'] for pool in matching_pools]) < count:
            return False
        else:
@@ -188,25 +205,31 @@ class PciDeviceStats(object):
                    break
        return True

-    def support_requests(self, requests):
+    def support_requests(self, requests, numa_cells=None):
        """Check if the pci requests can be met.

        Scheduler checks compute node's PCI stats to decide if an
        instance can be scheduled into the node. Support does not
        mean real allocation.
+        If numa_cells is provided then only devices contained in
+        those nodes are considered.
        """
        # note (yjiang5): this function has high possibility to fail,
        # so no exception should be triggered for performance reason.
        pools = copy.deepcopy(self.pools)
-        return all([self._apply_request(pools, r) for r in requests])
+        return all([self._apply_request(pools, r, numa_cells)
+                        for r in requests])

-    def apply_requests(self, requests):
+    def apply_requests(self, requests, numa_cells=None):
        """Apply PCI requests to the PCI stats.

        This is used in multiple instance creation, when the scheduler has to
        maintain how the resources are consumed by the instances.
+        If numa_cells is provided then only devices contained in
+        those nodes are considered.
        """
-        if not all([self._apply_request(self.pools, r) for r in requests]):
+        if not all([self._apply_request(self.pools, r, numa_cells)
+                                            for r in requests]):
            raise exception.PciDeviceRequestFailed(requests=requests)

    @staticmethod
--- a/nova/scheduler/filters/numa_topology_filter.py
+++ b/nova/scheduler/filters/numa_topology_filter.py
@@ -31,6 +31,9 @@ class NUMATopologyFilter(filters.BaseHostFilter):
        requested_topology = hardware.instance_topology_from_instance(instance)
        host_topology, _fmt = hardware.host_topology_and_format_from_host(
                host_state)
+        pci_requests = filter_properties.get('pci_requests')
+        if pci_requests:
+            pci_requests = pci_requests.requests
        if requested_topology and host_topology:
            limit_cells = []
            for cell in host_topology.cells:
@@ -42,7 +45,9 @@ class NUMATopologyFilter(filters.BaseHostFilter):
            limits = hardware.VirtNUMALimitTopology(cells=limit_cells)
            instance_topology = (hardware.numa_fit_instance_to_host(
                        host_topology, requested_topology,
-                        limits_topology=limits))
+                        limits_topology=limits,
+                        pci_requests=pci_requests,
+                        pci_stats=host_state.pci_stats))
            if not instance_topology:
                return False
            host_state.limits['numa_topology'] = limits.to_json()
--- a/nova/scheduler/host_manager.py
+++ b/nova/scheduler/host_manager.py
@@ -243,13 +243,20 @@ class HostState(object):
        # Track number of instances on host
        self.num_instances += 1

+        instance_numa_topology = hardware.instance_topology_from_instance(
+            instance)
+        instance_cells = None
+        if instance_numa_topology:
+            instance_cells = instance_numa_topology.cells
+
        pci_requests = instance.get('pci_requests')
        # NOTE(danms): Instance here is still a dict, which is converted from
        # an object. Thus, it has a .pci_requests field, which gets converted
        # to a primitive early on, and is thus a dict here. Convert this when
        # we get an object all the way to this path.
        if pci_requests and pci_requests['requests'] and self.pci_stats:
-            self.pci_stats.apply_requests(pci_requests.requests)
+            self.pci_stats.apply_requests(pci_requests.requests,
+                                          instance_cells)

        # Calculate the numa usage
        updated_numa_topology = hardware.get_host_numa_usage_from_instance(
--- a/nova/tests/unit/compute/test_claims.py
+++ b/nova/tests/unit/compute/test_claims.py
@@ -192,6 +192,7 @@ class ClaimTestCase(test.NoDBTestCase):
            'address': 'a',
            'product_id': 'p',
            'vendor_id': 'v',
+            'numa_node': 0,
            'status': 'available'}
        self.tracker.new_pci_tracker()
        self.tracker.pci_tracker.set_hvdevs([dev_dict])
@@ -209,6 +210,7 @@ class ClaimTestCase(test.NoDBTestCase):
            'address': 'a',
            'product_id': 'p',
            'vendor_id': 'v1',
+            'numa_node': 1,
            'status': 'available'}
        self.tracker.new_pci_tracker()
        self.tracker.pci_tracker.set_hvdevs([dev_dict])
@@ -226,6 +228,7 @@ class ClaimTestCase(test.NoDBTestCase):
            'address': 'a',
            'product_id': 'p',
            'vendor_id': 'v',
+            'numa_node': 0,
            'status': 'available'}
        self.tracker.new_pci_tracker()
        self.tracker.pci_tracker.set_hvdevs([dev_dict])
@@ -269,6 +272,83 @@ class ClaimTestCase(test.NoDBTestCase):
        self._claim(limits={'numa_topology': limit_topo.to_json()},
                    numa_topology=huge_instance)

+    @pci_fakes.patch_pci_whitelist
+    def test_numa_topology_with_pci(self, mock_get):
+        dev_dict = {
+            'compute_node_id': 1,
+            'address': 'a',
+            'product_id': 'p',
+            'vendor_id': 'v',
+            'numa_node': 1,
+            'status': 'available'}
+        self.tracker.new_pci_tracker()
+        self.tracker.pci_tracker.set_hvdevs([dev_dict])
+        request = objects.InstancePCIRequest(count=1,
+            spec=[{'vendor_id': 'v', 'product_id': 'p'}])
+        mock_get.return_value = objects.InstancePCIRequests(
+            requests=[request])
+
+        huge_instance = objects.InstanceNUMATopology(
+                cells=[objects.InstanceNUMACell(
+                    id=1, cpuset=set([1, 2]), memory=512)])
+
+        self._claim(numa_topology= huge_instance)
+
+    @pci_fakes.patch_pci_whitelist
+    def test_numa_topology_with_pci_fail(self, mock_get):
+        dev_dict = {
+            'compute_node_id': 1,
+            'address': 'a',
+            'product_id': 'p',
+            'vendor_id': 'v',
+            'numa_node': 1,
+            'status': 'available'}
+        dev_dict2 = {
+            'compute_node_id': 1,
+            'address': 'a',
+            'product_id': 'p',
+            'vendor_id': 'v',
+            'numa_node': 2,
+            'status': 'available'}
+        self.tracker.new_pci_tracker()
+        self.tracker.pci_tracker.set_hvdevs([dev_dict, dev_dict2])
+
+        request = objects.InstancePCIRequest(count=2,
+            spec=[{'vendor_id': 'v', 'product_id': 'p'}])
+        mock_get.return_value = objects.InstancePCIRequests(
+            requests=[request])
+
+        huge_instance = objects.InstanceNUMATopology(
+                cells=[objects.InstanceNUMACell(
+                    id=1, cpuset=set([1, 2]), memory=512)])
+
+        self.assertRaises(exception.ComputeResourcesUnavailable,
+                          self._claim,
+                          numa_topology=huge_instance)
+
+    @pci_fakes.patch_pci_whitelist
+    def test_numa_topology_with_pci_no_numa_info(self, mock_get):
+        dev_dict = {
+            'compute_node_id': 1,
+            'address': 'a',
+            'product_id': 'p',
+            'vendor_id': 'v',
+            'numa_node': None,
+            'status': 'available'}
+        self.tracker.new_pci_tracker()
+        self.tracker.pci_tracker.set_hvdevs([dev_dict])
+
+        request = objects.InstancePCIRequest(count=1,
+            spec=[{'vendor_id': 'v', 'product_id': 'p'}])
+        mock_get.return_value = objects.InstancePCIRequests(
+            requests=[request])
+
+        huge_instance = objects.InstanceNUMATopology(
+                cells=[objects.InstanceNUMACell(
+                    id=1, cpuset=set([1, 2]), memory=512)])
+
+        self._claim(numa_topology= huge_instance)
+
    def test_abort(self, mock_get):
        claim = self._abort()
        self.assertTrue(claim.tracker.icalled)
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@@ -101,7 +101,8 @@ class FakeVirtDriver(driver.ComputeDriver):
                'product_id': '0443',
                'vendor_id': '8086',
                'status': 'available',
-                'extra_k1': 'v1'
+                'extra_k1': 'v1',
+                'numa_node': 1
            },
            {
                'label': 'label_8086_0443',
@@ -111,7 +112,8 @@ class FakeVirtDriver(driver.ComputeDriver):
                'product_id': '0443',
                'vendor_id': '8086',
                'status': 'available',
-                'extra_k1': 'v1'
+                'extra_k1': 'v1',
+                'numa_node': 1
            },
            {
                'label': 'label_8086_0443',
@@ -121,7 +123,8 @@ class FakeVirtDriver(driver.ComputeDriver):
                'product_id': '0443',
                'vendor_id': '8086',
                'status': 'available',
-                'extra_k1': 'v1'
+                'extra_k1': 'v1',
+                'numa_node': 1
            },
            {
                'label': 'label_8086_0123',
@@ -131,7 +134,8 @@ class FakeVirtDriver(driver.ComputeDriver):
                'product_id': '0123',
                'vendor_id': '8086',
                'status': 'available',
-                'extra_k1': 'v1'
+                'extra_k1': 'v1',
+                'numa_node': 1
            },
            {
                'label': 'label_8086_7891',
@@ -141,19 +145,22 @@ class FakeVirtDriver(driver.ComputeDriver):
                'product_id': '7891',
                'vendor_id': '8086',
                'status': 'available',
-                'extra_k1': 'v1'
+                'extra_k1': 'v1',
+                'numa_node': None
            },
        ] if self.pci_support else []
        self.pci_stats = [
            {
                'count': 2,
                'vendor_id': '8086',
-                'product_id': '0443'
+                'product_id': '0443',
+                'numa_node': 1
            },
            {
                'count': 1,
                'vendor_id': '8086',
-                'product_id': '7891'
+                'product_id': '7891',
+                'numa_node': None
            },
        ] if self.pci_support else []
        if stats is not None:
--- a/nova/tests/unit/pci/test_manager.py
+++ b/nova/tests/unit/pci/test_manager.py
@@ -36,7 +36,8 @@ fake_pci = {
    'product_id': 'p',
    'vendor_id': 'v',
    'request_id': None,
-    'status': 'available'}
+    'status': 'available',
+    'numa_node': 0}
 fake_pci_1 = dict(fake_pci, address='0000:00:00.2',
                  product_id='p1', vendor_id='v1')
 fake_pci_2 = dict(fake_pci, address='0000:00:00.3')
@@ -63,8 +64,10 @@ fake_db_dev = {
    }
 fake_db_dev_1 = dict(fake_db_dev, vendor_id='v1',
                     product_id='p1', id=2,
-                     address='0000:00:00.2')
-fake_db_dev_2 = dict(fake_db_dev, id=3, address='0000:00:00.3')
+                     address='0000:00:00.2',
+                     numa_node=0)
+fake_db_dev_2 = dict(fake_db_dev, id=3, address='0000:00:00.3',
+                     numa_node=None)
 fake_db_devs = [fake_db_dev, fake_db_dev_1, fake_db_dev_2]


@@ -82,6 +85,7 @@ class PciDevTrackerTestCase(test.TestCase):
        self.inst.pci_devices = objects.PciDeviceList()
        self.inst.vm_state = vm_states.ACTIVE
        self.inst.task_state = None
+        self.inst.numa_topology = None

    def _fake_get_pci_devices(self, ctxt, node_id):
        return fake_db_devs[:]
@@ -119,7 +123,7 @@ class PciDevTrackerTestCase(test.TestCase):
        free_devs = self.tracker.pci_stats.get_free_devs()
        self.assertEqual(len(free_devs), 3)
        self.assertEqual(self.tracker.stale.keys(), [])
-        self.assertEqual(len(self.tracker.stats.pools), 2)
+        self.assertEqual(len(self.tracker.stats.pools), 3)
        self.assertEqual(self.tracker.node_id, 1)

    def test_pcidev_tracker_create_no_nodeid(self):
@@ -185,6 +189,36 @@ class PciDevTrackerTestCase(test.TestCase):
                          None,
                          self.inst)

+    @mock.patch('nova.objects.InstancePCIRequests.get_by_instance')
+    def test_update_pci_for_instance_with_numa(self, mock_get):
+        fake_db_dev_3 = dict(fake_db_dev_1, id=4, address='0000:00:00.4')
+        fake_devs_numa = copy.deepcopy(fake_db_devs)
+        fake_devs_numa.append(fake_db_dev_3)
+        self.tracker = manager.PciDevTracker(1)
+        self.tracker.set_hvdevs(fake_devs_numa)
+        pci_requests = copy.deepcopy(fake_pci_requests)[:1]
+        pci_requests[0]['count'] = 2
+        self._create_pci_requests_object(mock_get, pci_requests)
+        self.inst.numa_topology = objects.InstanceNUMATopology(
+                    cells=[objects.InstanceNUMACell(
+                        id=1, cpuset=set([1, 2]), memory=512)])
+        self.tracker.update_pci_for_instance(None, self.inst)
+        free_devs = self.tracker.pci_stats.get_free_devs()
+        self.assertEqual(2, len(free_devs))
+        self.assertEqual('v1', free_devs[0]['vendor_id'])
+        self.assertEqual('v1', free_devs[1]['vendor_id'])
+
+    @mock.patch('nova.objects.InstancePCIRequests.get_by_instance')
+    def test_update_pci_for_instance_with_numa_fail(self, mock_get):
+        self._create_pci_requests_object(mock_get, fake_pci_requests)
+        self.inst.numa_topology = objects.InstanceNUMATopology(
+                    cells=[objects.InstanceNUMACell(
+                        id=1, cpuset=set([1, 2]), memory=512)])
+        self.assertRaises(exception.PciDeviceRequestFailed,
+                          self.tracker.update_pci_for_instance,
+                          None,
+                          self.inst)
+
    @mock.patch('nova.objects.InstancePCIRequests.get_by_instance')
    def test_update_pci_for_instance_deleted(self, mock_get):
        self._create_pci_requests_object(mock_get, fake_pci_requests)
--- a/nova/tests/unit/pci/test_stats.py
+++ b/nova/tests/unit/pci/test_stats.py
@@ -22,7 +22,7 @@ from nova.pci import stats
 from nova.pci import whitelist
 from nova import test
 from nova.tests.unit.pci import fakes
-
+from nova.virt import hardware
 fake_pci_1 = {
    'compute_node_id': 1,
    'address': '0000:00:00.1',
@@ -31,16 +31,22 @@ fake_pci_1 = {
    'status': 'available',
    'extra_k1': 'v1',
    'request_id': None,
+    'numa_node': 0,
    }


 fake_pci_2 = dict(fake_pci_1, vendor_id='v2',
                  product_id='p2',
-                  address='0000:00:00.2')
+                  address='0000:00:00.2',
+                  numa_node=1)


 fake_pci_3 = dict(fake_pci_1, address='0000:00:00.3')

+fake_pci_4 = dict(fake_pci_1, vendor_id='v3',
+                  product_id='p3',
+                  address='0000:00:00.3',
+                  numa_node= None)

 pci_requests = [objects.InstancePCIRequest(count=1,
                    spec=[{'vendor_id': 'v1'}]),
@@ -59,9 +65,11 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
        self.fake_dev_1 = objects.PciDevice.create(fake_pci_1)
        self.fake_dev_2 = objects.PciDevice.create(fake_pci_2)
        self.fake_dev_3 = objects.PciDevice.create(fake_pci_3)
+        self.fake_dev_4 = objects.PciDevice.create(fake_pci_4)

        map(self.pci_stats.add_device,
-            [self.fake_dev_1, self.fake_dev_2, self.fake_dev_3])
+            [self.fake_dev_1, self.fake_dev_2,
+             self.fake_dev_3, self.fake_dev_4])

    def setUp(self):
        super(PciDeviceStatsTestCase, self).setUp()
@@ -72,15 +80,15 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
        self._create_fake_devs()

    def test_add_device(self):
-        self.assertEqual(len(self.pci_stats.pools), 2)
+        self.assertEqual(len(self.pci_stats.pools), 3)
        self.assertEqual(set([d['vendor_id'] for d in self.pci_stats]),
-                         set(['v1', 'v2']))
+                         set(['v1', 'v2', 'v3']))
        self.assertEqual(set([d['count'] for d in self.pci_stats]),
                         set([1, 2]))

    def test_remove_device(self):
        self.pci_stats.remove_device(self.fake_dev_2)
-        self.assertEqual(len(self.pci_stats.pools), 1)
+        self.assertEqual(len(self.pci_stats.pools), 2)
        self.assertEqual(self.pci_stats.pools[0]['count'], 2)
        self.assertEqual(self.pci_stats.pools[0]['vendor_id'], 'v1')

@@ -94,29 +102,29 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
        m = jsonutils.dumps(self.pci_stats)
        new_stats = stats.PciDeviceStats(m)

-        self.assertEqual(len(new_stats.pools), 2)
+        self.assertEqual(len(new_stats.pools), 3)
        self.assertEqual(set([d['count'] for d in new_stats]),
                         set([1, 2]))
        self.assertEqual(set([d['vendor_id'] for d in new_stats]),
-                         set(['v1', 'v2']))
+                         set(['v1', 'v2', 'v3']))

    def test_support_requests(self):
        self.assertEqual(self.pci_stats.support_requests(pci_requests),
                         True)
-        self.assertEqual(len(self.pci_stats.pools), 2)
+        self.assertEqual(len(self.pci_stats.pools), 3)
        self.assertEqual(set([d['count'] for d in self.pci_stats]),
                         set((1, 2)))

    def test_support_requests_failed(self):
        self.assertEqual(
            self.pci_stats.support_requests(pci_requests_multiple), False)
-        self.assertEqual(len(self.pci_stats.pools), 2)
+        self.assertEqual(len(self.pci_stats.pools), 3)
        self.assertEqual(set([d['count'] for d in self.pci_stats]),
                         set([1, 2]))

    def test_apply_requests(self):
        self.pci_stats.apply_requests(pci_requests)
-        self.assertEqual(len(self.pci_stats.pools), 1)
+        self.assertEqual(len(self.pci_stats.pools), 2)
        self.assertEqual(self.pci_stats.pools[0]['vendor_id'], 'v1')
        self.assertEqual(self.pci_stats.pools[0]['count'], 1)

@@ -140,6 +148,47 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
            self.pci_stats.consume_requests,
            pci_requests_multiple)

+    def test_support_requests_numa(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None),
+                 hardware.VirtNUMATopologyCell(1, None, None)]
+        self.assertEqual(True, self.pci_stats.support_requests(
+                                                        pci_requests, cells))
+
+    def test_support_requests_numa_failed(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None)]
+        self.assertEqual(False, self.pci_stats.support_requests(
+                                                        pci_requests, cells))
+
+    def test_support_requests_no_numa_info(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None)]
+        pci_request = [objects.InstancePCIRequest(count=1,
+                    spec=[{'vendor_id': 'v3'}])]
+        self.assertEqual(True, self.pci_stats.support_requests(
+                                                        pci_request, cells))
+
+    def test_consume_requests_numa(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None),
+                 hardware.VirtNUMATopologyCell(1, None, None)]
+        devs = self.pci_stats.consume_requests(pci_requests, cells)
+        self.assertEqual(2, len(devs))
+        self.assertEqual(set(['v1', 'v2']),
+                         set([dev['vendor_id'] for dev in devs]))
+
+    def test_consume_requests_numa_failed(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None)]
+        self.assertRaises(exception.PciDeviceRequestFailed,
+            self.pci_stats.consume_requests,
+            pci_requests, cells)
+
+    def test_consume_requests_no_numa_info(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None)]
+        pci_request = [objects.InstancePCIRequest(count=1,
+                    spec=[{'vendor_id': 'v3'}])]
+        devs = self.pci_stats.consume_requests(pci_request, cells)
+        self.assertEqual(1, len(devs))
+        self.assertEqual(set(['v3']),
+                         set([dev['vendor_id'] for dev in devs]))
+

@mock.patch.object(whitelist, 'get_pci_devices_filter')
 class PciDeviceStatsWithTagsTestCase(test.NoDBTestCase):
@@ -163,7 +212,8 @@ class PciDeviceStatsWithTagsTestCase(test.NoDBTestCase):
                       'vendor_id': '1137',
                       'product_id': '0071',
                       'status': 'available',
-                       'request_id': None}
+                       'request_id': None,
+                       'numa_node': 0}
            self.pci_tagged_devices.append(objects.PciDevice.create(pci_dev))

        self.pci_untagged_devices = []
@@ -173,7 +223,8 @@ class PciDeviceStatsWithTagsTestCase(test.NoDBTestCase):
                       'vendor_id': '1137',
                       'product_id': '0072',
                       'status': 'available',
-                       'request_id': None}
+                       'request_id': None,
+                       'numa_node': 0}
            self.pci_untagged_devices.append(objects.PciDevice.create(pci_dev))

        map(self.pci_stats.add_device, self.pci_tagged_devices)
--- a/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
+++ b/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
@@ -40,7 +40,8 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                'instance_properties': jsonutils.to_primitive(
                    obj_base.obj_to_primitive(instance))}}
        host = fakes.FakeHostState('host1', 'node1',
-                                   {'numa_topology': fakes.NUMA_TOPOLOGY})
+                                   {'numa_topology': fakes.NUMA_TOPOLOGY,
+                                    'pci_stats': None})
        self.assertTrue(self.filt_cls.host_passes(host, filter_properties))

    def test_numa_topology_filter_numa_instance_no_numa_host_fail(self):
@@ -55,7 +56,7 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
            'request_spec': {
                'instance_properties': jsonutils.to_primitive(
                    obj_base.obj_to_primitive(instance))}}
-        host = fakes.FakeHostState('host1', 'node1', {})
+        host = fakes.FakeHostState('host1', 'node1', {'pci_stats': None})
        self.assertFalse(self.filt_cls.host_passes(host, filter_properties))

    def test_numa_topology_filter_numa_host_no_numa_instance_pass(self):
@@ -82,7 +83,8 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                'instance_properties': jsonutils.to_primitive(
                    obj_base.obj_to_primitive(instance))}}
        host = fakes.FakeHostState('host1', 'node1',
-                                   {'numa_topology': fakes.NUMA_TOPOLOGY})
+                                   {'numa_topology': fakes.NUMA_TOPOLOGY,
+                                    'pci_stats': None})
        self.assertFalse(self.filt_cls.host_passes(host, filter_properties))

    def test_numa_topology_filter_fail_memory(self):
@@ -100,7 +102,8 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                'instance_properties': jsonutils.to_primitive(
                    obj_base.obj_to_primitive(instance))}}
        host = fakes.FakeHostState('host1', 'node1',
-                                   {'numa_topology': fakes.NUMA_TOPOLOGY})
+                                   {'numa_topology': fakes.NUMA_TOPOLOGY,
+                                    'pci_stats': None})
        self.assertFalse(self.filt_cls.host_passes(host, filter_properties))

    def test_numa_topology_filter_fail_cpu(self):
@@ -117,7 +120,8 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                'instance_properties': jsonutils.to_primitive(
                    obj_base.obj_to_primitive(instance))}}
        host = fakes.FakeHostState('host1', 'node1',
-                                   {'numa_topology': fakes.NUMA_TOPOLOGY})
+                                   {'numa_topology': fakes.NUMA_TOPOLOGY,
+                                    'pci_stats': None})
        self.assertFalse(self.filt_cls.host_passes(host, filter_properties))

    def test_numa_topology_filter_pass_set_limit(self):
@@ -135,7 +139,8 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                'instance_properties': jsonutils.to_primitive(
                    obj_base.obj_to_primitive(instance))}}
        host = fakes.FakeHostState('host1', 'node1',
-                                   {'numa_topology': fakes.NUMA_TOPOLOGY})
+                                   {'numa_topology': fakes.NUMA_TOPOLOGY,
+                                    'pci_stats': None})
        self.assertTrue(self.filt_cls.host_passes(host, filter_properties))
        limits_topology = hardware.VirtNUMALimitTopology.from_json(
                host.limits['numa_topology'])
--- a/nova/tests/unit/scheduler/test_host_manager.py
+++ b/nova/tests/unit/scheduler/test_host_manager.py
@@ -492,7 +492,7 @@ class HostStateTestCase(test.NoDBTestCase):
        instance = dict(root_gb=0, ephemeral_gb=0, memory_mb=0, vcpus=0,
                        project_id='12345', vm_state=vm_states.BUILDING,
                        task_state=task_states.SCHEDULING, os_type='Linux',
-                        uuid='fake-uuid')
+                        uuid='fake-uuid', numa_topology=None)
        host.consume_from_instance(instance)
        numa_usage_mock.assert_called_once_with(host, instance)
        self.assertEqual('fake-consumed-once', host.numa_topology)
@@ -501,7 +501,7 @@ class HostStateTestCase(test.NoDBTestCase):
        instance = dict(root_gb=0, ephemeral_gb=0, memory_mb=0, vcpus=0,
                        project_id='12345', vm_state=vm_states.PAUSED,
                        task_state=None, os_type='Linux',
-                        uuid='fake-uuid')
+                        uuid='fake-uuid', numa_topology=None)
        host.consume_from_instance(instance)

        self.assertEqual(2, host.num_instances)
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@@ -1182,6 +1182,144 @@ class LibvirtConnTestCase(test.NoDBTestCase):
            self.assertIsNone(drvr._get_guest_numa_tune_memnodes(
                              'something', 'something'))

+    @mock.patch.object(objects.Flavor, 'get_by_id')
+    def test_get_guest_config_numa_host_instance_1pci_fits(self, mock_flavor):
+        instance_ref = objects.Instance(**self.test_instance)
+        image_meta = {}
+        flavor = objects.Flavor(memory_mb=1, vcpus=2, root_gb=496,
+                                ephemeral_gb=8128, swap=33550336, name='fake',
+                                extra_specs={})
+        mock_flavor.return_value = flavor
+
+        caps = vconfig.LibvirtConfigCaps()
+        caps.host = vconfig.LibvirtConfigCapsHost()
+        caps.host.cpu = vconfig.LibvirtConfigCPU()
+        caps.host.cpu.arch = "x86_64"
+        caps.host.topology = self._fake_caps_numa_topology()
+
+        conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
+        disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
+                                            instance_ref,
+                                            image_meta)
+        pci_device_info = dict(test_pci_device.fake_db_dev)
+        pci_device_info.update(compute_node_id=1,
+                               label='fake',
+                               status='available',
+                               address='0000:00:00.1',
+                               instance_uuid=None,
+                               request_id=None,
+                               extra_info={},
+                               numa_node=1)
+        pci_device = objects.PciDevice(**pci_device_info)
+
+        with contextlib.nested(
+                mock.patch.object(host.Host, 'has_min_version',
+                                  return_value=True),
+                mock.patch.object(
+                    host.Host, "get_capabilities", return_value=caps),
+                mock.patch.object(
+                        random, 'choice', side_effect=lambda cells: cells[0]),
+                mock.patch.object(pci_manager, "get_instance_pci_devs",
+                                  return_value=[pci_device])):
+            cfg = conn._get_guest_config(instance_ref, [], {}, disk_info)
+            self.assertIsNone(instance_ref.numa_topology)
+            self.assertEqual(set([2, 3]), cfg.cpuset)
+            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cpu.numa)
+
+    @mock.patch.object(objects.Flavor, 'get_by_id')
+    def test_get_guest_config_numa_host_instance_pci_no_numa_info(self,
+                                                                  mock_flavor):
+        instance_ref = objects.Instance(**self.test_instance)
+        image_meta = {}
+        flavor = objects.Flavor(memory_mb=1, vcpus=2, root_gb=496,
+                                ephemeral_gb=8128, swap=33550336, name='fake',
+                                extra_specs={})
+        mock_flavor.return_value = flavor
+
+        caps = vconfig.LibvirtConfigCaps()
+        caps.host = vconfig.LibvirtConfigCapsHost()
+        caps.host.cpu = vconfig.LibvirtConfigCPU()
+        caps.host.cpu.arch = "x86_64"
+        caps.host.topology = self._fake_caps_numa_topology()
+
+        conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
+        disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
+                                            instance_ref,
+                                            image_meta)
+        pci_device_info = dict(test_pci_device.fake_db_dev)
+        pci_device_info.update(compute_node_id=1,
+                               label='fake',
+                               status='available',
+                               address='0000:00:00.1',
+                               instance_uuid=None,
+                               request_id=None,
+                               extra_info={},
+                               numa_node=None)
+        pci_device = objects.PciDevice(**pci_device_info)
+
+        with contextlib.nested(
+                mock.patch.object(host.Host, 'has_min_version',
+                                  return_value=True),
+                mock.patch.object(
+                    host.Host, "get_capabilities", return_value=caps),
+                mock.patch.object(
+                    hardware, 'get_vcpu_pin_set', return_value=set([3])),
+                mock.patch.object(pci_manager, "get_instance_pci_devs",
+                                  return_value=[pci_device])):
+            cfg = conn._get_guest_config(instance_ref, [], {}, disk_info)
+            self.assertEqual(set([3]), cfg.cpuset)
+            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cpu.numa)
+
+    @mock.patch.object(objects.Flavor, 'get_by_id')
+    def test_get_guest_config_numa_host_instance_2pci_no_fit(self,
+                                                             mock_flavor):
+        instance_ref = objects.Instance(**self.test_instance)
+        image_meta = {}
+        flavor = objects.Flavor(memory_mb=4096, vcpus=4, root_gb=496,
+                                ephemeral_gb=8128, swap=33550336, name='fake',
+                                extra_specs={})
+        mock_flavor.return_value = flavor
+
+        caps = vconfig.LibvirtConfigCaps()
+        caps.host = vconfig.LibvirtConfigCapsHost()
+        caps.host.cpu = vconfig.LibvirtConfigCPU()
+        caps.host.cpu.arch = "x86_64"
+        caps.host.topology = self._fake_caps_numa_topology()
+
+        conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
+        disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
+                                            instance_ref,
+                                            image_meta)
+        pci_device_info = dict(test_pci_device.fake_db_dev)
+        pci_device_info.update(compute_node_id=1,
+                               label='fake',
+                               status='available',
+                               address='0000:00:00.1',
+                               instance_uuid=None,
+                               request_id=None,
+                               extra_info={},
+                               numa_node=1)
+        pci_device = objects.PciDevice(**pci_device_info)
+        pci_device_info.update(numa_node=0, address='0000:00:00.2')
+        pci_device2 = objects.PciDevice(**pci_device_info)
+        with contextlib.nested(
+                mock.patch.object(
+                    host.Host, "get_capabilities", return_value=caps),
+                mock.patch.object(
+                    hardware, 'get_vcpu_pin_set', return_value=set([3])),
+                mock.patch.object(random, 'choice'),
+                mock.patch.object(pci_manager, "get_instance_pci_devs",
+                                  return_value=[pci_device, pci_device2])
+            ) as (get_host_cap_mock,
+                  get_vcpu_pin_set_mock, choice_mock, pci_mock):
+            cfg = conn._get_guest_config(instance_ref, [], {}, disk_info)
+            self.assertFalse(choice_mock.called)
+            self.assertEqual(set([3]), cfg.cpuset)
+            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cpu.numa)
+
    @mock.patch.object(objects.Flavor, 'get_by_id')
    def test_get_guest_config_numa_host_instance_fit_w_cpu_pinset(self,
                                                                  mock_flavor):
--- a/nova/tests/unit/virt/test_hardware.py
+++ b/nova/tests/unit/virt/test_hardware.py
@@ -22,6 +22,7 @@ from nova import context
 from nova import exception
 from nova import objects
 from nova.objects import base as base_obj
+from nova.pci import stats
 from nova import test
 from nova.virt import hardware as hw

@@ -1421,6 +1422,34 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
        self.assertIsInstance(fitted_instance2, objects.InstanceNUMATopology)
        self.assertEqual(2, fitted_instance2.cells[0].id)

+    def test_get_fitting_pci_success(self):
+        pci_request = objects.InstancePCIRequest(count=1,
+            spec=[{'vendor_id': '8086'}])
+        pci_reqs = [pci_request]
+        pci_stats = stats.PciDeviceStats()
+        with mock.patch.object(stats.PciDeviceStats,
+                'support_requests', return_value= True):
+            fitted_instance1 = hw.numa_fit_instance_to_host(self.host,
+                                                        self.instance1,
+                                                        pci_requests=pci_reqs,
+                                                        pci_stats=pci_stats)
+            self.assertIsInstance(fitted_instance1,
+                                  objects.InstanceNUMATopology)
+
+    def test_get_fitting_pci_fail(self):
+        pci_request = objects.InstancePCIRequest(count=1,
+            spec=[{'vendor_id': '8086'}])
+        pci_reqs = [pci_request]
+        pci_stats = stats.PciDeviceStats()
+        with mock.patch.object(stats.PciDeviceStats,
+                'support_requests', return_value= False):
+            fitted_instance1 = hw.numa_fit_instance_to_host(
+                                                        self.host,
+                                                        self.instance1,
+                                                        pci_requests=pci_reqs,
+                                                        pci_stats=pci_stats)
+            self.assertIsNone(fitted_instance1)
+

 class NumberOfSerialPortsTest(test.NoDBTestCase):
    def test_flavor(self):
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@@ -1126,12 +1126,15 @@ class VirtNUMALimitTopology(VirtNUMATopology):


 def numa_fit_instance_to_host(
-        host_topology, instance_topology, limits_topology=None):
+        host_topology, instance_topology, limits_topology=None,
+        pci_requests=None, pci_stats=None):
    """Fit the instance topology onto the host topology given the limits

    :param host_topology: objects.NUMATopology object to fit an instance on
    :param instance_topology: objects.InstanceNUMATopology to be fitted
    :param limits_topology: VirtNUMALimitTopology that defines limits
+    :param pci_requests: instance pci_requests
+    :param pci_stats: pci_stats for the host

    Given a host and instance topology and optionally limits - this method
    will attempt to fit instance cells onto all permutations of host cells
@@ -1163,7 +1166,12 @@ def numa_fit_instance_to_host(
                    break
                cells.append(got_cell)
            if len(cells) == len(host_cell_perm):
-                return objects.InstanceNUMATopology(cells=cells)
+                if not pci_requests:
+                    return objects.InstanceNUMATopology(cells=cells)
+                elif ((pci_stats is not None) and
+                    pci_stats.support_requests(pci_requests,
+                                                     cells)):
+                    return objects.InstanceNUMATopology(cells=cells)


 def _numa_pagesize_usage_from_cell(hostcell, instancecell, sign):
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -3376,7 +3376,7 @@ class LibvirtDriver(driver.ComputeDriver):
                guest_cpu_numa.cells.append(guest_cell)
            return guest_cpu_numa

-    def _get_guest_numa_config(self, instance_numa_topology, flavor,
+    def _get_guest_numa_config(self, instance_numa_topology, flavor, pci_devs,
                               allowed_cpus=None):
        """Returns the config objects for the guest NUMA specs.

@@ -3417,24 +3417,27 @@ class LibvirtDriver(driver.ComputeDriver):
            memory = flavor.memory_mb
            if topology:
                # Host is NUMA capable so try to keep the instance in a cell
-                viable_cells_cpus = []
-                for cell in topology.cells:
-                    if vcpus <= len(cell.cpuset) and memory <= cell.memory:
-                        viable_cells_cpus.append(cell.cpuset)
+                pci_cells = {pci.numa_node for pci in pci_devs}
+                if len(pci_cells) == 0:
+                    viable_cells_cpus = []
+                    for cell in topology.cells:
+                        if vcpus <= len(cell.cpuset) and memory <= cell.memory:
+                            viable_cells_cpus.append(cell.cpuset)

-                if not viable_cells_cpus:
-                    # We can't contain the instance in a cell - do nothing for
-                    # now.
-                    # TODO(ndipanov): Attempt to spread the instance across
-                    # NUMA nodes and expose the topology to the instance as an
-                    # optimisation
-                    return GuestNumaConfig(allowed_cpus, None, None, None)
-                else:
-                    pin_cpuset = random.choice(viable_cells_cpus)
-                    return GuestNumaConfig(pin_cpuset, None, None, None)
-            else:
-                # We have no NUMA topology in the host either
-                return GuestNumaConfig(allowed_cpus, None, None, None)
+                    if viable_cells_cpus:
+                        pin_cpuset = random.choice(viable_cells_cpus)
+                        return GuestNumaConfig(pin_cpuset, None, None, None)
+                elif len(pci_cells) == 1 and None not in pci_cells:
+                    cell = topology.cells[pci_cells.pop()]
+                    if vcpus <= len(cell.cpuset) and memory <= cell.memory:
+                        return GuestNumaConfig(cell.cpuset, None, None, None)
+
+            # We have no NUMA topology in the host either,
+            # or we can't find a single cell to acomodate the instance
+            # TODO(ndipanov): Attempt to spread the instance
+            # accross NUMA nodes and expose the topology to the
+            # instance as an optimisation
+            return GuestNumaConfig(allowed_cpus, None, None, None)
        else:
            if topology:
                # Now get the CpuTune configuration from the numa_topology
@@ -3856,9 +3859,10 @@ class LibvirtDriver(driver.ComputeDriver):
        guest.memory = flavor.memory_mb * units.Ki
        guest.vcpus = flavor.vcpus
        allowed_cpus = hardware.get_vcpu_pin_set()
+        pci_devs = pci_manager.get_instance_pci_devs(instance, 'all')

        guest_numa_config = self._get_guest_numa_config(
-                instance.numa_topology, flavor, allowed_cpus)
+                instance.numa_topology, flavor, pci_devs, allowed_cpus)

        guest.cpuset = guest_numa_config.cpuset
        guest.cputune = guest_numa_config.cputune
@@ -3980,7 +3984,7 @@ class LibvirtDriver(driver.ComputeDriver):
            for pci_dev in pci_manager.get_instance_pci_devs(instance):
                guest.add_device(self._get_guest_pci_device(pci_dev))
        else:
-            if len(pci_manager.get_instance_pci_devs(instance)) > 0:
+            if len(pci_devs) > 0:
                raise exception.PciDeviceUnsupportedHypervisor(
                    type=virt_type)