PCI NUMA filtering

Add pci device numa awareness to scheduling logic.
The NUMA topology filter is modified to consider PCI device NUMA node
locality during guest placement.
When a VM has a defined NUMA topology it will be placed on host numa
nodes that have requested pci devices.
If a VM doesn't have a NUMA topology if possible it will be placed in
one host NUMA node that has requested pci devices.

Implements: blueprint input-output-based-numa-scheduling
Change-Id: Id076a76d05f3d64facbeb60a7be3d4b60f817b94
Co-Authored-By: James Chapman <james.p.chapman@intel.com>
This commit is contained in:
Przemyslaw Czesnowicz
2014-11-28 17:37:26 +00:00
parent a219393c8c
commit 08713a8e3e
15 changed files with 485 additions and 66 deletions

View File

@@ -25,6 +25,7 @@ from nova import objects
from nova.openstack.common import log as logging
from nova.pci import device
from nova.pci import stats
from nova.virt import hardware
LOG = logging.getLogger(__name__)
@@ -155,11 +156,23 @@ class PciDevTracker(object):
context, instance)
if not pci_requests.requests:
return None
devs = self.stats.consume_requests(pci_requests.requests)
instance_numa_topology = hardware.instance_topology_from_instance(
instance)
instance_cells = None
if instance_numa_topology:
instance_cells = instance_numa_topology.cells
devs = self.stats.consume_requests(pci_requests.requests,
instance_cells)
if not devs:
raise exception.PciDeviceRequestFailed(pci_requests)
for dev in devs:
device.claim(dev, instance)
if instance_numa_topology and any(
dev.numa_node is None for dev in devs):
LOG.warning(_LW("Assigning a pci device without numa affinity to"
"instance %(instance)s which has numa topology"),
{'instance': instance['uuid']})
return devs
def _allocate_instance(self, instance, devs):

View File

@@ -52,7 +52,7 @@ class PciDeviceStats(object):
This summary information will be helpful for cloud management also.
"""
pool_keys = ['product_id', 'vendor_id']
pool_keys = ['product_id', 'vendor_id', 'numa_node']
def __init__(self, stats=None):
super(PciDeviceStats, self).__init__()
@@ -135,7 +135,7 @@ class PciDeviceStats(object):
free_devs.extend(pool['devices'])
return free_devs
def consume_requests(self, pci_requests):
def consume_requests(self, pci_requests, numa_cells=None):
alloc_devices = []
for request in pci_requests:
count = request.count
@@ -143,6 +143,8 @@ class PciDeviceStats(object):
# For now, keep the same algorithm as during scheduling:
# a spec may be able to match multiple pools.
pools = self._filter_pools_for_spec(self.pools, spec)
if numa_cells:
pools = self._filter_pools_for_numa_cells(pools, numa_cells)
# Failed to allocate the required number of devices
# Return the devices already allocated back to their pools
if sum([pool['count'] for pool in pools]) < count:
@@ -176,9 +178,24 @@ class PciDeviceStats(object):
return [pool for pool in pools
if utils.pci_device_prop_match(pool, request_specs)]
def _apply_request(self, pools, request):
@staticmethod
def _filter_pools_for_numa_cells(pools, numa_cells):
# Some systems don't report numa node info for pci devices, in
# that case None is reported in pci_device.numa_node, by adding None
# to numa_cells we allow assigning those devices to instances with
# numa topology
numa_cells = [None] + [cell.id for cell in numa_cells]
# filter out pools which numa_node is not included in numa_cells
return [pool for pool in pools if any(utils.pci_device_prop_match(
pool, [{'numa_node': cell}])
for cell in numa_cells)]
def _apply_request(self, pools, request, numa_cells=None):
count = request.count
matching_pools = self._filter_pools_for_spec(pools, request.spec)
if numa_cells:
matching_pools = self._filter_pools_for_numa_cells(matching_pools,
numa_cells)
if sum([pool['count'] for pool in matching_pools]) < count:
return False
else:
@@ -188,25 +205,31 @@ class PciDeviceStats(object):
break
return True
def support_requests(self, requests):
def support_requests(self, requests, numa_cells=None):
"""Check if the pci requests can be met.
Scheduler checks compute node's PCI stats to decide if an
instance can be scheduled into the node. Support does not
mean real allocation.
If numa_cells is provided then only devices contained in
those nodes are considered.
"""
# note (yjiang5): this function has high possibility to fail,
# so no exception should be triggered for performance reason.
pools = copy.deepcopy(self.pools)
return all([self._apply_request(pools, r) for r in requests])
return all([self._apply_request(pools, r, numa_cells)
for r in requests])
def apply_requests(self, requests):
def apply_requests(self, requests, numa_cells=None):
"""Apply PCI requests to the PCI stats.
This is used in multiple instance creation, when the scheduler has to
maintain how the resources are consumed by the instances.
If numa_cells is provided then only devices contained in
those nodes are considered.
"""
if not all([self._apply_request(self.pools, r) for r in requests]):
if not all([self._apply_request(self.pools, r, numa_cells)
for r in requests]):
raise exception.PciDeviceRequestFailed(requests=requests)
@staticmethod