Add PCI device tracker to compute resource tracker

PCI device tracker manages all the PCI devices life cycle.

The resource tracker on a compute node uses the PCI device tracker's
interface to query for resources, claim PCI devices, and alloc and
free PCI devices for instances.

bp:pci-passthrough-base

Change-Id: I766d7f92e26fb75c1d6095dc0237c44f938d4170
Signed-off-by: Yongli He <yongli.he@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
This commit is contained in:
He Yongli 2013-08-01 17:32:15 +08:00
parent 4855239497
commit fdce41ac4f
6 changed files with 167 additions and 7 deletions

View File

@ -21,6 +21,8 @@ from nova.objects import instance as instance_obj
from nova.openstack.common.gettextutils import _
from nova.openstack.common import jsonutils
from nova.openstack.common import log as logging
from nova.pci import pci_request
LOG = logging.getLogger(__name__)
@ -133,7 +135,8 @@ class Claim(NopClaim):
# Test for resources:
can_claim = (self._test_memory(resources, memory_mb_limit) and
self._test_disk(resources, disk_gb_limit) and
self._test_cpu(resources, vcpu_limit))
self._test_cpu(resources, vcpu_limit) and
self._test_pci())
if can_claim:
LOG.audit(_("Claim successful"), instance=self.instance)
@ -160,6 +163,12 @@ class Claim(NopClaim):
return self._test(type_, unit, total, used, requested, limit)
def _test_pci(self):
pci_requests = pci_request.get_instance_pci_requests(self.instance)
if not pci_requests:
return True
return self.tracker.pci_tracker.stats.support_requests(pci_requests)
def _test_cpu(self, resources, limit):
type_ = _("CPU")
unit = "VCPUs"
@ -226,6 +235,14 @@ class ResizeClaim(Claim):
def vcpus(self):
return self.instance_type['vcpus']
def _test_pci(self):
pci_requests = pci_request.get_instance_pci_requests(
self.instance, 'new_')
if not pci_requests:
return True
return self.tracker.pci_tracker.stats.support_requests(pci_requests)
def abort(self):
"""Compute operation requiring claimed resources has failed or
been aborted.

View File

@ -32,6 +32,7 @@ from nova.openstack.common.db import exception as db_exc
from nova.openstack.common.gettextutils import _
from nova.openstack.common import log as logging
from nova.openstack.common import strutils
from nova.pci import pci_request
from nova import utils
flavor_opts = [
@ -296,6 +297,7 @@ def save_flavor_info(metadata, instance_type, prefix=''):
for key in system_metadata_flavor_props.keys():
to_key = '%sinstance_type_%s' % (prefix, key)
metadata[to_key] = instance_type[key]
pci_request.save_flavor_pci_info(metadata, instance_type, prefix)
return metadata
@ -308,4 +310,5 @@ def delete_flavor_info(metadata, *prefixes):
for prefix in prefixes:
to_key = '%sinstance_type_%s' % (prefix, key)
del metadata[to_key]
pci_request.delete_flavor_pci_info(metadata, *prefixes)
return metadata

View File

@ -29,11 +29,13 @@ from nova import conductor
from nova import context
from nova import exception
from nova.objects import base as obj_base
from nova.objects import instance as instance_obj
from nova.objects import migration as migration_obj
from nova.openstack.common.gettextutils import _
from nova.openstack.common import importutils
from nova.openstack.common import jsonutils
from nova.openstack.common import log as logging
from nova.pci import pci_manager
from nova import utils
resource_tracker_opts = [
@ -61,6 +63,7 @@ class ResourceTracker(object):
def __init__(self, host, driver, nodename):
self.host = host
self.driver = driver
self.pci_tracker = pci_manager.PciDevTracker()
self.nodename = nodename
self.compute_node = None
self.stats = importutils.import_object(CONF.compute_stats_class)
@ -227,6 +230,7 @@ class ResourceTracker(object):
if instance_type['id'] == itype['id']:
self.stats.update_stats_for_migration(itype, sign=-1)
self.pci_tracker.update_pci_for_migration(instance, sign=-1)
self._update_usage(self.compute_node, itype, sign=-1)
self.compute_node['stats'] = self.stats
@ -276,8 +280,12 @@ class ResourceTracker(object):
self._report_hypervisor_resource_view(resources)
if 'pci_passthrough_devices' in resources:
self.pci_tracker.set_hvdevs(jsonutils.loads(resources.pop(
'pci_passthrough_devices')))
# Grab all instances assigned to this node:
instances = self.conductor_api.instance_get_all_by_host_and_node(
instances = instance_obj.InstanceList.get_by_host_and_node(
context, self.host, self.nodename)
# Now calculate usage based on instance utilization:
@ -295,6 +303,13 @@ class ResourceTracker(object):
orphans = self._find_orphaned_instances()
self._update_usage_from_orphans(resources, orphans)
# NOTE(yjiang5): Because pci device tracker status is not cleared in
# this periodic task, and also because the resource tracker is not
# notified when instances are deleted, we need remove all usages
# from deleted instances.
self.pci_tracker.clean_usage(instances, migrations, orphans)
resources['pci_stats'] = jsonutils.dumps(self.pci_tracker.stats)
self._report_final_resource_view(resources)
self._sync_compute_node(context, resources)
@ -313,12 +328,14 @@ class ResourceTracker(object):
for cn in compute_node_refs:
if cn.get('hypervisor_hostname') == self.nodename:
self.compute_node = cn
self.pci_tracker.set_compute_node_id(cn['id'])
break
if not self.compute_node:
# Need to create the ComputeNode record:
resources['service_id'] = service['id']
self._create(context, resources)
self.pci_tracker.set_compute_node_id(self.compute_node['id'])
LOG.info(_('Compute_service record created for %(host)s:%(node)s')
% {'host': self.host, 'node': self.nodename})
@ -342,9 +359,16 @@ class ResourceTracker(object):
LOG.warn(_("No service record for host %s"), self.host)
def _report_hypervisor_resource_view(self, resources):
"""Log the hypervisor's view of free memory in and free disk.
"""Log the hypervisor's view of free resources.
This is just a snapshot of resource usage recorded by the
virt driver.
The following resources are logged:
- free memory
- free disk
- free CPUs
- assignable PCI devices
"""
free_ram_mb = resources['memory_mb'] - resources['memory_mb_used']
free_disk_gb = resources['local_gb'] - resources['local_gb_used']
@ -359,9 +383,16 @@ class ResourceTracker(object):
else:
LOG.debug(_("Hypervisor: VCPU information unavailable"))
if 'pci_passthrough_devices' in resources and \
resources['pci_passthrough_devices']:
LOG.debug(_("Hypervisor: assignable PCI devices: %s") %
resources['pci_passthrough_devices'])
else:
LOG.debug(_("Hypervisor: no assignable PCI devices"))
def _report_final_resource_view(self, resources):
"""Report final calculate of free memory and free disk including
instance calculations and in-progress resource claims. These
"""Report final calculate of free memory, disk, CPUs, and PCI devices,
including instance calculations and in-progress resource claims. These
values will be exposed via the compute node table to the scheduler.
"""
LOG.audit(_("Free ram (MB): %s") % resources['free_ram_mb'])
@ -374,12 +405,16 @@ class ResourceTracker(object):
else:
LOG.audit(_("Free VCPU information unavailable"))
if 'pci_devices' in resources:
LOG.audit(_("Free PCI devices: %s") % resources['pci_devices'])
def _update(self, context, values, prune_stats=False):
"""Persist the compute node updates to the DB."""
if "service" in self.compute_node:
del self.compute_node['service']
self.compute_node = self.conductor_api.compute_node_update(
context, self.compute_node, values, prune_stats)
self.pci_tracker.save(context)
def _update_usage(self, resources, usage, sign=1):
mem_usage = usage['memory_mb']
@ -442,8 +477,10 @@ class ResourceTracker(object):
if itype:
self.stats.update_stats_for_migration(itype)
self.pci_tracker.update_pci_for_migration(instance)
self._update_usage(resources, itype)
resources['stats'] = self.stats
resources['pci_stats'] = jsonutils.dumps(self.pci_tracker.stats)
self.tracked_migrations[uuid] = (migration, itype)
def _update_usage_from_migrations(self, context, resources, migrations):
@ -493,7 +530,7 @@ class ResourceTracker(object):
is_deleted_instance = instance['vm_state'] == vm_states.DELETED
if is_new_instance:
self.tracked_instances[uuid] = jsonutils.to_primitive(instance)
self.tracked_instances[uuid] = obj_base.obj_to_primitive(instance)
sign = 1
if is_deleted_instance:
@ -502,6 +539,8 @@ class ResourceTracker(object):
self.stats.update_stats_for_instance(instance)
self.pci_tracker.update_pci_for_instance(instance)
# if it's a new or deleted instance:
if is_new_instance or is_deleted_instance:
# new instance, update compute node resource usage:
@ -509,6 +548,7 @@ class ResourceTracker(object):
resources['current_workload'] = self.stats.calculate_workload()
resources['stats'] = self.stats
resources['pci_stats'] = jsonutils.dumps(self.pci_tracker.stats)
def _update_usage_from_instances(self, resources, instances):
"""Calculate resource usage based on instance utilization. This is

View File

@ -23,6 +23,7 @@ class FakeResourceTracker(resource_tracker.ResourceTracker):
def _create(self, context, values):
self.compute_node = values
self.compute_node['id'] = 1
def _update(self, context, values, prune_stats=False):
self.compute_node.update(values)

View File

@ -20,12 +20,15 @@
import uuid
from nova.compute import claims
from nova.openstack.common import jsonutils
from nova.pci import pci_manager
from nova import test
class DummyTracker(object):
icalled = False
rcalled = False
pci_tracker = pci_manager.PciDevTracker()
def abort_instance_claim(self, *args, **kwargs):
self.icalled = True
@ -33,6 +36,9 @@ class DummyTracker(object):
def drop_resize_claim(self, *args, **kwargs):
self.rcalled = True
def new_pci_tracker(self):
self.pci_tracker = pci_manager.PciDevTracker()
class ClaimTestCase(test.TestCase):
@ -53,7 +59,8 @@ class ClaimTestCase(test.TestCase):
'memory_mb': 1024,
'root_gb': 10,
'ephemeral_gb': 5,
'vcpus': 1
'vcpus': 1,
'system_metadata': {}
}
instance.update(**kwargs)
return instance
@ -143,6 +150,53 @@ class ClaimTestCase(test.TestCase):
limits = {'disk_gb': 45}
self.assertFalse(claim.test(self.resources, limits))
def test_pci_pass(self):
dev_dict = {
'compute_node_id': 1,
'address': 'a',
'product_id': 'p',
'vendor_id': 'v',
'status': 'available'}
self.tracker.new_pci_tracker()
self.tracker.pci_tracker.set_hvdevs([dev_dict])
claim = self._claim()
self._set_pci_request(claim)
self.assertTrue(claim._test_pci())
def _set_pci_request(self, claim):
request = [{'count': 1,
'spec': [{'vendor_id': 'v', 'product_id': 'p'}],
}]
claim.instance.update(
system_metadata={'pci_requests': jsonutils.dumps(request)})
def test_pci_fail(self):
dev_dict = {
'compute_node_id': 1,
'address': 'a',
'product_id': 'p',
'vendor_id': 'v1',
'status': 'available'}
self.tracker.new_pci_tracker()
self.tracker.pci_tracker.set_hvdevs([dev_dict])
claim = self._claim()
self._set_pci_request(claim)
self.assertFalse(claim._test_pci())
def test_pci_pass_no_requests(self):
dev_dict = {
'compute_node_id': 1,
'address': 'a',
'product_id': 'p',
'vendor_id': 'v',
'status': 'available'}
self.tracker.new_pci_tracker()
self.tracker.pci_tracker.set_hvdevs([dev_dict])
claim = self._claim()
self._set_pci_request(claim)
self.assertTrue(claim._test_pci())
def test_abort(self):
claim = self._abort()
self.assertTrue(claim.tracker.icalled)
@ -171,6 +225,13 @@ class ResizeClaimTestCase(ClaimTestCase):
return claims.ResizeClaim(self.instance, instance_type, self.tracker,
overhead=overhead)
def _set_pci_request(self, claim):
request = [{'count': 1,
'spec': [{'vendor_id': 'v', 'product_id': 'p'}],
}]
claim.instance.update(
system_metadata={'new_pci_requests': jsonutils.dumps(request)})
def test_abort(self):
claim = self._abort()
self.assertTrue(claim.tracker.rcalled)

View File

@ -192,6 +192,44 @@ class BaseTestCase(test.TestCase):
'instance_type_id': 1,
'launched_on': None,
'system_metadata': sys_meta,
'availability_zone': None,
'vm_mode': None,
'reservation_id': None,
'display_name': None,
'default_swap_device': None,
'power_state': None,
'scheduled_at': None,
'access_ip_v6': None,
'access_ip_v4': None,
'key_name': None,
'updated_at': None,
'cell_name': None,
'locked': None,
'locked_by': None,
'launch_index': None,
'architecture': None,
'auto_disk_config': None,
'terminated_at': None,
'ramdisk_id': None,
'user_data': None,
'cleaned': None,
'deleted_at': None,
'id': 333,
'disable_terminate': None,
'hostname': None,
'display_description': None,
'key_data': None,
'deleted': None,
'default_ephemeral_device': None,
'progress': None,
'launched_at': None,
'config_drive': None,
'kernel_id': None,
'user_id': None,
'shutdown_terminate': None,
'created_at': None,
'image_ref': None,
'root_device_name': None,
}
instance.update(kwargs)