Add PCI device tracker to compute resource tracker
PCI device tracker manages all the PCI devices life cycle. The resource tracker on a compute node uses the PCI device tracker's interface to query for resources, claim PCI devices, and alloc and free PCI devices for instances. bp:pci-passthrough-base Change-Id: I766d7f92e26fb75c1d6095dc0237c44f938d4170 Signed-off-by: Yongli He <yongli.he@intel.com> Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
This commit is contained in:
parent
4855239497
commit
fdce41ac4f
|
@ -21,6 +21,8 @@ from nova.objects import instance as instance_obj
|
|||
from nova.openstack.common.gettextutils import _
|
||||
from nova.openstack.common import jsonutils
|
||||
from nova.openstack.common import log as logging
|
||||
from nova.pci import pci_request
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
@ -133,7 +135,8 @@ class Claim(NopClaim):
|
|||
# Test for resources:
|
||||
can_claim = (self._test_memory(resources, memory_mb_limit) and
|
||||
self._test_disk(resources, disk_gb_limit) and
|
||||
self._test_cpu(resources, vcpu_limit))
|
||||
self._test_cpu(resources, vcpu_limit) and
|
||||
self._test_pci())
|
||||
|
||||
if can_claim:
|
||||
LOG.audit(_("Claim successful"), instance=self.instance)
|
||||
|
@ -160,6 +163,12 @@ class Claim(NopClaim):
|
|||
|
||||
return self._test(type_, unit, total, used, requested, limit)
|
||||
|
||||
def _test_pci(self):
|
||||
pci_requests = pci_request.get_instance_pci_requests(self.instance)
|
||||
if not pci_requests:
|
||||
return True
|
||||
return self.tracker.pci_tracker.stats.support_requests(pci_requests)
|
||||
|
||||
def _test_cpu(self, resources, limit):
|
||||
type_ = _("CPU")
|
||||
unit = "VCPUs"
|
||||
|
@ -226,6 +235,14 @@ class ResizeClaim(Claim):
|
|||
def vcpus(self):
|
||||
return self.instance_type['vcpus']
|
||||
|
||||
def _test_pci(self):
|
||||
pci_requests = pci_request.get_instance_pci_requests(
|
||||
self.instance, 'new_')
|
||||
if not pci_requests:
|
||||
return True
|
||||
|
||||
return self.tracker.pci_tracker.stats.support_requests(pci_requests)
|
||||
|
||||
def abort(self):
|
||||
"""Compute operation requiring claimed resources has failed or
|
||||
been aborted.
|
||||
|
|
|
@ -32,6 +32,7 @@ from nova.openstack.common.db import exception as db_exc
|
|||
from nova.openstack.common.gettextutils import _
|
||||
from nova.openstack.common import log as logging
|
||||
from nova.openstack.common import strutils
|
||||
from nova.pci import pci_request
|
||||
from nova import utils
|
||||
|
||||
flavor_opts = [
|
||||
|
@ -296,6 +297,7 @@ def save_flavor_info(metadata, instance_type, prefix=''):
|
|||
for key in system_metadata_flavor_props.keys():
|
||||
to_key = '%sinstance_type_%s' % (prefix, key)
|
||||
metadata[to_key] = instance_type[key]
|
||||
pci_request.save_flavor_pci_info(metadata, instance_type, prefix)
|
||||
return metadata
|
||||
|
||||
|
||||
|
@ -308,4 +310,5 @@ def delete_flavor_info(metadata, *prefixes):
|
|||
for prefix in prefixes:
|
||||
to_key = '%sinstance_type_%s' % (prefix, key)
|
||||
del metadata[to_key]
|
||||
pci_request.delete_flavor_pci_info(metadata, *prefixes)
|
||||
return metadata
|
||||
|
|
|
@ -29,11 +29,13 @@ from nova import conductor
|
|||
from nova import context
|
||||
from nova import exception
|
||||
from nova.objects import base as obj_base
|
||||
from nova.objects import instance as instance_obj
|
||||
from nova.objects import migration as migration_obj
|
||||
from nova.openstack.common.gettextutils import _
|
||||
from nova.openstack.common import importutils
|
||||
from nova.openstack.common import jsonutils
|
||||
from nova.openstack.common import log as logging
|
||||
from nova.pci import pci_manager
|
||||
from nova import utils
|
||||
|
||||
resource_tracker_opts = [
|
||||
|
@ -61,6 +63,7 @@ class ResourceTracker(object):
|
|||
def __init__(self, host, driver, nodename):
|
||||
self.host = host
|
||||
self.driver = driver
|
||||
self.pci_tracker = pci_manager.PciDevTracker()
|
||||
self.nodename = nodename
|
||||
self.compute_node = None
|
||||
self.stats = importutils.import_object(CONF.compute_stats_class)
|
||||
|
@ -227,6 +230,7 @@ class ResourceTracker(object):
|
|||
|
||||
if instance_type['id'] == itype['id']:
|
||||
self.stats.update_stats_for_migration(itype, sign=-1)
|
||||
self.pci_tracker.update_pci_for_migration(instance, sign=-1)
|
||||
self._update_usage(self.compute_node, itype, sign=-1)
|
||||
self.compute_node['stats'] = self.stats
|
||||
|
||||
|
@ -276,8 +280,12 @@ class ResourceTracker(object):
|
|||
|
||||
self._report_hypervisor_resource_view(resources)
|
||||
|
||||
if 'pci_passthrough_devices' in resources:
|
||||
self.pci_tracker.set_hvdevs(jsonutils.loads(resources.pop(
|
||||
'pci_passthrough_devices')))
|
||||
|
||||
# Grab all instances assigned to this node:
|
||||
instances = self.conductor_api.instance_get_all_by_host_and_node(
|
||||
instances = instance_obj.InstanceList.get_by_host_and_node(
|
||||
context, self.host, self.nodename)
|
||||
|
||||
# Now calculate usage based on instance utilization:
|
||||
|
@ -295,6 +303,13 @@ class ResourceTracker(object):
|
|||
orphans = self._find_orphaned_instances()
|
||||
self._update_usage_from_orphans(resources, orphans)
|
||||
|
||||
# NOTE(yjiang5): Because pci device tracker status is not cleared in
|
||||
# this periodic task, and also because the resource tracker is not
|
||||
# notified when instances are deleted, we need remove all usages
|
||||
# from deleted instances.
|
||||
self.pci_tracker.clean_usage(instances, migrations, orphans)
|
||||
resources['pci_stats'] = jsonutils.dumps(self.pci_tracker.stats)
|
||||
|
||||
self._report_final_resource_view(resources)
|
||||
|
||||
self._sync_compute_node(context, resources)
|
||||
|
@ -313,12 +328,14 @@ class ResourceTracker(object):
|
|||
for cn in compute_node_refs:
|
||||
if cn.get('hypervisor_hostname') == self.nodename:
|
||||
self.compute_node = cn
|
||||
self.pci_tracker.set_compute_node_id(cn['id'])
|
||||
break
|
||||
|
||||
if not self.compute_node:
|
||||
# Need to create the ComputeNode record:
|
||||
resources['service_id'] = service['id']
|
||||
self._create(context, resources)
|
||||
self.pci_tracker.set_compute_node_id(self.compute_node['id'])
|
||||
LOG.info(_('Compute_service record created for %(host)s:%(node)s')
|
||||
% {'host': self.host, 'node': self.nodename})
|
||||
|
||||
|
@ -342,9 +359,16 @@ class ResourceTracker(object):
|
|||
LOG.warn(_("No service record for host %s"), self.host)
|
||||
|
||||
def _report_hypervisor_resource_view(self, resources):
|
||||
"""Log the hypervisor's view of free memory in and free disk.
|
||||
"""Log the hypervisor's view of free resources.
|
||||
|
||||
This is just a snapshot of resource usage recorded by the
|
||||
virt driver.
|
||||
|
||||
The following resources are logged:
|
||||
- free memory
|
||||
- free disk
|
||||
- free CPUs
|
||||
- assignable PCI devices
|
||||
"""
|
||||
free_ram_mb = resources['memory_mb'] - resources['memory_mb_used']
|
||||
free_disk_gb = resources['local_gb'] - resources['local_gb_used']
|
||||
|
@ -359,9 +383,16 @@ class ResourceTracker(object):
|
|||
else:
|
||||
LOG.debug(_("Hypervisor: VCPU information unavailable"))
|
||||
|
||||
if 'pci_passthrough_devices' in resources and \
|
||||
resources['pci_passthrough_devices']:
|
||||
LOG.debug(_("Hypervisor: assignable PCI devices: %s") %
|
||||
resources['pci_passthrough_devices'])
|
||||
else:
|
||||
LOG.debug(_("Hypervisor: no assignable PCI devices"))
|
||||
|
||||
def _report_final_resource_view(self, resources):
|
||||
"""Report final calculate of free memory and free disk including
|
||||
instance calculations and in-progress resource claims. These
|
||||
"""Report final calculate of free memory, disk, CPUs, and PCI devices,
|
||||
including instance calculations and in-progress resource claims. These
|
||||
values will be exposed via the compute node table to the scheduler.
|
||||
"""
|
||||
LOG.audit(_("Free ram (MB): %s") % resources['free_ram_mb'])
|
||||
|
@ -374,12 +405,16 @@ class ResourceTracker(object):
|
|||
else:
|
||||
LOG.audit(_("Free VCPU information unavailable"))
|
||||
|
||||
if 'pci_devices' in resources:
|
||||
LOG.audit(_("Free PCI devices: %s") % resources['pci_devices'])
|
||||
|
||||
def _update(self, context, values, prune_stats=False):
|
||||
"""Persist the compute node updates to the DB."""
|
||||
if "service" in self.compute_node:
|
||||
del self.compute_node['service']
|
||||
self.compute_node = self.conductor_api.compute_node_update(
|
||||
context, self.compute_node, values, prune_stats)
|
||||
self.pci_tracker.save(context)
|
||||
|
||||
def _update_usage(self, resources, usage, sign=1):
|
||||
mem_usage = usage['memory_mb']
|
||||
|
@ -442,8 +477,10 @@ class ResourceTracker(object):
|
|||
|
||||
if itype:
|
||||
self.stats.update_stats_for_migration(itype)
|
||||
self.pci_tracker.update_pci_for_migration(instance)
|
||||
self._update_usage(resources, itype)
|
||||
resources['stats'] = self.stats
|
||||
resources['pci_stats'] = jsonutils.dumps(self.pci_tracker.stats)
|
||||
self.tracked_migrations[uuid] = (migration, itype)
|
||||
|
||||
def _update_usage_from_migrations(self, context, resources, migrations):
|
||||
|
@ -493,7 +530,7 @@ class ResourceTracker(object):
|
|||
is_deleted_instance = instance['vm_state'] == vm_states.DELETED
|
||||
|
||||
if is_new_instance:
|
||||
self.tracked_instances[uuid] = jsonutils.to_primitive(instance)
|
||||
self.tracked_instances[uuid] = obj_base.obj_to_primitive(instance)
|
||||
sign = 1
|
||||
|
||||
if is_deleted_instance:
|
||||
|
@ -502,6 +539,8 @@ class ResourceTracker(object):
|
|||
|
||||
self.stats.update_stats_for_instance(instance)
|
||||
|
||||
self.pci_tracker.update_pci_for_instance(instance)
|
||||
|
||||
# if it's a new or deleted instance:
|
||||
if is_new_instance or is_deleted_instance:
|
||||
# new instance, update compute node resource usage:
|
||||
|
@ -509,6 +548,7 @@ class ResourceTracker(object):
|
|||
|
||||
resources['current_workload'] = self.stats.calculate_workload()
|
||||
resources['stats'] = self.stats
|
||||
resources['pci_stats'] = jsonutils.dumps(self.pci_tracker.stats)
|
||||
|
||||
def _update_usage_from_instances(self, resources, instances):
|
||||
"""Calculate resource usage based on instance utilization. This is
|
||||
|
|
|
@ -23,6 +23,7 @@ class FakeResourceTracker(resource_tracker.ResourceTracker):
|
|||
|
||||
def _create(self, context, values):
|
||||
self.compute_node = values
|
||||
self.compute_node['id'] = 1
|
||||
|
||||
def _update(self, context, values, prune_stats=False):
|
||||
self.compute_node.update(values)
|
||||
|
|
|
@ -20,12 +20,15 @@
|
|||
import uuid
|
||||
|
||||
from nova.compute import claims
|
||||
from nova.openstack.common import jsonutils
|
||||
from nova.pci import pci_manager
|
||||
from nova import test
|
||||
|
||||
|
||||
class DummyTracker(object):
|
||||
icalled = False
|
||||
rcalled = False
|
||||
pci_tracker = pci_manager.PciDevTracker()
|
||||
|
||||
def abort_instance_claim(self, *args, **kwargs):
|
||||
self.icalled = True
|
||||
|
@ -33,6 +36,9 @@ class DummyTracker(object):
|
|||
def drop_resize_claim(self, *args, **kwargs):
|
||||
self.rcalled = True
|
||||
|
||||
def new_pci_tracker(self):
|
||||
self.pci_tracker = pci_manager.PciDevTracker()
|
||||
|
||||
|
||||
class ClaimTestCase(test.TestCase):
|
||||
|
||||
|
@ -53,7 +59,8 @@ class ClaimTestCase(test.TestCase):
|
|||
'memory_mb': 1024,
|
||||
'root_gb': 10,
|
||||
'ephemeral_gb': 5,
|
||||
'vcpus': 1
|
||||
'vcpus': 1,
|
||||
'system_metadata': {}
|
||||
}
|
||||
instance.update(**kwargs)
|
||||
return instance
|
||||
|
@ -143,6 +150,53 @@ class ClaimTestCase(test.TestCase):
|
|||
limits = {'disk_gb': 45}
|
||||
self.assertFalse(claim.test(self.resources, limits))
|
||||
|
||||
def test_pci_pass(self):
|
||||
dev_dict = {
|
||||
'compute_node_id': 1,
|
||||
'address': 'a',
|
||||
'product_id': 'p',
|
||||
'vendor_id': 'v',
|
||||
'status': 'available'}
|
||||
self.tracker.new_pci_tracker()
|
||||
self.tracker.pci_tracker.set_hvdevs([dev_dict])
|
||||
claim = self._claim()
|
||||
self._set_pci_request(claim)
|
||||
self.assertTrue(claim._test_pci())
|
||||
|
||||
def _set_pci_request(self, claim):
|
||||
request = [{'count': 1,
|
||||
'spec': [{'vendor_id': 'v', 'product_id': 'p'}],
|
||||
}]
|
||||
|
||||
claim.instance.update(
|
||||
system_metadata={'pci_requests': jsonutils.dumps(request)})
|
||||
|
||||
def test_pci_fail(self):
|
||||
dev_dict = {
|
||||
'compute_node_id': 1,
|
||||
'address': 'a',
|
||||
'product_id': 'p',
|
||||
'vendor_id': 'v1',
|
||||
'status': 'available'}
|
||||
self.tracker.new_pci_tracker()
|
||||
self.tracker.pci_tracker.set_hvdevs([dev_dict])
|
||||
claim = self._claim()
|
||||
self._set_pci_request(claim)
|
||||
self.assertFalse(claim._test_pci())
|
||||
|
||||
def test_pci_pass_no_requests(self):
|
||||
dev_dict = {
|
||||
'compute_node_id': 1,
|
||||
'address': 'a',
|
||||
'product_id': 'p',
|
||||
'vendor_id': 'v',
|
||||
'status': 'available'}
|
||||
self.tracker.new_pci_tracker()
|
||||
self.tracker.pci_tracker.set_hvdevs([dev_dict])
|
||||
claim = self._claim()
|
||||
self._set_pci_request(claim)
|
||||
self.assertTrue(claim._test_pci())
|
||||
|
||||
def test_abort(self):
|
||||
claim = self._abort()
|
||||
self.assertTrue(claim.tracker.icalled)
|
||||
|
@ -171,6 +225,13 @@ class ResizeClaimTestCase(ClaimTestCase):
|
|||
return claims.ResizeClaim(self.instance, instance_type, self.tracker,
|
||||
overhead=overhead)
|
||||
|
||||
def _set_pci_request(self, claim):
|
||||
request = [{'count': 1,
|
||||
'spec': [{'vendor_id': 'v', 'product_id': 'p'}],
|
||||
}]
|
||||
claim.instance.update(
|
||||
system_metadata={'new_pci_requests': jsonutils.dumps(request)})
|
||||
|
||||
def test_abort(self):
|
||||
claim = self._abort()
|
||||
self.assertTrue(claim.tracker.rcalled)
|
||||
|
|
|
@ -192,6 +192,44 @@ class BaseTestCase(test.TestCase):
|
|||
'instance_type_id': 1,
|
||||
'launched_on': None,
|
||||
'system_metadata': sys_meta,
|
||||
'availability_zone': None,
|
||||
'vm_mode': None,
|
||||
'reservation_id': None,
|
||||
'display_name': None,
|
||||
'default_swap_device': None,
|
||||
'power_state': None,
|
||||
'scheduled_at': None,
|
||||
'access_ip_v6': None,
|
||||
'access_ip_v4': None,
|
||||
'key_name': None,
|
||||
'updated_at': None,
|
||||
'cell_name': None,
|
||||
'locked': None,
|
||||
'locked_by': None,
|
||||
'launch_index': None,
|
||||
'architecture': None,
|
||||
'auto_disk_config': None,
|
||||
'terminated_at': None,
|
||||
'ramdisk_id': None,
|
||||
'user_data': None,
|
||||
'cleaned': None,
|
||||
'deleted_at': None,
|
||||
'id': 333,
|
||||
'disable_terminate': None,
|
||||
'hostname': None,
|
||||
'display_description': None,
|
||||
'key_data': None,
|
||||
'deleted': None,
|
||||
'default_ephemeral_device': None,
|
||||
'progress': None,
|
||||
'launched_at': None,
|
||||
'config_drive': None,
|
||||
'kernel_id': None,
|
||||
'user_id': None,
|
||||
'shutdown_terminate': None,
|
||||
'created_at': None,
|
||||
'image_ref': None,
|
||||
'root_device_name': None,
|
||||
}
|
||||
instance.update(kwargs)
|
||||
|
||||
|
|
Loading…
Reference in New Issue