Merge "Add accounting for orphans to resource tracker."
This commit is contained in:
commit
aaae75dc35
@ -268,6 +268,11 @@ class ResourceTracker(object):
|
||||
|
||||
self._update_usage_from_migrations(resources, migrations)
|
||||
|
||||
# Detect and account for orphaned instances that may exist on the
|
||||
# hypervisor, but are not in the DB:
|
||||
orphans = self._find_orphaned_instances()
|
||||
self._update_usage_from_orphans(resources, orphans)
|
||||
|
||||
self._report_final_resource_view(resources)
|
||||
|
||||
self._sync_compute_node(context, resources)
|
||||
@ -364,8 +369,8 @@ class ResourceTracker(object):
|
||||
|
||||
def _update_usage(self, resources, usage, sign=1):
|
||||
resources['memory_mb_used'] += sign * usage['memory_mb']
|
||||
resources['local_gb_used'] += sign * usage['root_gb']
|
||||
resources['local_gb_used'] += sign * usage['ephemeral_gb']
|
||||
resources['local_gb_used'] += sign * usage.get('root_gb', 0)
|
||||
resources['local_gb_used'] += sign * usage.get('ephemeral_gb', 0)
|
||||
|
||||
# free ram and disk may be negative, depending on policy:
|
||||
resources['free_ram_mb'] = (resources['memory_mb'] -
|
||||
@ -501,6 +506,40 @@ class ResourceTracker(object):
|
||||
for instance in instances:
|
||||
self._update_usage_from_instance(resources, instance)
|
||||
|
||||
def _find_orphaned_instances(self):
|
||||
"""Given the set of instances and migrations already account for
|
||||
by resource tracker, sanity check the hypervisor to determine
|
||||
if there are any "orphaned" instances left hanging around.
|
||||
|
||||
Orphans could be consuming memory and should be accounted for in
|
||||
usage calculations to guard against potential out of memory
|
||||
errors.
|
||||
"""
|
||||
uuids1 = frozenset(self.tracked_instances.keys())
|
||||
uuids2 = frozenset(self.tracked_migrations.keys())
|
||||
uuids = uuids1 | uuids2
|
||||
|
||||
usage = self.driver.get_per_instance_usage()
|
||||
vuuids = frozenset(usage.keys())
|
||||
|
||||
orphan_uuids = vuuids - uuids
|
||||
orphans = [usage[uuid] for uuid in orphan_uuids]
|
||||
|
||||
return orphans
|
||||
|
||||
def _update_usage_from_orphans(self, resources, orphans):
|
||||
"""Include orphaned instances in usage."""
|
||||
for orphan in orphans:
|
||||
uuid = orphan['uuid']
|
||||
memory_mb = orphan['memory_mb']
|
||||
|
||||
LOG.warn(_("Detected running orphan instance: %(uuid)s (consuming "
|
||||
"%(memory_mb)s MB memory") % locals())
|
||||
|
||||
# just record memory usage for the orphan
|
||||
usage = {'memory_mb': orphan['memory_mb']}
|
||||
self._update_usage(resources, usage)
|
||||
|
||||
def _verify_resources(self, resources):
|
||||
resource_keys = ["vcpus", "memory_mb", "local_gb", "cpu_info",
|
||||
"vcpus_used", "memory_mb_used", "local_gb_used"]
|
||||
|
@ -193,17 +193,17 @@ class BaseTestCase(test.TestCase):
|
||||
# only used in the subsequent notification:
|
||||
return (instance, instance)
|
||||
|
||||
def _tracker(self, host=None, unsupported=False):
|
||||
def _driver(self):
|
||||
return FakeVirtDriver()
|
||||
|
||||
def _tracker(self, host=None):
|
||||
|
||||
if host is None:
|
||||
host = self.host
|
||||
|
||||
node = "fakenode"
|
||||
|
||||
if unsupported:
|
||||
driver = UnsupportedVirtDriver()
|
||||
else:
|
||||
driver = FakeVirtDriver()
|
||||
driver = self._driver()
|
||||
|
||||
tracker = resource_tracker.ResourceTracker(host, driver, node)
|
||||
return tracker
|
||||
@ -215,10 +215,13 @@ class UnsupportedDriverTestCase(BaseTestCase):
|
||||
"""
|
||||
def setUp(self):
|
||||
super(UnsupportedDriverTestCase, self).setUp()
|
||||
self.tracker = self._tracker(unsupported=True)
|
||||
self.tracker = self._tracker()
|
||||
# seed tracker with data:
|
||||
self.tracker.update_available_resource(self.context)
|
||||
|
||||
def _driver(self):
|
||||
return UnsupportedVirtDriver()
|
||||
|
||||
def test_disabled(self):
|
||||
# disabled = no compute node stats
|
||||
self.assertTrue(self.tracker.disabled)
|
||||
@ -248,7 +251,7 @@ class UnsupportedDriverTestCase(BaseTestCase):
|
||||
root_gb=10)
|
||||
self.tracker.update_usage(self.context, instance)
|
||||
|
||||
def testDisabledResizeClaim(self):
|
||||
def test_disabled_resize_claim(self):
|
||||
instance = self._fake_instance()
|
||||
instance_type = self._fake_instance_type_create()
|
||||
claim = self.tracker.resize_claim(self.context, instance,
|
||||
@ -258,7 +261,7 @@ class UnsupportedDriverTestCase(BaseTestCase):
|
||||
self.assertEqual(instance_type['id'],
|
||||
claim.migration['new_instance_type_id'])
|
||||
|
||||
def testDisabledResizeContextClaim(self):
|
||||
def test_disabled_resize_context_claim(self):
|
||||
instance = self._fake_instance()
|
||||
instance_type = self._fake_instance_type_create()
|
||||
with self.tracker.resize_claim(self.context, instance, instance_type) \
|
||||
@ -327,18 +330,6 @@ class BaseTrackerTestCase(BaseTestCase):
|
||||
self.tracker.update_available_resource(self.context)
|
||||
self.limits = self._limits()
|
||||
|
||||
self._assert(FAKE_VIRT_MEMORY_MB, 'memory_mb')
|
||||
self._assert(FAKE_VIRT_LOCAL_GB, 'local_gb')
|
||||
self._assert(FAKE_VIRT_VCPUS, 'vcpus')
|
||||
self._assert(0, 'memory_mb_used')
|
||||
self._assert(0, 'local_gb_used')
|
||||
self._assert(0, 'vcpus_used')
|
||||
self._assert(0, 'running_vms')
|
||||
self._assert(FAKE_VIRT_MEMORY_MB, 'free_ram_mb')
|
||||
self._assert(FAKE_VIRT_LOCAL_GB, 'free_disk_gb')
|
||||
self.assertFalse(self.tracker.disabled)
|
||||
self.assertEqual(0, self.tracker.compute_node['current_workload'])
|
||||
|
||||
def _fake_service_get_all_compute_by_host(self, ctx, host):
|
||||
self.compute = self._create_compute_node()
|
||||
self.service = self._create_service(host, compute=self.compute)
|
||||
@ -412,6 +403,19 @@ class TrackerTestCase(BaseTrackerTestCase):
|
||||
self.assertFalse(self.tracker.disabled)
|
||||
self.assertTrue(self.updated)
|
||||
|
||||
def test_init(self):
|
||||
self._assert(FAKE_VIRT_MEMORY_MB, 'memory_mb')
|
||||
self._assert(FAKE_VIRT_LOCAL_GB, 'local_gb')
|
||||
self._assert(FAKE_VIRT_VCPUS, 'vcpus')
|
||||
self._assert(0, 'memory_mb_used')
|
||||
self._assert(0, 'local_gb_used')
|
||||
self._assert(0, 'vcpus_used')
|
||||
self._assert(0, 'running_vms')
|
||||
self._assert(FAKE_VIRT_MEMORY_MB, 'free_ram_mb')
|
||||
self._assert(FAKE_VIRT_LOCAL_GB, 'free_disk_gb')
|
||||
self.assertFalse(self.tracker.disabled)
|
||||
self.assertEqual(0, self.tracker.compute_node['current_workload'])
|
||||
|
||||
|
||||
class InstanceClaimTestCase(BaseTrackerTestCase):
|
||||
|
||||
@ -817,3 +821,31 @@ class ResizeClaimTestCase(BaseTrackerTestCase):
|
||||
self.assertEqual('fakehost', instance['host'])
|
||||
self.assertEqual('fakehost', instance['launched_on'])
|
||||
self.assertEqual('fakenode', instance['node'])
|
||||
|
||||
|
||||
class OrphanTestCase(BaseTrackerTestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(OrphanTestCase, self).setUp()
|
||||
|
||||
def _driver(self):
|
||||
class OrphanVirtDriver(FakeVirtDriver):
|
||||
def get_per_instance_usage(self):
|
||||
return {
|
||||
'1-2-3-4-5': {'memory_mb': 4, 'uuid': '1-2-3-4-5'},
|
||||
'2-3-4-5-6': {'memory_mb': 4, 'uuid': '2-3-4-5-6'},
|
||||
|
||||
}
|
||||
|
||||
return OrphanVirtDriver()
|
||||
|
||||
def test_usage(self):
|
||||
# 2 instances, 4 mb each
|
||||
self.assertEqual(8, self.tracker.compute_node['memory_mb_used'])
|
||||
|
||||
def test_find(self):
|
||||
# create one legit instance and verify the 2 orphans remain
|
||||
self._fake_instance()
|
||||
orphans = self.tracker._find_orphaned_instances()
|
||||
|
||||
self.assertEqual(2, len(orphans))
|
||||
|
@ -1016,6 +1016,33 @@ class XenAPIVMTestCase(stubs.XenAPITestBase):
|
||||
pass
|
||||
self.assertTrue(was['called'])
|
||||
|
||||
def test_per_instance_usage_running(self):
|
||||
instance = self._create_instance(spawn=True)
|
||||
instance_type = instance_types.get_instance_type(3)
|
||||
|
||||
expected = {instance['uuid']: {'memory_mb': instance_type['memory_mb'],
|
||||
'uuid': instance['uuid']}}
|
||||
actual = self.conn.get_per_instance_usage()
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
# Paused instances still consume resources:
|
||||
self.conn.pause(instance)
|
||||
actual = self.conn.get_per_instance_usage()
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
def test_per_instance_usage_suspended(self):
|
||||
# Suspended instances do not consume memory:
|
||||
instance = self._create_instance(spawn=True)
|
||||
self.conn.suspend(instance)
|
||||
actual = self.conn.get_per_instance_usage()
|
||||
self.assertEqual({}, actual)
|
||||
|
||||
def test_per_instance_usage_halted(self):
|
||||
instance = self._create_instance(spawn=True)
|
||||
self.conn.power_off(instance)
|
||||
actual = self.conn.get_per_instance_usage()
|
||||
self.assertEqual({}, actual)
|
||||
|
||||
def _create_instance(self, instance_id=1, spawn=True):
|
||||
"""Creates and spawns a test instance."""
|
||||
instance_values = {
|
||||
|
@ -767,6 +767,13 @@ class ComputeDriver(object):
|
||||
stats = [stats]
|
||||
return [s['hypervisor_hostname'] for s in stats]
|
||||
|
||||
def get_per_instance_usage(self):
|
||||
"""Get information about instance resource usage.
|
||||
|
||||
:returns: dict of nova uuid => dict of usage info
|
||||
"""
|
||||
return {}
|
||||
|
||||
|
||||
def load_compute_driver(virtapi, compute_driver=None):
|
||||
"""Load a compute driver module.
|
||||
|
@ -607,6 +607,14 @@ class XenAPIDriver(driver.ComputeDriver):
|
||||
"""resume guest state when a host is booted"""
|
||||
self._vmops.power_on(instance)
|
||||
|
||||
def get_per_instance_usage(self):
|
||||
"""Get information about instance resource usage.
|
||||
|
||||
:returns: dict of nova uuid => dict of usage
|
||||
info
|
||||
"""
|
||||
return self._vmops.get_per_instance_usage()
|
||||
|
||||
|
||||
class XenAPISession(object):
|
||||
"""The session to invoke XenAPI SDK calls"""
|
||||
|
@ -635,6 +635,14 @@ class SessionBase(object):
|
||||
db_ref['power_state'] = 'Halted'
|
||||
VM_clean_shutdown = VM_hard_shutdown
|
||||
|
||||
def VM_suspend(self, session, vm_ref):
|
||||
db_ref = _db_content['VM'][vm_ref]
|
||||
db_ref['power_state'] = 'Suspended'
|
||||
|
||||
def VM_pause(self, session, vm_ref):
|
||||
db_ref = _db_content['VM'][vm_ref]
|
||||
db_ref['power_state'] = 'Paused'
|
||||
|
||||
def pool_eject(self, session, host_ref):
|
||||
pass
|
||||
|
||||
|
@ -1639,3 +1639,24 @@ class VMOps(object):
|
||||
with excutils.save_and_reraise_exception():
|
||||
recover_method(context, instance, destination_hostname,
|
||||
block_migration)
|
||||
|
||||
def get_per_instance_usage(self):
|
||||
"""Get usage info about each active instance."""
|
||||
usage = {}
|
||||
|
||||
def _is_active(vm_rec):
|
||||
power_state = vm_rec['power_state'].lower()
|
||||
return power_state in ['running', 'paused']
|
||||
|
||||
def _get_uuid(vm_rec):
|
||||
other_config = vm_rec['other_config']
|
||||
return other_config.get('nova_uuid', None)
|
||||
|
||||
for vm_ref, vm_rec in vm_utils.list_vms(self._session):
|
||||
uuid = _get_uuid(vm_rec)
|
||||
|
||||
if _is_active(vm_rec) and uuid is not None:
|
||||
memory_mb = int(vm_rec['memory_static_max']) / 1024 / 1024
|
||||
usage[uuid] = {'memory_mb': memory_mb, 'uuid': uuid}
|
||||
|
||||
return usage
|
||||
|
Loading…
Reference in New Issue
Block a user