Extend the scheduler HostState for metrics from compute_node

Changes the HostState to be more generic and extensible to store other
metrics from compute node for scheduling purpose.

This is part of the blueprint utilization-aware-scheduling.

Change-Id: Ia472e9dcdaf12371ffad94e074cecd51886a1c04
This commit is contained in:
Lianhao Lu
2013-06-26 18:53:06 +08:00
parent f567a11460
commit 32da9b2b99
2 changed files with 61 additions and 0 deletions

View File

@@ -17,6 +17,7 @@
Manage hosts in the current zone. Manage hosts in the current zone.
""" """
import collections
import UserDict import UserDict
from oslo.config import cfg from oslo.config import cfg
@@ -95,6 +96,11 @@ class ReadOnlyDict(UserDict.IterableUserDict):
raise TypeError() raise TypeError()
# Representation of a single metric value from a compute node.
MetricItem = collections.namedtuple(
'MetricItem', ['value', 'timestamp', 'source'])
class HostState(object): class HostState(object):
"""Mutable and immutable information tracked for a host. """Mutable and immutable information tracked for a host.
This is an attempt to remove the ad-hoc data structures This is an attempt to remove the ad-hoc data structures
@@ -134,6 +140,9 @@ class HostState(object):
# Resource oversubscription values for the compute host: # Resource oversubscription values for the compute host:
self.limits = {} self.limits = {}
# Generic metrics from compute nodes
self.metrics = {}
self.updated = None self.updated = None
def update_capabilities(self, capabilities=None, service=None): def update_capabilities(self, capabilities=None, service=None):
@@ -146,6 +155,26 @@ class HostState(object):
service = {} service = {}
self.service = ReadOnlyDict(service) self.service = ReadOnlyDict(service)
def _update_metrics_from_compute_node(self, compute):
#NOTE(llu): The 'or []' is to avoid json decode failure of None
# returned from compute.get, because DB schema allows
# NULL in the metrics column
metrics = compute.get('metrics', []) or []
if metrics:
metrics = jsonutils.loads(metrics)
for metric in metrics:
# 'name', 'value', 'timestamp' and 'source' are all required
# to be valid keys, just let KeyError happend if any one of
# them is missing. But we also require 'name' to be True.
name = metric['name']
item = MetricItem(value=metric['value'],
timestamp=metric['timestamp'],
source=metric['source'])
if name:
self.metrics[name] = item
else:
LOG.warn(_("Metric name unknown of %r") % item)
def update_from_compute_node(self, compute): def update_from_compute_node(self, compute):
"""Update information about a host from its compute_node info.""" """Update information about a host from its compute_node info."""
if (self.updated and compute['updated_at'] if (self.updated and compute['updated_at']
@@ -222,6 +251,9 @@ class HostState(object):
self.num_io_ops = int(self.stats.get('io_workload', 0)) self.num_io_ops = int(self.stats.get('io_workload', 0))
# update metrics
self._update_metrics_from_compute_node(compute)
def consume_from_instance(self, instance): def consume_from_instance(self, instance):
"""Incrementally update host state from an instance.""" """Incrementally update host state from an instance."""
disk_mb = (instance['root_gb'] + instance['ephemeral_gb']) * 1024 disk_mb = (instance['root_gb'] + instance['ephemeral_gb']) * 1024

View File

@@ -19,6 +19,7 @@ from nova.compute import task_states
from nova.compute import vm_states from nova.compute import vm_states
from nova import db from nova import db
from nova import exception from nova import exception
from nova.openstack.common import jsonutils
from nova.openstack.common import timeutils from nova.openstack.common import timeutils
from nova.scheduler import filters from nova.scheduler import filters
from nova.scheduler import host_manager from nova.scheduler import host_manager
@@ -512,3 +513,31 @@ class HostStateTestCase(test.NoDBTestCase):
self.assertEqual(1, host.task_states[None]) self.assertEqual(1, host.task_states[None])
self.assertEqual(2, host.num_instances_by_os_type['Linux']) self.assertEqual(2, host.num_instances_by_os_type['Linux'])
self.assertEqual(1, host.num_io_ops) self.assertEqual(1, host.num_io_ops)
def test_resources_consumption_from_compute_node(self):
metrics = [
dict(name='res1',
value=1.0,
source='source1',
timestamp=None),
dict(name='res2',
value="string2",
source='source2',
timestamp=None),
]
hyper_ver_int = utils.convert_version_to_int('6.0.0')
compute = dict(metrics=jsonutils.dumps(metrics),
memory_mb=0, free_disk_gb=0, local_gb=0,
local_gb_used=0, free_ram_mb=0, vcpus=0, vcpus_used=0,
updated_at=None, host_ip='127.0.0.1',
hypervisor_version=hyper_ver_int)
host = host_manager.HostState("fakehost", "fakenode")
host.update_from_compute_node(compute)
self.assertEqual(len(host.metrics), 2)
self.assertEqual(set(['res1', 'res2']), set(host.metrics.keys()))
self.assertEqual(1.0, host.metrics['res1'].value)
self.assertEqual('source1', host.metrics['res1'].source)
self.assertEqual('string2', host.metrics['res2'].value)
self.assertEqual('source2', host.metrics['res2'].source)