Rework monitor plugin interface and API

In order to prepare for the stevedore-ization of the compute monitor
plugins, and to clean up the overly complex monitor API, this patch
breaks out the base monitor plugin into a much simpler class that has
the following methods:

- get_metric_names(), which remains unchanged in its
  purpose from the original ResourceMonitorBase class
- get_metric(name), which returns a (value, timestamp) tuple for
  a supplied metric name
- add_metrics_to_list() which is a non-overrideable method on the
  base plugin class that adds a set of metrics to a
  nova.objects.MonitorMetricList object supplied as the parameter

There is a base class called nova.compute.monitors.base.CPUMonitorBase
that simply returns the appropriate CPU monitor metric name constants
for the get_metric_names() method.

The directory structure of the monitor plugins and the module naming for
the lone in-tree is changed to better represent what the directories and
files contain. The lone in-tree plugin is a monitor that inherits from
nova.compute.monitors.base.CPUMonitorBase and uses the call to the
hypervisor's get_host_cpu_stats() to grab CPU-centric metrics on a
periodic basis.

Change-Id: I3f2e8eca6ce43b07b3c8b430b8576be4f0d3f909
Partial-bug: 1468012
This commit is contained in:
Jay Pipes 2015-06-24 17:26:28 -04:00
parent 28258a18e9
commit c41db608fe
10 changed files with 165 additions and 381 deletions

View File

@ -15,19 +15,12 @@
"""
Resource monitor API specification.
ResourceMonitorBase provides the definition of minimum set of methods
that needs to be implemented by Resource Monitor.
"""
import functools
import types
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import timeutils
import six
import nova.compute.monitors.base
from nova.i18n import _LW
from nova import loadables
@ -47,104 +40,13 @@ CONF.register_opts(compute_monitors_opts)
LOG = logging.getLogger(__name__)
class ResourceMonitorMeta(type):
def __init__(cls, names, bases, dict_):
"""Metaclass that allows us to create a function map and call it later
to get the metric names and their values.
"""
super(ResourceMonitorMeta, cls).__init__(names, bases, dict_)
prefix = '_get_'
prefix_len = len(prefix)
cls.metric_map = {}
for name, value in six.iteritems(cls.__dict__):
if (len(name) > prefix_len
and name[:prefix_len] == prefix
and isinstance(value, types.FunctionType)):
metric_name = name[prefix_len:].replace('_', '.')
cls.metric_map[metric_name] = value
@six.add_metaclass(ResourceMonitorMeta)
class ResourceMonitorBase(object):
"""Base class for resource monitors
"""
def __init__(self, parent):
self.compute_manager = parent
self.source = None
self._data = {}
@classmethod
def add_timestamp(cls, func):
"""Decorator to indicate that a method needs to add a timestamp.
When a function returning a value is decorated by the decorator,
which means a timestamp should be added into the returned value.
That is, a tuple (value, timestamp) is returned.
The timestamp is the time when we update the value in the _data.
If users hope to define how the timestamp is got by themselves,
they should not use this decorator in their own classes.
"""
@functools.wraps(func)
def wrapper(self, **kwargs):
return func(self, **kwargs), self._data.get("timestamp", None)
return wrapper
def _update_data(self):
"""Method to update the metrics data.
Each subclass can implement this method to update metrics
into _data. It will be called in get_metrics.
"""
pass
def get_metric_names(self):
"""Get available metric names.
Get available metric names, which are represented by a set of keys
that can be used to check conflicts and duplications
:returns: a set of keys representing metrics names
"""
return self.metric_map.keys()
def get_metrics(self, **kwargs):
"""Get metrics.
Get metrics, which are represented by a list of dictionaries
[{'name': metric name,
'value': metric value,
'timestamp': the time when the value is retrieved,
'source': what the value is got by}, ...]
:param kwargs: extra arguments that might be present
:returns: a list to tell the current metrics
"""
data = []
self._update_data()
for name, func in six.iteritems(self.metric_map):
ret = func(self, **kwargs)
data.append(self._populate(name, ret[0], ret[1]))
return data
def _populate(self, metric_name, metric_value, timestamp=None):
"""Populate the format what we want from metric name and metric value
"""
result = {}
result['name'] = metric_name
result['value'] = metric_value
result['timestamp'] = timestamp or timeutils.utcnow()
result['source'] = self.source
return result
# TODO(jaypipes): Replace the use of loadables with stevedore.
class ResourceMonitorHandler(loadables.BaseLoader):
"""Base class to handle loading monitor classes.
"""
def __init__(self):
super(ResourceMonitorHandler, self).__init__(ResourceMonitorBase)
super(ResourceMonitorHandler, self).__init__(
nova.compute.monitors.base.MonitorBase)
def choose_monitors(self, manager):
"""This function checks the monitor names and metrics names against a
@ -166,7 +68,7 @@ class ResourceMonitorHandler(loadables.BaseLoader):
# make sure different monitors do not have the same
# metric name
monitor = monitor_class_map[monitor_name](manager)
metric_names_tmp = set(monitor.get_metric_names())
metric_names_tmp = monitor.get_metric_names()
overlap = metric_names & metric_names_tmp
if not overlap:
metric_names = metric_names | metric_names_tmp

View File

@ -0,0 +1,82 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import six
from nova import objects
from nova.objects import fields
@six.add_metaclass(abc.ABCMeta)
class MonitorBase(object):
"""Base class for all resource monitor plugins."""
def __init__(self, compute_manager):
self.compute_manager = compute_manager
self.source = None
@abc.abstractmethod
def get_metric(self, name):
"""Return a (value, timestamp) tuple for the supplied metric name.
:param name: The name/key for the metric to grab the value for.
"""
raise NotImplementedError('get_metric')
@abc.abstractmethod
def get_metric_names(self):
"""Get available metric names.
Get available metric names, which are represented by a set of keys
that can be used to check conflicts and duplications
:returns: set containing one or more values from
nova.objects.fields.MonitorMetricType.ALL constants
"""
raise NotImplementedError('get_metric_names')
def add_metrics_to_list(self, metrics_list):
"""Adds metric objects to a supplied list object.
:param metric_list: nova.objects.MonitorMetricList that the monitor
plugin should append nova.objects.MonitorMetric
objects to.
"""
metric_names = self.get_metric_names()
metrics = []
for name in metric_names:
value, timestamp = self.get_metric(name)
metric = objects.MonitorMetric(name=name,
value=value,
timestamp=timestamp,
source=self.source)
metrics.append(metric)
metrics_list.objects.extend(metrics)
class CPUMonitorBase(MonitorBase):
"""Base class for all monitors that return CPU-related metrics."""
def get_metric_names(self):
return set([
fields.MonitorMetricType.CPU_FREQUENCY,
fields.MonitorMetricType.CPU_USER_TIME,
fields.MonitorMetricType.CPU_KERNEL_TIME,
fields.MonitorMetricType.CPU_IDLE_TIME,
fields.MonitorMetricType.CPU_IOWAIT_TIME,
fields.MonitorMetricType.CPU_USER_PERCENT,
fields.MonitorMetricType.CPU_KERNEL_PERCENT,
fields.MonitorMetricType.CPU_IDLE_PERCENT,
fields.MonitorMetricType.CPU_IOWAIT_PERCENT,
fields.MonitorMetricType.CPU_PERCENT,
])

View File

View File

@ -14,15 +14,14 @@
# under the License.
"""
CPU monitor based on compute driver to retrieve CPU information
CPU monitor based on virt driver to retrieve CPU information
"""
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import timeutils
from nova.compute import monitors
from nova.compute.monitors import cpu_monitor as monitor
from nova.compute.monitors import base
from nova import exception
from nova.i18n import _LE
@ -31,65 +30,21 @@ CONF.import_opt('compute_driver', 'nova.virt.driver')
LOG = logging.getLogger(__name__)
class ComputeDriverCPUMonitor(monitor._CPUMonitorBase):
"""CPU monitor based on compute driver
class Monitor(base.CPUMonitorBase):
"""CPU monitor that uses the virt driver's get_host_cpu_stats() call."""
The class inherits from the base class for resource monitors,
and implements the essential methods to get metric names and their real
values for CPU utilization.
The compute manager could load the monitors to retrieve the metrics
of the devices on compute nodes and know their resource information
periodically.
"""
def __init__(self, parent):
super(ComputeDriverCPUMonitor, self).__init__(parent)
def __init__(self, compute_manager):
super(Monitor, self).__init__(compute_manager)
self.source = CONF.compute_driver
self.driver = self.compute_manager.driver
self._data = {}
self._cpu_stats = {}
@monitors.ResourceMonitorBase.add_timestamp
def _get_cpu_frequency(self, **kwargs):
return self._data.get("cpu.frequency")
def get_metric(self, name):
self._update_data()
return self._data[name], self._data["timestamp"]
@monitors.ResourceMonitorBase.add_timestamp
def _get_cpu_user_time(self, **kwargs):
return self._data.get("cpu.user.time")
@monitors.ResourceMonitorBase.add_timestamp
def _get_cpu_kernel_time(self, **kwargs):
return self._data.get("cpu.kernel.time")
@monitors.ResourceMonitorBase.add_timestamp
def _get_cpu_idle_time(self, **kwargs):
return self._data.get("cpu.idle.time")
@monitors.ResourceMonitorBase.add_timestamp
def _get_cpu_iowait_time(self, **kwargs):
return self._data.get("cpu.iowait.time")
@monitors.ResourceMonitorBase.add_timestamp
def _get_cpu_user_percent(self, **kwargs):
return self._data.get("cpu.user.percent")
@monitors.ResourceMonitorBase.add_timestamp
def _get_cpu_kernel_percent(self, **kwargs):
return self._data.get("cpu.kernel.percent")
@monitors.ResourceMonitorBase.add_timestamp
def _get_cpu_idle_percent(self, **kwargs):
return self._data.get("cpu.idle.percent")
@monitors.ResourceMonitorBase.add_timestamp
def _get_cpu_iowait_percent(self, **kwargs):
return self._data.get("cpu.iowait.percent")
@monitors.ResourceMonitorBase.add_timestamp
def _get_cpu_percent(self, **kwargs):
return self._data.get("cpu.percent")
def _update_data(self, **kwargs):
def _update_data(self):
# Don't allow to call this function so frequently (<= 1 sec)
now = timeutils.utcnow()
if self._data.get("timestamp") is not None:

View File

@ -1,64 +0,0 @@
# Copyright 2013 Intel Corporation.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
CPU monitor to retrieve CPU information
"""
from nova.compute import monitors
class _CPUMonitorBase(monitors.ResourceMonitorBase):
"""CPU monitor base."""
def _get_cpu_frequency(self, **kwargs):
"""Return CPU current frequency and its timestamp."""
return None, None
def _get_cpu_user_time(self, **kwargs):
"""Return CPU user mode time and its timestamp."""
return None, None
def _get_cpu_kernel_time(self, **kwargs):
"""Return CPU kernel time and its timestamp."""
return None, None
def _get_cpu_idle_time(self, **kwargs):
"""Return CPU idle time and its timestamp."""
return None, None
def _get_cpu_iowait_time(self, **kwargs):
"""Return CPU I/O wait time and its timestamp."""
return None, None
def _get_cpu_user_percent(self, **kwargs):
"""Return CPU user mode percentage and its timestamp."""
return None, None
def _get_cpu_kernel_percent(self, **kwargs):
"""Return CPU kernel percentage and its timestamp."""
return None, None
def _get_cpu_idle_percent(self, **kwargs):
"""Return CPU idle percentage and its timestamp."""
return None, None
def _get_cpu_iowait_percent(self, **kwargs):
"""Return CPU I/O wait percentage and its timestamp."""
return None, None
def _get_cpu_percent(self, **kwargs):
"""Return generic CPU utilization and its timestamp."""
return None, None

View File

@ -1,18 +0,0 @@
# Copyright 2013 Intel Corporation.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from nova.compute.monitors.virt import cpu_monitor
ComputeDriverCPUMonitor = cpu_monitor.ComputeDriverCPUMonitor

View File

@ -348,14 +348,17 @@ class ResourceTracker(object):
"""Get the metrics from monitors and
notify information to message bus.
"""
metrics = []
metrics = objects.MonitorMetricList()
metrics_info = {}
for monitor in self.monitors:
try:
metrics += monitor.get_metrics(nodename=nodename)
monitor.add_metrics_to_list(metrics)
except Exception:
LOG.warning(_LW("Cannot get the metrics from %s."), monitor)
if metrics:
# TODO(jaypipes): Remove this when compute_node.metrics doesn't need
# to be populated as a JSON-ified string.
metrics = metrics.to_list()
if len(metrics):
metrics_info['nodename'] = nodename
metrics_info['metrics'] = metrics
metrics_info['host'] = self.host

View File

@ -15,7 +15,8 @@
"""Tests for Compute Driver CPU resource monitor."""
from nova.compute.monitors import virt
from nova.compute.monitors.cpu import virt_driver
from nova import objects
from nova import test
@ -34,7 +35,7 @@ class ComputeDriverCPUMonitorTestCase(test.NoDBTestCase):
class FakeComputeManager(object):
driver = FakeDriver()
self.monitor = virt.ComputeDriverCPUMonitor(FakeComputeManager())
self.monitor = virt_driver.Monitor(FakeComputeManager())
def test_get_metric_names(self):
names = self.monitor.get_metric_names()
@ -51,13 +52,14 @@ class ComputeDriverCPUMonitorTestCase(test.NoDBTestCase):
self.assertIn("cpu.percent", names)
def test_get_metrics(self):
metrics_raw = self.monitor.get_metrics()
metrics = objects.MonitorMetricList()
self.monitor.add_metrics_to_list(metrics)
names = self.monitor.get_metric_names()
metrics = {}
for metric in metrics_raw:
self.assertIn(metric['name'], names)
metrics[metric['name']] = metric['value']
for metric in metrics.objects:
self.assertIn(metric.name, names)
# Some conversion to a dict to ease testing...
metrics = {m.name: m.value for m in metrics.objects}
self.assertEqual(metrics["cpu.frequency"], 800)
self.assertEqual(metrics["cpu.user.time"], 26728850000000)
self.assertEqual(metrics["cpu.kernel.time"], 5664160000000)

View File

@ -15,91 +15,42 @@
"""Tests for resource monitors."""
from oslo_utils import timeutils
from nova.compute import monitors
from nova.compute.monitors import base
from nova.objects import fields
from nova import test
class FakeResourceMonitor(monitors.ResourceMonitorBase):
def _update_data(self):
self._data['foo.metric1'] = '1000'
self._data['foo.metric2'] = '99.999'
self._data['timestamp'] = '123'
class CPUMonitor1(base.MonitorBase):
@monitors.ResourceMonitorBase.add_timestamp
def _get_foo_metric1(self, **kwargs):
return self._data.get("foo.metric1")
NOW_TS = timeutils.utcnow()
@monitors.ResourceMonitorBase.add_timestamp
def _get_foo_metric2(self, **kwargs):
return self._data.get("foo.metric2")
class FakeMonitorClass1(monitors.ResourceMonitorBase):
def get_metrics(self, **kwargs):
data = [{'timestamp': 1232,
'name': 'key1',
'value': 2600,
'source': 'libvirt'}]
return data
def __init__(self, *args):
super(CPUMonitor1, self).__init__(*args)
self.source = 'CPUMonitor1'
def get_metric_names(self):
return ['key1']
return set([
fields.MonitorMetricType.CPU_FREQUENCY
])
def get_metric(self, name):
return 100, CPUMonitor1.NOW_TS
class FakeMonitorClass2(monitors.ResourceMonitorBase):
def get_metrics(self, **kwargs):
data = [{'timestamp': 123,
'name': 'key2',
'value': 1600,
'source': 'libvirt'}]
return data
class CPUMonitor2(base.MonitorBase):
def get_metric_names(self):
return ['key2']
return set([
fields.MonitorMetricType.CPU_FREQUENCY
])
class FakeMonitorClass3(monitors.ResourceMonitorBase):
def get_metrics(self, **kwargs):
data = [{'timestamp': 1234,
'name': 'key1',
'value': 1200,
'source': 'libvirt'}]
return data
def get_metric_names(self):
return ['key1']
class FakeMonitorClass4(monitors.ResourceMonitorBase):
def get_metrics(self, **kwargs):
raise test.TestingException()
def get_metric_names(self):
raise test.TestingException()
class ResourceMonitorBaseTestCase(test.NoDBTestCase):
def setUp(self):
super(ResourceMonitorBaseTestCase, self).setUp()
self.monitor = FakeResourceMonitor(None)
def test_get_metric_names(self):
names = self.monitor.get_metric_names()
self.assertEqual(2, len(names))
self.assertIn("foo.metric1", names)
self.assertIn("foo.metric2", names)
def test_get_metrics(self):
metrics_raw = self.monitor.get_metrics()
names = self.monitor.get_metric_names()
metrics = {}
for metric in metrics_raw:
self.assertIn(metric['name'], names)
self.assertEqual(metric["timestamp"], '123')
metrics[metric['name']] = metric['value']
self.assertEqual(metrics["foo.metric1"], '1000')
self.assertEqual(metrics["foo.metric2"], '99.999')
def get_metric(self, name):
# This should never be called since the CPU metrics overlap
# with the ones in the CPUMonitor1.
pass
class ResourceMonitorsTestCase(test.NoDBTestCase):
@ -109,36 +60,21 @@ class ResourceMonitorsTestCase(test.NoDBTestCase):
super(ResourceMonitorsTestCase, self).setUp()
self.monitor_handler = monitors.ResourceMonitorHandler()
fake_monitors = [
'nova.tests.unit.compute.monitors.test_monitors.FakeMonitorClass1',
'nova.tests.unit.compute.monitors.test_monitors.FakeMonitorClass2']
'nova.tests.unit.compute.monitors.test_monitors.CPUMonitor1',
'nova.tests.unit.compute.monitors.test_monitors.CPUMonitor2']
self.flags(compute_available_monitors=fake_monitors)
classes = self.monitor_handler.get_matching_classes(
['nova.compute.monitors.all_monitors'])
self.class_map = {}
for cls in classes:
self.class_map[cls.__name__] = cls
def test_choose_monitors_not_found(self):
self.flags(compute_monitors=['FakeMonitorClass5', 'FakeMonitorClass4'])
monitor_classes = self.monitor_handler.choose_monitors(self)
self.assertEqual(len(monitor_classes), 0)
def test_choose_monitors_bad(self):
self.flags(compute_monitors=['FakeMonitorClass1', 'FakePluginClass3'])
self.flags(compute_monitors=['CPUMonitor1', 'CPUMonitorb'])
monitor_classes = self.monitor_handler.choose_monitors(self)
self.assertEqual(len(monitor_classes), 1)
def test_choose_monitors(self):
self.flags(compute_monitors=['FakeMonitorClass1', 'FakeMonitorClass2'])
def test_choose_monitors_bad(self):
self.flags(compute_monitors=['CPUMonitor1', 'CPUMonitor2'])
monitor_classes = self.monitor_handler.choose_monitors(self)
self.assertEqual(len(monitor_classes), 2)
self.assertEqual(len(monitor_classes), 1)
def test_choose_monitors_none(self):
self.flags(compute_monitors=[])
monitor_classes = self.monitor_handler.choose_monitors(self)
self.assertEqual(len(monitor_classes), 0)
def test_all_monitors(self):
# Double check at least a couple of known monitors exist
self.assertIn('ComputeDriverCPUMonitor', self.class_map)

View File

@ -22,6 +22,7 @@ import uuid
import mock
from oslo_config import cfg
from oslo_serialization import jsonutils
from oslo_utils import timeutils
from nova.compute import resource_tracker
from nova.compute import resources
@ -1276,8 +1277,8 @@ class ComputeMonitorTestCase(BaseTestCase):
def setUp(self):
super(ComputeMonitorTestCase, self).setUp()
fake_monitors = [
'nova.tests.unit.compute.monitors.test_monitors.FakeMonitorClass1',
'nova.tests.unit.compute.monitors.test_monitors.FakeMonitorClass2']
'nova.tests.unit.compute.monitors.test_monitors.CPUMonitor1',
'nova.tests.unit.compute.monitors.test_monitors.CPUMonitor2']
self.flags(compute_available_monitors=fake_monitors)
self.tracker = self._tracker()
self.node_name = 'nodename'
@ -1288,39 +1289,26 @@ class ComputeMonitorTestCase(BaseTestCase):
self.project_id)
def test_get_host_metrics_none(self):
self.flags(compute_monitors=['FakeMontorClass1', 'FakeMonitorClass4'])
self.flags(compute_monitors=[])
self.tracker.monitors = []
metrics = self.tracker._get_host_metrics(self.context,
self.node_name)
self.assertEqual(len(metrics), 0)
def test_get_host_metrics_one_failed(self):
self.flags(compute_monitors=['FakeMonitorClass1', 'FakeMonitorClass4'])
class1 = test_monitors.FakeMonitorClass1(self.tracker)
class4 = test_monitors.FakeMonitorClass4(self.tracker)
self.tracker.monitors = [class1, class4]
metrics = self.tracker._get_host_metrics(self.context,
self.node_name)
self.assertTrue(len(metrics) > 0)
@mock.patch.object(resource_tracker.LOG, 'warning')
def test_get_host_metrics_exception(self, mock_LOG_warning):
self.flags(compute_monitors=['FakeMontorClass1'])
class1 = test_monitors.FakeMonitorClass1(self.tracker)
self.tracker.monitors = [class1]
with mock.patch.object(class1, 'get_metrics',
side_effect=test.TestingException()):
metrics = self.tracker._get_host_metrics(self.context,
self.node_name)
mock_LOG_warning.assert_called_once_with(
u'Cannot get the metrics from %s.', class1)
self.assertEqual(0, len(metrics))
monitor = mock.MagicMock()
monitor.add_metrics_to_list.side_effect = Exception
self.tracker.monitors = [monitor]
metrics = self.tracker._get_host_metrics(self.context,
self.node_name)
mock_LOG_warning.assert_called_once_with(
u'Cannot get the metrics from %s.', mock.ANY)
self.assertEqual(0, len(metrics))
def test_get_host_metrics(self):
self.flags(compute_monitors=['FakeMonitorClass1', 'FakeMonitorClass2'])
class1 = test_monitors.FakeMonitorClass1(self.tracker)
class2 = test_monitors.FakeMonitorClass2(self.tracker)
self.tracker.monitors = [class1, class2]
class1 = test_monitors.CPUMonitor1(self.tracker)
self.tracker.monitors = [class1]
mock_notifier = mock.Mock()
@ -1331,17 +1319,15 @@ class ComputeMonitorTestCase(BaseTestCase):
mock_get.assert_called_once_with(service='compute',
host=self.node_name)
expected_metrics = [{
'timestamp': 1232,
'name': 'key1',
'value': 2600,
'source': 'libvirt'
}, {
'name': 'key2',
'source': 'libvirt',
'timestamp': 123,
'value': 1600
}]
expected_metrics = [
{
'timestamp': timeutils.strtime(
test_monitors.CPUMonitor1.NOW_TS),
'name': 'cpu.frequency',
'value': 100,
'source': 'CPUMonitor1'
},
]
payload = {
'metrics': expected_metrics,