Add disk latency metrics implementation in Hyper-V Inspector

High latency between I/O requests can be a sign of issues. Collecting
disk metrics can help detect those issues. Windows / Hyper-V Server
2012 R2 can collect those metrics. This patch adds a disk latency
metrics implementation for the Hyper-V Inspector as well as disk latency
and per device latency pollsters.

DocImpact

Change-Id: I1621fc7d4226ae1f46cdfca878e8b52487ca4379
Implements: blueprint hyper-v-disk-latency-metrics
This commit is contained in:
Adelina Tuvenie 2015-01-27 04:27:29 -08:00
parent b6f0165b37
commit ef6f21ada7
10 changed files with 224 additions and 2 deletions

View File

@ -42,6 +42,10 @@ DiskRateData = collections.namedtuple('DiskRateData',
'write_requests_rate',
'per_disk_rate'])
DiskLatencyData = collections.namedtuple('DiskLatencyData',
['disk_latency',
'per_disk_latency'])
@six.add_metaclass(abc.ABCMeta)
class _Base(pollsters.BaseComputePollster):
@ -463,3 +467,86 @@ class PerDeviceWriteRequestsRatePollster(_DiskRatesPollsterBase):
resource_id="%s-%s" % (instance.id, disk),
))
return samples
@six.add_metaclass(abc.ABCMeta)
class _DiskLatencyPollsterBase(pollsters.BaseComputePollster):
CACHE_KEY_DISK_LATENCY = 'disk-latency'
def _populate_cache(self, inspector, cache, instance):
i_cache = cache.setdefault(self.CACHE_KEY_DISK_LATENCY, {})
if instance.id not in i_cache:
latency = 0
per_device_latency = {}
disk_rates = inspector.inspect_disk_latency(instance)
for disk, stats in disk_rates:
latency += stats.disk_latency
per_device_latency[disk.device] = (
stats.disk_latency)
per_disk_latency = {
'disk_latency': per_device_latency
}
i_cache[instance.id] = DiskLatencyData(
latency,
per_disk_latency
)
return i_cache[instance.id]
@abc.abstractmethod
def _get_samples(self, instance, disk_rates_info):
"""Return one or more Sample."""
def get_samples(self, manager, cache, resources):
for instance in resources:
try:
disk_latency_info = self._populate_cache(
self.inspector,
cache,
instance,
)
for disk_latency in self._get_samples(instance,
disk_latency_info):
yield disk_latency
except virt_inspector.InstanceNotFoundException as err:
# Instance was deleted while getting samples. Ignore it.
LOG.debug(_('Exception while getting samples %s'), err)
except ceilometer.NotImplementedError:
# Selected inspector does not implement this pollster.
LOG.debug(_('%(inspector)s does not provide data for '
' %(pollster)s'),
{'inspector': self.inspector.__class__.__name__,
'pollster': self.__class__.__name__})
except Exception as err:
instance_name = util.instance_name(instance)
LOG.exception(_('Ignoring instance %(name)s: %(error)s'),
{'name': instance_name, 'error': err})
class DiskLatencyPollster(_DiskLatencyPollsterBase):
def _get_samples(self, instance, disk_latency_info):
return [util.make_sample_from_instance(
instance,
name='disk.latency',
type=sample.TYPE_GAUGE,
unit='ms',
volume=disk_latency_info.disk_latency / 1000
)]
class PerDeviceDiskLatencyPollster(_DiskLatencyPollsterBase):
def _get_samples(self, instance, disk_latency_info):
samples = []
for disk, value in six.iteritems(disk_latency_info.per_disk_latency[
'disk_latency']):
samples.append(util.make_sample_from_instance(
instance,
name='disk.device.latency',
type=sample.TYPE_GAUGE,
unit='ms',
volume=value / 1000,
resource_id="%s-%s" % (instance.id, disk)
))
return samples

View File

@ -80,3 +80,13 @@ class HyperVInspector(virt_inspector.Inspector):
errors=0)
yield (disk, stats)
def inspect_disk_latency(self, instance):
instance_name = util.instance_name(instance)
for disk_metrics in self._utils.get_disk_latency_metrics(
instance_name):
disk = virt_inspector.Disk(device=disk_metrics['instance_id'])
stats = virt_inspector.DiskLatencyStats(
disk_latency=disk_metrics['disk_latency'])
yield (disk, stats)

View File

@ -57,6 +57,7 @@ class UtilsV2(object):
# Disk metrics are supported from Hyper-V 2012 R2
_DISK_RD_METRIC_NAME = 'Disk Data Read'
_DISK_WR_METRIC_NAME = 'Disk Data Written'
_DISK_LATENCY_METRIC_NAME = 'Average Disk Latency'
def __init__(self, host='.'):
if sys.platform == 'win32':
@ -151,6 +152,21 @@ class UtilsV2(object):
'host_resource': host_resource
}
def get_disk_latency_metrics(self, vm_name):
vm = self._lookup_vm(vm_name)
metric_latency_def = self._get_metric_def(
self._DISK_LATENCY_METRIC_NAME)
disks = self._get_vm_resources(vm, self._STORAGE_ALLOC)
for disk in disks:
metric_values = self._get_metric_values(
disk, [metric_latency_def])
yield {
'disk_latency': metric_values[0],
'instance_id': disk.InstanceID,
}
@staticmethod
def _sum_metric_values(metrics):
tot_metric_val = 0

View File

@ -128,6 +128,13 @@ DiskRateStats = collections.namedtuple('DiskRateStats',
'write_bytes_rate',
'write_requests_rate'])
# Named tuple representing disk latency statistics.
#
# disk_latency: average disk latency
#
DiskLatencyStats = collections.namedtuple('DiskLatencyStats',
['disk_latency'])
# Exception types
#
@ -220,6 +227,14 @@ class Inspector(object):
"""
raise ceilometer.NotImplementedError
def inspect_disk_latency(self, instance):
"""Inspect the disk statistics as rates for an instance.
:param instance: the target instance
:return: for each disk, the average disk latency
"""
raise ceilometer.NotImplementedError
def get_hypervisor_inspector():
try:

View File

@ -35,8 +35,8 @@ class TestManager(base.BaseTestCase):
def test_load_plugins_pollster_list(self):
mgr = manager.AgentManager(pollster_list=['disk.*'])
# currently we do have 16 disk-related pollsters
self.assertEqual(16, len(list(mgr.extensions)))
# currently we do have 18 disk-related pollsters
self.assertEqual(18, len(list(mgr.extensions)))
def test_load_plugins_no_intersection(self):
# Let's test nothing will be polled if namespace and pollsters

View File

@ -269,3 +269,52 @@ class TestDiskRatePollsters(TestBaseDiskIO):
self._check_per_device_samples(disk.PerDeviceWriteRequestsRatePollster,
'disk.device.write.requests.rate', 800L,
'disk2')
class TestDiskLatencyPollsters(TestBaseDiskIO):
DISKS = [
(virt_inspector.Disk(device='disk1'),
virt_inspector.DiskLatencyStats(1000)),
(virt_inspector.Disk(device='disk2'),
virt_inspector.DiskLatencyStats(2000))
]
TYPE = 'gauge'
def setUp(self):
super(TestDiskLatencyPollsters, self).setUp()
self.inspector.inspect_disk_latency = mock.Mock(
return_value=self.DISKS)
@mock.patch('ceilometer.pipeline.setup_pipeline', mock.MagicMock())
def _check_get_samples(self, factory, sample_name,
expected_count=2):
pollster = factory()
mgr = manager.AgentManager()
cache = {}
samples = list(pollster.get_samples(mgr, cache, self.instance))
self.assertIsNotNone(samples)
self.assertIsNotEmpty(samples)
self.assertIn(pollster.CACHE_KEY_DISK_LATENCY, cache)
for instance in self.instance:
self.assertIn(instance.id, cache[pollster.CACHE_KEY_DISK_LATENCY])
self.assertEqual(set([sample_name]), set([s.name for s in samples]))
match = [s for s in samples if s.name == sample_name]
self.assertEqual(expected_count, len(match),
'missing counter %s' % sample_name)
return match
def test_disk_latency(self):
self._check_aggregate_samples(disk.DiskLatencyPollster,
'disk.latency', 3)
def test_per_device_latency(self):
self._check_per_device_samples(disk.PerDeviceDiskLatencyPollster,
'disk.device.latency', 1, 'disk1')
self._check_per_device_samples(disk.PerDeviceDiskLatencyPollster,
'disk.device.latency', 2, 'disk2')

View File

@ -114,3 +114,23 @@ class TestHyperVInspection(base.BaseTestCase):
self.assertEqual(fake_read_mb * units.Mi, inspected_stats.read_bytes)
self.assertEqual(fake_write_mb * units.Mi, inspected_stats.write_bytes)
def test_inspect_disk_latency(self):
fake_instance_name = mock.sentinel.INSTANCE_NAME
fake_disk_latency = mock.sentinel.DISK_LATENCY
fake_instance_id = mock.sentinel.INSTANCE_ID
self._inspector._utils.get_disk_latency_metrics.return_value = [{
'disk_latency': fake_disk_latency,
'instance_id': fake_instance_id}]
inspected_disks = list(self._inspector.inspect_disk_latency(
fake_instance_name))
self.assertEqual(1, len(inspected_disks))
self.assertEqual(2, len(inspected_disks[0]))
inspected_disk, inspected_stats = inspected_disks[0]
self.assertEqual(fake_instance_id, inspected_disk.device)
self.assertEqual(fake_disk_latency, inspected_stats.disk_latency)

View File

@ -172,6 +172,27 @@ class TestUtilsV2(base.BaseTestCase):
self.assertEqual(fake_instance_id, disk_metrics[0]['instance_id'])
self.assertEqual(fake_host_resource, disk_metrics[0]['host_resource'])
def test_get_disk_latency(self):
fake_vm_name = mock.sentinel.VM_NAME
fake_instance_id = mock.sentinel.FAKE_INSTANCE_ID
fake_latency = mock.sentinel.FAKE_LATENCY
self._utils._lookup_vm = mock.MagicMock()
mock_disk = mock.MagicMock()
mock_disk.InstanceID = fake_instance_id
self._utils._get_vm_resources = mock.MagicMock(
return_value=[mock_disk])
self._utils._get_metric_values = mock.MagicMock(
return_value=[fake_latency])
disk_metrics = list(self._utils.get_disk_latency_metrics(fake_vm_name))
self.assertEqual(1, len(disk_metrics))
self.assertEqual(fake_latency, disk_metrics[0]['disk_latency'])
self.assertEqual(fake_instance_id, disk_metrics[0]['instance_id'])
def test_get_metric_value_instances(self):
mock_el1 = mock.MagicMock()
mock_associator = mock.MagicMock()

View File

@ -83,6 +83,7 @@ disk.read.bytes c B inst ID p 1, 2
disk.read.bytes.rate g B/s inst ID p 1, 2, 3, 4 Average rate of reads
disk.write.bytes c B inst ID p 1, 2 Volume of writes
disk.write.bytes.rate g B/s inst ID p 1, 2, 3, 4 Average volume of writes
disk.latency g ms inst ID p 2 Average disk latency
disk.device.read.requests c request disk ID p 1, 2 Number of read requests
disk.device.read.requests.rate g request/s disk ID p 1, 2, 3 Average rate of read requests
disk.device.write.requests c request disk ID p 1, 2 Number of write requests
@ -91,6 +92,7 @@ disk.device.read.bytes c B disk ID p 1, 2
disk.device.read.bytes.rate g B/s disk ID p 1, 2, 3 Average rate of reads
disk.device.write.bytes c B disk ID p 1, 2 Volume of writes
disk.device.write.bytes.rate g B/s disk ID p 1, 2, 3 Average volume of writes
disk.device.latency g ms disk ID p 2 Average disk latency per device
disk.root.size g GB inst ID n 1, 2 Size of root disk
disk.ephemeral.size g GB inst ID n 1, 2 Size of ephemeral disk
network.incoming.bytes c B iface ID p 1, 2 Number of incoming bytes

2
setup.cfg Executable file → Normal file
View File

@ -124,6 +124,8 @@ ceilometer.poll.compute =
disk.device.write.requests.rate = ceilometer.compute.pollsters.disk:PerDeviceWriteRequestsRatePollster
disk.device.read.bytes.rate = ceilometer.compute.pollsters.disk:PerDeviceReadBytesRatePollster
disk.device.write.bytes.rate = ceilometer.compute.pollsters.disk:PerDeviceWriteBytesRatePollster
disk.latency = ceilometer.compute.pollsters.disk:DiskLatencyPollster
disk.device.latency = ceilometer.compute.pollsters.disk:PerDeviceDiskLatencyPollster
cpu = ceilometer.compute.pollsters.cpu:CPUPollster
cpu_util = ceilometer.compute.pollsters.cpu:CPUUtilPollster
network.incoming.bytes = ceilometer.compute.pollsters.net:IncomingBytesPollster