Add disk latency metrics implementation in Hyper-V Inspector
High latency between I/O requests can be a sign of issues. Collecting disk metrics can help detect those issues. Windows / Hyper-V Server 2012 R2 can collect those metrics. This patch adds a disk latency metrics implementation for the Hyper-V Inspector as well as disk latency and per device latency pollsters. DocImpact Change-Id: I1621fc7d4226ae1f46cdfca878e8b52487ca4379 Implements: blueprint hyper-v-disk-latency-metrics
This commit is contained in:
parent
b6f0165b37
commit
ef6f21ada7
@ -42,6 +42,10 @@ DiskRateData = collections.namedtuple('DiskRateData',
|
||||
'write_requests_rate',
|
||||
'per_disk_rate'])
|
||||
|
||||
DiskLatencyData = collections.namedtuple('DiskLatencyData',
|
||||
['disk_latency',
|
||||
'per_disk_latency'])
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class _Base(pollsters.BaseComputePollster):
|
||||
@ -463,3 +467,86 @@ class PerDeviceWriteRequestsRatePollster(_DiskRatesPollsterBase):
|
||||
resource_id="%s-%s" % (instance.id, disk),
|
||||
))
|
||||
return samples
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class _DiskLatencyPollsterBase(pollsters.BaseComputePollster):
|
||||
|
||||
CACHE_KEY_DISK_LATENCY = 'disk-latency'
|
||||
|
||||
def _populate_cache(self, inspector, cache, instance):
|
||||
i_cache = cache.setdefault(self.CACHE_KEY_DISK_LATENCY, {})
|
||||
if instance.id not in i_cache:
|
||||
latency = 0
|
||||
per_device_latency = {}
|
||||
disk_rates = inspector.inspect_disk_latency(instance)
|
||||
for disk, stats in disk_rates:
|
||||
latency += stats.disk_latency
|
||||
per_device_latency[disk.device] = (
|
||||
stats.disk_latency)
|
||||
per_disk_latency = {
|
||||
'disk_latency': per_device_latency
|
||||
}
|
||||
i_cache[instance.id] = DiskLatencyData(
|
||||
latency,
|
||||
per_disk_latency
|
||||
)
|
||||
return i_cache[instance.id]
|
||||
|
||||
@abc.abstractmethod
|
||||
def _get_samples(self, instance, disk_rates_info):
|
||||
"""Return one or more Sample."""
|
||||
|
||||
def get_samples(self, manager, cache, resources):
|
||||
for instance in resources:
|
||||
try:
|
||||
disk_latency_info = self._populate_cache(
|
||||
self.inspector,
|
||||
cache,
|
||||
instance,
|
||||
)
|
||||
for disk_latency in self._get_samples(instance,
|
||||
disk_latency_info):
|
||||
yield disk_latency
|
||||
except virt_inspector.InstanceNotFoundException as err:
|
||||
# Instance was deleted while getting samples. Ignore it.
|
||||
LOG.debug(_('Exception while getting samples %s'), err)
|
||||
except ceilometer.NotImplementedError:
|
||||
# Selected inspector does not implement this pollster.
|
||||
LOG.debug(_('%(inspector)s does not provide data for '
|
||||
' %(pollster)s'),
|
||||
{'inspector': self.inspector.__class__.__name__,
|
||||
'pollster': self.__class__.__name__})
|
||||
except Exception as err:
|
||||
instance_name = util.instance_name(instance)
|
||||
LOG.exception(_('Ignoring instance %(name)s: %(error)s'),
|
||||
{'name': instance_name, 'error': err})
|
||||
|
||||
|
||||
class DiskLatencyPollster(_DiskLatencyPollsterBase):
|
||||
|
||||
def _get_samples(self, instance, disk_latency_info):
|
||||
return [util.make_sample_from_instance(
|
||||
instance,
|
||||
name='disk.latency',
|
||||
type=sample.TYPE_GAUGE,
|
||||
unit='ms',
|
||||
volume=disk_latency_info.disk_latency / 1000
|
||||
)]
|
||||
|
||||
|
||||
class PerDeviceDiskLatencyPollster(_DiskLatencyPollsterBase):
|
||||
|
||||
def _get_samples(self, instance, disk_latency_info):
|
||||
samples = []
|
||||
for disk, value in six.iteritems(disk_latency_info.per_disk_latency[
|
||||
'disk_latency']):
|
||||
samples.append(util.make_sample_from_instance(
|
||||
instance,
|
||||
name='disk.device.latency',
|
||||
type=sample.TYPE_GAUGE,
|
||||
unit='ms',
|
||||
volume=value / 1000,
|
||||
resource_id="%s-%s" % (instance.id, disk)
|
||||
))
|
||||
return samples
|
||||
|
@ -80,3 +80,13 @@ class HyperVInspector(virt_inspector.Inspector):
|
||||
errors=0)
|
||||
|
||||
yield (disk, stats)
|
||||
|
||||
def inspect_disk_latency(self, instance):
|
||||
instance_name = util.instance_name(instance)
|
||||
for disk_metrics in self._utils.get_disk_latency_metrics(
|
||||
instance_name):
|
||||
disk = virt_inspector.Disk(device=disk_metrics['instance_id'])
|
||||
stats = virt_inspector.DiskLatencyStats(
|
||||
disk_latency=disk_metrics['disk_latency'])
|
||||
|
||||
yield (disk, stats)
|
||||
|
@ -57,6 +57,7 @@ class UtilsV2(object):
|
||||
# Disk metrics are supported from Hyper-V 2012 R2
|
||||
_DISK_RD_METRIC_NAME = 'Disk Data Read'
|
||||
_DISK_WR_METRIC_NAME = 'Disk Data Written'
|
||||
_DISK_LATENCY_METRIC_NAME = 'Average Disk Latency'
|
||||
|
||||
def __init__(self, host='.'):
|
||||
if sys.platform == 'win32':
|
||||
@ -151,6 +152,21 @@ class UtilsV2(object):
|
||||
'host_resource': host_resource
|
||||
}
|
||||
|
||||
def get_disk_latency_metrics(self, vm_name):
|
||||
vm = self._lookup_vm(vm_name)
|
||||
metric_latency_def = self._get_metric_def(
|
||||
self._DISK_LATENCY_METRIC_NAME)
|
||||
|
||||
disks = self._get_vm_resources(vm, self._STORAGE_ALLOC)
|
||||
for disk in disks:
|
||||
metric_values = self._get_metric_values(
|
||||
disk, [metric_latency_def])
|
||||
|
||||
yield {
|
||||
'disk_latency': metric_values[0],
|
||||
'instance_id': disk.InstanceID,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _sum_metric_values(metrics):
|
||||
tot_metric_val = 0
|
||||
|
@ -128,6 +128,13 @@ DiskRateStats = collections.namedtuple('DiskRateStats',
|
||||
'write_bytes_rate',
|
||||
'write_requests_rate'])
|
||||
|
||||
# Named tuple representing disk latency statistics.
|
||||
#
|
||||
# disk_latency: average disk latency
|
||||
#
|
||||
DiskLatencyStats = collections.namedtuple('DiskLatencyStats',
|
||||
['disk_latency'])
|
||||
|
||||
|
||||
# Exception types
|
||||
#
|
||||
@ -220,6 +227,14 @@ class Inspector(object):
|
||||
"""
|
||||
raise ceilometer.NotImplementedError
|
||||
|
||||
def inspect_disk_latency(self, instance):
|
||||
"""Inspect the disk statistics as rates for an instance.
|
||||
|
||||
:param instance: the target instance
|
||||
:return: for each disk, the average disk latency
|
||||
"""
|
||||
raise ceilometer.NotImplementedError
|
||||
|
||||
|
||||
def get_hypervisor_inspector():
|
||||
try:
|
||||
|
@ -35,8 +35,8 @@ class TestManager(base.BaseTestCase):
|
||||
|
||||
def test_load_plugins_pollster_list(self):
|
||||
mgr = manager.AgentManager(pollster_list=['disk.*'])
|
||||
# currently we do have 16 disk-related pollsters
|
||||
self.assertEqual(16, len(list(mgr.extensions)))
|
||||
# currently we do have 18 disk-related pollsters
|
||||
self.assertEqual(18, len(list(mgr.extensions)))
|
||||
|
||||
def test_load_plugins_no_intersection(self):
|
||||
# Let's test nothing will be polled if namespace and pollsters
|
||||
|
@ -269,3 +269,52 @@ class TestDiskRatePollsters(TestBaseDiskIO):
|
||||
self._check_per_device_samples(disk.PerDeviceWriteRequestsRatePollster,
|
||||
'disk.device.write.requests.rate', 800L,
|
||||
'disk2')
|
||||
|
||||
|
||||
class TestDiskLatencyPollsters(TestBaseDiskIO):
|
||||
|
||||
DISKS = [
|
||||
(virt_inspector.Disk(device='disk1'),
|
||||
virt_inspector.DiskLatencyStats(1000)),
|
||||
|
||||
(virt_inspector.Disk(device='disk2'),
|
||||
virt_inspector.DiskLatencyStats(2000))
|
||||
]
|
||||
TYPE = 'gauge'
|
||||
|
||||
def setUp(self):
|
||||
super(TestDiskLatencyPollsters, self).setUp()
|
||||
self.inspector.inspect_disk_latency = mock.Mock(
|
||||
return_value=self.DISKS)
|
||||
|
||||
@mock.patch('ceilometer.pipeline.setup_pipeline', mock.MagicMock())
|
||||
def _check_get_samples(self, factory, sample_name,
|
||||
expected_count=2):
|
||||
pollster = factory()
|
||||
|
||||
mgr = manager.AgentManager()
|
||||
cache = {}
|
||||
samples = list(pollster.get_samples(mgr, cache, self.instance))
|
||||
self.assertIsNotNone(samples)
|
||||
self.assertIsNotEmpty(samples)
|
||||
self.assertIn(pollster.CACHE_KEY_DISK_LATENCY, cache)
|
||||
for instance in self.instance:
|
||||
self.assertIn(instance.id, cache[pollster.CACHE_KEY_DISK_LATENCY])
|
||||
|
||||
self.assertEqual(set([sample_name]), set([s.name for s in samples]))
|
||||
|
||||
match = [s for s in samples if s.name == sample_name]
|
||||
self.assertEqual(expected_count, len(match),
|
||||
'missing counter %s' % sample_name)
|
||||
return match
|
||||
|
||||
def test_disk_latency(self):
|
||||
self._check_aggregate_samples(disk.DiskLatencyPollster,
|
||||
'disk.latency', 3)
|
||||
|
||||
def test_per_device_latency(self):
|
||||
self._check_per_device_samples(disk.PerDeviceDiskLatencyPollster,
|
||||
'disk.device.latency', 1, 'disk1')
|
||||
|
||||
self._check_per_device_samples(disk.PerDeviceDiskLatencyPollster,
|
||||
'disk.device.latency', 2, 'disk2')
|
||||
|
@ -114,3 +114,23 @@ class TestHyperVInspection(base.BaseTestCase):
|
||||
|
||||
self.assertEqual(fake_read_mb * units.Mi, inspected_stats.read_bytes)
|
||||
self.assertEqual(fake_write_mb * units.Mi, inspected_stats.write_bytes)
|
||||
|
||||
def test_inspect_disk_latency(self):
|
||||
fake_instance_name = mock.sentinel.INSTANCE_NAME
|
||||
fake_disk_latency = mock.sentinel.DISK_LATENCY
|
||||
fake_instance_id = mock.sentinel.INSTANCE_ID
|
||||
|
||||
self._inspector._utils.get_disk_latency_metrics.return_value = [{
|
||||
'disk_latency': fake_disk_latency,
|
||||
'instance_id': fake_instance_id}]
|
||||
|
||||
inspected_disks = list(self._inspector.inspect_disk_latency(
|
||||
fake_instance_name))
|
||||
|
||||
self.assertEqual(1, len(inspected_disks))
|
||||
self.assertEqual(2, len(inspected_disks[0]))
|
||||
|
||||
inspected_disk, inspected_stats = inspected_disks[0]
|
||||
|
||||
self.assertEqual(fake_instance_id, inspected_disk.device)
|
||||
self.assertEqual(fake_disk_latency, inspected_stats.disk_latency)
|
||||
|
@ -172,6 +172,27 @@ class TestUtilsV2(base.BaseTestCase):
|
||||
self.assertEqual(fake_instance_id, disk_metrics[0]['instance_id'])
|
||||
self.assertEqual(fake_host_resource, disk_metrics[0]['host_resource'])
|
||||
|
||||
def test_get_disk_latency(self):
|
||||
fake_vm_name = mock.sentinel.VM_NAME
|
||||
fake_instance_id = mock.sentinel.FAKE_INSTANCE_ID
|
||||
fake_latency = mock.sentinel.FAKE_LATENCY
|
||||
|
||||
self._utils._lookup_vm = mock.MagicMock()
|
||||
|
||||
mock_disk = mock.MagicMock()
|
||||
mock_disk.InstanceID = fake_instance_id
|
||||
self._utils._get_vm_resources = mock.MagicMock(
|
||||
return_value=[mock_disk])
|
||||
|
||||
self._utils._get_metric_values = mock.MagicMock(
|
||||
return_value=[fake_latency])
|
||||
|
||||
disk_metrics = list(self._utils.get_disk_latency_metrics(fake_vm_name))
|
||||
|
||||
self.assertEqual(1, len(disk_metrics))
|
||||
self.assertEqual(fake_latency, disk_metrics[0]['disk_latency'])
|
||||
self.assertEqual(fake_instance_id, disk_metrics[0]['instance_id'])
|
||||
|
||||
def test_get_metric_value_instances(self):
|
||||
mock_el1 = mock.MagicMock()
|
||||
mock_associator = mock.MagicMock()
|
||||
|
@ -83,6 +83,7 @@ disk.read.bytes c B inst ID p 1, 2
|
||||
disk.read.bytes.rate g B/s inst ID p 1, 2, 3, 4 Average rate of reads
|
||||
disk.write.bytes c B inst ID p 1, 2 Volume of writes
|
||||
disk.write.bytes.rate g B/s inst ID p 1, 2, 3, 4 Average volume of writes
|
||||
disk.latency g ms inst ID p 2 Average disk latency
|
||||
disk.device.read.requests c request disk ID p 1, 2 Number of read requests
|
||||
disk.device.read.requests.rate g request/s disk ID p 1, 2, 3 Average rate of read requests
|
||||
disk.device.write.requests c request disk ID p 1, 2 Number of write requests
|
||||
@ -91,6 +92,7 @@ disk.device.read.bytes c B disk ID p 1, 2
|
||||
disk.device.read.bytes.rate g B/s disk ID p 1, 2, 3 Average rate of reads
|
||||
disk.device.write.bytes c B disk ID p 1, 2 Volume of writes
|
||||
disk.device.write.bytes.rate g B/s disk ID p 1, 2, 3 Average volume of writes
|
||||
disk.device.latency g ms disk ID p 2 Average disk latency per device
|
||||
disk.root.size g GB inst ID n 1, 2 Size of root disk
|
||||
disk.ephemeral.size g GB inst ID n 1, 2 Size of ephemeral disk
|
||||
network.incoming.bytes c B iface ID p 1, 2 Number of incoming bytes
|
||||
|
2
setup.cfg
Executable file → Normal file
2
setup.cfg
Executable file → Normal file
@ -124,6 +124,8 @@ ceilometer.poll.compute =
|
||||
disk.device.write.requests.rate = ceilometer.compute.pollsters.disk:PerDeviceWriteRequestsRatePollster
|
||||
disk.device.read.bytes.rate = ceilometer.compute.pollsters.disk:PerDeviceReadBytesRatePollster
|
||||
disk.device.write.bytes.rate = ceilometer.compute.pollsters.disk:PerDeviceWriteBytesRatePollster
|
||||
disk.latency = ceilometer.compute.pollsters.disk:DiskLatencyPollster
|
||||
disk.device.latency = ceilometer.compute.pollsters.disk:PerDeviceDiskLatencyPollster
|
||||
cpu = ceilometer.compute.pollsters.cpu:CPUPollster
|
||||
cpu_util = ceilometer.compute.pollsters.cpu:CPUUtilPollster
|
||||
network.incoming.bytes = ceilometer.compute.pollsters.net:IncomingBytesPollster
|
||||
|
Loading…
Reference in New Issue
Block a user