add memory bandwidth meter

Some Intel processor families (e.g. the Intel Xeon processor E5 v3
family) introduced MBM (Memory Bandwidth Monitoring) to measure
bandwidth from one level of the cache hierarchy to the next by
applications running on the platform. It supports both 'local
bandwidth' and 'total bandwidth' monitoring for the socket. Local
bandwidth measures the amount of data sent through the memory
controller on the socket and total b/w measures the total system
bandwidth. This patch introduces two new meters to get memory
bandwidth statistics based on Intel CMT feature.

Change-Id: Iab9b326695b4ac5b5841b930ebad633d4e3a01e6
This commit is contained in:
Qiaowei Ren 2016-07-14 09:57:35 +08:00
parent b523822060
commit ed7b6dbc95
7 changed files with 208 additions and 0 deletions

View File

@ -13,6 +13,9 @@
# License for the specific language governing permissions and limitations
# under the License.
import abc
import collections
from oslo_log import log
import ceilometer
@ -26,6 +29,10 @@ from ceilometer import sample
LOG = log.getLogger(__name__)
MemoryBandwidthData = collections.namedtuple('MemoryBandwidthData',
['total', 'local'])
class MemoryUsagePollster(pollsters.BaseComputePollster):
def get_samples(self, manager, cache, resources):
@ -117,3 +124,82 @@ class MemoryResidentPollster(pollsters.BaseComputePollster):
LOG.exception(_LE('Could not get Resident Memory Usage for '
'%(id)s: %(e)s'), {'id': instance.id,
'e': err})
class _MemoryBandwidthPollster(pollsters.BaseComputePollster):
CACHE_KEY_MEMORY_BANDWIDTH = 'memory-bandwidth'
def _populate_cache(self, inspector, cache, instance):
i_cache = cache.setdefault(self.CACHE_KEY_MEMORY_BANDWIDTH, {})
if instance.id not in i_cache:
memory_bandwidth = self.inspector.inspect_memory_bandwidth(
instance, self._inspection_duration)
i_cache[instance.id] = MemoryBandwidthData(
memory_bandwidth.total,
memory_bandwidth.local,
)
return i_cache[instance.id]
@abc.abstractmethod
def _get_samples(self, instance, c_data):
"""Return one or more Samples."""
def _get_sample_total_and_local(self, instance, _name, _unit,
c_data, _element):
"""Total / local Pollster and return one Sample"""
return [util.make_sample_from_instance(
instance,
name=_name,
type=sample.TYPE_GAUGE,
unit=_unit,
volume=getattr(c_data, _element),
)]
def get_samples(self, manager, cache, resources):
self._inspection_duration = self._record_poll_time()
for instance in resources:
try:
c_data = self._populate_cache(
self.inspector,
cache,
instance,
)
for s in self._get_samples(instance, c_data):
yield s
except virt_inspector.InstanceNotFoundException as err:
# Instance was deleted while getting samples. Ignore it.
LOG.debug('Exception while getting samples %s', err)
except virt_inspector.InstanceShutOffException as e:
LOG.debug('Instance %(instance_id)s was shut off while '
'getting samples of %(pollster)s: %(exc)s',
{'instance_id': instance.id,
'pollster': self.__class__.__name__, 'exc': e})
except virt_inspector.NoDataException as e:
LOG.warning(_LW('Cannot inspect data of %(pollster)s for '
'%(instance_id)s, non-fatal reason: %(exc)s'),
{'pollster': self.__class__.__name__,
'instance_id': instance.id, 'exc': e})
raise plugin_base.PollsterPermanentError(resources)
except ceilometer.NotImplementedError:
# Selected inspector does not implement this pollster.
LOG.debug('Obtaining memory bandwidth is not implemented'
' for %s', self.inspector.__class__.__name__)
except Exception as err:
LOG.exception(_LE('Could not get memory bandwidth for '
'%(id)s: %(e)s'), {'id': instance.id,
'e': err})
class MemoryBandwidthTotalPollster(_MemoryBandwidthPollster):
def _get_samples(self, instance, c_data):
return self._get_sample_total_and_local(
instance, 'memory.bandwidth.total', 'B/s', c_data, 'total')
class MemoryBandwidthLocalPollster(_MemoryBandwidthPollster):
def _get_samples(self, instance, c_data):
return self._get_sample_total_and_local(
instance, 'memory.bandwidth.local', 'B/s', c_data, 'local')

View File

@ -80,6 +80,14 @@ MemoryResidentStats = collections.namedtuple('MemoryResidentStats',
['resident'])
# Named tuple representing memory bandwidth statistics.
#
# total: total system bandwidth from one level of cache
# local: bandwidth of memory traffic for a memory controller
#
MemoryBandwidthStats = collections.namedtuple('MemoryBandwidthStats',
['total', 'local'])
# Named tuple representing vNICs.
#
# name: the name of the vNIC
@ -286,6 +294,16 @@ class Inspector(object):
"""
raise ceilometer.NotImplementedError
def inspect_memory_bandwidth(self, instance, duration=None):
"""Inspect the memory bandwidth statistics for an instance.
:param instance: the target instance
:param duration: the last 'n' seconds, over which the value should be
inspected
:return:
"""
raise ceilometer.NotImplementedError
def inspect_disk_rates(self, instance, duration=None):
"""Inspect the disk statistics as rates for an instance.

View File

@ -255,3 +255,27 @@ class LibvirtInspector(virt_inspector.Inspector):
domain = self._get_domain_not_shut_off_or_raise(instance)
memory = domain.memoryStats()['rss'] / units.Ki
return virt_inspector.MemoryResidentStats(resident=memory)
def inspect_memory_bandwidth(self, instance, duration=None):
domain = self._get_domain_not_shut_off_or_raise(instance)
try:
stats = self.connection.domainListGetStats(
[domain], libvirt.VIR_DOMAIN_STATS_PERF)
perf = stats[0][1]
return virt_inspector.MemoryBandwidthStats(total=perf["perf.mbmt"],
local=perf["perf.mbml"])
except AttributeError as e:
msg = _('Perf is not supported by current version of libvirt, and '
'failed to inspect memory bandwidth of %(instance_uuid)s, '
'can not get info from libvirt: %(error)s') % {
'instance_uuid': instance.id, 'error': e}
raise virt_inspector.NoDataException(msg)
# domainListGetStats might launch an exception if the method or
# mbmt/mbml perf event is not supported by the underlying hypervisor
# being used by libvirt.
except libvirt.libvirtError as e:
msg = _('Failed to inspect memory bandwidth of %(instance_uuid)s, '
'can not get info from libvirt: %(error)s') % {
'instance_uuid': instance.id, 'error': e}
raise virt_inspector.NoDataException(msg)

View File

@ -132,3 +132,62 @@ class TestResidentMemoryPollster(base.TestPollsterBase):
_verify_resident_memory_metering(1, 2.0, 0)
_verify_resident_memory_metering(0, 0, 1)
_verify_resident_memory_metering(0, 0, 0)
class TestMemoryBandwidthPollster(base.TestPollsterBase):
def setUp(self):
super(TestMemoryBandwidthPollster, self).setUp()
@mock.patch('ceilometer.pipeline.setup_pipeline', mock.MagicMock())
def test_get_samples(self):
next_value = iter((
virt_inspector.MemoryBandwidthStats(total=1892352, local=1802240),
virt_inspector.MemoryBandwidthStats(total=1081344, local=90112),
))
def inspect_memory_bandwidth(instance, duration):
return next(next_value)
self.inspector.inspect_memory_bandwidth = mock.Mock(
side_effect=inspect_memory_bandwidth)
mgr = manager.AgentManager()
def _check_memory_bandwidth_total(expected_usage):
pollster = memory.MemoryBandwidthTotalPollster()
samples = list(pollster.get_samples(mgr, {}, [self.instance]))
self.assertEqual(1, len(samples))
self.assertEqual(set(['memory.bandwidth.total']),
set([s.name for s in samples]))
self.assertEqual(expected_usage, samples[0].volume)
def _check_memory_bandwidth_local(expected_usage):
pollster = memory.MemoryBandwidthLocalPollster()
samples = list(pollster.get_samples(mgr, {}, [self.instance]))
self.assertEqual(1, len(samples))
self.assertEqual(set(['memory.bandwidth.local']),
set([s.name for s in samples]))
self.assertEqual(expected_usage, samples[0].volume)
_check_memory_bandwidth_total(1892352)
_check_memory_bandwidth_local(90112)
@mock.patch('ceilometer.pipeline.setup_pipeline', mock.MagicMock())
def test_get_samples_with_empty_stats(self):
def inspect_memory_bandwidth(instance, duration):
raise virt_inspector.NoDataException()
self.inspector.inspect_memory_bandwidth = mock.Mock(
side_effect=inspect_memory_bandwidth)
mgr = manager.AgentManager()
pollster = memory.MemoryBandwidthTotalPollster()
def all_samples():
return list(pollster.get_samples(mgr, {}, [self.instance]))
self.assertRaises(plugin_base.PollsterPermanentError,
all_samples)

View File

@ -372,6 +372,20 @@ class TestLibvirtInspection(base.BaseTestCase):
self.inspector.inspect_memory_usage,
self.instance)
def test_inspect_memory_bandwidth(self):
fake_stats = [({}, {'perf.mbmt': 1892352, 'perf.mbml': 1802240})]
connection = self.inspector.connection
with mock.patch.object(connection, 'lookupByUUIDString',
return_value=self.domain):
with mock.patch.object(self.domain, 'info',
return_value=(0, 0, 51200,
2, 999999)):
with mock.patch.object(connection, 'domainListGetStats',
return_value=fake_stats):
mb = self.inspector.inspect_memory_bandwidth(self.instance)
self.assertEqual(1892352, mb.total)
self.assertEqual(1802240, mb.local)
class TestLibvirtInspectionWithError(base.BaseTestCase):

View File

@ -0,0 +1,5 @@
---
features:
- Add two new meters, including memory.bandwidth.total and
memory.bandwidth.local, to get memory bandwidth statistics
based on Intel CMT feature.

View File

@ -112,6 +112,8 @@ ceilometer.poll.compute =
instance = ceilometer.compute.pollsters.instance:InstancePollster
memory.usage = ceilometer.compute.pollsters.memory:MemoryUsagePollster
memory.resident = ceilometer.compute.pollsters.memory:MemoryResidentPollster
memory.bandwidth.total = ceilometer.compute.pollsters.memory:MemoryBandwidthTotalPollster
memory.bandwidth.local = ceilometer.compute.pollsters.memory:MemoryBandwidthLocalPollster
disk.capacity = ceilometer.compute.pollsters.disk:CapacityPollster
disk.allocation = ceilometer.compute.pollsters.disk:AllocationPollster
disk.usage = ceilometer.compute.pollsters.disk:PhysicalPollster