Add new metrics for RabbitMQ

The following new metrics are added:
  - The total number of queues that are not mirrored
  - The total memory used reported by RabbitMQ
  - The VM memory limit
  - The remaining memory before reaching VM memory limit
  - The disk free limit
  - The disk free space per node
  - The remaining disk space before reaching the disk free limit

Change-Id: I1d50d3cb9035d60fe915afe465e6c35ff01b6bb6
This commit is contained in:
Guillaume Thouvenin
2015-10-05 11:18:19 +02:00
parent fe40230efb
commit 1c43773cd6
3 changed files with 61 additions and 3 deletions

View File

@@ -71,10 +71,40 @@ class RabbitMqPlugin(base.Base):
stats['memory'] = 0
stats['consumers'] = 0
stats['queues'] = 0
stats['unmirrored_queues'] = 0
stats['pmap_mapped'] = 0
stats['pmap_used'] = 0
stats['pmap_shared'] = 0
out, err = self.execute([self.rabbitmqctl_bin, '-q', 'status'],
shell=False)
if not out:
self.logger.error('%s: Failed to get the status' %
self.rabbitmqctl_bin)
return
for v in ('vm_memory_limit', 'disk_free_limit', 'disk_free'):
try:
stats[v] = int(re.findall('{%s,([0-9]+)}' % v, out)[0])
except:
self.logger.error('%s: Failed to get %s' %
(self.rabbitmqctl_bin, v))
mem_str = re.findall('{memory,\s+\[([^\]]+)\]\}', out)
# We are only interested by the total of memory used
# TODO: Get all informations about memory usage from mem_str
try:
stats['used_memory'] = int(re.findall('total,([0-9]+)',
mem_str[0])[0])
except:
self.logger.error('%s: Failed to get the memory used by rabbitmq' %
self.rabbitmqctl_bin)
if 'vm_memory_limit' in stats and 'used_memory' in stats:
stats['remaining_memory'] = stats['vm_memory_limit'] - stats['used_memory']
if 'disk_free' in stats and 'disk_free_limit' in stats:
stats['remaining_disk'] = stats['disk_free'] - stats['disk_free_limit']
out, err = self.execute([self.rabbitmqctl_bin, '-q', 'cluster_status'],
shell=False)
if not out:
@@ -109,14 +139,15 @@ class RabbitMqPlugin(base.Base):
out, err = self.execute([self.rabbitmqctl_bin, '-q', '-p', self.vhost,
'list_queues', 'name', 'messages', 'memory',
'consumers'], shell=False)
'consumers', 'policy', 'slave_pids',
'synchronised_slave_pids'], shell=False)
if not out:
self.logger.error('%s: Failed to get the list of queues' %
self.rabbitmqctl_bin)
return
for line in out.split('\n'):
ctl_stats = line.split()
ctl_stats = line.split('\t')
try:
ctl_stats[1] = int(ctl_stats[1])
ctl_stats[2] = int(ctl_stats[2])
@@ -131,6 +162,18 @@ class RabbitMqPlugin(base.Base):
stats['%s.messages' % queue_name] = ctl_stats[1]
stats['%s.memory' % queue_name] = ctl_stats[2]
stats['%s.consumers' % queue_name] = ctl_stats[3]
# a queue is unmirrored if its policy is not ha-all
if 'ha-all' not in ctl_stats[4]:
stats['unmirrored_queues'] += 1
else:
# we need to check if the list of synchronised slaves is
# equal to the list of slaves.
slaves = re.findall('<([a-zA-Z@\-.0-9]+)>', ctl_stats[5])
for s in slaves:
if s not in ctl_stats[6]:
stats['unmirrored_queues'] += 1
break
if not stats['memory'] > 0:
self.logger.warning(

View File

@@ -191,6 +191,13 @@ function process_message ()
if sample['type_instance'] ~= 'consumers' and
sample['type_instance'] ~= 'messages' and
sample['type_instance'] ~= 'memory' and
sample['type_instance'] ~= 'used_memory' and
sample['type_instance'] ~= 'unmirrored_queues' and
sample['type_instance'] ~= 'vm_memory_limit' and
sample['type_instance'] ~= 'disk_free_limit' and
sample['type_instance'] ~= 'disk_free' and
sample['type_instance'] ~= 'remaining_memory' and
sample['type_instance'] ~= 'remaining_disk' and
(string.match(sample['type_instance'], '%.consumers$') or
string.match(sample['type_instance'], '%.messages$') or
string.match(sample['type_instance'], '%.memory$')) then