Check for local crash dumps
Add a check that monitors crash dumps. The specified crash directory is scanned for sub-directories named 'YYYYmmddHHMM' which contain the dump files. The 'dump_count' metric is the number of sub-directories (i.e., the number of crash dumps present) and the 'value_meta' dict contains the date-/timestamp of the most recent crash. Implements: blueprint check-for-crashes Change-Id: I20b584c68644ff2e76baabab78e965e682759aa0
This commit is contained in:
parent
c189c45be7
commit
3dade6deb2
|
@ -0,0 +1,6 @@
|
|||
init_config:
|
||||
# crash_dir: /var/crash
|
||||
|
||||
instances:
|
||||
# Crash check only supports one configured instance
|
||||
- name: crash_stats
|
|
@ -0,0 +1,46 @@
|
|||
import logging
|
||||
import os
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import monasca_agent.collector.checks as checks
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Crash(checks.AgentCheck):
|
||||
|
||||
def __init__(self, name, init_config, agent_config):
|
||||
super(Crash, self).__init__(name, init_config, agent_config)
|
||||
self.crash_dir = self.init_config.get('crash_dir', '/var/crash')
|
||||
log.debug('crash dir: %s', self.crash_dir)
|
||||
|
||||
def check(self, instance):
|
||||
"""
|
||||
Capture crash dump statistics
|
||||
"""
|
||||
dimensions = self._set_dimensions(None, instance)
|
||||
dump_count = 0
|
||||
value_meta = {'latest': ''}
|
||||
|
||||
# Parse the crash directory
|
||||
if os.path.isdir(self.crash_dir):
|
||||
for entry in sorted(os.listdir(self.crash_dir), reverse=True):
|
||||
if os.path.isdir(os.path.join(self.crash_dir, entry)):
|
||||
try:
|
||||
dt = datetime.strptime(entry, '%Y%m%d%H%M')
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# Found a valid crash dump directory
|
||||
log.debug('found crash dump dir: %s',
|
||||
os.path.join(self.crash_dir, entry))
|
||||
dump_count += 1
|
||||
|
||||
# Return the date-/timestamp of the most recent crash
|
||||
if dump_count == 1:
|
||||
value_meta = {'latest': unicode(dt)}
|
||||
|
||||
log.debug('dump_count: %s', dump_count)
|
||||
self.gauge('crash.dump_count', dump_count, dimensions=dimensions,
|
||||
value_meta=value_meta)
|
|
@ -0,0 +1,55 @@
|
|||
import os
|
||||
import shutil
|
||||
import unittest
|
||||
import uuid
|
||||
|
||||
from common import get_check
|
||||
|
||||
|
||||
class TestCrash(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.crash_dir = '/tmp/crash-test-%s' % str(uuid.uuid4())
|
||||
os.mkdir(self.crash_dir)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.crash_dir)
|
||||
|
||||
def test_checks(self):
|
||||
config = """
|
||||
init_config:
|
||||
crash_dir: %s
|
||||
|
||||
instances:
|
||||
- name: crash_stats
|
||||
""" % self.crash_dir
|
||||
|
||||
(check, instances) = get_check('crash', config)
|
||||
|
||||
# Baseline check
|
||||
check.check(instances[0])
|
||||
metrics = check.get_metrics()
|
||||
self.assertEqual(metrics[0].value, 0)
|
||||
self.assertEqual(metrics[0].value_meta['latest'], '')
|
||||
|
||||
# Add a crash and re-check
|
||||
os.mkdir(os.path.join(self.crash_dir,'201504141011'))
|
||||
|
||||
check.check(instances[0])
|
||||
metrics = check.get_metrics()
|
||||
self.assertEqual(metrics[0].value, 1)
|
||||
self.assertEqual(metrics[0].value_meta['latest'],
|
||||
'2015-04-14 10:11:00')
|
||||
|
||||
# Add a second crash and re-check
|
||||
os.mkdir(os.path.join(self.crash_dir,'201505222303'))
|
||||
|
||||
check.check(instances[0])
|
||||
metrics = check.get_metrics()
|
||||
self.assertEqual(metrics[0].value, 2)
|
||||
self.assertEqual(metrics[0].value_meta['latest'],
|
||||
'2015-05-22 23:03:00')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in New Issue