charm-ceph-osd/files/nagios/check_ceph_osd_services.py

72 lines
2.0 KiB
Python
Executable File

#!/usr/bin/env python3
# Copyright (C) 2018 Canonical
# All Rights Reserved
# Author: Alex Kavanagh <alex.kavanagh@canonical.com>
import os
import sys
from datetime import datetime, timedelta
CRON_CHECK_TMPFILE = 'ceph-osd-checks'
NAGIOS_HOME = '/var/lib/nagios'
CACHE_MAX_AGE = timedelta(minutes=10)
STATE_OK = 0
STATE_WARNING = 1
STATE_CRITICAL = 2
STATE_UNKNOWN = 3
def run_main():
"""Process the CRON_CHECK_TMP_FILE and see if any line is not OK.
If a line is not OK, the main returns STATE_CRITICAL.
If there are no lines, or the file doesn't exist, it returns STATE_UNKNOWN
Otherwise it returns STATE_OK.
:returns: nagios state 0,2 or 3
"""
_tmp_file = os.path.join(NAGIOS_HOME, CRON_CHECK_TMPFILE)
if not os.path.isfile(_tmp_file):
print("File '{}' doesn't exist".format(_tmp_file))
return STATE_UNKNOWN
try:
s = os.stat(_tmp_file)
if datetime.now() - datetime.fromtimestamp(s.st_mtime) > CACHE_MAX_AGE:
print("Status file is older than {}".format(CACHE_MAX_AGE))
return STATE_CRITICAL
except Exception as e:
print("Something went wrong grabbing stats for the file: {}".format(
str(e)))
return STATE_UNKNOWN
try:
with open(_tmp_file, 'rt') as f:
lines = f.readlines()
except Exception as e:
print("Something went wrong reading the file: {}".format(str(e)))
return STATE_UNKNOWN
if not lines:
print("checked status file is empty: {}".format(_tmp_file))
return STATE_UNKNOWN
# finally, check that the file contains all ok lines. Unfortunately, it's
# not consistent across releases, but what is consistent is that the check
# command in the collect phase does fail, and so the start of the line is
# 'Failed'
state = STATE_OK
for line in lines:
print(line, end='')
if line.startswith('Failed'):
state = STATE_CRITICAL
return state
if __name__ == '__main__':
sys.exit(run_main())