[bradm] Adding nrpe checks, handle rsyncd config fragments
This commit is contained in:
25
files/nrpe-external-master/check_swift_service
Executable file
25
files/nrpe-external-master/check_swift_service
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/bin/sh
|
||||
|
||||
#
|
||||
# check_swift_service --- exactly what it says on the tin
|
||||
#
|
||||
# Copyright 2013 Canonical Ltd.
|
||||
#
|
||||
# Authors:
|
||||
# Paul Collins <paul.collins@canonical.com>
|
||||
#
|
||||
|
||||
STATUS=$(sudo -u swift swift-init status "$1" 2>&1)
|
||||
|
||||
case $? in
|
||||
0)
|
||||
echo "OK: ${STATUS}"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "CRITICAL: ${STATUS}"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 1
|
||||
136
files/nrpe-external-master/check_swift_storage.py
Executable file
136
files/nrpe-external-master/check_swift_storage.py
Executable file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright (C) 2014 Canonical
|
||||
# All Rights Reserved
|
||||
# Author: Jacek Nykis
|
||||
|
||||
import sys
|
||||
import json
|
||||
import urllib2
|
||||
import argparse
|
||||
import hashlib
|
||||
import datetime
|
||||
|
||||
STATUS_OK = 0
|
||||
STATUS_WARN = 1
|
||||
STATUS_CRIT = 2
|
||||
STATUS_UNKNOWN = 3
|
||||
|
||||
|
||||
def generate_md5(filename):
|
||||
with open(filename, 'rb') as f:
|
||||
md5 = hashlib.md5()
|
||||
buffer = f.read(2 ** 20)
|
||||
while buffer:
|
||||
md5.update(buffer)
|
||||
buffer = f.read(2 ** 20)
|
||||
return md5.hexdigest()
|
||||
|
||||
|
||||
def check_md5(base_url):
|
||||
url = base_url + "ringmd5"
|
||||
ringfiles = ["/etc/swift/object.ring.gz",
|
||||
"/etc/swift/account.ring.gz",
|
||||
"/etc/swift/container.ring.gz"]
|
||||
results = []
|
||||
try:
|
||||
data = urllib2.urlopen(url).read()
|
||||
j = json.loads(data)
|
||||
except urllib2.URLError:
|
||||
return [(STATUS_UNKNOWN, "Can't open url: {}".format(url))]
|
||||
except ValueError:
|
||||
return [(STATUS_UNKNOWN, "Can't parse status data")]
|
||||
|
||||
for ringfile in ringfiles:
|
||||
try:
|
||||
if generate_md5(ringfile) != j[ringfile]:
|
||||
results.append((STATUS_CRIT,
|
||||
"Ringfile {} MD5 sum mismatch".format(ringfile)))
|
||||
except IOError:
|
||||
results.append(
|
||||
(STATUS_UNKNOWN, "Can't open ringfile {}".format(ringfile)))
|
||||
if results:
|
||||
return results
|
||||
else:
|
||||
return [(STATUS_OK, "OK")]
|
||||
|
||||
|
||||
def check_replication(base_url, limits):
|
||||
types = ["account", "object", "container"]
|
||||
results = []
|
||||
for repl in types:
|
||||
url = base_url + "replication/" + repl
|
||||
try:
|
||||
data = urllib2.urlopen(url).read()
|
||||
j = json.loads(data)
|
||||
except urllib2.URLError:
|
||||
results.append((STATUS_UNKNOWN, "Can't open url: {}".format(url)))
|
||||
continue
|
||||
except ValueError:
|
||||
results.append((STATUS_UNKNOWN, "Can't parse status data"))
|
||||
continue
|
||||
|
||||
if "object_replication_last" in j:
|
||||
repl_last = datetime.datetime.fromtimestamp(j["object_replication_last"])
|
||||
else:
|
||||
repl_last = datetime.datetime.fromtimestamp(j["replication_last"])
|
||||
delta = datetime.datetime.now() - repl_last
|
||||
if delta.seconds >= limits[1]:
|
||||
results.append((STATUS_CRIT,
|
||||
"'{}' replication lag is {} seconds".format(repl, delta.seconds)))
|
||||
elif delta.seconds >= limits[0]:
|
||||
results.append((STATUS_WARN,
|
||||
"'{}' replication lag is {} seconds".format(repl, delta.seconds)))
|
||||
if "replication_stats" in j:
|
||||
errors = j["replication_stats"]["failure"]
|
||||
if errors >= limits[3]:
|
||||
results.append(
|
||||
(STATUS_CRIT, "{} replication failures".format(errors)))
|
||||
elif errors >= limits[2]:
|
||||
results.append(
|
||||
(STATUS_WARN, "{} replication failures".format(errors)))
|
||||
if results:
|
||||
return results
|
||||
else:
|
||||
return [(STATUS_OK, "OK")]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Check swift-storage health')
|
||||
parser.add_argument('-H', '--host', dest='host', default='localhost',
|
||||
help='Hostname to query')
|
||||
parser.add_argument('-p', '--port', dest='port', default='6000',
|
||||
type=int, help='Port number')
|
||||
parser.add_argument('-r', '--replication', dest='check_replication',
|
||||
type=int, nargs=4, help='Check replication status',
|
||||
metavar=('lag_warn', 'lag_crit', 'failures_warn', 'failures_crit'))
|
||||
parser.add_argument('-m', '--md5', dest='check_md5', action='store_true',
|
||||
help='Compare server rings md5sum with local copy')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.check_replication and not args.check_md5:
|
||||
print "You must use -r or -m switch"
|
||||
sys.exit(STATUS_UNKNOWN)
|
||||
|
||||
base_url = "http://{}:{}/recon/".format(args.host, args.port)
|
||||
results = []
|
||||
if args.check_replication:
|
||||
results.extend(check_replication(base_url, args.check_replication))
|
||||
if args.check_md5:
|
||||
results.extend(check_md5(base_url))
|
||||
|
||||
crits = ';'.join([i[1] for i in results if i[0] == STATUS_CRIT])
|
||||
warns = ';'.join([i[1] for i in results if i[0] == STATUS_WARN])
|
||||
unknowns = ';'.join([i[1] for i in results if i[0] == STATUS_UNKNOWN])
|
||||
if crits:
|
||||
print "CRITICAL: " + crits
|
||||
sys.exit(STATUS_CRIT)
|
||||
elif warns:
|
||||
print "WARNING: " + warns
|
||||
sys.exit(STATUS_WARN)
|
||||
elif unknowns:
|
||||
print "UNKNOWN: " + unknowns
|
||||
sys.exit(STATUS_UNKNOWN)
|
||||
else:
|
||||
print "OK"
|
||||
sys.exit(0)
|
||||
1
files/sudo/swift-storage
Normal file
1
files/sudo/swift-storage
Normal file
@@ -0,0 +1 @@
|
||||
nagios ALL=(swift) NOPASSWD:/usr/bin/swift-init status *
|
||||
Reference in New Issue
Block a user