Don't return Critical when ceph is in warning state.
Current implementation returns Critical when Ceph is in warning state, checking for some known exceptions which are considered operational tasks. However this causes many Alarms. This patch changes the behavior to report Warning when Ceph is in HEALTH_WARN. If known operational tasks are exceeding thresholds, Critical is returned. Change-Id: I7a330189da8f0ba9168cedb534823c5e8f4795ba
This commit is contained in:
parent
7a362ff0a5
commit
35c8e40e83
14
config.yaml
14
config.yaml
@ -189,17 +189,21 @@ options:
|
|||||||
type: float
|
type: float
|
||||||
description: "Threshold for degraded ratio (0.1 = 10%)"
|
description: "Threshold for degraded ratio (0.1 = 10%)"
|
||||||
nagios_misplaced_thresh:
|
nagios_misplaced_thresh:
|
||||||
default: 10.0
|
default: 1.0
|
||||||
type: float
|
type: float
|
||||||
description: "Threshold for misplaced ratio (0.1 = 10%)"
|
description: "Threshold for misplaced ratio (0.1 = 10%)"
|
||||||
nagios_recovery_rate:
|
nagios_recovery_rate:
|
||||||
default: '1'
|
default: '1'
|
||||||
type: string
|
type: string
|
||||||
description: Recovery rate below which we consider recovery to be stalled
|
description: |
|
||||||
nagios_ignore_nodeepscub:
|
Recovery rate (in objects/s) below which we consider recovery
|
||||||
default: False
|
to be stalled.
|
||||||
|
nagios_raise_nodeepscrub:
|
||||||
|
default: True
|
||||||
type: boolean
|
type: boolean
|
||||||
description: Whether to ignore the nodeep-scrub flag
|
description: |
|
||||||
|
Whether to report Critical instead of Warning when the nodeep-scrub
|
||||||
|
flag is set.
|
||||||
use-direct-io:
|
use-direct-io:
|
||||||
type: boolean
|
type: boolean
|
||||||
default: True
|
default: True
|
||||||
|
@ -102,10 +102,6 @@ def check_ceph_status(args):
|
|||||||
:returns string, describing the status of the ceph cluster.
|
:returns string, describing the status of the ceph cluster.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
ignorable = (r'\d+ pgs (?:backfill|degraded|recovery_wait|stuck unclean)|'
|
|
||||||
'recovery \d+\/\d+ objects (?:degraded|misplaced)')
|
|
||||||
if args.ignore_nodeepscrub:
|
|
||||||
ignorable = ignorable + '|nodeep-scrub flag\(s\) set'
|
|
||||||
status_critical = False
|
status_critical = False
|
||||||
if args.status_file:
|
if args.status_file:
|
||||||
check_file_freshness(args.status_file)
|
check_file_freshness(args.status_file)
|
||||||
@ -136,41 +132,60 @@ def check_ceph_status(args):
|
|||||||
luminous = False
|
luminous = False
|
||||||
|
|
||||||
if overall_status != 'HEALTH_OK':
|
if overall_status != 'HEALTH_OK':
|
||||||
# Health is not OK, check if any lines are not in our list of OK
|
# Health is not OK, collect status message(s) and
|
||||||
# any lines that don't match, check is critical
|
# decide whether to return warning or critical
|
||||||
|
status_critical = False
|
||||||
status_msg = []
|
status_msg = []
|
||||||
if luminous:
|
if luminous:
|
||||||
status_messages = [x['summary']['message'] for x in status_data['health'].get('checks').values()]
|
status_messages = [x['summary']['message']
|
||||||
|
for x in
|
||||||
|
status_data['health'].get('checks').values()]
|
||||||
else:
|
else:
|
||||||
status_messages = [x['summary'] for x in status_data['health']['summary']]
|
status_messages = [x['summary']
|
||||||
|
for x in
|
||||||
|
status_data['health']['summary']]
|
||||||
for status in status_messages:
|
for status in status_messages:
|
||||||
if not re.match(ignorable, status):
|
status_msg.append(status)
|
||||||
|
# Check if nedeepscrub is set and whether it should raise an error
|
||||||
|
if args.raise_nodeepscrub:
|
||||||
|
if re.match("nodeep-scrub flag", status):
|
||||||
|
status_critical = True
|
||||||
|
if overall_status == 'HEALTH_CRITICAL' or \
|
||||||
|
overall_status == 'HEALTH_ERR':
|
||||||
|
# HEALTH_ERR, report critical
|
||||||
|
status_critical = True
|
||||||
|
else:
|
||||||
|
# HEALTH_WARN
|
||||||
|
# Check the threshold for a list of operational tasks,
|
||||||
|
# and return CRITICAL if exceeded
|
||||||
|
degraded_ratio = float(status_data['pgmap'].get('degraded_ratio',
|
||||||
|
0.0))
|
||||||
|
if degraded_ratio > args.degraded_thresh:
|
||||||
status_critical = True
|
status_critical = True
|
||||||
status_msg.append(status)
|
if degraded_ratio > 0:
|
||||||
# If we got this far, then the status is not OK but the status lines
|
status_msg.append("Degraded ratio: {}".format(degraded_ratio))
|
||||||
# are all in our list of things we consider to be operational tasks.
|
misplaced_ratio = float(status_data['pgmap'].get('misplaced_ratio',
|
||||||
# Check the thresholds and return CRITICAL if exceeded,
|
0.0))
|
||||||
# otherwise there's something not accounted for and we want to know
|
if misplaced_ratio > args.misplaced_thresh:
|
||||||
# about it with a WARN alert.
|
status_critical = True
|
||||||
degraded_ratio = status_data['pgmap'].get('degraded_ratio', 0.0)
|
if misplaced_ratio > 0:
|
||||||
if degraded_ratio > args.degraded_thresh:
|
status_msg.append("Misplaced ratio: {}".
|
||||||
status_critical = True
|
format(misplaced_ratio))
|
||||||
status_msg.append("Degraded ratio: {}".format(degraded_ratio))
|
recovering = float(status_data['pgmap'].
|
||||||
misplaced_ratio = status_data['pgmap'].get('misplaced_ratio', 0.0)
|
get('recovering_objects_per_sec', 0.0))
|
||||||
if misplaced_ratio > args.misplaced_thresh:
|
if (degraded_ratio > 0 or misplaced_ratio > 0) \
|
||||||
status_critical = True
|
and recovering > 0 \
|
||||||
status_msg.append("Misplaced ratio: {}".format(misplaced_ratio))
|
and recovering < args.recovery_rate:
|
||||||
recovering = status_data['pgmap'].get('recovering_objects_per_sec',
|
status_critical = True
|
||||||
0.0)
|
if recovering > 0:
|
||||||
if recovering < args.recovery_rate:
|
status_msg.append("Recovering objects/s {}".format(recovering))
|
||||||
status_critical = True
|
|
||||||
status_msg.append("Recovering objects/sec {}".format(recovering))
|
|
||||||
if status_critical:
|
if status_critical:
|
||||||
msg = 'CRITICAL: ceph health: "{} {}"'.format(
|
msg = 'CRITICAL: ceph health: "{} {}"'.format(
|
||||||
overall_status,
|
overall_status,
|
||||||
", ".join(status_msg))
|
", ".join(status_msg))
|
||||||
raise CriticalError(msg)
|
raise CriticalError(msg)
|
||||||
if overall_status == 'HEALTH_WARN':
|
else:
|
||||||
|
# overall_status == 'HEALTH_WARN':
|
||||||
msg = "WARNING: {}".format(", ".join(status_msg))
|
msg = "WARNING: {}".format(", ".join(status_msg))
|
||||||
raise WarnError(msg)
|
raise WarnError(msg)
|
||||||
message = "All OK"
|
message = "All OK"
|
||||||
@ -187,21 +202,21 @@ def parse_args(args):
|
|||||||
'user account does not have rights for the Ceph '
|
'user account does not have rights for the Ceph '
|
||||||
'config files.')
|
'config files.')
|
||||||
parser.add_argument('--degraded_thresh', dest='degraded_thresh',
|
parser.add_argument('--degraded_thresh', dest='degraded_thresh',
|
||||||
default=1, type=float,
|
default=1.0, type=float,
|
||||||
help="Threshold for degraded ratio (0.1 = 10%)")
|
help="Threshold for degraded ratio (0.1 = 10%)")
|
||||||
parser.add_argument('--misplaced_thresh', dest='misplaced_thresh',
|
parser.add_argument('--misplaced_thresh', dest='misplaced_thresh',
|
||||||
default=10, type=float,
|
default=1.0, type=float,
|
||||||
help="Threshold for misplaced ratio (0.1 = 10%)")
|
help="Threshold for misplaced ratio (0.1 = 10%)")
|
||||||
parser.add_argument('--recovery_rate', dest='recovery_rate',
|
parser.add_argument('--recovery_rate', dest='recovery_rate',
|
||||||
default=1, type=int,
|
default=1, type=int,
|
||||||
help="Recovery rate below which we consider recovery "
|
help="Recovery rate (in objects/s) below which we"
|
||||||
"to be stalled")
|
"consider recovery to be stalled")
|
||||||
parser.add_argument('--ignore_nodeepscrub', dest='ignore_nodeepscrub',
|
parser.add_argument('--raise_nodeepscrub', dest='raise_nodeepscrub',
|
||||||
default=False, action='store_true',
|
default=False, action='store_true',
|
||||||
help="Whether to ignore the nodeep-scrub flag. If "
|
help="Whether to raise an error for the nodeep-scrub"
|
||||||
"the nodeep-scrub flag is set, the check returns "
|
"flag. If the nodeep-scrub flag is set,"
|
||||||
"warning if this param is passed, otherwise "
|
"the check returns critical if this param is"
|
||||||
"returns critical.")
|
"passed, otherwise it returns warning.")
|
||||||
return parser.parse_args(args)
|
return parser.parse_args(args)
|
||||||
|
|
||||||
|
|
||||||
@ -218,7 +233,7 @@ def main(args):
|
|||||||
exitcode = 'critical'
|
exitcode = 'critical'
|
||||||
except WarnError as msg:
|
except WarnError as msg:
|
||||||
print(msg)
|
print(msg)
|
||||||
exitcode = 'critical'
|
exitcode = 'warning'
|
||||||
except:
|
except:
|
||||||
print("%s raised unknown exception '%s'" % ('check_ceph_status',
|
print("%s raised unknown exception '%s'" % ('check_ceph_status',
|
||||||
sys.exc_info()[0]))
|
sys.exc_info()[0]))
|
||||||
|
@ -748,8 +748,8 @@ def update_nrpe_config():
|
|||||||
config('nagios_degraded_thresh'),
|
config('nagios_degraded_thresh'),
|
||||||
config('nagios_misplaced_thresh'),
|
config('nagios_misplaced_thresh'),
|
||||||
config('nagios_recovery_rate'))
|
config('nagios_recovery_rate'))
|
||||||
if config('nagios_ignore_nodeepscub'):
|
if config('nagios_raise_nodeepscrub'):
|
||||||
check_cmd = check_cmd + ' --ignore_nodeepscrub'
|
check_cmd = check_cmd + ' --raise_nodeepscrub'
|
||||||
nrpe_setup.add_check(
|
nrpe_setup.add_check(
|
||||||
shortname="ceph",
|
shortname="ceph",
|
||||||
description='Check Ceph health {{{}}}'.format(current_unit),
|
description='Check Ceph health {{{}}}'.format(current_unit),
|
||||||
|
147
unit_tests/ceph_degraded_luminous.json
Normal file
147
unit_tests/ceph_degraded_luminous.json
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
{
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"health": {
|
||||||
|
"checks": {
|
||||||
|
"OSD_DOWN": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "3 osds down"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"OSD_HOST_DOWN": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "1 host (3 osds) down"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"OBJECT_MISPLACED": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "9883/43779 objects misplaced (22.575%)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"PG_DEGRADED": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "Degraded data redundancy: 14001/43779 objects degraded (31.981%), 32 pgs degraded"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"POOL_APP_NOT_ENABLED": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "application not enabled on 1 pool(s)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"TOO_FEW_PGS": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "too few PGs per OSD (7 < min 30)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"status": "HEALTH_WARN"
|
||||||
|
},
|
||||||
|
"election_epoch": 5,
|
||||||
|
"quorum": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"quorum_names": [
|
||||||
|
"juju-460e0f-11"
|
||||||
|
],
|
||||||
|
"monmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"modified": "2018-11-07 14:17:12.324408",
|
||||||
|
"created": "2018-11-07 14:17:12.324408",
|
||||||
|
"features": {
|
||||||
|
"persistent": [
|
||||||
|
"kraken",
|
||||||
|
"luminous"
|
||||||
|
],
|
||||||
|
"optional": []
|
||||||
|
},
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"rank": 0,
|
||||||
|
"name": "juju-460e0f-11",
|
||||||
|
"addr": "192.168.100.81:6789/0",
|
||||||
|
"public_addr": "192.168.100.81:6789/0"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"osdmap": {
|
||||||
|
"osdmap": {
|
||||||
|
"epoch": 72,
|
||||||
|
"num_osds": 9,
|
||||||
|
"num_up_osds": 6,
|
||||||
|
"num_in_osds": 9,
|
||||||
|
"full": false,
|
||||||
|
"nearfull": false,
|
||||||
|
"num_remapped_pgs": 16
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pgmap": {
|
||||||
|
"pgs_by_state": [
|
||||||
|
{
|
||||||
|
"state_name": "active+undersized+degraded",
|
||||||
|
"count": 16
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "active+undersized+degraded+remapped+backfill_wait",
|
||||||
|
"count": 14
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "active+undersized+degraded+remapped+backfilling",
|
||||||
|
"count": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"num_pgs": 32,
|
||||||
|
"num_pools": 1,
|
||||||
|
"num_objects": 14593,
|
||||||
|
"data_bytes": 61169729807,
|
||||||
|
"bytes_used": 14540595200,
|
||||||
|
"bytes_avail": 14889525248,
|
||||||
|
"bytes_total": 29430120448,
|
||||||
|
"degraded_objects": 14001,
|
||||||
|
"degraded_total": 43779,
|
||||||
|
"degraded_ratio": 0.319811,
|
||||||
|
"misplaced_objects": 9883,
|
||||||
|
"misplaced_total": 43779,
|
||||||
|
"misplaced_ratio": 0.225748
|
||||||
|
},
|
||||||
|
"fsmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"by_rank": []
|
||||||
|
},
|
||||||
|
"mgrmap": {
|
||||||
|
"epoch": 5,
|
||||||
|
"active_gid": 14097,
|
||||||
|
"active_name": "juju-460e0f-11",
|
||||||
|
"active_addr": "192.168.100.81:6800/204",
|
||||||
|
"available": true,
|
||||||
|
"standbys": [],
|
||||||
|
"modules": [
|
||||||
|
"balancer",
|
||||||
|
"restful",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"available_modules": [
|
||||||
|
"balancer",
|
||||||
|
"dashboard",
|
||||||
|
"influx",
|
||||||
|
"localpool",
|
||||||
|
"prometheus",
|
||||||
|
"restful",
|
||||||
|
"selftest",
|
||||||
|
"status",
|
||||||
|
"zabbix"
|
||||||
|
],
|
||||||
|
"services": {}
|
||||||
|
},
|
||||||
|
"servicemap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"modified": "0.000000",
|
||||||
|
"services": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
118
unit_tests/ceph_error.json
Normal file
118
unit_tests/ceph_error.json
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
{
|
||||||
|
"health": {
|
||||||
|
"health": {
|
||||||
|
"health_services": [
|
||||||
|
{
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"name": "juju-460e0f-12",
|
||||||
|
"kb_total": 1829760,
|
||||||
|
"kb_used": 835072,
|
||||||
|
"kb_avail": 994688,
|
||||||
|
"avail_percent": 54,
|
||||||
|
"last_updated": "2018-11-07 18:46:32.308592",
|
||||||
|
"store_stats": {
|
||||||
|
"bytes_total": 15678387,
|
||||||
|
"bytes_sst": 0,
|
||||||
|
"bytes_log": 420953,
|
||||||
|
"bytes_misc": 15257434,
|
||||||
|
"last_updated": "0.000000"
|
||||||
|
},
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timechecks": {
|
||||||
|
"epoch": 3,
|
||||||
|
"round": 0,
|
||||||
|
"round_status": "finished"
|
||||||
|
},
|
||||||
|
"summary": [
|
||||||
|
{
|
||||||
|
"severity": "HEALTH_ERR",
|
||||||
|
"summary": "6 pgs are stuck inactive for more than 300 seconds"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": "7 pgs peering"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": "6 pgs stuck inactive"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": "6 pgs stuck unclean"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"overall_status": "HEALTH_ERR",
|
||||||
|
"detail": []
|
||||||
|
},
|
||||||
|
"fsid": "68a9ca14-e297-11e8-843c-00163e64b0c0",
|
||||||
|
"election_epoch": 3,
|
||||||
|
"quorum": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"quorum_names": [
|
||||||
|
"juju-460e0f-12"
|
||||||
|
],
|
||||||
|
"monmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"fsid": "68a9ca14-e297-11e8-843c-00163e64b0c0",
|
||||||
|
"modified": "2018-11-07 14:17:27.659064",
|
||||||
|
"created": "2018-11-07 14:17:27.659064",
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"rank": 0,
|
||||||
|
"name": "juju-460e0f-12",
|
||||||
|
"addr": "192.168.100.26:6789\/0"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"osdmap": {
|
||||||
|
"osdmap": {
|
||||||
|
"epoch": 28,
|
||||||
|
"num_osds": 9,
|
||||||
|
"num_up_osds": 9,
|
||||||
|
"num_in_osds": 9,
|
||||||
|
"full": false,
|
||||||
|
"nearfull": false,
|
||||||
|
"num_remapped_pgs": 0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pgmap": {
|
||||||
|
"pgs_by_state": [
|
||||||
|
{
|
||||||
|
"state_name": "creating",
|
||||||
|
"count": 113
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "active+clean",
|
||||||
|
"count": 64
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "activating",
|
||||||
|
"count": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "peering",
|
||||||
|
"count": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"version": 7831,
|
||||||
|
"num_pgs": 192,
|
||||||
|
"data_bytes": 1790967809,
|
||||||
|
"bytes_used": 9995157504,
|
||||||
|
"bytes_avail": 9157476352,
|
||||||
|
"bytes_total": 19152633856,
|
||||||
|
"write_bytes_sec": 89844495,
|
||||||
|
"read_op_per_sec": 0,
|
||||||
|
"write_op_per_sec": 21
|
||||||
|
},
|
||||||
|
"fsmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"by_rank": []
|
||||||
|
}
|
||||||
|
}
|
147
unit_tests/ceph_many_warnings_luminous.json
Normal file
147
unit_tests/ceph_many_warnings_luminous.json
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
{
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"health": {
|
||||||
|
"checks": {
|
||||||
|
"OBJECT_MISPLACED": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "1560/12264 objects misplaced (12.720%)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"PG_AVAILABILITY": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "Reduced data availability: 27 pgs inactive, 30 pgs peering"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"POOL_APP_NOT_ENABLED": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "application not enabled on 1 pool(s)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"TOO_FEW_PGS": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "too few PGs per OSD (21 < min 30)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"status": "HEALTH_WARN"
|
||||||
|
},
|
||||||
|
"election_epoch": 5,
|
||||||
|
"quorum": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"quorum_names": [
|
||||||
|
"juju-460e0f-11"
|
||||||
|
],
|
||||||
|
"monmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"modified": "2018-11-07 14:17:12.324408",
|
||||||
|
"created": "2018-11-07 14:17:12.324408",
|
||||||
|
"features": {
|
||||||
|
"persistent": [
|
||||||
|
"kraken",
|
||||||
|
"luminous"
|
||||||
|
],
|
||||||
|
"optional": []
|
||||||
|
},
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"rank": 0,
|
||||||
|
"name": "juju-460e0f-11",
|
||||||
|
"addr": "192.168.100.81:6789/0",
|
||||||
|
"public_addr": "192.168.100.81:6789/0"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"osdmap": {
|
||||||
|
"osdmap": {
|
||||||
|
"epoch": 118,
|
||||||
|
"num_osds": 9,
|
||||||
|
"num_up_osds": 9,
|
||||||
|
"num_in_osds": 9,
|
||||||
|
"full": false,
|
||||||
|
"nearfull": false,
|
||||||
|
"num_remapped_pgs": 15
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pgmap": {
|
||||||
|
"pgs_by_state": [
|
||||||
|
{
|
||||||
|
"state_name": "unknown",
|
||||||
|
"count": 65
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "peering",
|
||||||
|
"count": 31
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "activating",
|
||||||
|
"count": 17
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "activating+remapped",
|
||||||
|
"count": 15
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"num_pgs": 128,
|
||||||
|
"num_pools": 1,
|
||||||
|
"num_objects": 4088,
|
||||||
|
"data_bytes": 17187733578,
|
||||||
|
"bytes_used": 14360064000,
|
||||||
|
"bytes_avail": 15023263744,
|
||||||
|
"bytes_total": 29383327744,
|
||||||
|
"unknown_pgs_ratio": 0.507812,
|
||||||
|
"inactive_pgs_ratio": 0.492188,
|
||||||
|
"misplaced_objects": 1560,
|
||||||
|
"misplaced_total": 12264,
|
||||||
|
"misplaced_ratio": 0.127202,
|
||||||
|
"recovering_objects_per_sec": 14,
|
||||||
|
"recovering_bytes_per_sec": 60779755,
|
||||||
|
"recovering_keys_per_sec": 0,
|
||||||
|
"num_objects_recovered": 113,
|
||||||
|
"num_bytes_recovered": 471859200,
|
||||||
|
"num_keys_recovered": 0,
|
||||||
|
"read_bytes_sec": 0,
|
||||||
|
"write_bytes_sec": 244132150,
|
||||||
|
"read_op_per_sec": 0,
|
||||||
|
"write_op_per_sec": 116
|
||||||
|
},
|
||||||
|
"fsmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"by_rank": []
|
||||||
|
},
|
||||||
|
"mgrmap": {
|
||||||
|
"epoch": 5,
|
||||||
|
"active_gid": 14097,
|
||||||
|
"active_name": "juju-460e0f-11",
|
||||||
|
"active_addr": "192.168.100.81:6800/204",
|
||||||
|
"available": true,
|
||||||
|
"standbys": [],
|
||||||
|
"modules": [
|
||||||
|
"balancer",
|
||||||
|
"restful",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"available_modules": [
|
||||||
|
"balancer",
|
||||||
|
"dashboard",
|
||||||
|
"influx",
|
||||||
|
"localpool",
|
||||||
|
"prometheus",
|
||||||
|
"restful",
|
||||||
|
"selftest",
|
||||||
|
"status",
|
||||||
|
"zabbix"
|
||||||
|
],
|
||||||
|
"services": {}
|
||||||
|
},
|
||||||
|
"servicemap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"modified": "0.000000",
|
||||||
|
"services": {}
|
||||||
|
}
|
||||||
|
}
|
@ -1,177 +1,202 @@
|
|||||||
{
|
{
|
||||||
"health": {
|
|
||||||
"health": {
|
"health": {
|
||||||
"health_services": [
|
"health": {
|
||||||
{
|
"health_services": [
|
||||||
"mons": [
|
{
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-21-lxd-0",
|
||||||
|
"kb_total": 334602320,
|
||||||
|
"kb_used": 2127960,
|
||||||
|
"kb_avail": 315454468,
|
||||||
|
"avail_percent": 94,
|
||||||
|
"last_updated": "2018-11-08 09:47:09.932189",
|
||||||
|
"store_stats": {
|
||||||
|
"bytes_total": 34880542,
|
||||||
|
"bytes_sst": 0,
|
||||||
|
"bytes_log": 1647123,
|
||||||
|
"bytes_misc": 33233419,
|
||||||
|
"last_updated": "0.000000"
|
||||||
|
},
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-24-lxd-0",
|
||||||
|
"kb_total": 334602320,
|
||||||
|
"kb_used": 2128116,
|
||||||
|
"kb_avail": 315454312,
|
||||||
|
"avail_percent": 94,
|
||||||
|
"last_updated": "2018-11-08 09:47:16.418007",
|
||||||
|
"store_stats": {
|
||||||
|
"bytes_total": 36811676,
|
||||||
|
"bytes_sst": 0,
|
||||||
|
"bytes_log": 3574345,
|
||||||
|
"bytes_misc": 33237331,
|
||||||
|
"last_updated": "0.000000"
|
||||||
|
},
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-25-lxd-0",
|
||||||
|
"kb_total": 334602320,
|
||||||
|
"kb_used": 2128860,
|
||||||
|
"kb_avail": 315453568,
|
||||||
|
"avail_percent": 94,
|
||||||
|
"last_updated": "2018-11-08 09:47:21.198816",
|
||||||
|
"store_stats": {
|
||||||
|
"bytes_total": 37388424,
|
||||||
|
"bytes_sst": 0,
|
||||||
|
"bytes_log": 4151569,
|
||||||
|
"bytes_misc": 33236855,
|
||||||
|
"last_updated": "0.000000"
|
||||||
|
},
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timechecks": {
|
||||||
|
"epoch": 14,
|
||||||
|
"round": 4480,
|
||||||
|
"round_status": "finished",
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-21-lxd-0",
|
||||||
|
"skew": 0.000000,
|
||||||
|
"latency": 0.000000,
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-24-lxd-0",
|
||||||
|
"skew": 0.000282,
|
||||||
|
"latency": 0.000989,
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-25-lxd-0",
|
||||||
|
"skew": -0.001223,
|
||||||
|
"latency": 0.000776,
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"summary": [
|
||||||
{
|
{
|
||||||
"name": "node1",
|
"severity": "HEALTH_WARN",
|
||||||
"kb_total": 140956600,
|
"summary": "19 pgs backfill_wait"
|
||||||
"kb_used": 15916132,
|
|
||||||
"kb_avail": 117857208,
|
|
||||||
"avail_percent": 83,
|
|
||||||
"last_updated": "2017-05-17 03:23:11.248297",
|
|
||||||
"store_stats": {
|
|
||||||
"bytes_total": 140014259,
|
|
||||||
"bytes_sst": 0,
|
|
||||||
"bytes_log": 13670758,
|
|
||||||
"bytes_misc": 126343501,
|
|
||||||
"last_updated": "0.000000"
|
|
||||||
},
|
|
||||||
"health": "HEALTH_OK"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "node2",
|
"severity": "HEALTH_WARN",
|
||||||
"kb_total": 70395920,
|
"summary": "4 pgs backfilling"
|
||||||
"kb_used": 10532504,
|
|
||||||
"kb_avail": 56264436,
|
|
||||||
"avail_percent": 79,
|
|
||||||
"last_updated": "2017-05-17 03:23:16.952673",
|
|
||||||
"store_stats": {
|
|
||||||
"bytes_total": 315512452,
|
|
||||||
"bytes_sst": 0,
|
|
||||||
"bytes_log": 21691698,
|
|
||||||
"bytes_misc": 293820754,
|
|
||||||
"last_updated": "0.000000"
|
|
||||||
},
|
|
||||||
"health": "HEALTH_OK"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "juju-machine-85-lxc-10",
|
"severity": "HEALTH_WARN",
|
||||||
"kb_total": 131927524,
|
"summary": "1 pgs peering"
|
||||||
"kb_used": 79521024,
|
},
|
||||||
"kb_avail": 45954016,
|
{
|
||||||
"avail_percent": 34,
|
"severity": "HEALTH_WARN",
|
||||||
"last_updated": "2017-05-17 03:23:13.794034",
|
"summary": "24 pgs stuck unclean"
|
||||||
"store_stats": {
|
},
|
||||||
"bytes_total": 89036349,
|
{
|
||||||
"bytes_sst": 0,
|
"severity": "HEALTH_WARN",
|
||||||
"bytes_log": 21055337,
|
"summary": "recovery 17386\/112794 objects misplaced (15.414%)"
|
||||||
"bytes_misc": 67981012,
|
},
|
||||||
"last_updated": "0.000000"
|
{
|
||||||
},
|
"severity": "HEALTH_WARN",
|
||||||
"health": "HEALTH_OK"
|
"summary": "pool pool1 has many more objects per pg than average (too few pgs?)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": "nodeep-scrub flag(s) set"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
}
|
"overall_status": "HEALTH_WARN",
|
||||||
]
|
"detail": []
|
||||||
},
|
},
|
||||||
"timechecks": {
|
"fsid": "66af7af5-2f60-4e0e-94dc-49f49bd37284",
|
||||||
"epoch": 280,
|
"election_epoch": 14,
|
||||||
"round": 19874,
|
"quorum": [
|
||||||
"round_status": "finished",
|
0,
|
||||||
"mons": [
|
1,
|
||||||
{
|
2
|
||||||
"name": "node1",
|
|
||||||
"skew": "0.000000",
|
|
||||||
"latency": "0.000000",
|
|
||||||
"health": "HEALTH_OK"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "node2",
|
|
||||||
"skew": "-0.000000",
|
|
||||||
"latency": "0.000866",
|
|
||||||
"health": "HEALTH_OK"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "juju-machine-85-lxc-10",
|
|
||||||
"skew": "-0.000000",
|
|
||||||
"latency": "0.018848",
|
|
||||||
"health": "HEALTH_OK"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"summary": [
|
|
||||||
{
|
|
||||||
"severity": "HEALTH_WARN",
|
|
||||||
"summary": "nodeep-scrub flag(s) set"
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"overall_status": "HEALTH_WARN",
|
"quorum_names": [
|
||||||
"detail": []
|
"juju-c62a41-21-lxd-0",
|
||||||
},
|
"juju-c62a41-24-lxd-0",
|
||||||
"fsid": "some_fsid",
|
"juju-c62a41-25-lxd-0"
|
||||||
"election_epoch": 280,
|
],
|
||||||
"quorum": [
|
"monmap": {
|
||||||
0,
|
"epoch": 2,
|
||||||
1,
|
"fsid": "66af7af5-2f60-4e0e-94dc-49f49bd37284",
|
||||||
2
|
"modified": "2018-10-31 15:37:56.902830",
|
||||||
],
|
"created": "2018-10-31 15:37:40.288870",
|
||||||
"quorum_names": [
|
"mons": [
|
||||||
"node1",
|
{
|
||||||
"node2",
|
"rank": 0,
|
||||||
"juju-machine-85-lxc-10"
|
"name": "juju-c62a41-21-lxd-0",
|
||||||
],
|
"addr": "100.84.195.4:6789\/0"
|
||||||
"monmap": {
|
},
|
||||||
"epoch": 3,
|
{
|
||||||
"fsid": "some_fsid",
|
"rank": 1,
|
||||||
"modified": "2016-11-25 00:08:51.235813",
|
"name": "juju-c62a41-24-lxd-0",
|
||||||
"created": "0.000000",
|
"addr": "100.84.196.4:6789\/0"
|
||||||
"mons": [
|
},
|
||||||
{
|
{
|
||||||
"rank": 0,
|
"rank": 2,
|
||||||
"name": "node1",
|
"name": "juju-c62a41-25-lxd-0",
|
||||||
"addr": "10.24.0.15:6789/0"
|
"addr": "100.84.196.5:6789\/0"
|
||||||
},
|
}
|
||||||
{
|
]
|
||||||
"rank": 1,
|
},
|
||||||
"name": "node2",
|
|
||||||
"addr": "10.24.0.17:6789/0"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"rank": 2,
|
|
||||||
"name": "juju-machine-85-lxc-10",
|
|
||||||
"addr": "10.24.0.195:6789/0"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"osdmap": {
|
|
||||||
"osdmap": {
|
"osdmap": {
|
||||||
"epoch": 37820,
|
"osdmap": {
|
||||||
"num_osds": 46,
|
"epoch": 316,
|
||||||
"num_up_osds": 46,
|
"num_osds": 48,
|
||||||
"num_in_osds": 46,
|
"num_up_osds": 48,
|
||||||
"full": false,
|
"num_in_osds": 48,
|
||||||
"nearfull": false
|
"full": false,
|
||||||
|
"nearfull": false,
|
||||||
|
"num_remapped_pgs": 22
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pgmap": {
|
||||||
|
"pgs_by_state": [
|
||||||
|
{
|
||||||
|
"state_name": "active+clean",
|
||||||
|
"count": 3448
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "active+remapped+wait_backfill",
|
||||||
|
"count": 19
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "active+remapped+backfilling",
|
||||||
|
"count": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "peering",
|
||||||
|
"count": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"version": 141480,
|
||||||
|
"num_pgs": 3472,
|
||||||
|
"data_bytes": 157009583781,
|
||||||
|
"bytes_used": 487185850368,
|
||||||
|
"bytes_avail": 75282911256576,
|
||||||
|
"bytes_total": 75770097106944,
|
||||||
|
"misplaced_objects": 17386,
|
||||||
|
"misplaced_total": 112794,
|
||||||
|
"misplaced_ratio": 0.154139,
|
||||||
|
"recovering_objects_per_sec": 436,
|
||||||
|
"recovering_bytes_per_sec": 1832614589,
|
||||||
|
"recovering_keys_per_sec": 0,
|
||||||
|
"num_objects_recovered": 446,
|
||||||
|
"num_bytes_recovered": 1870659584,
|
||||||
|
"num_keys_recovered": 0
|
||||||
|
},
|
||||||
|
"fsmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"by_rank": []
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"pgmap": {
|
|
||||||
"pgs_by_state": [
|
|
||||||
{
|
|
||||||
"state_name": "active+clean",
|
|
||||||
"count": 1988
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"state_name": "active+remapped+wait_backfill",
|
|
||||||
"count": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"state_name": "active+remapped+backfilling",
|
|
||||||
"count": 1
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"version": 58873447,
|
|
||||||
"num_pgs": 1992,
|
|
||||||
"data_bytes": 35851846298041,
|
|
||||||
"bytes_used": 107730678743040,
|
|
||||||
"bytes_avail": 63413590548480,
|
|
||||||
"bytes_total": 171144269291520,
|
|
||||||
"degraded_objects": 0,
|
|
||||||
"degraded_total": 25759217,
|
|
||||||
"degraded_ratio": 0,
|
|
||||||
"recovering_objects_per_sec": 17,
|
|
||||||
"recovering_bytes_per_sec": 72552794,
|
|
||||||
"recovering_keys_per_sec": 0,
|
|
||||||
"read_bytes_sec": 23935944,
|
|
||||||
"write_bytes_sec": 7024503,
|
|
||||||
"op_per_sec": 5332
|
|
||||||
},
|
|
||||||
"mdsmap": {
|
|
||||||
"epoch": 1,
|
|
||||||
"up": 0,
|
|
||||||
"in": 0,
|
|
||||||
"max": 1,
|
|
||||||
"by_rank": []
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
102
unit_tests/ceph_nodeepscrub_luminous.json
Normal file
102
unit_tests/ceph_nodeepscrub_luminous.json
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
{
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"health": {
|
||||||
|
"checks": {
|
||||||
|
"OSDMAP_FLAGS": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "nodeep-scrub flag(s) set"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"status": "HEALTH_WARN"
|
||||||
|
},
|
||||||
|
"election_epoch": 5,
|
||||||
|
"quorum": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"quorum_names": [
|
||||||
|
"juju-460e0f-11"
|
||||||
|
],
|
||||||
|
"monmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"modified": "2018-11-07 14:17:12.324408",
|
||||||
|
"created": "2018-11-07 14:17:12.324408",
|
||||||
|
"features": {
|
||||||
|
"persistent": [
|
||||||
|
"kraken",
|
||||||
|
"luminous"
|
||||||
|
],
|
||||||
|
"optional": []
|
||||||
|
},
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"rank": 0,
|
||||||
|
"name": "juju-460e0f-11",
|
||||||
|
"addr": "192.168.100.81:6789/0",
|
||||||
|
"public_addr": "192.168.100.81:6789/0"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"osdmap": {
|
||||||
|
"osdmap": {
|
||||||
|
"epoch": 518,
|
||||||
|
"num_osds": 9,
|
||||||
|
"num_up_osds": 9,
|
||||||
|
"num_in_osds": 9,
|
||||||
|
"full": false,
|
||||||
|
"nearfull": false,
|
||||||
|
"num_remapped_pgs": 0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pgmap": {
|
||||||
|
"pgs_by_state": [
|
||||||
|
{
|
||||||
|
"state_name": "active+clean",
|
||||||
|
"count": 128
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"num_pgs": 128,
|
||||||
|
"num_pools": 1,
|
||||||
|
"num_objects": 14896,
|
||||||
|
"data_bytes": 62440603919,
|
||||||
|
"bytes_used": 14225776640,
|
||||||
|
"bytes_avail": 9450938368,
|
||||||
|
"bytes_total": 23676715008
|
||||||
|
},
|
||||||
|
"fsmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"by_rank": []
|
||||||
|
},
|
||||||
|
"mgrmap": {
|
||||||
|
"epoch": 5,
|
||||||
|
"active_gid": 14097,
|
||||||
|
"active_name": "juju-460e0f-11",
|
||||||
|
"active_addr": "192.168.100.81:6800/204",
|
||||||
|
"available": true,
|
||||||
|
"standbys": [],
|
||||||
|
"modules": [
|
||||||
|
"balancer",
|
||||||
|
"restful",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"available_modules": [
|
||||||
|
"balancer",
|
||||||
|
"dashboard",
|
||||||
|
"influx",
|
||||||
|
"localpool",
|
||||||
|
"prometheus",
|
||||||
|
"restful",
|
||||||
|
"selftest",
|
||||||
|
"status",
|
||||||
|
"zabbix"
|
||||||
|
],
|
||||||
|
"services": {}
|
||||||
|
},
|
||||||
|
"servicemap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"modified": "0.000000",
|
||||||
|
"services": {}
|
||||||
|
}
|
||||||
|
}
|
@ -48,7 +48,7 @@ CHARM_CONFIG = {'config-flags': '',
|
|||||||
'nagios_degraded_thresh': '1',
|
'nagios_degraded_thresh': '1',
|
||||||
'nagios_misplaced_thresh': '10',
|
'nagios_misplaced_thresh': '10',
|
||||||
'nagios_recovery_rate': '1',
|
'nagios_recovery_rate': '1',
|
||||||
'nagios_ignore_nodeepscub': False,
|
'nagios_raise_nodeepscrub': True,
|
||||||
'disable-pg-max-object-skew': False}
|
'disable-pg-max-object-skew': False}
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@ class NagiosTestCase(unittest.TestCase):
|
|||||||
ceph_version = check_ceph_status.get_ceph_version()
|
ceph_version = check_ceph_status.get_ceph_version()
|
||||||
self.assertEqual(ceph_version, [10, 2, 9])
|
self.assertEqual(ceph_version, [10, 2, 9])
|
||||||
|
|
||||||
|
# All OK, pre-luminoius
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
def test_health_ok(self, mock_ceph_version, mock_subprocess):
|
def test_health_ok(self, mock_ceph_version, mock_subprocess):
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
@ -42,6 +43,84 @@ class NagiosTestCase(unittest.TestCase):
|
|||||||
check_output = check_ceph_status.check_ceph_status(args)
|
check_output = check_ceph_status.check_ceph_status(args)
|
||||||
self.assertRegex(check_output, r"^All OK$")
|
self.assertRegex(check_output, r"^All OK$")
|
||||||
|
|
||||||
|
# Warning, pre-luminous
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_warn(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_warn.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args("")
|
||||||
|
self.assertRaises(check_ceph_status.WarnError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, pre-luminous, health_critical status
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_err(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_crit.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args("")
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, pre-luminous, overall HEALTH_ERR
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_crit(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_error.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args("")
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, pre-luminous, because misplaced ratio is too big
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_crit_misplaced(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_params.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--misplaced_thresh', '0.1'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, pre-luminous, because recovery rate is too low
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_crit_recovery(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_params.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--recovery_rate', '400'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Warning, pre-luminous, deepscrub
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_warn_deepscrub(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_nodeepscrub.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args("")
|
||||||
|
self.assertRaises(check_ceph_status.WarnError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, pre-luminous, deepscrub
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_crit_deepscrub(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_nodeepscrub.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--raise_nodeepscrub'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# All OK, luminous
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
def test_health_ok_luminous(self, mock_ceph_version, mock_subprocess):
|
def test_health_ok_luminous(self, mock_ceph_version, mock_subprocess):
|
||||||
mock_ceph_version.return_value = [12, 2, 0]
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
@ -52,62 +131,80 @@ class NagiosTestCase(unittest.TestCase):
|
|||||||
check_output = check_ceph_status.check_ceph_status(args)
|
check_output = check_ceph_status.check_ceph_status(args)
|
||||||
self.assertRegex(check_output, r"^All OK$")
|
self.assertRegex(check_output, r"^All OK$")
|
||||||
|
|
||||||
|
# Warning, luminous
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
def test_health_warn(self, mock_ceph_version, mock_subprocess):
|
def test_health_warn_luminous(self, mock_ceph_version, mock_subprocess):
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
|
||||||
with open('unit_tests/ceph_warn.json') as f:
|
|
||||||
tree = f.read()
|
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
|
||||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
|
||||||
self.assertRaises(check_ceph_status.WarnError,
|
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
|
||||||
|
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
|
||||||
def test_health_crit(self, mock_ceph_version, mock_subprocess):
|
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
|
||||||
with open('unit_tests/ceph_crit.json') as f:
|
|
||||||
tree = f.read()
|
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
|
||||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
|
||||||
self.assertRaises(check_ceph_status.CriticalError,
|
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
|
||||||
|
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
|
||||||
def test_health_crit_luminous(self, mock_ceph_version, mock_subprocess):
|
|
||||||
mock_ceph_version.return_value = [12, 2, 0]
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
with open('unit_tests/ceph_crit_luminous.json') as f:
|
with open('unit_tests/ceph_many_warnings_luminous.json') as f:
|
||||||
tree = f.read()
|
tree = f.read()
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
args = check_ceph_status.parse_args("")
|
||||||
self.assertRaises(check_ceph_status.CriticalError,
|
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
|
||||||
|
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
|
||||||
def test_health_lotsdegraded(self, mock_ceph_version, mock_subprocess):
|
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
|
||||||
with open('unit_tests/ceph_params.json') as f:
|
|
||||||
tree = f.read()
|
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
|
||||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
|
||||||
self.assertRaises(check_ceph_status.CriticalError,
|
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
|
||||||
|
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
|
||||||
def test_health_nodeepscrub(self, mock_ceph_version, mock_subprocess):
|
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
|
||||||
with open('unit_tests/ceph_nodeepscrub.json') as f:
|
|
||||||
tree = f.read()
|
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
|
||||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
|
||||||
self.assertRaises(check_ceph_status.CriticalError,
|
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
|
||||||
|
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
|
||||||
def test_health_nodeepscrubok(self, mock_ceph_version, mock_subprocess):
|
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
|
||||||
with open('unit_tests/ceph_nodeepscrub.json') as f:
|
|
||||||
tree = f.read()
|
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
|
||||||
args = check_ceph_status.parse_args(['--ignore_nodeepscrub'])
|
|
||||||
self.assertRaises(check_ceph_status.WarnError,
|
self.assertRaises(check_ceph_status.WarnError,
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, luminous, because of overall status
|
||||||
|
|
||||||
|
# Error, luminous, because misplaced ratio is too big
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_critical_misplaced_luminous(self,
|
||||||
|
mock_ceph_version,
|
||||||
|
mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
with open('unit_tests/ceph_many_warnings_luminous.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--misplaced_thresh', '0.1'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, luminous, because degraded ratio is too big
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_critical_degraded_luminous(self,
|
||||||
|
mock_ceph_version,
|
||||||
|
mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
with open('unit_tests/ceph_degraded_luminous.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--degraded_thresh', '0.1'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, luminous, because recovery rate is too low
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_critical_recovery_luminous(self,
|
||||||
|
mock_ceph_version,
|
||||||
|
mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
with open('unit_tests/ceph_many_warnings_luminous.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--recovery_rate', '20'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Warning, luminous, deepscrub
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_warn_deepscrub_luminous(self,
|
||||||
|
mock_ceph_version,
|
||||||
|
mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
with open('unit_tests/ceph_nodeepscrub_luminous.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args("")
|
||||||
|
self.assertRaises(check_ceph_status.WarnError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, luminous, deepscrub
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_crit_deepscrub_luminous(self,
|
||||||
|
mock_ceph_version,
|
||||||
|
mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
with open('unit_tests/ceph_nodeepscrub_luminous.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--raise_nodeepscrub'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
Loading…
Reference in New Issue
Block a user