122 lines
3.8 KiB
Python
Executable File
122 lines
3.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# Copyright (C) 2021 Canonical
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
|
|
|
|
EXIT_OK = 0
|
|
EXIT_WARN = 1
|
|
EXIT_CRIT = 2
|
|
EXIT_UNKNOWN = 3
|
|
EXIT_CODE_TEXT = ["OK", "WARN", "CRITICAL", "UNKNOWN"]
|
|
|
|
CURRENT_OSD_COUNT_FILE = "/var/lib/nagios/current-ceph-osd-count.json"
|
|
|
|
|
|
class CriticalError(Exception):
|
|
"""This indicates a critical error."""
|
|
|
|
|
|
def check_file_freshness(filename, newer_than=3600):
|
|
"""Check a file exists, is readable and is newer than <n> seconds.
|
|
|
|
:param filename: The filename to check
|
|
:type filename: str
|
|
:param newer_than: The file should be newer than n seconds, default 3600
|
|
:type: newer_than: int
|
|
:raises CriticalError: If file is not readable or older then <n> seconds
|
|
"""
|
|
# First check the file exists and is readable
|
|
if not os.path.exists(filename):
|
|
raise CriticalError("%s: does not exist." % (filename))
|
|
if os.access(filename, os.R_OK) == 0:
|
|
raise CriticalError("%s: is not readable." % (filename))
|
|
|
|
# Then ensure the file is up-to-date enough
|
|
mtime = os.stat(filename).st_mtime
|
|
last_modified = time.time() - mtime
|
|
if last_modified > newer_than:
|
|
raise CriticalError("%s: was last modified on %s and is too old "
|
|
"(> %s seconds)."
|
|
% (filename, time.ctime(mtime), newer_than))
|
|
if last_modified < 0:
|
|
raise CriticalError("%s: was last modified on %s which is in the "
|
|
"future."
|
|
% (filename, time.ctime(mtime)))
|
|
|
|
|
|
def check_ceph_osd_count(host_osd_count_report):
|
|
|
|
with open(host_osd_count_report, "r") as f:
|
|
expected_osd_map = json.load(f)
|
|
|
|
current_osd_map = get_osd_tree()
|
|
|
|
exit_code = EXIT_OK
|
|
err_msgs = []
|
|
for host, osd_list in expected_osd_map.items():
|
|
if host not in current_osd_map:
|
|
err_msgs.append("Missing host {}".format(host))
|
|
current_osd_map[host] = {}
|
|
|
|
if len(osd_list) <= len(current_osd_map[host]):
|
|
continue
|
|
|
|
missing_osds = list(set(osd_list) - set(current_osd_map[host]))
|
|
if missing_osds:
|
|
osd_ids = [str(osd) for osd in missing_osds]
|
|
err_msgs.append("Missing osds on "
|
|
"{}: {}".format(host,
|
|
", ".join(osd_ids)))
|
|
exit_code = EXIT_CRIT
|
|
|
|
return (exit_code, err_msgs)
|
|
|
|
|
|
def get_osd_tree():
|
|
"""Read CURRENT_OSD_COUNT_FILE to get the host osd map.
|
|
|
|
:return: The map of node and osd ids.
|
|
:rtype: Dict[str: List[str]]
|
|
"""
|
|
check_file_freshness(CURRENT_OSD_COUNT_FILE)
|
|
with open(CURRENT_OSD_COUNT_FILE, "r") as f:
|
|
current_osd_counts = json.load(f)
|
|
|
|
host_osd_map = {}
|
|
for node in current_osd_counts["nodes"]:
|
|
if node["type"] != "host":
|
|
continue
|
|
|
|
host_osd_map[node["name"]] = node["children"]
|
|
|
|
return host_osd_map
|
|
|
|
|
|
if __name__ == "__main__":
|
|
host_osd_report = sys.argv[1]
|
|
if not os.path.isfile(host_osd_report):
|
|
print("UNKNOWN: report file missing: {}".format(host_osd_report))
|
|
sys.exit(EXIT_UNKNOWN)
|
|
|
|
(exit_code, err_msgs) = check_ceph_osd_count(host_osd_report)
|
|
print("{} {}".format(EXIT_CODE_TEXT[exit_code],
|
|
", ".join(err_msgs)))
|
|
sys.exit(exit_code)
|