From 9203e69983fcaf45d5927a31d84449218e8cf81c Mon Sep 17 00:00:00 2001 From: Martin Magr Date: Wed, 14 Oct 2020 00:10:09 +0200 Subject: [PATCH] Return details in output of container health check This patch reformats check-container-health script for sensubility to output json formatted data instead of semi-colon separated data. Removes calculation of duration for each container HC to keep the runtime shorter. Change-Id: I18bcde4b6031c79deae3f6c9ee6f2c4bb754be88 (cherry picked from commit f84655ed550f59580628646e158ca23585a13b63) --- .../monitoring/collectd_check_health.py | 45 ++++++++----------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/container_config_scripts/monitoring/collectd_check_health.py b/container_config_scripts/monitoring/collectd_check_health.py index eea75ec31b..4dbbdfebbc 100755 --- a/container_config_scripts/monitoring/collectd_check_health.py +++ b/container_config_scripts/monitoring/collectd_check_health.py @@ -14,10 +14,11 @@ # License for the specific language governing permissions and limitations # under the License. -import datetime +import json import re import sys + HCLOG = '/var/log/collectd/healthchecks.stdout' START_RE = re.compile( r'(?P\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P[\w\-\.\:]*) systemd\[.*\]: Started /usr/bin/podman healthcheck run (?P\w*)') @@ -55,38 +56,28 @@ def process_healthcheck_output(path_to_log): continue item = data[pid_map[match.group('pid')]] item['result'] = match.group('result') - if 'timestamp_start' not in item: - continue - try: - start = datetime.datetime.strptime(item['timestamp_start'], - '%b %d %H:%M:%S') - end = datetime.datetime.strptime(match.group('timestamp'), - '%b %d %H:%M:%S') - item['duration'] = (end - start).seconds - except Exception as ex: - err = "[WARN] Failure during calculating duration: {}" - print(err.format(ex)) - continue - logfile.truncate() + item['timestamp_end'] = match.group('timestamp') # truncate the file with open(HCLOG, "w") as logfile: pass - unhealthy = [] - for container in data.values(): - if 'result' not in container: + rc, output = 0, [] + for cid, item in data.items(): + if 'result' not in item: continue - if container['result'] == 'healthy': - continue - log = ('{container_name}: Container health check on host {host} ' - 'results as {result} after {duration}s.') - unhealthy.append(log.format(**container)) - return unhealthy + if item['result'] != 'healthy' and rc != 2: + rc = 2 if item['result'] == 'unhealthy' else 1 + output.append({ + 'container': cid, + 'service': item['container_name'], + 'status': item['result'], + 'healthy': int(item['result'] == 'healthy'), + }) + return rc, output if __name__ == "__main__": - unhealthy = process_healthcheck_output(HCLOG) - if unhealthy: - print(' ; '.join(unhealthy)) - sys.exit(2) + rc, status = process_healthcheck_output(HCLOG) + print(json.dumps(status)) + sys.exit(rc)