|
|
|
@ -14,10 +14,11 @@
|
|
|
|
|
# License for the specific language governing permissions and limitations
|
|
|
|
|
# under the License.
|
|
|
|
|
|
|
|
|
|
import datetime
|
|
|
|
|
import json
|
|
|
|
|
import re
|
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HCLOG = '/var/log/collectd/healthchecks.stdout'
|
|
|
|
|
START_RE = re.compile(
|
|
|
|
|
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) systemd\[.*\]: Started /usr/bin/podman healthcheck run (?P<container_id>\w*)')
|
|
|
|
@ -55,38 +56,28 @@ def process_healthcheck_output(path_to_log):
|
|
|
|
|
continue
|
|
|
|
|
item = data[pid_map[match.group('pid')]]
|
|
|
|
|
item['result'] = match.group('result')
|
|
|
|
|
if 'timestamp_start' not in item:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
start = datetime.datetime.strptime(item['timestamp_start'],
|
|
|
|
|
'%b %d %H:%M:%S')
|
|
|
|
|
end = datetime.datetime.strptime(match.group('timestamp'),
|
|
|
|
|
'%b %d %H:%M:%S')
|
|
|
|
|
item['duration'] = (end - start).seconds
|
|
|
|
|
except Exception as ex:
|
|
|
|
|
err = "[WARN] Failure during calculating duration: {}"
|
|
|
|
|
print(err.format(ex))
|
|
|
|
|
continue
|
|
|
|
|
logfile.truncate()
|
|
|
|
|
item['timestamp_end'] = match.group('timestamp')
|
|
|
|
|
|
|
|
|
|
# truncate the file
|
|
|
|
|
with open(HCLOG, "w") as logfile:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
unhealthy = []
|
|
|
|
|
for container in data.values():
|
|
|
|
|
if 'result' not in container:
|
|
|
|
|
continue
|
|
|
|
|
if container['result'] == 'healthy':
|
|
|
|
|
rc, output = 0, []
|
|
|
|
|
for cid, item in data.items():
|
|
|
|
|
if 'result' not in item:
|
|
|
|
|
continue
|
|
|
|
|
log = ('{container_name}: Container health check on host {host} '
|
|
|
|
|
'results as {result} after {duration}s.')
|
|
|
|
|
unhealthy.append(log.format(**container))
|
|
|
|
|
return unhealthy
|
|
|
|
|
if item['result'] != 'healthy' and rc != 2:
|
|
|
|
|
rc = 2 if item['result'] == 'unhealthy' else 1
|
|
|
|
|
output.append({
|
|
|
|
|
'container': cid,
|
|
|
|
|
'service': item['container_name'],
|
|
|
|
|
'status': item['result'],
|
|
|
|
|
'healthy': int(item['result'] == 'healthy'),
|
|
|
|
|
})
|
|
|
|
|
return rc, output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
unhealthy = process_healthcheck_output(HCLOG)
|
|
|
|
|
if unhealthy:
|
|
|
|
|
print(' ; '.join(unhealthy))
|
|
|
|
|
sys.exit(2)
|
|
|
|
|
rc, status = process_healthcheck_output(HCLOG)
|
|
|
|
|
print(json.dumps(status))
|
|
|
|
|
sys.exit(rc)
|
|
|
|
|