Merge "Return details in output of container health check" into stable/ussuri

This commit is contained in:
Zuul 2020-10-20 00:54:15 +00:00 committed by Gerrit Code Review
commit 83b50464c2
1 changed files with 18 additions and 27 deletions

View File

@ -14,10 +14,11 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import datetime import json
import re import re
import sys import sys
HCLOG = '/var/log/collectd/healthchecks.stdout' HCLOG = '/var/log/collectd/healthchecks.stdout'
START_RE = re.compile( START_RE = re.compile(
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) systemd\[.*\]: Started /usr/bin/podman healthcheck run (?P<container_id>\w*)') r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) systemd\[.*\]: Started /usr/bin/podman healthcheck run (?P<container_id>\w*)')
@ -55,38 +56,28 @@ def process_healthcheck_output(path_to_log):
continue continue
item = data[pid_map[match.group('pid')]] item = data[pid_map[match.group('pid')]]
item['result'] = match.group('result') item['result'] = match.group('result')
if 'timestamp_start' not in item: item['timestamp_end'] = match.group('timestamp')
continue
try:
start = datetime.datetime.strptime(item['timestamp_start'],
'%b %d %H:%M:%S')
end = datetime.datetime.strptime(match.group('timestamp'),
'%b %d %H:%M:%S')
item['duration'] = (end - start).seconds
except Exception as ex:
err = "[WARN] Failure during calculating duration: {}"
print(err.format(ex))
continue
logfile.truncate()
# truncate the file # truncate the file
with open(HCLOG, "w") as logfile: with open(HCLOG, "w") as logfile:
pass pass
unhealthy = [] rc, output = 0, []
for container in data.values(): for cid, item in data.items():
if 'result' not in container: if 'result' not in item:
continue continue
if container['result'] == 'healthy': if item['result'] != 'healthy' and rc != 2:
continue rc = 2 if item['result'] == 'unhealthy' else 1
log = ('{container_name}: Container health check on host {host} ' output.append({
'results as {result} after {duration}s.') 'container': cid,
unhealthy.append(log.format(**container)) 'service': item['container_name'],
return unhealthy 'status': item['result'],
'healthy': int(item['result'] == 'healthy'),
})
return rc, output
if __name__ == "__main__": if __name__ == "__main__":
unhealthy = process_healthcheck_output(HCLOG) rc, status = process_healthcheck_output(HCLOG)
if unhealthy: print(json.dumps(status))
print(' ; '.join(unhealthy)) sys.exit(rc)
sys.exit(2)