Browse Source

Return details in output of container health check

This patch reformats check-container-health script for sensubility to output
json formatted data instead of semi-colon separated data. Removes calculation
of duration for each container HC to keep the runtime shorter.

Change-Id: I18bcde4b6031c79deae3f6c9ee6f2c4bb754be88
changes/61/757961/2
Martin Magr 8 months ago
parent
commit
f84655ed55
1 changed files with 18 additions and 27 deletions
  1. +18
    -27
      container_config_scripts/monitoring/collectd_check_health.py

+ 18
- 27
container_config_scripts/monitoring/collectd_check_health.py View File

@ -14,10 +14,11 @@
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import json
import re
import sys
HCLOG = '/var/log/collectd/healthchecks.stdout'
START_RE = re.compile(
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) systemd\[.*\]: Started /usr/bin/podman healthcheck run (?P<container_id>\w*)')
@ -55,38 +56,28 @@ def process_healthcheck_output(path_to_log):
continue
item = data[pid_map[match.group('pid')]]
item['result'] = match.group('result')
if 'timestamp_start' not in item:
continue
try:
start = datetime.datetime.strptime(item['timestamp_start'],
'%b %d %H:%M:%S')
end = datetime.datetime.strptime(match.group('timestamp'),
'%b %d %H:%M:%S')
item['duration'] = (end - start).seconds
except Exception as ex:
err = "[WARN] Failure during calculating duration: {}"
print(err.format(ex))
continue
logfile.truncate()
item['timestamp_end'] = match.group('timestamp')
# truncate the file
with open(HCLOG, "w") as logfile:
pass
unhealthy = []
for container in data.values():
if 'result' not in container:
continue
if container['result'] == 'healthy':
rc, output = 0, []
for cid, item in data.items():
if 'result' not in item:
continue
log = ('{container_name}: Container health check on host {host} '
'results as {result} after {duration}s.')
unhealthy.append(log.format(**container))
return unhealthy
if item['result'] != 'healthy' and rc != 2:
rc = 2 if item['result'] == 'unhealthy' else 1
output.append({
'container': cid,
'service': item['container_name'],
'status': item['result'],
'healthy': int(item['result'] == 'healthy'),
})
return rc, output
if __name__ == "__main__":
unhealthy = process_healthcheck_output(HCLOG)
if unhealthy:
print(' ; '.join(unhealthy))
sys.exit(2)
rc, status = process_healthcheck_output(HCLOG)
print(json.dumps(status))
sys.exit(rc)

Loading…
Cancel
Save