Revert "Adapt container health check for built-in podman health checks"

This reverts commit 31a1f9c8ed.

In train health checks are still scheduled and executed by systemd.
So there is no need for adaptation to podman managed health checks.

Change-Id: I1e43a1ee5a72afabb0f3ba650c9dd40d0a29d6ac
This commit is contained in:
Martin Magr 2020-10-09 17:09:26 +02:00
parent b28d02caac
commit 0a10aaba1b
2 changed files with 13 additions and 101 deletions

View File

@ -1,92 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2018 Red Hat Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import re
import sys
HCLOG = '/var/log/collectd/healthchecks.stdout'
START_RE = re.compile(
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) systemd\[.*\]: Started /usr/bin/podman healthcheck run (?P<container_id>\w*)')
EXEC_RE = re.compile(
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) podman\[(?P<pid>\d*)\]: (?P<trash>.*) container exec (?P<container_id>\w*) \(.*name=(?P<container_name>\w*).*\)')
RESULT_RE = re.compile(
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) podman\[(?P<pid>\d*)\]: (?P<result>(un)?healthy)')
def process_healthcheck_output(path_to_log):
"""Process saved output of health checks and returns list of unhealthy
containers.
"""
data = {}
pid_map = {}
with open(path_to_log, "r+") as logfile:
for line in logfile:
match = START_RE.search(line)
if match:
item = data.setdefault(match.group('container_id'), {})
item['timestamp_start'] = match.group('timestamp')
item['host'] = match.group('host')
continue
match = EXEC_RE.search(line)
if match:
item = data.setdefault(match.group('container_id'), {})
item['container_name'] = match.group('container_name')
item['host'] = match.group('host')
item['pid'] = match.group('pid')
pid_map[match.group('pid')] = match.group('container_id')
continue
match = RESULT_RE.search(line)
if match:
if match.group('pid') not in pid_map:
continue
item = data[pid_map[match.group('pid')]]
item['result'] = match.group('result')
if 'timestamp_start' not in item:
continue
try:
start = datetime.datetime.strptime(item['timestamp_start'],
'%b %d %H:%M:%S')
end = datetime.datetime.strptime(match.group('timestamp'),
'%b %d %H:%M:%S')
item['duration'] = (end - start).seconds
except Exception as ex:
err = "[WARN] Failure during calculating duration: {}"
print(err.format(ex))
continue
logfile.truncate()
# truncate the file
with open(HCLOG, "w") as logfile:
pass
unhealthy = []
for container in data.values():
if 'result' not in container:
continue
if container['result'] == 'healthy':
continue
log = ('{container_name}: Container health check on host {host} '
'results as {result} after {duration}s.')
unhealthy.append(log.format(**container))
return unhealthy
if __name__ == "__main__":
unhealthy = process_healthcheck_output(HCLOG)
if unhealthy:
print(' ; '.join(unhealthy))
sys.exit(2)

View File

@ -330,7 +330,18 @@ parameters:
default: true default: true
CollectdContainerHealthCheckCommand: CollectdContainerHealthCheckCommand:
type: string type: string
default: "/scripts/collectd_check_health.py" default: |
output=""
while read line ; do
i=$(echo $line | awk '//{gsub(/:/, "", $0); print $5}')
log=$(echo $line | awk '{split($0,a,/:\s+Error:\s+/); print a[2]}')
log=${log:0:-1}
output+=" ; ${i}: ${log}"
done < <(egrep "^[a-zA-Z]{3}\s+[0-9]{2}\s+[0-9\:]{8}\s+.*\s+.*:\s+[Ee]rror\:" /var/log/collectd/healthchecks.log)
truncate -s0 /var/log/collectd/healthchecks.log
if [ ! -z "${output}" ]; then
echo ${output:3} && exit 2;
fi
CollectdContainerHealthCheckInterval: CollectdContainerHealthCheckInterval:
type: number type: number
description: The frequency in seconds the docker health check is executed. description: The frequency in seconds the docker health check is executed.
@ -629,12 +640,6 @@ outputs:
- path: /var/log/collectd - path: /var/log/collectd
owner: collectd:collectd owner: collectd:collectd
recurse: true recurse: true
container_config_scripts:
map_merge:
- {get_attr: [ContainersCommon, container_config_scripts]}
- collectd_check_health.py:
mode: "0755"
content: { get_file: ../../container_config_scripts/monitoring/collectd_check_health.py }
docker_config: docker_config:
step_5: step_5:
collectd: collectd:
@ -656,7 +661,6 @@ outputs:
- /var/lib/config-data/puppet-generated/collectd:/var/lib/kolla/config_files/src:ro - /var/lib/config-data/puppet-generated/collectd:/var/lib/kolla/config_files/src:ro
- /var/log/containers/collectd:/var/log/collectd:rw,z - /var/log/containers/collectd:/var/log/collectd:rw,z
- /var/run/:/var/run:rw - /var/run/:/var/run:rw
- /var/lib/container-config-scripts:/scripts:ro
- /sys/fs/cgroup:/sys/fs/cgroup:ro - /sys/fs/cgroup:/sys/fs/cgroup:ro
environment: environment:
KOLLA_CONFIG_STRATEGY: COPY_ALWAYS KOLLA_CONFIG_STRATEGY: COPY_ALWAYS
@ -684,7 +688,7 @@ outputs:
copy: copy:
dest: /etc/rsyslog.d/openstack-healthcheck.conf dest: /etc/rsyslog.d/openstack-healthcheck.conf
content: | content: |
if ($programname startswith 'podman' and ($msg contains 'container exec' or $msg contains 'healthy')) or ($programname startswith 'systemd' and $msg contains 'podman healthcheck run') then -/var/log/containers/collectd/healthchecks.stdout if $programname startswith 'healthcheck_' then -/var/log/containers/collectd/healthchecks.log
& stop & stop
- name: Remove healthcheck log - name: Remove healthcheck log
when: when: