Merge "Revert "Adapt container health check for built-in podman health checks"" into stable/train
This commit is contained in:
commit
1115e028df
@ -1,92 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
#
|
|
||||||
# Copyright 2018 Red Hat Inc.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
||||||
# not use this file except in compliance with the License. You may obtain
|
|
||||||
# a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
||||||
# License for the specific language governing permissions and limitations
|
|
||||||
# under the License.
|
|
||||||
|
|
||||||
import datetime
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
HCLOG = '/var/log/collectd/healthchecks.stdout'
|
|
||||||
START_RE = re.compile(
|
|
||||||
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) systemd\[.*\]: Started /usr/bin/podman healthcheck run (?P<container_id>\w*)')
|
|
||||||
EXEC_RE = re.compile(
|
|
||||||
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) podman\[(?P<pid>\d*)\]: (?P<trash>.*) container exec (?P<container_id>\w*) \(.*name=(?P<container_name>\w*).*\)')
|
|
||||||
RESULT_RE = re.compile(
|
|
||||||
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) podman\[(?P<pid>\d*)\]: (?P<result>(un)?healthy)')
|
|
||||||
|
|
||||||
|
|
||||||
def process_healthcheck_output(path_to_log):
|
|
||||||
"""Process saved output of health checks and returns list of unhealthy
|
|
||||||
containers.
|
|
||||||
"""
|
|
||||||
data = {}
|
|
||||||
pid_map = {}
|
|
||||||
with open(path_to_log, "r+") as logfile:
|
|
||||||
for line in logfile:
|
|
||||||
match = START_RE.search(line)
|
|
||||||
if match:
|
|
||||||
item = data.setdefault(match.group('container_id'), {})
|
|
||||||
item['timestamp_start'] = match.group('timestamp')
|
|
||||||
item['host'] = match.group('host')
|
|
||||||
continue
|
|
||||||
match = EXEC_RE.search(line)
|
|
||||||
if match:
|
|
||||||
item = data.setdefault(match.group('container_id'), {})
|
|
||||||
item['container_name'] = match.group('container_name')
|
|
||||||
item['host'] = match.group('host')
|
|
||||||
item['pid'] = match.group('pid')
|
|
||||||
pid_map[match.group('pid')] = match.group('container_id')
|
|
||||||
continue
|
|
||||||
match = RESULT_RE.search(line)
|
|
||||||
if match:
|
|
||||||
if match.group('pid') not in pid_map:
|
|
||||||
continue
|
|
||||||
item = data[pid_map[match.group('pid')]]
|
|
||||||
item['result'] = match.group('result')
|
|
||||||
if 'timestamp_start' not in item:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
start = datetime.datetime.strptime(item['timestamp_start'],
|
|
||||||
'%b %d %H:%M:%S')
|
|
||||||
end = datetime.datetime.strptime(match.group('timestamp'),
|
|
||||||
'%b %d %H:%M:%S')
|
|
||||||
item['duration'] = (end - start).seconds
|
|
||||||
except Exception as ex:
|
|
||||||
err = "[WARN] Failure during calculating duration: {}"
|
|
||||||
print(err.format(ex))
|
|
||||||
continue
|
|
||||||
logfile.truncate()
|
|
||||||
|
|
||||||
# truncate the file
|
|
||||||
with open(HCLOG, "w") as logfile:
|
|
||||||
pass
|
|
||||||
|
|
||||||
unhealthy = []
|
|
||||||
for container in data.values():
|
|
||||||
if 'result' not in container:
|
|
||||||
continue
|
|
||||||
if container['result'] == 'healthy':
|
|
||||||
continue
|
|
||||||
log = ('{container_name}: Container health check on host {host} '
|
|
||||||
'results as {result} after {duration}s.')
|
|
||||||
unhealthy.append(log.format(**container))
|
|
||||||
return unhealthy
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unhealthy = process_healthcheck_output(HCLOG)
|
|
||||||
if unhealthy:
|
|
||||||
print(' ; '.join(unhealthy))
|
|
||||||
sys.exit(2)
|
|
@ -330,7 +330,18 @@ parameters:
|
|||||||
default: true
|
default: true
|
||||||
CollectdContainerHealthCheckCommand:
|
CollectdContainerHealthCheckCommand:
|
||||||
type: string
|
type: string
|
||||||
default: "/scripts/collectd_check_health.py"
|
default: |
|
||||||
|
output=""
|
||||||
|
while read line ; do
|
||||||
|
i=$(echo $line | awk '//{gsub(/:/, "", $0); print $5}')
|
||||||
|
log=$(echo $line | awk '{split($0,a,/:\s+Error:\s+/); print a[2]}')
|
||||||
|
log=${log:0:-1}
|
||||||
|
output+=" ; ${i}: ${log}"
|
||||||
|
done < <(egrep "^[a-zA-Z]{3}\s+[0-9]{2}\s+[0-9\:]{8}\s+.*\s+.*:\s+[Ee]rror\:" /var/log/collectd/healthchecks.log)
|
||||||
|
truncate -s0 /var/log/collectd/healthchecks.log
|
||||||
|
if [ ! -z "${output}" ]; then
|
||||||
|
echo ${output:3} && exit 2;
|
||||||
|
fi
|
||||||
CollectdContainerHealthCheckInterval:
|
CollectdContainerHealthCheckInterval:
|
||||||
type: number
|
type: number
|
||||||
description: The frequency in seconds the docker health check is executed.
|
description: The frequency in seconds the docker health check is executed.
|
||||||
@ -644,12 +655,6 @@ outputs:
|
|||||||
- path: /var/log/collectd
|
- path: /var/log/collectd
|
||||||
owner: collectd:collectd
|
owner: collectd:collectd
|
||||||
recurse: true
|
recurse: true
|
||||||
container_config_scripts:
|
|
||||||
map_merge:
|
|
||||||
- {get_attr: [ContainersCommon, container_config_scripts]}
|
|
||||||
- collectd_check_health.py:
|
|
||||||
mode: "0755"
|
|
||||||
content: { get_file: ../../container_config_scripts/monitoring/collectd_check_health.py }
|
|
||||||
docker_config:
|
docker_config:
|
||||||
step_5:
|
step_5:
|
||||||
collectd:
|
collectd:
|
||||||
@ -671,7 +676,6 @@ outputs:
|
|||||||
- /var/lib/config-data/puppet-generated/collectd:/var/lib/kolla/config_files/src:ro
|
- /var/lib/config-data/puppet-generated/collectd:/var/lib/kolla/config_files/src:ro
|
||||||
- /var/log/containers/collectd:/var/log/collectd:rw,z
|
- /var/log/containers/collectd:/var/log/collectd:rw,z
|
||||||
- /var/run/:/var/run:rw
|
- /var/run/:/var/run:rw
|
||||||
- /var/lib/container-config-scripts:/scripts:ro
|
|
||||||
- /sys/fs/cgroup:/sys/fs/cgroup:ro
|
- /sys/fs/cgroup:/sys/fs/cgroup:ro
|
||||||
environment:
|
environment:
|
||||||
KOLLA_CONFIG_STRATEGY: COPY_ALWAYS
|
KOLLA_CONFIG_STRATEGY: COPY_ALWAYS
|
||||||
@ -699,7 +703,7 @@ outputs:
|
|||||||
copy:
|
copy:
|
||||||
dest: /etc/rsyslog.d/openstack-healthcheck.conf
|
dest: /etc/rsyslog.d/openstack-healthcheck.conf
|
||||||
content: |
|
content: |
|
||||||
if ($programname startswith 'podman' and ($msg contains 'container exec' or $msg contains 'healthy')) or ($programname startswith 'systemd' and $msg contains 'podman healthcheck run') then -/var/log/containers/collectd/healthchecks.stdout
|
if $programname startswith 'healthcheck_' then -/var/log/containers/collectd/healthchecks.log
|
||||||
& stop
|
& stop
|
||||||
- name: Remove healthcheck log
|
- name: Remove healthcheck log
|
||||||
when:
|
when:
|
||||||
|
Loading…
Reference in New Issue
Block a user