Browse Source

Revert "Adapt container health check for built-in podman health checks"

This reverts commit 31a1f9c8ed.

In train health checks are still scheduled and executed by systemd.
So there is no need for adaptation to podman managed health checks.

Change-Id: I1e43a1ee5a72afabb0f3ba650c9dd40d0a29d6ac
changes/28/757128/1
Martin Magr 8 months ago
parent
commit
0a10aaba1b
2 changed files with 13 additions and 101 deletions
  1. +0
    -92
      container_config_scripts/monitoring/collectd_check_health.py
  2. +13
    -9
      deployment/metrics/collectd-container-puppet.yaml

+ 0
- 92
container_config_scripts/monitoring/collectd_check_health.py View File

@ -1,92 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2018 Red Hat Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import re
import sys
HCLOG = '/var/log/collectd/healthchecks.stdout'
START_RE = re.compile(
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) systemd\[.*\]: Started /usr/bin/podman healthcheck run (?P<container_id>\w*)')
EXEC_RE = re.compile(
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) podman\[(?P<pid>\d*)\]: (?P<trash>.*) container exec (?P<container_id>\w*) \(.*name=(?P<container_name>\w*).*\)')
RESULT_RE = re.compile(
r'(?P<timestamp>\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P<host>[\w\-\.\:]*) podman\[(?P<pid>\d*)\]: (?P<result>(un)?healthy)')
def process_healthcheck_output(path_to_log):
"""Process saved output of health checks and returns list of unhealthy
containers.
"""
data = {}
pid_map = {}
with open(path_to_log, "r+") as logfile:
for line in logfile:
match = START_RE.search(line)
if match:
item = data.setdefault(match.group('container_id'), {})
item['timestamp_start'] = match.group('timestamp')
item['host'] = match.group('host')
continue
match = EXEC_RE.search(line)
if match:
item = data.setdefault(match.group('container_id'), {})
item['container_name'] = match.group('container_name')
item['host'] = match.group('host')
item['pid'] = match.group('pid')
pid_map[match.group('pid')] = match.group('container_id')
continue
match = RESULT_RE.search(line)
if match:
if match.group('pid') not in pid_map:
continue
item = data[pid_map[match.group('pid')]]
item['result'] = match.group('result')
if 'timestamp_start' not in item:
continue
try:
start = datetime.datetime.strptime(item['timestamp_start'],
'%b %d %H:%M:%S')
end = datetime.datetime.strptime(match.group('timestamp'),
'%b %d %H:%M:%S')
item['duration'] = (end - start).seconds
except Exception as ex:
err = "[WARN] Failure during calculating duration: {}"
print(err.format(ex))
continue
logfile.truncate()
# truncate the file
with open(HCLOG, "w") as logfile:
pass
unhealthy = []
for container in data.values():
if 'result' not in container:
continue
if container['result'] == 'healthy':
continue
log = ('{container_name}: Container health check on host {host} '
'results as {result} after {duration}s.')
unhealthy.append(log.format(**container))
return unhealthy
if __name__ == "__main__":
unhealthy = process_healthcheck_output(HCLOG)
if unhealthy:
print(' ; '.join(unhealthy))
sys.exit(2)

+ 13
- 9
deployment/metrics/collectd-container-puppet.yaml View File

@ -330,7 +330,18 @@ parameters:
default: true
CollectdContainerHealthCheckCommand:
type: string
default: "/scripts/collectd_check_health.py"
default: |
output=""
while read line ; do
i=$(echo $line | awk '//{gsub(/:/, "", $0); print $5}')
log=$(echo $line | awk '{split($0,a,/:\s+Error:\s+/); print a[2]}')
log=${log:0:-1}
output+=" ; ${i}: ${log}"
done < <(egrep "^[a-zA-Z]{3}\s+[0-9]{2}\s+[0-9\:]{8}\s+.*\s+.*:\s+[Ee]rror\:" /var/log/collectd/healthchecks.log)
truncate -s0 /var/log/collectd/healthchecks.log
if [ ! -z "${output}" ]; then
echo ${output:3} && exit 2;
fi
CollectdContainerHealthCheckInterval:
type: number
description: The frequency in seconds the docker health check is executed.
@ -629,12 +640,6 @@ outputs:
- path: /var/log/collectd
owner: collectd:collectd
recurse: true
container_config_scripts:
map_merge:
- {get_attr: [ContainersCommon, container_config_scripts]}
- collectd_check_health.py:
mode: "0755"
content: { get_file: ../../container_config_scripts/monitoring/collectd_check_health.py }
docker_config:
step_5:
collectd:
@ -656,7 +661,6 @@ outputs:
- /var/lib/config-data/puppet-generated/collectd:/var/lib/kolla/config_files/src:ro
- /var/log/containers/collectd:/var/log/collectd:rw,z
- /var/run/:/var/run:rw
- /var/lib/container-config-scripts:/scripts:ro
- /sys/fs/cgroup:/sys/fs/cgroup:ro
environment:
KOLLA_CONFIG_STRATEGY: COPY_ALWAYS
@ -684,7 +688,7 @@ outputs:
copy:
dest: /etc/rsyslog.d/openstack-healthcheck.conf
content: |
if ($programname startswith 'podman' and ($msg contains 'container exec' or $msg contains 'healthy')) or ($programname startswith 'systemd' and $msg contains 'podman healthcheck run') then -/var/log/containers/collectd/healthchecks.stdout
if $programname startswith 'healthcheck_' then -/var/log/containers/collectd/healthchecks.log
& stop
- name: Remove healthcheck log
when:


Loading…
Cancel
Save