From 0a10aaba1bb4265fc2ce7d98b19938de6279fad6 Mon Sep 17 00:00:00 2001 From: Martin Magr Date: Fri, 9 Oct 2020 17:09:26 +0200 Subject: [PATCH] Revert "Adapt container health check for built-in podman health checks" This reverts commit 31a1f9c8ed4ad8a212e146cc59e5273048570b32. In train health checks are still scheduled and executed by systemd. So there is no need for adaptation to podman managed health checks. Change-Id: I1e43a1ee5a72afabb0f3ba650c9dd40d0a29d6ac --- .../monitoring/collectd_check_health.py | 92 ------------------- .../metrics/collectd-container-puppet.yaml | 22 +++-- 2 files changed, 13 insertions(+), 101 deletions(-) delete mode 100755 container_config_scripts/monitoring/collectd_check_health.py diff --git a/container_config_scripts/monitoring/collectd_check_health.py b/container_config_scripts/monitoring/collectd_check_health.py deleted file mode 100755 index eea75ec31b..0000000000 --- a/container_config_scripts/monitoring/collectd_check_health.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright 2018 Red Hat Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import datetime -import re -import sys - -HCLOG = '/var/log/collectd/healthchecks.stdout' -START_RE = re.compile( - r'(?P\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P[\w\-\.\:]*) systemd\[.*\]: Started /usr/bin/podman healthcheck run (?P\w*)') -EXEC_RE = re.compile( - r'(?P\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P[\w\-\.\:]*) podman\[(?P\d*)\]: (?P.*) container exec (?P\w*) \(.*name=(?P\w*).*\)') -RESULT_RE = re.compile( - r'(?P\w{3} \d{2} \d{2}\:\d{2}\:\d{2}) (?P[\w\-\.\:]*) podman\[(?P\d*)\]: (?P(un)?healthy)') - - -def process_healthcheck_output(path_to_log): - """Process saved output of health checks and returns list of unhealthy - containers. - """ - data = {} - pid_map = {} - with open(path_to_log, "r+") as logfile: - for line in logfile: - match = START_RE.search(line) - if match: - item = data.setdefault(match.group('container_id'), {}) - item['timestamp_start'] = match.group('timestamp') - item['host'] = match.group('host') - continue - match = EXEC_RE.search(line) - if match: - item = data.setdefault(match.group('container_id'), {}) - item['container_name'] = match.group('container_name') - item['host'] = match.group('host') - item['pid'] = match.group('pid') - pid_map[match.group('pid')] = match.group('container_id') - continue - match = RESULT_RE.search(line) - if match: - if match.group('pid') not in pid_map: - continue - item = data[pid_map[match.group('pid')]] - item['result'] = match.group('result') - if 'timestamp_start' not in item: - continue - try: - start = datetime.datetime.strptime(item['timestamp_start'], - '%b %d %H:%M:%S') - end = datetime.datetime.strptime(match.group('timestamp'), - '%b %d %H:%M:%S') - item['duration'] = (end - start).seconds - except Exception as ex: - err = "[WARN] Failure during calculating duration: {}" - print(err.format(ex)) - continue - logfile.truncate() - - # truncate the file - with open(HCLOG, "w") as logfile: - pass - - unhealthy = [] - for container in data.values(): - if 'result' not in container: - continue - if container['result'] == 'healthy': - continue - log = ('{container_name}: Container health check on host {host} ' - 'results as {result} after {duration}s.') - unhealthy.append(log.format(**container)) - return unhealthy - - -if __name__ == "__main__": - unhealthy = process_healthcheck_output(HCLOG) - if unhealthy: - print(' ; '.join(unhealthy)) - sys.exit(2) diff --git a/deployment/metrics/collectd-container-puppet.yaml b/deployment/metrics/collectd-container-puppet.yaml index 7c11f7bcf7..73099a14bb 100644 --- a/deployment/metrics/collectd-container-puppet.yaml +++ b/deployment/metrics/collectd-container-puppet.yaml @@ -330,7 +330,18 @@ parameters: default: true CollectdContainerHealthCheckCommand: type: string - default: "/scripts/collectd_check_health.py" + default: | + output="" + while read line ; do + i=$(echo $line | awk '//{gsub(/:/, "", $0); print $5}') + log=$(echo $line | awk '{split($0,a,/:\s+Error:\s+/); print a[2]}') + log=${log:0:-1} + output+=" ; ${i}: ${log}" + done < <(egrep "^[a-zA-Z]{3}\s+[0-9]{2}\s+[0-9\:]{8}\s+.*\s+.*:\s+[Ee]rror\:" /var/log/collectd/healthchecks.log) + truncate -s0 /var/log/collectd/healthchecks.log + if [ ! -z "${output}" ]; then + echo ${output:3} && exit 2; + fi CollectdContainerHealthCheckInterval: type: number description: The frequency in seconds the docker health check is executed. @@ -629,12 +640,6 @@ outputs: - path: /var/log/collectd owner: collectd:collectd recurse: true - container_config_scripts: - map_merge: - - {get_attr: [ContainersCommon, container_config_scripts]} - - collectd_check_health.py: - mode: "0755" - content: { get_file: ../../container_config_scripts/monitoring/collectd_check_health.py } docker_config: step_5: collectd: @@ -656,7 +661,6 @@ outputs: - /var/lib/config-data/puppet-generated/collectd:/var/lib/kolla/config_files/src:ro - /var/log/containers/collectd:/var/log/collectd:rw,z - /var/run/:/var/run:rw - - /var/lib/container-config-scripts:/scripts:ro - /sys/fs/cgroup:/sys/fs/cgroup:ro environment: KOLLA_CONFIG_STRATEGY: COPY_ALWAYS @@ -684,7 +688,7 @@ outputs: copy: dest: /etc/rsyslog.d/openstack-healthcheck.conf content: | - if ($programname startswith 'podman' and ($msg contains 'container exec' or $msg contains 'healthy')) or ($programname startswith 'systemd' and $msg contains 'podman healthcheck run') then -/var/log/containers/collectd/healthchecks.stdout + if $programname startswith 'healthcheck_' then -/var/log/containers/collectd/healthchecks.log & stop - name: Remove healthcheck log when: