Merge "Return details in output of container health check" into stable/train
This commit is contained in:
commit
834dc3fdaa
|
@ -0,0 +1,70 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright 2018 Red Hat Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
HCLOG = '/var/log/collectd/healthchecks.log'
|
||||||
|
SERVICE_REGX = re.compile(r"""
|
||||||
|
\shealthcheck_(?P<service_name>\w+) # service
|
||||||
|
\[(?P<id>\d+)\] # pid
|
||||||
|
""", re.VERBOSE)
|
||||||
|
ERROR_REGX = re.compile(r"""
|
||||||
|
\shealthcheck_(?P<service_name>\w+) # service
|
||||||
|
\[(?P<id>\d+)\] # pid
|
||||||
|
:\s[Ee]rror: (?P<error>.+) # error
|
||||||
|
""", re.VERBOSE)
|
||||||
|
|
||||||
|
|
||||||
|
def process_healthcheck_output(logfile):
|
||||||
|
"""Process saved output of health checks and returns list of healthy and
|
||||||
|
unhealthy containers.
|
||||||
|
"""
|
||||||
|
with open(logfile, 'r') as logs:
|
||||||
|
data = {}
|
||||||
|
for line in logs:
|
||||||
|
match = SERVICE_REGX.search(line)
|
||||||
|
if match and not match.group('service_name') in data:
|
||||||
|
data[match.group('service_name')] = {
|
||||||
|
'service': match.group('service_name'),
|
||||||
|
'container': match.group('id'),
|
||||||
|
'status': 'healthy',
|
||||||
|
'healthy': 1
|
||||||
|
}
|
||||||
|
match = ERROR_REGX.search(line)
|
||||||
|
if match:
|
||||||
|
data[match.group('service_name')] = {
|
||||||
|
'service': match.group('service_name'),
|
||||||
|
'container': match.group('id'),
|
||||||
|
'status': 'unhealthy',
|
||||||
|
'healthy': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# truncate
|
||||||
|
with open(logfile, 'w') as logs:
|
||||||
|
pass
|
||||||
|
|
||||||
|
ret_code, output = 0, []
|
||||||
|
for _, opt in data.items():
|
||||||
|
if opt['healthy'] > 0 and ret_code != 2:
|
||||||
|
ret_code = 2
|
||||||
|
output.append(opt)
|
||||||
|
return ret_code, output
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
RET_CODE, STATUS = process_healthcheck_output(HCLOG)
|
||||||
|
print(json.dumps(STATUS))
|
||||||
|
sys.exit(RET_CODE)
|
|
@ -330,18 +330,7 @@ parameters:
|
||||||
default: true
|
default: true
|
||||||
CollectdContainerHealthCheckCommand:
|
CollectdContainerHealthCheckCommand:
|
||||||
type: string
|
type: string
|
||||||
default: |
|
default: /scripts/collectd_check_health.py
|
||||||
output=""
|
|
||||||
while read line ; do
|
|
||||||
i=$(echo $line | awk '//{gsub(/:/, "", $0); print $5}')
|
|
||||||
log=$(echo $line | awk '{split($0,a,/:\s+Error:\s+/); print a[2]}')
|
|
||||||
log=${log:0:-1}
|
|
||||||
output+=" ; ${i}: ${log}"
|
|
||||||
done < <(egrep "^[a-zA-Z]{3}\s+[0-9]{2}\s+[0-9\:]{8}\s+.*\s+.*:\s+[Ee]rror\:" /var/log/collectd/healthchecks.log)
|
|
||||||
truncate -s0 /var/log/collectd/healthchecks.log
|
|
||||||
if [ ! -z "${output}" ]; then
|
|
||||||
echo ${output:3} && exit 2;
|
|
||||||
fi
|
|
||||||
CollectdContainerHealthCheckInterval:
|
CollectdContainerHealthCheckInterval:
|
||||||
type: number
|
type: number
|
||||||
description: The frequency in seconds the docker health check is executed.
|
description: The frequency in seconds the docker health check is executed.
|
||||||
|
@ -666,6 +655,12 @@ outputs:
|
||||||
- path: /var/log/collectd
|
- path: /var/log/collectd
|
||||||
owner: collectd:collectd
|
owner: collectd:collectd
|
||||||
recurse: true
|
recurse: true
|
||||||
|
container_config_scripts:
|
||||||
|
map_merge:
|
||||||
|
- {get_attr: [ContainersCommon, container_config_scripts]}
|
||||||
|
- collectd_check_health.py:
|
||||||
|
mode: "0755"
|
||||||
|
content: { get_file: ../../container_config_scripts/monitoring/collectd_check_health.py }
|
||||||
docker_config:
|
docker_config:
|
||||||
step_5:
|
step_5:
|
||||||
collectd:
|
collectd:
|
||||||
|
@ -687,6 +682,7 @@ outputs:
|
||||||
- /var/lib/config-data/puppet-generated/collectd:/var/lib/kolla/config_files/src:ro
|
- /var/lib/config-data/puppet-generated/collectd:/var/lib/kolla/config_files/src:ro
|
||||||
- /var/log/containers/collectd:/var/log/collectd:rw,z
|
- /var/log/containers/collectd:/var/log/collectd:rw,z
|
||||||
- /var/run/:/var/run:rw
|
- /var/run/:/var/run:rw
|
||||||
|
- /var/lib/container-config-scripts:/scripts:ro
|
||||||
- /sys/fs/cgroup:/sys/fs/cgroup:ro
|
- /sys/fs/cgroup:/sys/fs/cgroup:ro
|
||||||
environment:
|
environment:
|
||||||
KOLLA_CONFIG_STRATEGY: COPY_ALWAYS
|
KOLLA_CONFIG_STRATEGY: COPY_ALWAYS
|
||||||
|
|
Loading…
Reference in New Issue