Improve Swift healthchecks
This patch adds checks for the replicators.
It also removes some unused or invalid code from other checks. Checking
modification time of the recon files is not enough, these might also be
changed by other Swift processes and are not a good indicator for stuck
processes.
Co-Authored-By: Christian Schwede <cschwede@redhat.com>
Co-Authored-By: Emilien Macchi <emilien@redhat.com>
Change-Id: Ib15f1ec4766bf4d64a2860422c230e4d514bc224
(cherry picked from commit 0862fcf98f
)
This commit is contained in:
parent
a0c7845485
commit
7948344c88
|
@ -1002,15 +1002,6 @@ RUN mkdir -p /openstack && \
|
|||
chmod a+rx /openstack/healthcheck
|
||||
{% endblock %}
|
||||
|
||||
# Note(mmagr): Below block is currently noop because swift-proxy-server kolla image is used
|
||||
# for swift_object_expirer container, but it will allow smooth transition
|
||||
# to usage of correct image for the service in future.
|
||||
{% block swift_object_expirer_footer %}
|
||||
RUN mkdir -p /openstack && \
|
||||
ln -s /usr/share/openstack-tripleo-common/healthcheck/swift-object-expirer /openstack/healthcheck && \
|
||||
chmod a+rx /openstack/healthcheck
|
||||
{% endblock %}
|
||||
|
||||
{% block swift_proxy_server_footer %}
|
||||
RUN mkdir -p /openstack && \
|
||||
ln -s /usr/share/openstack-tripleo-common/healthcheck/swift-proxy /openstack/healthcheck && \
|
||||
|
|
|
@ -109,3 +109,16 @@ get_url_from_vhost () {
|
|||
fi
|
||||
echo ${proto}://${server_name}:${bind_port}${wsgi_alias}
|
||||
}
|
||||
|
||||
check_swift_interval () {
|
||||
service=$1
|
||||
if ps -e | grep --quiet swift-$service; then
|
||||
interval=$(get_config_val $conf $service interval 300)
|
||||
last=`grep -o "\"replication_last\": [0-9]*" $cache | cut -f 2 -d " "`
|
||||
now=`date +%s`
|
||||
if [ `expr $now - $last` -gt $interval ]; then
|
||||
echo "Last replication run did not finish within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
|
||||
|
||||
conf=/etc/swift/account-server.conf
|
||||
cache=/var/cache/swift/account.recon
|
||||
|
||||
check_swift_interval account-replicator
|
|
@ -19,17 +19,4 @@ if ps -e | grep --quiet swift-account-server; then
|
|||
bind_host="[${bind_host}]"
|
||||
fi
|
||||
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
|
||||
else
|
||||
if ps -e | grep --quiet swift-account-auditor; then
|
||||
interval=$(get_config_val $conf account-auditor interval 1800)
|
||||
elif ps -e | grep --quiet swift-account-reaper; then
|
||||
interval=$(get_config_val $conf account-reaper interval 3600)
|
||||
else
|
||||
interval=$(get_config_val $conf account-replicator interval 30)
|
||||
fi
|
||||
|
||||
if ! healthcheck_file_modification $cache $interval; then
|
||||
echo "Cache file $cache was not updated within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
|
||||
|
||||
conf=/etc/swift/container-server.conf
|
||||
cache=/var/cache/swift/container.recon
|
||||
|
||||
check_swift_interval container-replicator
|
|
@ -19,17 +19,4 @@ if ps -e | grep --quiet swift-container-server; then
|
|||
bind_host="[${bind_host}]"
|
||||
fi
|
||||
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
|
||||
else
|
||||
if ps -e | grep --quiet swift-account-auditor; then
|
||||
interval=$(get_config_val $conf container-auditor interval 1800)
|
||||
elif ps -e | grep --quiet swift-account-reaper; then
|
||||
interval=$(get_config_val $conf container-replicator interval 30)
|
||||
else
|
||||
interval=$(get_config_val $conf container-updater interval 300)
|
||||
fi
|
||||
|
||||
if ! healthcheck_file_modification $cache $interval; then
|
||||
echo "Cache file $cache was not updated within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
|
||||
|
||||
conf=/etc/swift/object-expirer.conf
|
||||
cache=/var/cache/swift/object.recon
|
||||
interval=$(get_config_val $conf object-expirer interval 300)
|
||||
|
||||
if ! healthcheck_file_modification $cache $interval; then
|
||||
echo "Cache file $cache was not updated within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
|
||||
|
||||
conf=/etc/swift/object-server.conf
|
||||
cache=/var/cache/swift/object.recon
|
||||
|
||||
check_swift_interval object-replicator
|
|
@ -19,26 +19,4 @@ if ps -e | grep --quiet swift-object-server; then
|
|||
bind_host="[${bind_host}]"
|
||||
fi
|
||||
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
|
||||
elif ps -e | grep --quiet rsync; then
|
||||
process='rsync'
|
||||
args="${@:-873}"
|
||||
|
||||
if ! healthcheck_listen $process $args; then
|
||||
ports=${args// /,}
|
||||
echo "There is no $process process, listening on port(s) $ports, running in the container."
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
if ps -e | grep --quiet swift-account-auditor; then
|
||||
interval=$(get_config_val $conf object-auditor interval 300)
|
||||
elif ps -e | grep --quiet swift-account-reaper; then
|
||||
interval=$(get_config_val $conf object-replicator interval 300)
|
||||
else
|
||||
interval=$(get_config_val $conf object-updater interval 300)
|
||||
fi
|
||||
|
||||
if ! healthcheck_file_modification $cache $interval; then
|
||||
echo "Cache file $cache was not updated within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
|
|
@ -18,15 +18,4 @@ if pgrep -f swift-proxy-server; then
|
|||
bind_host="[${bind_host}]"
|
||||
fi
|
||||
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
|
||||
else
|
||||
# TODO(mmagr): Remove this once swift_object_expirer container will start
|
||||
# using swift-object-expirer kolla image
|
||||
conf=/etc/swift/object-expirer.conf
|
||||
cache=/var/cache/swift/object.recon
|
||||
interval=$(get_config_val $conf object-expirer interval 300)
|
||||
|
||||
if ! healthcheck_file_modification $cache $interval; then
|
||||
echo "Cache file $cache was not updated within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
features:
|
||||
- |
|
||||
Adds additional healtchecks for Swift to monitor account,
|
||||
container and object replicators as well as the rsync process.
|
Loading…
Reference in New Issue