tripleo-heat-templates/container_config_scripts/pacemaker_wait_bundle.sh

334 lines
12 KiB
Bash
Executable File

#!/bin/bash
# ----
# Wait for an OCF resource or a bundle to be restarted
# ----
# e.g.:
# M/S OCF: $0 galera galera-bundle Master
# clone OCF: $0 rabbitmq rabbitmq-bundle Started
# A/P M/S OCF: $0 redis redis-bundle Slave Master
# A/P bundle: $0 openstack-cinder-volume openstack-cinder-volume _ Started
# clone bundle: $0 haproxy-bundle haproxy-bundle Started
# design note 1:
# - this script is called during a minor update; it is called
# once per node that hosts a service replica.
# - the purpose of this script is to ensure that restarting the
# service replica locally won't disrupt the service availability
# for the end user. To reach that goal, the script waits until the
# service is restarted locally or globallu and reaches a given
# target state (i.e. Started, Slave or Master).
# design note 2:
# - we don't want to track restart error: our only job is to ensure
# service restart synchronization, not service health.
# - In particular, we don't want to error out in case the resource
# cannot be restarted locally, because that would make the minor
# update fail, even if potentially other replicas still provide
# the service.
# design note 3:
# - we can bail out early if we determine that the resource can't
# be restarted automatically by pacemaker (e.g. its "blocked",
# unmanaged or disabled).
log() {
local msg=$1
echo "$(date -u): $1"
}
usage() {
echo 2>&1 "Usage: $0 NAME BUNDLE_NAME ROLE_LOCAL [ROLE_ANYWHERE] [HOST] [TIMEOUT]"
exit 1
}
pacemaker_supports_promoted() {
# The Promoted token is only matched in recent pacemaker versions
grep -wq "<value>Promoted</value>" /usr/share/pacemaker/resources-*.rng
}
#
# Utility functions to detect stuck resources
#
bundle_failures_locally() {
local engine=$BUNDLE_CONTAINER_ENGINE
local replicas=$BUNDLE_REPLICAS
local last=$(($replicas - 1))
local replica_name
for i in $(seq 0 $last); do
replica_name=${BUNDLE_NAME}-${engine}-${i}
crm_failcount -q -G -r $replica_name -N $HOST
done
}
bundle_failures_globally() {
local engine=$BUNDLE_CONTAINER_ENGINE
local replicas=$BUNDLE_REPLICAS
local last=$(($replicas - 1))
for i in $(seq 0 $last); do
crm_failcount -q -G -r ${BUNDLE_NAME}-${engine}-${i}
done
}
bundle_running_globally() {
local engine=$BUNDLE_CONTAINER_ENGINE
# return the number of running bundles replica, i.e. the number of
# docker/podman resource replicas currently running in the cluster
crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='${OCF}:heartbeat:${engine}']/node)" -
}
ocf_failures_globally() {
local replicas=$BUNDLE_REPLICAS
local last=$(($replicas - 1))
local bundle_node
for i in $(seq 0 $last); do
bundle_node=${BUNDLE_NAME}-${i}
crm_failcount -q -G -r $NAME -N $bundle_node
done
}
did_resource_failed_locally() {
local failures
local running
local remotehost
if [ "${NAME}" != "${BUNDLE_NAME}" ]; then
# if we're dealing with an ocf resource, it is running on a
# pacemaker_remote rather that on the real host, and the
# failcounts are thus associated to the pcmk remote. Replace
# the host's name with the pcmk remote's name.
remotehost=$(crm_mon --as-xml | xmllint --xpath "string(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource/node[@name='${HOST}']/../../resource[@resource_agent='${OCF}:pacemaker:remote']/@id)" -)
if [ -n "${remotehost}" ]; then
crm_failcount -q -G -r $NAME -N $remotehost | grep -q -w INFINITY
return $?
fi
# If no pcmk remote is currently running, the failcount from
# the ocf resource is useless, compute the failcount from the
# bundle case instead (computed below).
fi
# for bundles, pacemaker can run any bundle replica locally
# (e.g. galera-bundle-docker-{0,1,2}), and a failure happens when
# there are no more replica to try.
# That is, when _at least_ one replica failed locally, and all the
# others either failed or are currently running elsewhere.
failures=$(bundle_failures_locally $HOST | grep -c -w INFINITY)
running=$(bundle_running_globally)
test $failures -gt 0 && \
test $(( $failures + $running )) -ge $BUNDLE_REPLICAS
}
did_resource_failed_globally() {
local remotecount
local failures
if [ "${NAME}" != "${BUNDLE_NAME}" ]; then
# we check the state of an ocf resource only if the
# pcmkremotes are started
remotecount=$(crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='${OCF}:pacemaker:remote']/node)" -)
if [ "${remotecount}" = "0" ]; then
# no pcmkremote is running, so check the bundle state
# instead of checking the ocf resource
# bundle failed if all ${BUNDLE_REPLICAS} replicas failed
failures=$(bundle_failures_globally | grep -c -w INFINITY)
test $failures -eq $BUNDLE_REPLICAS
else
# ocf resource failed if it failed to start on
# all $BUNDLE_REPLICAS bundle nodes
failures=$(ocf_failures_globally | grep -c -w INFINITY)
test $failures -eq $BUNDLE_REPLICAS
fi
else
# bundle failed if all ${BUNDLE_REPLICAS} replicas failed
failures=$(bundle_failures_globally | grep -c -w INFINITY)
test $failures -eq $BUNDLE_REPLICAS
fi
}
# Input validation
#
NAME=$1
if [ -z "${NAME}" ]; then
echo 2>&1 "Error: argument NAME must not be empty"
exit 1
fi
BUNDLE_NAME=$2
if [ -z "${BUNDLE_NAME}" ]; then
echo 2>&1 "Error: argument BUNDLE_NAME must not be empty"
exit 1
fi
ROLE_LOCAL=$3
if [ "${ROLE_LOCAL}" = "_" ]; then
ROLE_LOCAL=""
fi
ROLE_ANYWHERE=$4
if [ "${ROLE_ANYWHERE}" = "_" ]; then
ROLE_ANYWHERE=""
fi
if [ -z "${ROLE_LOCAL}" ]; then
if [ -z "${ROLE_ANYWHERE}" ]; then
echo 2>&1 "Error: either ROLE_LOCAL or ROLE_ANYWHERE must be non empty"
exit 1
fi
else
if !(echo "${ROLE_LOCAL}" | grep -q -x -E "(Started|Slave|Master|Unpromoted|Promoted)"); then
echo 2>&1 "Error: argument ROLE_LOCAL must be either 'Started' 'Slave' 'Master' 'Unpromoted' or 'Promoted'"
exit 1
fi
fi
if [ -n "${ROLE_ANYWHERE}" ] && !(echo "${ROLE_ANYWHERE}" | grep -q -x -E "(Started|Slave|Master|Unpromoted|Promoted)"); then
echo 2>&1 "Error: argument ROLE_ANYWHERE must be either 'Started' 'Slave' 'Master' 'Unpromoted' or 'Promoted'"
exit 1
fi
# Ensure compatibility with pacemaker 2.1
if pacemaker_supports_promoted; then
ROLE_LOCAL=$(echo "$ROLE_LOCAL" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
ROLE_ANYWHERE=$(echo "$ROLE_ANYWHERE" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
OCF="ocf"
else
OCF="ocf:"
fi
HOST=${5:-$(facter hostname)}
TIMEOUT=${6:-__PCMKTIMEOUT__}
# Configure the search
# ----
# Note: we can't use crm_resource in all searches because we can't
# easily extract the host the OCF resources run on (crm_resource
# returns the pcmk-remote nodes rather than the hosts)
# So instead, we implement various searches with XPath directly.
if [ "${BUNDLE_NAME}" != "${NAME}" ]; then
# ocf resource
local_resource_xpath="//bundle/replica/resource[@resource_agent='${OCF}:pacemaker:remote']/node[@name='${HOST}']/../../resource[@id='${NAME}']"
any_resource_xpath="//bundle//resource[@id='${NAME}']"
replicas_xpath="//bundle/primitive[@id='${BUNDLE_NAME}']/../*[boolean(@image) and boolean(@replicas)]"
else
# bundle resource
local_resource_xpath="//bundle[@id='${NAME}']/replica/resource/node[@name='${HOST}']/../../resource"
any_resource_xpath="//bundle[@id='${NAME}']//resource"
replicas_xpath="//bundle[@id='${BUNDLE_NAME}']/*[boolean(@image) and boolean(@replicas)]"
fi
bundle_def_xpath="//bundle[@id='${BUNDLE_NAME}']/*[boolean(@image) and boolean(@replicas)]"
BUNDLE_CONTAINER_ENGINE=$(cibadmin -Q | xmllint --xpath "name(${bundle_def_xpath})" -)
BUNDLE_REPLICAS=$(cibadmin -Q | xmllint --xpath "string(${bundle_def_xpath}/@replicas)" -)
# The wait algorithm follows a two-stage approach
# 1. Depending on how the script is called, we first check whether
# the resource is restarted locally. An A/P resource may be
# restarted elsewhere in the cluster.
# 2. If needed, check whether the A/P resource has restarted
# elsewhere. For A/P M/S resources, in case the resource is
# restarted as Slave locally, ensure a Master is available.
success=1
bailout=1
timeout=$TIMEOUT
role=""
# Stage 1: local check
if [ -n "$ROLE_LOCAL" ]; then
log "Waiting until ${NAME} has restarted on ${HOST} and is in state ${ROLE_LOCAL}"
log "Will probe resource state with the following XPath pattern: ${local_resource_xpath}"
while [ $timeout -gt 0 ] && [ $bailout -ne 0 ] && [ $success -ne 0 ]; do
resource=$(crm_mon -r --as-xml | xmllint --xpath "${local_resource_xpath}" - 2>/dev/null)
role=$(echo "${resource}" | sed -ne 's/.*\Wrole="\([^"]*\)".*/\1/p')
if [ "$(crm_resource --meta -r ${NAME} -g is-managed 2>/dev/null)" = "false" ]; then
log "${NAME} is unmanaged, will never reach target role. Bailing out"
bailout=0
continue
elif [ "$(crm_resource --meta -r ${NAME} -g target-role 2>/dev/null)" = "Stopped" ]; then
log "${NAME} is disabled, will never reach target role. Bailing out"
bailout=0
continue
elif echo "${resource}" | grep -q -w "\Wblocked=\"true\""; then
log "${NAME} is blocked, will never reach target role. Bailing out"
bailout=0
continue
elif did_resource_failed_locally; then
log "${NAME} is in failed state, will never reach target role. Bailing out"
bailout=0
continue
elif [ "$role" = "$ROLE_LOCAL" ]; then
success=0
continue
elif [ -n "$ROLE_ANYWHERE" ] && [ "$role" = "$ROLE_ANYWHERE" ]; then
# A/P: we are restarted in the expected state
success=0
continue
else
log "Waiting for ${NAME} to transition to role ${ROLE_LOCAL} on ${HOST}"
fi
if [ $bailout -ne 0 ] && [ $success -ne 0 ]; then
sleep 4
timeout=$((timeout-4))
fi
done
fi
# Stage 2: global check
if [ $timeout -gt 0 ] && [ -n "$ROLE_ANYWHERE" ] && [ "$role" != "$ROLE_ANYWHERE" ]; then
log "Wait until ${NAME} is restarted anywhere in the cluster in state ${ROLE_ANYWHERE}"
log "Will probe resource state with the following XPath pattern: ${any_resource_xpath}"
success=1
bailout=1
while [ $timeout -gt 0 ] && [ $bailout -ne 0 ] && [ $success -ne 0 ]; do
resources=$(crm_mon -r --as-xml | xmllint --xpath "${any_resource_xpath}" - 2>/dev/null)
if [ "$(crm_resource --meta -r ${NAME} -g is-managed 2>/dev/null)" = "false" ]; then
log "${NAME} is unmanaged, will never reach target role. Bailing out"
bailout=0
continue
elif [ "$(crm_resource --meta -r ${NAME} -g target-role 2>/dev/null)" = "Stopped" ]; then
log "${NAME} is disabled, will never reach target role. Bailing out"
bailout=0
continue
elif ! (echo "${resources}" | grep -q -w "\Wblocked=\"false\""); then
log "${NAME} blocked, will never reach target role. Bailing out"
bailout=0
continue
elif did_resource_failed_globally; then
log "${NAME} is in failed state, will never reach target role. Bailing out"
bailout=0
continue
elif echo "${resources}" | grep -q -w "\Wrole=\"${ROLE_ANYWHERE}\""; then
success=0
continue
else
log "Waiting for ${NAME} to transition to role ${ROLE_ANYWHERE} anywhere in the cluster"
fi
if [ $bailout -ne 0 ] && [ $success -ne 0 ]; then
sleep 4
timeout=$((timeout-4))
fi
done
fi
if [ $timeout -le 0 ]; then
log "Timeout reached after ${TIMEOUT}s while waiting for ${NAME} to be restarted"
elif [ $bailout -le 0 ]; then
log "Restart monitoring for ${NAME} cancelled"
fi
if [ $success -eq 0 ]; then
log "${NAME} successfully restarted"
else
log "${NAME} was not restarted properly"
fi
# Don't block minor update or stack update if the wait was unsuccessful
exit 0