128c2bcc25
Pacemaker 2.1 changed naming convention around multi-state resources and ocf resource name. Adapt our resource restart scripts so that they parse the proper data from the CIB. Change-Id: Ieade3444e44e305f507c057991e02048ab5f3b3a Closes-Bug: #1942771
334 lines
12 KiB
Bash
Executable File
334 lines
12 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# ----
|
|
# Wait for an OCF resource or a bundle to be restarted
|
|
# ----
|
|
# e.g.:
|
|
# M/S OCF: $0 galera galera-bundle Master
|
|
# clone OCF: $0 rabbitmq rabbitmq-bundle Started
|
|
# A/P M/S OCF: $0 redis redis-bundle Slave Master
|
|
# A/P bundle: $0 openstack-cinder-volume openstack-cinder-volume _ Started
|
|
# clone bundle: $0 haproxy-bundle haproxy-bundle Started
|
|
|
|
# design note 1:
|
|
# - this script is called during a minor update; it is called
|
|
# once per node that hosts a service replica.
|
|
# - the purpose of this script is to ensure that restarting the
|
|
# service replica locally won't disrupt the service availability
|
|
# for the end user. To reach that goal, the script waits until the
|
|
# service is restarted locally or globallu and reaches a given
|
|
# target state (i.e. Started, Slave or Master).
|
|
# design note 2:
|
|
# - we don't want to track restart error: our only job is to ensure
|
|
# service restart synchronization, not service health.
|
|
# - In particular, we don't want to error out in case the resource
|
|
# cannot be restarted locally, because that would make the minor
|
|
# update fail, even if potentially other replicas still provide
|
|
# the service.
|
|
# design note 3:
|
|
# - we can bail out early if we determine that the resource can't
|
|
# be restarted automatically by pacemaker (e.g. its "blocked",
|
|
# unmanaged or disabled).
|
|
|
|
log() {
|
|
local msg=$1
|
|
echo "$(date -u): $1"
|
|
}
|
|
|
|
usage() {
|
|
echo 2>&1 "Usage: $0 NAME BUNDLE_NAME ROLE_LOCAL [ROLE_ANYWHERE] [HOST] [TIMEOUT]"
|
|
exit 1
|
|
}
|
|
|
|
pacemaker_supports_promoted() {
|
|
# The Promoted token is only matched in recent pacemaker versions
|
|
grep -wq "<value>Promoted</value>" /usr/share/pacemaker/resources-*.rng
|
|
}
|
|
|
|
#
|
|
# Utility functions to detect stuck resources
|
|
#
|
|
|
|
bundle_failures_locally() {
|
|
local engine=$BUNDLE_CONTAINER_ENGINE
|
|
local replicas=$BUNDLE_REPLICAS
|
|
local last=$(($replicas - 1))
|
|
local replica_name
|
|
for i in $(seq 0 $last); do
|
|
replica_name=${BUNDLE_NAME}-${engine}-${i}
|
|
crm_failcount -q -G -r $replica_name -N $HOST
|
|
done
|
|
}
|
|
|
|
bundle_failures_globally() {
|
|
local engine=$BUNDLE_CONTAINER_ENGINE
|
|
local replicas=$BUNDLE_REPLICAS
|
|
local last=$(($replicas - 1))
|
|
for i in $(seq 0 $last); do
|
|
crm_failcount -q -G -r ${BUNDLE_NAME}-${engine}-${i}
|
|
done
|
|
}
|
|
|
|
bundle_running_globally() {
|
|
local engine=$BUNDLE_CONTAINER_ENGINE
|
|
# return the number of running bundles replica, i.e. the number of
|
|
# docker/podman resource replicas currently running in the cluster
|
|
crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='${OCF}:heartbeat:${engine}']/node)" -
|
|
}
|
|
|
|
ocf_failures_globally() {
|
|
local replicas=$BUNDLE_REPLICAS
|
|
local last=$(($replicas - 1))
|
|
local bundle_node
|
|
for i in $(seq 0 $last); do
|
|
bundle_node=${BUNDLE_NAME}-${i}
|
|
crm_failcount -q -G -r $NAME -N $bundle_node
|
|
done
|
|
}
|
|
|
|
did_resource_failed_locally() {
|
|
local failures
|
|
local running
|
|
local remotehost
|
|
if [ "${NAME}" != "${BUNDLE_NAME}" ]; then
|
|
# if we're dealing with an ocf resource, it is running on a
|
|
# pacemaker_remote rather that on the real host, and the
|
|
# failcounts are thus associated to the pcmk remote. Replace
|
|
# the host's name with the pcmk remote's name.
|
|
remotehost=$(crm_mon --as-xml | xmllint --xpath "string(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource/node[@name='${HOST}']/../../resource[@resource_agent='${OCF}:pacemaker:remote']/@id)" -)
|
|
if [ -n "${remotehost}" ]; then
|
|
crm_failcount -q -G -r $NAME -N $remotehost | grep -q -w INFINITY
|
|
return $?
|
|
fi
|
|
# If no pcmk remote is currently running, the failcount from
|
|
# the ocf resource is useless, compute the failcount from the
|
|
# bundle case instead (computed below).
|
|
fi
|
|
|
|
# for bundles, pacemaker can run any bundle replica locally
|
|
# (e.g. galera-bundle-docker-{0,1,2}), and a failure happens when
|
|
# there are no more replica to try.
|
|
# That is, when _at least_ one replica failed locally, and all the
|
|
# others either failed or are currently running elsewhere.
|
|
failures=$(bundle_failures_locally $HOST | grep -c -w INFINITY)
|
|
running=$(bundle_running_globally)
|
|
test $failures -gt 0 && \
|
|
test $(( $failures + $running )) -ge $BUNDLE_REPLICAS
|
|
}
|
|
|
|
did_resource_failed_globally() {
|
|
local remotecount
|
|
local failures
|
|
if [ "${NAME}" != "${BUNDLE_NAME}" ]; then
|
|
# we check the state of an ocf resource only if the
|
|
# pcmkremotes are started
|
|
remotecount=$(crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='${OCF}:pacemaker:remote']/node)" -)
|
|
if [ "${remotecount}" = "0" ]; then
|
|
# no pcmkremote is running, so check the bundle state
|
|
# instead of checking the ocf resource
|
|
# bundle failed if all ${BUNDLE_REPLICAS} replicas failed
|
|
failures=$(bundle_failures_globally | grep -c -w INFINITY)
|
|
test $failures -eq $BUNDLE_REPLICAS
|
|
else
|
|
# ocf resource failed if it failed to start on
|
|
# all $BUNDLE_REPLICAS bundle nodes
|
|
failures=$(ocf_failures_globally | grep -c -w INFINITY)
|
|
test $failures -eq $BUNDLE_REPLICAS
|
|
fi
|
|
else
|
|
# bundle failed if all ${BUNDLE_REPLICAS} replicas failed
|
|
failures=$(bundle_failures_globally | grep -c -w INFINITY)
|
|
test $failures -eq $BUNDLE_REPLICAS
|
|
fi
|
|
}
|
|
|
|
|
|
# Input validation
|
|
#
|
|
|
|
NAME=$1
|
|
if [ -z "${NAME}" ]; then
|
|
echo 2>&1 "Error: argument NAME must not be empty"
|
|
exit 1
|
|
fi
|
|
|
|
BUNDLE_NAME=$2
|
|
if [ -z "${BUNDLE_NAME}" ]; then
|
|
echo 2>&1 "Error: argument BUNDLE_NAME must not be empty"
|
|
exit 1
|
|
fi
|
|
|
|
ROLE_LOCAL=$3
|
|
if [ "${ROLE_LOCAL}" = "_" ]; then
|
|
ROLE_LOCAL=""
|
|
fi
|
|
|
|
ROLE_ANYWHERE=$4
|
|
if [ "${ROLE_ANYWHERE}" = "_" ]; then
|
|
ROLE_ANYWHERE=""
|
|
fi
|
|
|
|
if [ -z "${ROLE_LOCAL}" ]; then
|
|
if [ -z "${ROLE_ANYWHERE}" ]; then
|
|
echo 2>&1 "Error: either ROLE_LOCAL or ROLE_ANYWHERE must be non empty"
|
|
exit 1
|
|
fi
|
|
else
|
|
if !(echo "${ROLE_LOCAL}" | grep -q -x -E "(Started|Slave|Master|Unpromoted|Promoted)"); then
|
|
echo 2>&1 "Error: argument ROLE_LOCAL must be either 'Started' 'Slave' 'Master' 'Unpromoted' or 'Promoted'"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if [ -n "${ROLE_ANYWHERE}" ] && !(echo "${ROLE_ANYWHERE}" | grep -q -x -E "(Started|Slave|Master|Unpromoted|Promoted)"); then
|
|
echo 2>&1 "Error: argument ROLE_ANYWHERE must be either 'Started' 'Slave' 'Master' 'Unpromoted' or 'Promoted'"
|
|
exit 1
|
|
fi
|
|
|
|
# Ensure compatibility with pacemaker 2.1
|
|
if pacemaker_supports_promoted; then
|
|
ROLE_LOCAL=$(echo "$ROLE_LOCAL" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
|
|
ROLE_ANYWHERE=$(echo "$ROLE_ANYWHERE" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
|
|
OCF="ocf"
|
|
else
|
|
OCF="ocf:"
|
|
fi
|
|
|
|
HOST=${5:-$(facter hostname)}
|
|
TIMEOUT=${6:-__PCMKTIMEOUT__}
|
|
|
|
|
|
# Configure the search
|
|
# ----
|
|
# Note: we can't use crm_resource in all searches because we can't
|
|
# easily extract the host the OCF resources run on (crm_resource
|
|
# returns the pcmk-remote nodes rather than the hosts)
|
|
# So instead, we implement various searches with XPath directly.
|
|
|
|
if [ "${BUNDLE_NAME}" != "${NAME}" ]; then
|
|
# ocf resource
|
|
local_resource_xpath="//bundle/replica/resource[@resource_agent='${OCF}:pacemaker:remote']/node[@name='${HOST}']/../../resource[@id='${NAME}']"
|
|
any_resource_xpath="//bundle//resource[@id='${NAME}']"
|
|
replicas_xpath="//bundle/primitive[@id='${BUNDLE_NAME}']/../*[boolean(@image) and boolean(@replicas)]"
|
|
else
|
|
# bundle resource
|
|
local_resource_xpath="//bundle[@id='${NAME}']/replica/resource/node[@name='${HOST}']/../../resource"
|
|
any_resource_xpath="//bundle[@id='${NAME}']//resource"
|
|
replicas_xpath="//bundle[@id='${BUNDLE_NAME}']/*[boolean(@image) and boolean(@replicas)]"
|
|
fi
|
|
|
|
bundle_def_xpath="//bundle[@id='${BUNDLE_NAME}']/*[boolean(@image) and boolean(@replicas)]"
|
|
BUNDLE_CONTAINER_ENGINE=$(cibadmin -Q | xmllint --xpath "name(${bundle_def_xpath})" -)
|
|
BUNDLE_REPLICAS=$(cibadmin -Q | xmllint --xpath "string(${bundle_def_xpath}/@replicas)" -)
|
|
|
|
|
|
# The wait algorithm follows a two-stage approach
|
|
# 1. Depending on how the script is called, we first check whether
|
|
# the resource is restarted locally. An A/P resource may be
|
|
# restarted elsewhere in the cluster.
|
|
# 2. If needed, check whether the A/P resource has restarted
|
|
# elsewhere. For A/P M/S resources, in case the resource is
|
|
# restarted as Slave locally, ensure a Master is available.
|
|
|
|
success=1
|
|
bailout=1
|
|
timeout=$TIMEOUT
|
|
role=""
|
|
|
|
# Stage 1: local check
|
|
if [ -n "$ROLE_LOCAL" ]; then
|
|
log "Waiting until ${NAME} has restarted on ${HOST} and is in state ${ROLE_LOCAL}"
|
|
log "Will probe resource state with the following XPath pattern: ${local_resource_xpath}"
|
|
|
|
while [ $timeout -gt 0 ] && [ $bailout -ne 0 ] && [ $success -ne 0 ]; do
|
|
resource=$(crm_mon -r --as-xml | xmllint --xpath "${local_resource_xpath}" - 2>/dev/null)
|
|
role=$(echo "${resource}" | sed -ne 's/.*\Wrole="\([^"]*\)".*/\1/p')
|
|
|
|
if [ "$(crm_resource --meta -r ${NAME} -g is-managed 2>/dev/null)" = "false" ]; then
|
|
log "${NAME} is unmanaged, will never reach target role. Bailing out"
|
|
bailout=0
|
|
continue
|
|
elif [ "$(crm_resource --meta -r ${NAME} -g target-role 2>/dev/null)" = "Stopped" ]; then
|
|
log "${NAME} is disabled, will never reach target role. Bailing out"
|
|
bailout=0
|
|
continue
|
|
elif echo "${resource}" | grep -q -w "\Wblocked=\"true\""; then
|
|
log "${NAME} is blocked, will never reach target role. Bailing out"
|
|
bailout=0
|
|
continue
|
|
elif did_resource_failed_locally; then
|
|
log "${NAME} is in failed state, will never reach target role. Bailing out"
|
|
bailout=0
|
|
continue
|
|
elif [ "$role" = "$ROLE_LOCAL" ]; then
|
|
success=0
|
|
continue
|
|
elif [ -n "$ROLE_ANYWHERE" ] && [ "$role" = "$ROLE_ANYWHERE" ]; then
|
|
# A/P: we are restarted in the expected state
|
|
success=0
|
|
continue
|
|
else
|
|
log "Waiting for ${NAME} to transition to role ${ROLE_LOCAL} on ${HOST}"
|
|
fi
|
|
|
|
if [ $bailout -ne 0 ] && [ $success -ne 0 ]; then
|
|
sleep 4
|
|
timeout=$((timeout-4))
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Stage 2: global check
|
|
if [ $timeout -gt 0 ] && [ -n "$ROLE_ANYWHERE" ] && [ "$role" != "$ROLE_ANYWHERE" ]; then
|
|
log "Wait until ${NAME} is restarted anywhere in the cluster in state ${ROLE_ANYWHERE}"
|
|
log "Will probe resource state with the following XPath pattern: ${any_resource_xpath}"
|
|
|
|
success=1
|
|
bailout=1
|
|
while [ $timeout -gt 0 ] && [ $bailout -ne 0 ] && [ $success -ne 0 ]; do
|
|
resources=$(crm_mon -r --as-xml | xmllint --xpath "${any_resource_xpath}" - 2>/dev/null)
|
|
if [ "$(crm_resource --meta -r ${NAME} -g is-managed 2>/dev/null)" = "false" ]; then
|
|
log "${NAME} is unmanaged, will never reach target role. Bailing out"
|
|
bailout=0
|
|
continue
|
|
elif [ "$(crm_resource --meta -r ${NAME} -g target-role 2>/dev/null)" = "Stopped" ]; then
|
|
log "${NAME} is disabled, will never reach target role. Bailing out"
|
|
bailout=0
|
|
continue
|
|
elif ! (echo "${resources}" | grep -q -w "\Wblocked=\"false\""); then
|
|
log "${NAME} blocked, will never reach target role. Bailing out"
|
|
bailout=0
|
|
continue
|
|
elif did_resource_failed_globally; then
|
|
log "${NAME} is in failed state, will never reach target role. Bailing out"
|
|
bailout=0
|
|
continue
|
|
elif echo "${resources}" | grep -q -w "\Wrole=\"${ROLE_ANYWHERE}\""; then
|
|
success=0
|
|
continue
|
|
else
|
|
log "Waiting for ${NAME} to transition to role ${ROLE_ANYWHERE} anywhere in the cluster"
|
|
fi
|
|
|
|
if [ $bailout -ne 0 ] && [ $success -ne 0 ]; then
|
|
sleep 4
|
|
timeout=$((timeout-4))
|
|
fi
|
|
done
|
|
fi
|
|
|
|
if [ $timeout -le 0 ]; then
|
|
log "Timeout reached after ${TIMEOUT}s while waiting for ${NAME} to be restarted"
|
|
elif [ $bailout -le 0 ]; then
|
|
log "Restart monitoring for ${NAME} cancelled"
|
|
fi
|
|
|
|
if [ $success -eq 0 ]; then
|
|
log "${NAME} successfully restarted"
|
|
else
|
|
log "${NAME} was not restarted properly"
|
|
fi
|
|
|
|
# Don't block minor update or stack update if the wait was unsuccessful
|
|
exit 0
|