You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
333 lines
12 KiB
333 lines
12 KiB
#!/bin/bash |
|
|
|
# ---- |
|
# Wait for an OCF resource or a bundle to be restarted |
|
# ---- |
|
# e.g.: |
|
# M/S OCF: $0 galera galera-bundle Master |
|
# clone OCF: $0 rabbitmq rabbitmq-bundle Started |
|
# A/P M/S OCF: $0 redis redis-bundle Slave Master |
|
# A/P bundle: $0 openstack-cinder-volume openstack-cinder-volume _ Started |
|
# clone bundle: $0 haproxy-bundle haproxy-bundle Started |
|
|
|
# design note 1: |
|
# - this script is called during a minor update; it is called |
|
# once per node that hosts a service replica. |
|
# - the purpose of this script is to ensure that restarting the |
|
# service replica locally won't disrupt the service availability |
|
# for the end user. To reach that goal, the script waits until the |
|
# service is restarted locally or globallu and reaches a given |
|
# target state (i.e. Started, Slave or Master). |
|
# design note 2: |
|
# - we don't want to track restart error: our only job is to ensure |
|
# service restart synchronization, not service health. |
|
# - In particular, we don't want to error out in case the resource |
|
# cannot be restarted locally, because that would make the minor |
|
# update fail, even if potentially other replicas still provide |
|
# the service. |
|
# design note 3: |
|
# - we can bail out early if we determine that the resource can't |
|
# be restarted automatically by pacemaker (e.g. its "blocked", |
|
# unmanaged or disabled). |
|
|
|
log() { |
|
local msg=$1 |
|
echo "$(date -u): $1" |
|
} |
|
|
|
usage() { |
|
echo 2>&1 "Usage: $0 NAME BUNDLE_NAME ROLE_LOCAL [ROLE_ANYWHERE] [HOST] [TIMEOUT]" |
|
exit 1 |
|
} |
|
|
|
pacemaker_supports_promoted() { |
|
# The Promoted token is only matched in recent pacemaker versions |
|
grep -wq "<value>Promoted</value>" /usr/share/pacemaker/resources-*.rng |
|
} |
|
|
|
# |
|
# Utility functions to detect stuck resources |
|
# |
|
|
|
bundle_failures_locally() { |
|
local engine=$BUNDLE_CONTAINER_ENGINE |
|
local replicas=$BUNDLE_REPLICAS |
|
local last=$(($replicas - 1)) |
|
local replica_name |
|
for i in $(seq 0 $last); do |
|
replica_name=${BUNDLE_NAME}-${engine}-${i} |
|
crm_failcount -q -G -r $replica_name -N $HOST |
|
done |
|
} |
|
|
|
bundle_failures_globally() { |
|
local engine=$BUNDLE_CONTAINER_ENGINE |
|
local replicas=$BUNDLE_REPLICAS |
|
local last=$(($replicas - 1)) |
|
for i in $(seq 0 $last); do |
|
crm_failcount -q -G -r ${BUNDLE_NAME}-${engine}-${i} |
|
done |
|
} |
|
|
|
bundle_running_globally() { |
|
local engine=$BUNDLE_CONTAINER_ENGINE |
|
# return the number of running bundles replica, i.e. the number of |
|
# docker/podman resource replicas currently running in the cluster |
|
crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='${OCF}:heartbeat:${engine}']/node)" - |
|
} |
|
|
|
ocf_failures_globally() { |
|
local replicas=$BUNDLE_REPLICAS |
|
local last=$(($replicas - 1)) |
|
local bundle_node |
|
for i in $(seq 0 $last); do |
|
bundle_node=${BUNDLE_NAME}-${i} |
|
crm_failcount -q -G -r $NAME -N $bundle_node |
|
done |
|
} |
|
|
|
did_resource_failed_locally() { |
|
local failures |
|
local running |
|
local remotehost |
|
if [ "${NAME}" != "${BUNDLE_NAME}" ]; then |
|
# if we're dealing with an ocf resource, it is running on a |
|
# pacemaker_remote rather that on the real host, and the |
|
# failcounts are thus associated to the pcmk remote. Replace |
|
# the host's name with the pcmk remote's name. |
|
remotehost=$(crm_mon --as-xml | xmllint --xpath "string(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource/node[@name='${HOST}']/../../resource[@resource_agent='${OCF}:pacemaker:remote']/@id)" -) |
|
if [ -n "${remotehost}" ]; then |
|
crm_failcount -q -G -r $NAME -N $remotehost | grep -q -w INFINITY |
|
return $? |
|
fi |
|
# If no pcmk remote is currently running, the failcount from |
|
# the ocf resource is useless, compute the failcount from the |
|
# bundle case instead (computed below). |
|
fi |
|
|
|
# for bundles, pacemaker can run any bundle replica locally |
|
# (e.g. galera-bundle-docker-{0,1,2}), and a failure happens when |
|
# there are no more replica to try. |
|
# That is, when _at least_ one replica failed locally, and all the |
|
# others either failed or are currently running elsewhere. |
|
failures=$(bundle_failures_locally $HOST | grep -c -w INFINITY) |
|
running=$(bundle_running_globally) |
|
test $failures -gt 0 && \ |
|
test $(( $failures + $running )) -ge $BUNDLE_REPLICAS |
|
} |
|
|
|
did_resource_failed_globally() { |
|
local remotecount |
|
local failures |
|
if [ "${NAME}" != "${BUNDLE_NAME}" ]; then |
|
# we check the state of an ocf resource only if the |
|
# pcmkremotes are started |
|
remotecount=$(crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='${OCF}:pacemaker:remote']/node)" -) |
|
if [ "${remotecount}" = "0" ]; then |
|
# no pcmkremote is running, so check the bundle state |
|
# instead of checking the ocf resource |
|
# bundle failed if all ${BUNDLE_REPLICAS} replicas failed |
|
failures=$(bundle_failures_globally | grep -c -w INFINITY) |
|
test $failures -eq $BUNDLE_REPLICAS |
|
else |
|
# ocf resource failed if it failed to start on |
|
# all $BUNDLE_REPLICAS bundle nodes |
|
failures=$(ocf_failures_globally | grep -c -w INFINITY) |
|
test $failures -eq $BUNDLE_REPLICAS |
|
fi |
|
else |
|
# bundle failed if all ${BUNDLE_REPLICAS} replicas failed |
|
failures=$(bundle_failures_globally | grep -c -w INFINITY) |
|
test $failures -eq $BUNDLE_REPLICAS |
|
fi |
|
} |
|
|
|
|
|
# Input validation |
|
# |
|
|
|
NAME=$1 |
|
if [ -z "${NAME}" ]; then |
|
echo 2>&1 "Error: argument NAME must not be empty" |
|
exit 1 |
|
fi |
|
|
|
BUNDLE_NAME=$2 |
|
if [ -z "${BUNDLE_NAME}" ]; then |
|
echo 2>&1 "Error: argument BUNDLE_NAME must not be empty" |
|
exit 1 |
|
fi |
|
|
|
ROLE_LOCAL=$3 |
|
if [ "${ROLE_LOCAL}" = "_" ]; then |
|
ROLE_LOCAL="" |
|
fi |
|
|
|
ROLE_ANYWHERE=$4 |
|
if [ "${ROLE_ANYWHERE}" = "_" ]; then |
|
ROLE_ANYWHERE="" |
|
fi |
|
|
|
if [ -z "${ROLE_LOCAL}" ]; then |
|
if [ -z "${ROLE_ANYWHERE}" ]; then |
|
echo 2>&1 "Error: either ROLE_LOCAL or ROLE_ANYWHERE must be non empty" |
|
exit 1 |
|
fi |
|
else |
|
if !(echo "${ROLE_LOCAL}" | grep -q -x -E "(Started|Slave|Master|Unpromoted|Promoted)"); then |
|
echo 2>&1 "Error: argument ROLE_LOCAL must be either 'Started' 'Slave' 'Master' 'Unpromoted' or 'Promoted'" |
|
exit 1 |
|
fi |
|
fi |
|
|
|
if [ -n "${ROLE_ANYWHERE}" ] && !(echo "${ROLE_ANYWHERE}" | grep -q -x -E "(Started|Slave|Master|Unpromoted|Promoted)"); then |
|
echo 2>&1 "Error: argument ROLE_ANYWHERE must be either 'Started' 'Slave' 'Master' 'Unpromoted' or 'Promoted'" |
|
exit 1 |
|
fi |
|
|
|
# Ensure compatibility with pacemaker 2.1 |
|
if pacemaker_supports_promoted; then |
|
ROLE_LOCAL=$(echo "$ROLE_LOCAL" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/') |
|
ROLE_ANYWHERE=$(echo "$ROLE_ANYWHERE" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/') |
|
OCF="ocf" |
|
else |
|
OCF="ocf:" |
|
fi |
|
|
|
HOST=${5:-$(facter hostname)} |
|
TIMEOUT=${6:-__PCMKTIMEOUT__} |
|
|
|
|
|
# Configure the search |
|
# ---- |
|
# Note: we can't use crm_resource in all searches because we can't |
|
# easily extract the host the OCF resources run on (crm_resource |
|
# returns the pcmk-remote nodes rather than the hosts) |
|
# So instead, we implement various searches with XPath directly. |
|
|
|
if [ "${BUNDLE_NAME}" != "${NAME}" ]; then |
|
# ocf resource |
|
local_resource_xpath="//bundle/replica/resource[@resource_agent='${OCF}:pacemaker:remote']/node[@name='${HOST}']/../../resource[@id='${NAME}']" |
|
any_resource_xpath="//bundle//resource[@id='${NAME}']" |
|
replicas_xpath="//bundle/primitive[@id='${BUNDLE_NAME}']/../*[boolean(@image) and boolean(@replicas)]" |
|
else |
|
# bundle resource |
|
local_resource_xpath="//bundle[@id='${NAME}']/replica/resource/node[@name='${HOST}']/../../resource" |
|
any_resource_xpath="//bundle[@id='${NAME}']//resource" |
|
replicas_xpath="//bundle[@id='${BUNDLE_NAME}']/*[boolean(@image) and boolean(@replicas)]" |
|
fi |
|
|
|
bundle_def_xpath="//bundle[@id='${BUNDLE_NAME}']/*[boolean(@image) and boolean(@replicas)]" |
|
BUNDLE_CONTAINER_ENGINE=$(cibadmin -Q | xmllint --xpath "name(${bundle_def_xpath})" -) |
|
BUNDLE_REPLICAS=$(cibadmin -Q | xmllint --xpath "string(${bundle_def_xpath}/@replicas)" -) |
|
|
|
|
|
# The wait algorithm follows a two-stage approach |
|
# 1. Depending on how the script is called, we first check whether |
|
# the resource is restarted locally. An A/P resource may be |
|
# restarted elsewhere in the cluster. |
|
# 2. If needed, check whether the A/P resource has restarted |
|
# elsewhere. For A/P M/S resources, in case the resource is |
|
# restarted as Slave locally, ensure a Master is available. |
|
|
|
success=1 |
|
bailout=1 |
|
timeout=$TIMEOUT |
|
role="" |
|
|
|
# Stage 1: local check |
|
if [ -n "$ROLE_LOCAL" ]; then |
|
log "Waiting until ${NAME} has restarted on ${HOST} and is in state ${ROLE_LOCAL}" |
|
log "Will probe resource state with the following XPath pattern: ${local_resource_xpath}" |
|
|
|
while [ $timeout -gt 0 ] && [ $bailout -ne 0 ] && [ $success -ne 0 ]; do |
|
resource=$(crm_mon -r --as-xml | xmllint --xpath "${local_resource_xpath}" - 2>/dev/null) |
|
role=$(echo "${resource}" | sed -ne 's/.*\Wrole="\([^"]*\)".*/\1/p') |
|
|
|
if [ "$(crm_resource --meta -r ${NAME} -g is-managed 2>/dev/null)" = "false" ]; then |
|
log "${NAME} is unmanaged, will never reach target role. Bailing out" |
|
bailout=0 |
|
continue |
|
elif [ "$(crm_resource --meta -r ${NAME} -g target-role 2>/dev/null)" = "Stopped" ]; then |
|
log "${NAME} is disabled, will never reach target role. Bailing out" |
|
bailout=0 |
|
continue |
|
elif echo "${resource}" | grep -q -w "\Wblocked=\"true\""; then |
|
log "${NAME} is blocked, will never reach target role. Bailing out" |
|
bailout=0 |
|
continue |
|
elif did_resource_failed_locally; then |
|
log "${NAME} is in failed state, will never reach target role. Bailing out" |
|
bailout=0 |
|
continue |
|
elif [ "$role" = "$ROLE_LOCAL" ]; then |
|
success=0 |
|
continue |
|
elif [ -n "$ROLE_ANYWHERE" ] && [ "$role" = "$ROLE_ANYWHERE" ]; then |
|
# A/P: we are restarted in the expected state |
|
success=0 |
|
continue |
|
else |
|
log "Waiting for ${NAME} to transition to role ${ROLE_LOCAL} on ${HOST}" |
|
fi |
|
|
|
if [ $bailout -ne 0 ] && [ $success -ne 0 ]; then |
|
sleep 4 |
|
timeout=$((timeout-4)) |
|
fi |
|
done |
|
fi |
|
|
|
# Stage 2: global check |
|
if [ $timeout -gt 0 ] && [ -n "$ROLE_ANYWHERE" ] && [ "$role" != "$ROLE_ANYWHERE" ]; then |
|
log "Wait until ${NAME} is restarted anywhere in the cluster in state ${ROLE_ANYWHERE}" |
|
log "Will probe resource state with the following XPath pattern: ${any_resource_xpath}" |
|
|
|
success=1 |
|
bailout=1 |
|
while [ $timeout -gt 0 ] && [ $bailout -ne 0 ] && [ $success -ne 0 ]; do |
|
resources=$(crm_mon -r --as-xml | xmllint --xpath "${any_resource_xpath}" - 2>/dev/null) |
|
if [ "$(crm_resource --meta -r ${NAME} -g is-managed 2>/dev/null)" = "false" ]; then |
|
log "${NAME} is unmanaged, will never reach target role. Bailing out" |
|
bailout=0 |
|
continue |
|
elif [ "$(crm_resource --meta -r ${NAME} -g target-role 2>/dev/null)" = "Stopped" ]; then |
|
log "${NAME} is disabled, will never reach target role. Bailing out" |
|
bailout=0 |
|
continue |
|
elif ! (echo "${resources}" | grep -q -w "\Wblocked=\"false\""); then |
|
log "${NAME} blocked, will never reach target role. Bailing out" |
|
bailout=0 |
|
continue |
|
elif did_resource_failed_globally; then |
|
log "${NAME} is in failed state, will never reach target role. Bailing out" |
|
bailout=0 |
|
continue |
|
elif echo "${resources}" | grep -q -w "\Wrole=\"${ROLE_ANYWHERE}\""; then |
|
success=0 |
|
continue |
|
else |
|
log "Waiting for ${NAME} to transition to role ${ROLE_ANYWHERE} anywhere in the cluster" |
|
fi |
|
|
|
if [ $bailout -ne 0 ] && [ $success -ne 0 ]; then |
|
sleep 4 |
|
timeout=$((timeout-4)) |
|
fi |
|
done |
|
fi |
|
|
|
if [ $timeout -le 0 ]; then |
|
log "Timeout reached after ${TIMEOUT}s while waiting for ${NAME} to be restarted" |
|
elif [ $bailout -le 0 ]; then |
|
log "Restart monitoring for ${NAME} cancelled" |
|
fi |
|
|
|
if [ $success -eq 0 ]; then |
|
log "${NAME} successfully restarted" |
|
else |
|
log "${NAME} was not restarted properly" |
|
fi |
|
|
|
# Don't block minor update or stack update if the wait was unsuccessful |
|
exit 0
|
|
|