Damien Ciabrini cb55cc8ce5 Serialize shutdown of pacemaker nodes
When running minor update in a composable HA, different
roles could run ansible tasks concurrently. However,
there is currently a race when pacemaker nodes are
stopped in parallel [1,2], that could cause nodes to
incorrectly stop themselves once they reconnect to the
cluster.

To prevent concurrent shutdown, use a cluster-wide lock
to signals that one node is about to shutdown, and block
the others until the node disconnects from the cluster.

Tested the minor update in a composable HA environment:
  . when run with "openstack update run", every role
    is updated sequentially, and the shutdown lock
    doesn't interfere.
  . when running multiple ansible tasks in parallel
    "openstack update run --limit role<X>", pacemaker
    nodes are correctly stopped sequentially thanks
    to the shutdown lock.
  . when updating an existing overcloud, the new
    locking script used in the review is correctly
    injected on the overcloud, thanks to [3].

[1] https://bugzilla.redhat.com/show_bug.cgi?id=1791841
[2] https://bugzilla.redhat.com/show_bug.cgi?id=1872404
[3] I2ac6bb98e1d4183327e888240fc8d5a70e0d6fcb

Closes-Bug: #1904193
Change-Id: I0e041c6a95a7f53019967f9263df2326b1408c6f
2020-12-24 14:06:32 +01:00

121 lines
3.9 KiB
Bash
Executable File

#!/bin/bash
# pacemaker_mutex_shutdown.sh --acquire
# pacemaker_mutex_shutdown.sh --release
set -u
usage() {
echo "Shutdown a cluster node in a coordinated way across the cluster"
echo "Usage:"
echo " $0 --acquire # prevent other node from shutting down until we hold the lock"
echo " $0 --release # release the lock, other node can compete for the shutdown lock"
echo
}
log() {
echo "$(date -u): $1"
}
error() {
echo "$(date -u): $1" 1>&2
exit 1
}
# Loop until we hold the lock. The lock has a TTL, so we're guaranteed to get it eventually
shutdown_lock_acquire() {
local lockname=$1
local requester=$2
local ttl=$3
local rc=1
local current_owner
local owner_stopped
local owner_rc
log "Acquiring the shutdown lock"
while [ $rc -ne 0 ]; do
/var/lib/container-config-scripts/pacemaker_resource_lock.sh --acquire-once $lockname $requester $ttl
rc=$?
if [ $rc -ne 0 ]; then
if [ $rc -eq 2 ]; then
error "Could not acquire the shutdown lock due to unrecoverable error (rc: $rc), bailing out"
else
# The lock is held by another node.
current_owner=$(/var/lib/container-config-scripts/pacemaker_resource_lock.sh --owner $lockname)
owner_rc=$?
if [ $owner_rc -eq 2 ]; then
error "Could not get the shutdown lock owner due to unrecoverable error (rc: $owner_rc), bailing out"
fi
if [ $owner_rc -eq 0 ]; then
# If the owner is marked as offline, that means it has shutdown and
# we can clean the lock preemptively and try to acquire it.
owner_stopped=$(crm_mon -1X | xmllint --xpath 'count(//nodes/node[@name="'${current_owner}'" and @online="false" and @unclean="false"])' -)
if [ "${owner_stopped}" = "1" ]; then
log "Shutdown lock held by stopped node '${current_owner}', lock can be released"
/var/lib/container-config-scripts/pacemaker_resource_lock.sh --release $lockname $current_owner
continue
fi
fi
log "Shutdown lock held by another node (rc: $rc), retrying"
sleep 10
fi
fi
done
log "Shutdown lock acquired"
return 0
}
# Release the lock if we still own it. Not owning it anymore is not fatal
shutdown_lock_release() {
local lockname=$1
local requester=$2
local rc
log "Releasing the shutdown lock"
/var/lib/container-config-scripts/pacemaker_resource_lock.sh --release $lockname $requester
rc=$?
if [ $rc -ne 0 ]; then
if [ $rc -gt 1 ]; then
error "Could not release the shutdown lock due to unrecoverable error (rc: $rc), bailing out"
else
log "Shutdown lock no longer held, nothing to do"
fi
else
log "Shutdown lock released"
fi
return 0
}
ACTION=$1
if [ -z "$ACTION" ]; then
error "Action must be specified"
fi
LOCK_NAME=tripleo-shutdown-lock
LOCK_OWNER=$(crm_node -n 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
if [ $rc -eq 102 ]; then
log "Cluster is not running locally, no need to aquire the shutdown lock"
exit 0
else
error "Unexpected error while connecting to the cluster (rc: $rc), bailing out"
fi
fi
# We start with a very high TTL, that long enough to accomodate a cluster stop.
# As soon as the node will get offline, the other competing node will be entitled
# to steal the lock, so they should never wait that long in practice.
LOCK_TTL=600
case $ACTION in
--help) usage; exit 0;;
--acquire|-a) shutdown_lock_acquire ${LOCK_NAME} ${LOCK_OWNER} ${LOCK_TTL};;
--release|-r) shutdown_lock_release ${LOCK_NAME} ${LOCK_OWNER};;
*) error "Invalid action";;
esac
exit $?