From cb55cc8ce538f3e075754b0328872ec075f4cde9 Mon Sep 17 00:00:00 2001 From: Damien Ciabrini Date: Thu, 22 Oct 2020 12:23:54 +0200 Subject: [PATCH] Serialize shutdown of pacemaker nodes When running minor update in a composable HA, different roles could run ansible tasks concurrently. However, there is currently a race when pacemaker nodes are stopped in parallel [1,2], that could cause nodes to incorrectly stop themselves once they reconnect to the cluster. To prevent concurrent shutdown, use a cluster-wide lock to signals that one node is about to shutdown, and block the others until the node disconnects from the cluster. Tested the minor update in a composable HA environment: . when run with "openstack update run", every role is updated sequentially, and the shutdown lock doesn't interfere. . when running multiple ansible tasks in parallel "openstack update run --limit role", pacemaker nodes are correctly stopped sequentially thanks to the shutdown lock. . when updating an existing overcloud, the new locking script used in the review is correctly injected on the overcloud, thanks to [3]. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1791841 [2] https://bugzilla.redhat.com/show_bug.cgi?id=1872404 [3] I2ac6bb98e1d4183327e888240fc8d5a70e0d6fcb Closes-Bug: #1904193 Change-Id: I0e041c6a95a7f53019967f9263df2326b1408c6f --- .../pacemaker_mutex_shutdown.sh | 120 ++++++++++++++++++ .../pacemaker_resource_lock.sh | 34 ++++- deployment/containers-common.yaml | 3 + .../pacemaker/pacemaker-baremetal-puppet.yaml | 6 + 4 files changed, 161 insertions(+), 2 deletions(-) create mode 100755 container_config_scripts/pacemaker_mutex_shutdown.sh diff --git a/container_config_scripts/pacemaker_mutex_shutdown.sh b/container_config_scripts/pacemaker_mutex_shutdown.sh new file mode 100755 index 0000000000..9de8f3a90c --- /dev/null +++ b/container_config_scripts/pacemaker_mutex_shutdown.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# pacemaker_mutex_shutdown.sh --acquire +# pacemaker_mutex_shutdown.sh --release + +set -u + +usage() { + echo "Shutdown a cluster node in a coordinated way across the cluster" + echo "Usage:" + echo " $0 --acquire # prevent other node from shutting down until we hold the lock" + echo " $0 --release # release the lock, other node can compete for the shutdown lock" + echo +} + +log() { + echo "$(date -u): $1" +} + +error() { + echo "$(date -u): $1" 1>&2 + exit 1 +} + +# Loop until we hold the lock. The lock has a TTL, so we're guaranteed to get it eventually +shutdown_lock_acquire() { + local lockname=$1 + local requester=$2 + local ttl=$3 + local rc=1 + local current_owner + local owner_stopped + local owner_rc + + log "Acquiring the shutdown lock" + while [ $rc -ne 0 ]; do + /var/lib/container-config-scripts/pacemaker_resource_lock.sh --acquire-once $lockname $requester $ttl + rc=$? + if [ $rc -ne 0 ]; then + if [ $rc -eq 2 ]; then + error "Could not acquire the shutdown lock due to unrecoverable error (rc: $rc), bailing out" + else + # The lock is held by another node. + current_owner=$(/var/lib/container-config-scripts/pacemaker_resource_lock.sh --owner $lockname) + owner_rc=$? + if [ $owner_rc -eq 2 ]; then + error "Could not get the shutdown lock owner due to unrecoverable error (rc: $owner_rc), bailing out" + fi + if [ $owner_rc -eq 0 ]; then + # If the owner is marked as offline, that means it has shutdown and + # we can clean the lock preemptively and try to acquire it. + owner_stopped=$(crm_mon -1X | xmllint --xpath 'count(//nodes/node[@name="'${current_owner}'" and @online="false" and @unclean="false"])' -) + if [ "${owner_stopped}" = "1" ]; then + log "Shutdown lock held by stopped node '${current_owner}', lock can be released" + /var/lib/container-config-scripts/pacemaker_resource_lock.sh --release $lockname $current_owner + continue + fi + fi + log "Shutdown lock held by another node (rc: $rc), retrying" + sleep 10 + fi + fi + done + log "Shutdown lock acquired" + return 0 +} + + +# Release the lock if we still own it. Not owning it anymore is not fatal +shutdown_lock_release() { + local lockname=$1 + local requester=$2 + local rc + + log "Releasing the shutdown lock" + /var/lib/container-config-scripts/pacemaker_resource_lock.sh --release $lockname $requester + rc=$? + if [ $rc -ne 0 ]; then + if [ $rc -gt 1 ]; then + error "Could not release the shutdown lock due to unrecoverable error (rc: $rc), bailing out" + else + log "Shutdown lock no longer held, nothing to do" + fi + else + log "Shutdown lock released" + fi + return 0 +} + + +ACTION=$1 +if [ -z "$ACTION" ]; then + error "Action must be specified" +fi + +LOCK_NAME=tripleo-shutdown-lock +LOCK_OWNER=$(crm_node -n 2>/dev/null) +rc=$? +if [ $rc -ne 0 ]; then + if [ $rc -eq 102 ]; then + log "Cluster is not running locally, no need to aquire the shutdown lock" + exit 0 + else + error "Unexpected error while connecting to the cluster (rc: $rc), bailing out" + fi +fi + +# We start with a very high TTL, that long enough to accomodate a cluster stop. +# As soon as the node will get offline, the other competing node will be entitled +# to steal the lock, so they should never wait that long in practice. +LOCK_TTL=600 + + +case $ACTION in + --help) usage; exit 0;; + --acquire|-a) shutdown_lock_acquire ${LOCK_NAME} ${LOCK_OWNER} ${LOCK_TTL};; + --release|-r) shutdown_lock_release ${LOCK_NAME} ${LOCK_OWNER};; + *) error "Invalid action";; +esac +exit $? diff --git a/container_config_scripts/pacemaker_resource_lock.sh b/container_config_scripts/pacemaker_resource_lock.sh index eb2b36162b..fc4bf91250 100755 --- a/container_config_scripts/pacemaker_resource_lock.sh +++ b/container_config_scripts/pacemaker_resource_lock.sh @@ -213,6 +213,29 @@ lock_release() { } +# Retrieve the owner of a lock from the CIB +# this is a read-only operation, so no need to log debug info +lock_get_owner() { + local lockname=$1 + local rc + local lock + local owner + + lock=$(lock_get $lockname) + rc=$? + if [ $rc -ne 0 ] && [ $rc -ne $CIB_ENOTFOUND ]; then + return 2 + fi + + if [ -z "$lock" ]; then + return 1 + else + lock_owner $lock + return 0 + fi +} + + ACTION=$1 LOCKNAME=$2 REQUESTER=$3 @@ -223,8 +246,13 @@ if [ -z "$ACTION" ]; then fi if [ $ACTION != "--help" ]; then - if [ -z "$LOCKNAME" ] || [ -z "$REQUESTER" ]; then - error "You must specific a lock name and a requester" + if [ -z "$LOCKNAME" ]; then + error "You must specific a lock name" + fi + if [ $ACTION != "--owner" ] && [ $ACTION != "-o" ]; then + if [ -z "$REQUESTER" ]; then + error "You must specific a lock requester" + fi fi fi @@ -232,6 +260,8 @@ case $ACTION in --help) usage; exit 0;; --acquire|-a) try_action lock_acquire $LOCKNAME $REQUESTER $TTL;; --release|-r) try_action lock_release $LOCKNAME $REQUESTER;; + --acquire-once|-A) lock_acquire $LOCKNAME $REQUESTER $TTL;; + --owner|-o) lock_get_owner $LOCKNAME;; *) error "Invalid action";; esac exit $? diff --git a/deployment/containers-common.yaml b/deployment/containers-common.yaml index 8b119b8eb4..cb3ae68286 100644 --- a/deployment/containers-common.yaml +++ b/deployment/containers-common.yaml @@ -127,6 +127,9 @@ outputs: pacemaker_mutex_restart_bundle.sh: mode: "0755" content: { get_file: ../container_config_scripts/pacemaker_mutex_restart_bundle.sh } + pacemaker_mutex_shutdown.sh: + mode: "0755" + content: { get_file: ../container_config_scripts/pacemaker_mutex_shutdown.sh } volumes_base: description: Base volume list diff --git a/deployment/pacemaker/pacemaker-baremetal-puppet.yaml b/deployment/pacemaker/pacemaker-baremetal-puppet.yaml index b7c5624cd1..447706960e 100644 --- a/deployment/pacemaker/pacemaker-baremetal-puppet.yaml +++ b/deployment/pacemaker/pacemaker-baremetal-puppet.yaml @@ -370,9 +370,15 @@ outputs: echo "Could not retrieve and clear location constraint for VIP $v" 2>&1 fi done + - name: Acquire the cluster shutdown lock to stop pacemaker cluster + when: step|int == 1 + command: systemd-cat -t ha-shutdown /var/lib/container-config-scripts/pacemaker_mutex_shutdown.sh --acquire - name: Stop pacemaker cluster when: step|int == 1 pacemaker_cluster: state=offline - name: Start pacemaker cluster when: step|int == 4 pacemaker_cluster: state=online + - name: Release the cluster shutdown lock + when: step|int == 4 + command: systemd-cat -t ha-shutdown /var/lib/container-config-scripts/pacemaker_mutex_shutdown.sh --release