Serialize shutdown of pacemaker nodes

When running minor update in a composable HA, different roles could run ansible tasks concurrently. However, there is currently a race when pacemaker nodes are stopped in parallel [1,2], that could cause nodes to incorrectly stop themselves once they reconnect to the cluster. To prevent concurrent shutdown, use a cluster-wide lock to signals that one node is about to shutdown, and block the others until the node disconnects from the cluster. Tested the minor update in a composable HA environment: . when run with "openstack update run", every role is updated sequentially, and the shutdown lock doesn't interfere. . when running multiple ansible tasks in parallel "openstack update run --limit role<X>", pacemaker nodes are correctly stopped sequentially thanks to the shutdown lock. . when updating an existing overcloud, the new locking script used in the review is correctly injected on the overcloud, thanks to [3]. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1791841 [2] https://bugzilla.redhat.com/show_bug.cgi?id=1872404 [3] I2ac6bb98e1d4183327e888240fc8d5a70e0d6fcb Closes-Bug: #1904193 Change-Id: I0e041c6a95a7f53019967f9263df2326b1408c6f
2020-10-22 12:23:54 +02:00 · 2020-10-22 12:23:54 +02:00 · cb55cc8ce5
parent 93a6f9d4cf
commit cb55cc8ce5
4 changed files with 161 additions and 2 deletions
--- a/container_config_scripts/pacemaker_mutex_shutdown.sh
+++ b/container_config_scripts/pacemaker_mutex_shutdown.sh
@ -0,0 +1,120 @@
 #!/bin/bash
 # pacemaker_mutex_shutdown.sh --acquire
 # pacemaker_mutex_shutdown.sh --release
 set -u
 usage() {
    echo "Shutdown a cluster node in a coordinated way across the cluster"
    echo "Usage:"
    echo "   $0 --acquire # prevent other node from shutting down until we hold the lock"
    echo "   $0 --release # release the lock, other node can compete for the shutdown lock"
    echo
 }
 log() {
    echo "$(date -u): $1"
 }
 error() {
    echo "$(date -u): $1" 1>&2
    exit 1
 }
 # Loop until we hold the lock. The lock has a TTL, so we're guaranteed to get it eventually
 shutdown_lock_acquire() {
    local lockname=$1
    local requester=$2
    local ttl=$3
    local rc=1
    local current_owner
    local owner_stopped
    local owner_rc
    log "Acquiring the shutdown lock"
    while [ $rc -ne 0 ]; do
        /var/lib/container-config-scripts/pacemaker_resource_lock.sh --acquire-once $lockname $requester $ttl
        rc=$?
        if [ $rc -ne 0 ]; then
            if [ $rc -eq 2 ]; then
                error "Could not acquire the shutdown lock due to unrecoverable error (rc: $rc), bailing out"
            else
                # The lock is held by another node.
                current_owner=$(/var/lib/container-config-scripts/pacemaker_resource_lock.sh --owner $lockname)
                owner_rc=$?
                if [ $owner_rc -eq 2 ]; then
                    error "Could not get the shutdown lock owner due to unrecoverable error (rc: $owner_rc), bailing out"
                fi
                if [ $owner_rc -eq 0 ]; then
                    # If the owner is marked as offline, that means it has shutdown and
                    # we can clean the lock preemptively and try to acquire it.
                    owner_stopped=$(crm_mon -1X | xmllint --xpath 'count(//nodes/node[@name="'${current_owner}'" and @online="false" and @unclean="false"])' -)
                    if [ "${owner_stopped}" = "1" ]; then
                        log "Shutdown lock held by stopped node '${current_owner}', lock can be released"
                        /var/lib/container-config-scripts/pacemaker_resource_lock.sh --release $lockname $current_owner
                        continue
                    fi
                fi
                log "Shutdown lock held by another node (rc: $rc), retrying"
                sleep 10
            fi
        fi
    done
    log "Shutdown lock acquired"
    return 0
 }
 # Release the lock if we still own it. Not owning it anymore is not fatal
 shutdown_lock_release() {
    local lockname=$1
    local requester=$2
    local rc
    log "Releasing the shutdown lock"
    /var/lib/container-config-scripts/pacemaker_resource_lock.sh --release $lockname $requester
    rc=$?
    if [ $rc -ne 0 ]; then
        if [ $rc -gt 1 ]; then
            error "Could not release the shutdown lock due to unrecoverable error (rc: $rc), bailing out"
        else
            log "Shutdown lock no longer held, nothing to do"
        fi
    else
        log "Shutdown lock released"
    fi
    return 0
 }
 ACTION=$1
 if [ -z "$ACTION" ]; then
    error "Action must be specified"
 fi
 LOCK_NAME=tripleo-shutdown-lock
 LOCK_OWNER=$(crm_node -n 2>/dev/null)
 rc=$?
 if [ $rc -ne 0 ]; then
    if [ $rc -eq 102 ]; then
        log "Cluster is not running locally, no need to aquire the shutdown lock"
        exit 0
    else
        error "Unexpected error while connecting to the cluster (rc: $rc), bailing out"
    fi
 fi
 # We start with a very high TTL, that long enough to accomodate a cluster stop.
 # As soon as the node will get offline, the other competing node will be entitled
 # to steal the lock, so they should never wait that long in practice.
 LOCK_TTL=600
 case $ACTION in
    --help) usage; exit 0;;
    --acquire|-a) shutdown_lock_acquire ${LOCK_NAME} ${LOCK_OWNER} ${LOCK_TTL};;
    --release|-r) shutdown_lock_release ${LOCK_NAME} ${LOCK_OWNER};;
    *) error "Invalid action";;
 esac
 exit $?
--- a/container_config_scripts/pacemaker_resource_lock.sh
+++ b/container_config_scripts/pacemaker_resource_lock.sh
@ -213,6 +213,29 @@ lock_release() {
 }
 # Retrieve the owner of a lock from the CIB
 # this is a read-only operation, so no need to log debug info
 lock_get_owner() {
    local lockname=$1
    local rc
    local lock
    local owner
    lock=$(lock_get $lockname)
    rc=$?
    if [ $rc -ne 0 ] && [ $rc -ne $CIB_ENOTFOUND ]; then
        return 2
    fi
    if [ -z "$lock" ]; then
        return 1
    else
        lock_owner $lock
        return 0
    fi
 }
 ACTION=$1
 LOCKNAME=$2
 REQUESTER=$3
@ -223,8 +246,13 @@ if [ -z "$ACTION" ]; then
 fi
 if [ $ACTION != "--help" ]; then
-    if [ -z "$LOCKNAME" ] || [ -z "$REQUESTER" ]; then
+    if [ -z "$LOCKNAME" ]; then
-        error "You must specific a lock name and a requester"
+        error "You must specific a lock name"
    fi
    if [ $ACTION != "--owner" ] && [ $ACTION != "-o" ]; then
        if [ -z "$REQUESTER" ]; then
            error "You must specific a lock requester"
        fi
    fi
 fi
@ -232,6 +260,8 @@ case $ACTION in
    --help) usage; exit 0;;
    --acquire|-a) try_action lock_acquire $LOCKNAME $REQUESTER $TTL;;
    --release|-r) try_action lock_release $LOCKNAME $REQUESTER;;
    --acquire-once|-A) lock_acquire $LOCKNAME $REQUESTER $TTL;;
    --owner|-o) lock_get_owner $LOCKNAME;;
    *) error "Invalid action";;
 esac
 exit $?
--- a/deployment/containers-common.yaml
+++ b/deployment/containers-common.yaml
@ -127,6 +127,9 @@ outputs:
      pacemaker_mutex_restart_bundle.sh:
        mode: "0755"
        content: { get_file: ../container_config_scripts/pacemaker_mutex_restart_bundle.sh }
      pacemaker_mutex_shutdown.sh:
        mode: "0755"
        content: { get_file: ../container_config_scripts/pacemaker_mutex_shutdown.sh }
  volumes_base:
    description: Base volume list
--- a/deployment/pacemaker/pacemaker-baremetal-puppet.yaml
+++ b/deployment/pacemaker/pacemaker-baremetal-puppet.yaml
@ -370,9 +370,15 @@ outputs:
                    echo "Could not retrieve and clear location constraint for VIP $v" 2>&1
                fi
            done
        - name: Acquire the cluster shutdown lock to stop pacemaker cluster
          when: step|int == 1
          command: systemd-cat -t ha-shutdown /var/lib/container-config-scripts/pacemaker_mutex_shutdown.sh --acquire
        - name: Stop pacemaker cluster
          when: step|int == 1
          pacemaker_cluster: state=offline
        - name: Start pacemaker cluster
          when: step|int == 4
          pacemaker_cluster: state=online
        - name: Release the cluster shutdown lock
          when: step|int == 4
          command: systemd-cat -t ha-shutdown /var/lib/container-config-scripts/pacemaker_mutex_shutdown.sh --release