From c8f5fdfc3609d38bea46f96ef378dcb5ecd14a2b Mon Sep 17 00:00:00 2001 From: Damien Ciabrini Date: Wed, 25 Nov 2020 16:01:49 +0100 Subject: [PATCH] HA: reimplement resource locks with cibadmin A resource lock is used as a synchronization point between pacemaker cluster nodes. It is currently implemented by adding an attribute in an offline copy of CIB, and merging the update in the CIB only if no concurrent updates has occurred in the mean time. The problem with that approach is that - even if the concurrency is enforced by pacemaker - the offline CIB contains a snapshot of the cluster state; so pushing back the entire offline CIB pushes old resources' state back into the cluster. This causes additional burden on the cluster and sometimes caused unexpected cluster state transition. Reimplement the locking strategy with cibadmin; It's a much faster approach, that provides the same concurrency guarantees, and only changes one attribute rather than the entire CIB, so it doesn't cause unexpected cluster state transition. Closes-Bug: #1905585 Change-Id: Id10f026c8b31cad7b7313ac9427a99b3e6744788 --- .../pacemaker_resource_lock.sh | 100 +++++++++--------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/container_config_scripts/pacemaker_resource_lock.sh b/container_config_scripts/pacemaker_resource_lock.sh index e0a451a930..eb2b36162b 100755 --- a/container_config_scripts/pacemaker_resource_lock.sh +++ b/container_config_scripts/pacemaker_resource_lock.sh @@ -1,12 +1,7 @@ #!/bin/bash MAX_RETRIES=10 -TMP_CIB=$(mktemp -p /var/lib/pacemaker/cib -t tmpcib.XXXXXXXX) -function finish { - rm -f $TMP_CIB -} -trap finish EXIT -trap exit INT TERM +CIB_ENOTFOUND=105 usage() { echo "Set a global property in the cluster with a validity timestamp." @@ -25,16 +20,39 @@ error() { exit 1 } +lock_create() { + local name=$1 + local data=$2 + # cibadmin won't overwrite a key if someone else succeeded to create it concurrently + cibadmin --sync-call --scope crm_config --create --xml-text "" &>/dev/null + return $? +} + +lock_update() { + local name=$1 + local expected_data=$2 + local new_data=$3 + # we only update the lock we expect to see, so we can't update someone else's lock + cibadmin --sync-call --scope crm_config --modify --xpath "//cluster_property_set/nvpair[@name='${name}' and @value='${expected_data}']/.." --xml-text "" &>/dev/null + return $? +} + +lock_delete() { + local name=$1 + local expected_data=$2 + # we only delete the lock we expect to see, so we can't delete someone else's lock + cibadmin --sync-call --scope crm_config --delete --xpath "//cluster_property_set/nvpair[@name='${name}' and @value='${expected_data}']/.." &>/dev/null + return $? +} lock_get() { - local cib_copy=$1 - local lockname=$2 + local lockname=$1 local res local rc - res=$(pcs -f $cib_copy property show "$lockname") + res=$(cibadmin --query --scope crm_config --xpath "//cluster_property_set/nvpair[@name='$lockname']" 2>/dev/null) rc=$? if [ $rc -eq 0 ]; then - echo "$res" | grep -w "$lockname" | cut -d' ' -f3 + echo "$res" | sed -n 's/.*value="\([^"]*\)".*/\1/p' fi return $rc } @@ -92,8 +110,8 @@ try_action() { done } -# The lock mechanism uses the CIB's num_updates tag to implement -# a conditional store. Cluster-wide locking is guaranteed by pacemaker +# The lock mechanism uses cibadmin's atomic creation so cluster-wide +# state coherency is guaranteed by pacemaker lock_acquire() { local lockname=$1 local requester=$2 @@ -103,24 +121,15 @@ lock_acquire() { local expiry local owner - log "Snapshot the current CIB" - pcs cluster cib > $TMP_CIB - rc=$? - if [ $rc -ne 0 ]; then - log "Could not snapshot the CIB" - return 2 - fi - log "Check whether the lock is already held in the CIB" - lock=$(lock_get $TMP_CIB $lockname) + lock=$(lock_get $lockname) rc=$? - if [ $rc -ne 0 ]; then - log "Could not retrieve info from snapshot CIB" + if [ $rc -ne 0 ] && [ $rc -ne $CIB_ENOTFOUND ]; then + log "Could not retrieve info from the CIB" return 2 fi if [ -n "$lock" ]; then - log "Lock exists, check whether it has expired" lock_has_expired $lock rc=$? if [ $rc -eq 0 ]; then @@ -129,7 +138,7 @@ lock_acquire() { # lock is still held. check whether we're the owner owner=$(lock_owner $lock) if [ "$owner" = "$requester" ];then - log "Already own the lock, acquiring attempt will just reconfigure the TTL" + log "Requester already owns the lock, acquiring attempt will just reconfigure the TTL" else log "Lock is held by someone else ($owner)" return 1 @@ -139,20 +148,24 @@ lock_acquire() { log "Lock is not held yet" fi - log "Prepare the snapshot CIB to acquire the lock" + # prepare the lock info expiry=$(($(date +%s) + $ttl)) - pcs -f $TMP_CIB property set "$lockname"="$requester:$expiry" --force - # Store Conditional: only works if no update have been pushed in the meantime" - log "Try to push the CIB to signal lock is acquired" - pcs cluster cib-push $TMP_CIB - rc=$? + if [ -n "$lock" ]; then + log "Attempting to update the lock" + lock_update $lockname "$lock" "$requester:$expiry" + rc=$? + else + log "Attempting to acquire the lock" + lock_create $lockname "$requester:$expiry" + rc=$? + fi if [ $rc -eq 0 ]; then log "Lock '$lockname' acquired by '$requester', valid until $(date -d @$expiry)" return 0 else - log "CIB changed since snapshot, lock cannot be acquired" + log "CIB changed, lock cannot be acquired" return 3 fi } @@ -167,19 +180,11 @@ lock_release() { local lock local owner - log "Snapshot the current CIB" - pcs cluster cib > $TMP_CIB - rc=$? - if [ $rc -ne 0 ]; then - log "Could not snapshot the CIB" - return 2 - fi - log "Check whether the lock is already held in the CIB" - lock=$(lock_get $TMP_CIB $lockname) + lock=$(lock_get $lockname) rc=$? - if [ $rc -ne 0 ]; then - log "Could not retrieve info from snapshot CIB" + if [ $rc -ne 0 ] && [ $rc -ne $CIB_ENOTFOUND ]; then + log "Could not retrieve info from the CIB" return 2 fi @@ -195,19 +200,14 @@ lock_release() { fi fi - log "Prepare the snapshot CIB to release the lock" - pcs -f $TMP_CIB property set "$lockname"="" - - # Store Conditional: only works if no update have been pushed in the meantime" - log "Try to push the CIB to signal lock is released" - pcs cluster cib-push $TMP_CIB + lock_delete $lockname "$lock" rc=$? if [ $rc -eq 0 ]; then log "Lock '$lockname' released by '$requester'" return 0 else - log "CIB changed since snapshot, lock cannot be released" + log "CIB deletion error, lock cannot be released" return 3 fi }