Browse Source

Rolling certificate update for HA services

(manually squashed the subsequent fix [1] into a single commit)
(also manually squashed [2] because of #1906505)

There are certain HA clustered services (e.g. galera) that don't
have the ability natively to reload their TLS certificate without
being restarted. If too many replicas are restarted concurrently
this might result in full service disruption.

To ensure service availability, provide a means to ensure that
only one service replica is restarted at a time in the cluster.
This works by using pacemaker's CIB to implement a cluster-wide
restart lock for a service. The lock has a TTL so it's guaranteed
to be eventually released without requiring complex contingency
cleanup in case of failures.

Tested locally by running the following:
1. force recreate certificate on all nodes at once for galera
   (ipa-cert resubmit -i mysql), and verify that the resources
   restart one after the other

2. create a lock manually in pacemaker, recreate certificate for
   galera on all nodes, and verify that no resource is restarted
   before the manually created lock expires.

3. create a lock manually, let it expires, recreate a certificate,
   and verify that the resource is restarted appropriately and the
   lock gets cleaned up from pacemaker once the restart finished.

[1] Id10f026c8b31cad7b7313ac9427a99b3e6744788
[2] I17f1364932e43b8487515084e41b525e186888db

Related-Bug: #1904193
Closes-Bug: #1885113
Change-Id: Ib2b62e33b34cf72edfdae6299cf432259bf960a2
(cherry picked from commit 0f54889408)
(cherry picked from commit c8f5fdfc36)
(cherry picked from commit 8b16911cc2)
(cherry picked from commit 8968c7efd6)
changes/30/760130/4
Damien Ciabrini 1 year ago
parent
commit
916b9385c6
  1. 90
      container_config_scripts/pacemaker_mutex_restart_bundle.sh
  2. 237
      container_config_scripts/pacemaker_resource_lock.sh
  3. 9
      deployment/certs/certmonger-user-baremetal-puppet.yaml
  4. 6
      deployment/containers-common.yaml

90
container_config_scripts/pacemaker_mutex_restart_bundle.sh

@ -0,0 +1,90 @@
#!/bin/bash
# pacemaker_mutex_restart_bundle.sh --lock mysql galera galera-bundle Master _
# pacemaker_mutex_restart_bundle.sh --lock ovn_dbs ovndb_servers ovn-dbs-bundle Slave Master
set -u
usage() {
echo "Restart a clustered resource in a coordinated way across the cluster"
echo "Usage:"
echo " $0 --lock <tripleo-service> <pcmk-resource> <pcmk-bundle> <target-state-local> <target-state-cluster>"
echo
}
log() {
echo "$(date -u): $1"
}
error() {
echo "$(date -u): $1" 1>&2
exit 1
}
ACTION=$1
case $ACTION in
--help) usage; exit 0;;
--lock) ;;
*) error "Unknown action '$ACTION'";;
esac
TRIPLEO_SERVICE=$2
LOCK_NAME=${TRIPLEO_SERVICE}-restart-lock
LOCK_OWNER=$(crm_node -n 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
if [ $rc -eq 102 ]; then
log "Cluster is not running locally, no need to restart resource $TRIPLEO_SERVICE"
exit 0
else
error "Unexpected error while connecting to the cluster (rc: $rc), bailing out"
fi
fi
RESOURCE_NAME=$3
BUNDLE_NAME=$4
WAIT_TARGET_LOCAL=$5
WAIT_TARGET_ANYWHERE=${6:-_}
# The lock TTL should accomodate for the resource start/promote timeout
if [ "$RESOURCE_NAME" != "$BUNDLE_NAME" ]; then
if [ "$WAIT_TARGET_LOCAL" = "Master" ] || [ "$WAIT_TARGET_ANYWHERE" = "Master" ]; then
rsc_op="promote"
else
rsc_op="start"
fi
# <op id="galera-promote-interval-0s" interval="0s" name="promote" on-fail="block" timeout="300s"/>
PCMK_TTL=$(cibadmin -Q | xmllint -xpath "string(//primitive[@id='${RESOURCE_NAME}']/operations/op[@name='${rsc_op}']/@timeout)" - | sed 's/s$//')
LOCK_TTL=$((PCMK_TTL + 30))
else
# The podman RA's default start timeout
LOCK_TTL=90
fi
log "Acquire a ${LOCK_TTL}s restart lock for service $TRIPLEO_SERVICE before restarting it"
# Loop until we hold the lock. The lock has a TTL, so we're guaranteed to get it eventually
rc=1
while [ $rc -ne 0 ]; do
/var/lib/container-config-scripts/pacemaker_resource_lock.sh --acquire $LOCK_NAME $LOCK_OWNER $LOCK_TTL
rc=$?
if [ $rc != 0 ]; then
if [ $rc -gt 1 ]; then
error "Could not acquire lock due to unrecoverable error (rc: $rc), bailing out"
else
log "Could not acquire lock, retrying"
sleep 10
fi
fi
done
log "Restart the service $TRIPLEO_SERVICE locally"
# Reuse the local restart script in t-h-t (driven by env var TRIPLEO_MINOR_UPDATE)
TRIPLEO_MINOR_UPDATE=true /var/lib/container-config-scripts/pacemaker_restart_bundle.sh $TRIPLEO_SERVICE $RESOURCE_NAME $BUNDLE_NAME $WAIT_TARGET_LOCAL $WAIT_TARGET_ANYWHERE
# If we reached this point, always try to release the lock
log "Release the restart lock for service $TRIPLEO_SERVICE"
/var/lib/container-config-scripts/pacemaker_resource_lock.sh --release $LOCK_NAME $LOCK_OWNER
rc=$?
if [ $rc -ne 0 ] && [ $rc -ne 1 ]; then
error "Could not release held lock (rc: $rc)"
fi

237
container_config_scripts/pacemaker_resource_lock.sh

@ -0,0 +1,237 @@
#!/bin/bash
MAX_RETRIES=10
CIB_ENOTFOUND=105
usage() {
echo "Set a global property in the cluster with a validity timestamp."
echo "Usage:"
echo " $0 --acquire <lock_name> <lock_owner> <lock_ttl_in_seconds>"
echo " $0 --release <lock_name> <lock_owner>"
echo
}
log() {
echo "$(date -u): $1" 1>&2
}
error() {
echo "$(date -u): $1" 1>&2
exit 1
}
lock_create() {
local name=$1
local data=$2
# cibadmin won't overwrite a key if someone else succeeded to create it concurrently
cibadmin --sync-call --scope crm_config --create --xml-text "<cluster_property_set id='${name}'><nvpair id='${name}-pair' name='${name}' value='${data}'/></cluster_property_set>" &>/dev/null
return $?
}
lock_update() {
local name=$1
local expected_data=$2
local new_data=$3
# we only update the lock we expect to see, so we can't update someone else's lock
cibadmin --sync-call --scope crm_config --modify --xpath "//cluster_property_set/nvpair[@name='${name}' and @value='${expected_data}']/.." --xml-text "<nvpair id='${name}-pair' name='${name}' value='${new_data}'/>" &>/dev/null
return $?
}
lock_delete() {
local name=$1
local expected_data=$2
# we only delete the lock we expect to see, so we can't delete someone else's lock
cibadmin --sync-call --scope crm_config --delete --xpath "//cluster_property_set/nvpair[@name='${name}' and @value='${expected_data}']/.." &>/dev/null
return $?
}
lock_get() {
local lockname=$1
local res
local rc
res=$(cibadmin --query --scope crm_config --xpath "//cluster_property_set/nvpair[@name='$lockname']" 2>/dev/null)
rc=$?
if [ $rc -eq 0 ]; then
echo "$res" | sed -n 's/.*value="\([^"]*\)".*/\1/p'
fi
return $rc
}
lock_owner() {
local lock=$1
echo "$lock" | cut -d':' -f1
}
lock_has_expired() {
local lock=$1
local expiry=$(echo "$lock" | cut -d':' -f2)
local now=$(date +%s)
test $now -ge $expiry
}
# Perform a lock action and restart if the CIB has been modified before
# committing the lock action
try_action() {
local fun=$1
local lock=$2
local requester=$3
local args=${4:-}
local tries=$MAX_RETRIES
local rc=1
if [ "$fun" = "lock_acquire" ] || [ "$fun" = "lock_release" ]; then
log "Try running $fun"
else
return 2
fi
while [ $rc -ne 0 ]; do
$fun $lock $requester $args
rc=$?
if [ $rc -eq 0 ]; then
log "Operation $1 succeeded"
return 0
elif [ $rc -eq 3 ]; then
# rc == 3 -> CIB changed before push
if [ $tries -eq 0 ]; then
log "Failed to commit after $MAX_RETRIES retries. Bailing out."
return 2
else
log "Failed to commit. Retrying operation."
tries=$(($tries - 1))
fi
elif [ $rc -eq 2 ]; then
# rc == 2 -> unrecoverable cib error (e.g. pacemaker down)
log "Unexpected failure. Bailing out"
return $rc
else
# rc == 1 -> lock error (not owner, lock doesn't exists)
return $rc
fi
done
}
# The lock mechanism uses cibadmin's atomic creation so cluster-wide
# state coherency is guaranteed by pacemaker
lock_acquire() {
local lockname=$1
local requester=$2
local ttl=$3
local rc
local lock
local expiry
local owner
log "Check whether the lock is already held in the CIB"
lock=$(lock_get $lockname)
rc=$?
if [ $rc -ne 0 ] && [ $rc -ne $CIB_ENOTFOUND ]; then
log "Could not retrieve info from the CIB"
return 2
fi
if [ -n "$lock" ]; then
lock_has_expired $lock
rc=$?
if [ $rc -eq 0 ]; then
log "Lock has expired, now available for being held"
else
# lock is still held. check whether we're the owner
owner=$(lock_owner $lock)
if [ "$owner" = "$requester" ];then
log "Requester already owns the lock, acquiring attempt will just reconfigure the TTL"
else
log "Lock is held by someone else ($owner)"
return 1
fi
fi
else
log "Lock is not held yet"
fi
# prepare the lock info
expiry=$(($(date +%s) + $ttl))
if [ -n "$lock" ]; then
log "Attempting to update the lock"
lock_update $lockname "$lock" "$requester:$expiry"
rc=$?
else
log "Attempting to acquire the lock"
lock_create $lockname "$requester:$expiry"
rc=$?
fi
if [ $rc -eq 0 ]; then
log "Lock '$lockname' acquired by '$requester', valid until $(date -d @$expiry)"
return 0
else
log "CIB changed, lock cannot be acquired"
return 3
fi
}
# The lock mechanism uses the CIB's num_updates tag to implement
# a conditional store. Cluster-wide locking is guaranteed by pacemaker
lock_release() {
local lockname=$1
local requester=$2
local rc
local lock
local owner
log "Check whether the lock is already held in the CIB"
lock=$(lock_get $lockname)
rc=$?
if [ $rc -ne 0 ] && [ $rc -ne $CIB_ENOTFOUND ]; then
log "Could not retrieve info from the CIB"
return 2
fi
if [ -z "$lock" ]; then
log "Lock doesn't exist. Nothing to release"
return 0
else
log "Lock exists, check whether we're the owner"
owner=$(lock_owner $lock)
if [ "$owner" != "$requester" ];then
log "Lock is held by someone else ($owner), will not unlock"
return 1
fi
fi
lock_delete $lockname "$lock"
rc=$?
if [ $rc -eq 0 ]; then
log "Lock '$lockname' released by '$requester'"
return 0
else
log "CIB deletion error, lock cannot be released"
return 3
fi
}
ACTION=$1
LOCKNAME=$2
REQUESTER=$3
TTL=${4:-60}
if [ -z "$ACTION" ]; then
error "Action must be specified"
fi
if [ $ACTION != "--help" ]; then
if [ -z "$LOCKNAME" ] || [ -z "$REQUESTER" ]; then
error "You must specific a lock name and a requester"
fi
fi
case $ACTION in
--help) usage; exit 0;;
--acquire|-a) try_action lock_acquire $LOCKNAME $REQUESTER $TTL;;
--release|-r) try_action lock_release $LOCKNAME $REQUESTER;;
*) error "Invalid action";;
esac
exit $?

9
deployment/certs/certmonger-user-baremetal-puppet.yaml

@ -72,3 +72,12 @@ outputs:
- {}
step_config: |
include ::tripleo::profile::base::certmonger_user
host_prep_tasks:
- name: create certificate rotation script for HA services
copy:
dest: /usr/bin/certmonger-ha-resource-refresh.sh
setype: certmonger_unconfined_exec_t
mode: "0700"
content: |
#!/bin/bash
/var/lib/container-config-scripts/pacemaker_mutex_restart_bundle.sh --lock $* 2>&1 | logger -t certmonger

6
deployment/containers-common.yaml

@ -121,6 +121,12 @@ outputs:
wait-port-and-run.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/wait-port-and-run.sh }
pacemaker_resource_lock.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/pacemaker_resource_lock.sh }
pacemaker_mutex_restart_bundle.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/pacemaker_mutex_restart_bundle.sh }
volumes_base:
description: Base volume list

Loading…
Cancel
Save