From fffbdc0df115ed7f442620e4fff22fcacd595e70 Mon Sep 17 00:00:00 2001 From: Damien Ciabrini Date: Mon, 3 Aug 2020 18:59:44 +0200 Subject: [PATCH] Fix HA resource restart when no replicas are running When the helper script pacemaker_restart_bundle.sh is called during a stack update, it restarts the pacemaker resource via a "pcs resource restart ". When all the replicas are stopped due to a previous error, pcs won't restart them because there is nothing to stop. In that case, one must use "pcs resource cleanup ". (cherry picked from commit ba471ee461b125e2aa53c485ab61dc467bf7d858) Closes-Bug: #1889395 Change-Id: I1790444d289d057e9a3f612c53efe485080978b5 --- container_config_scripts/pacemaker_restart_bundle.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/container_config_scripts/pacemaker_restart_bundle.sh b/container_config_scripts/pacemaker_restart_bundle.sh index 949167f734..665fe1ebc0 100755 --- a/container_config_scripts/pacemaker_restart_bundle.sh +++ b/container_config_scripts/pacemaker_restart_bundle.sh @@ -39,8 +39,15 @@ if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ]; then HOSTNAME=$(/bin/hostname -s) SERVICE_NODEID=$(/bin/hiera -c /etc/puppet/hiera.yaml "${TRIPLEO_SERVICE}_short_bootstrap_node_name") if [[ "${HOSTNAME,,}" == "${SERVICE_NODEID,,}" ]]; then - echo "$(date -u): Restarting ${BUNDLE_NAME} globally" - /sbin/pcs resource restart --wait=__PCMKTIMEOUT__ $BUNDLE_NAME + replicas_running=$(crm_resource -Q -r $BUNDLE_NAME --locate 2>&1 | wc -l) + if [ "$replicas_running" != "0" ]; then + echo "$(date -u): Restarting ${BUNDLE_NAME} globally" + /sbin/pcs resource restart --wait=__PCMKTIMEOUT__ $BUNDLE_NAME + else + echo "$(date -u): ${BUNDLE_NAME} is not running anywhere," \ + "cleaning up to restart it globally if necessary" + /sbin/pcs resource cleanup $BUNDLE_NAME + fi else echo "$(date -u): Skipping global restart of ${BUNDLE_NAME} on ${HOSTNAME} it will be restarted by node ${SERVICE_NODEID}" fi