#!/bin/bash set -u # ./pacemaker_restart_bundle.sh mysql galera galera-bundle Master _ # ./pacemaker_restart_bundle.sh redis redis redis-bundle Slave Master # ./pacemaker_restart_bundle.sh ovn_dbs ovndb_servers ovn-dbs-bundle Slave Master RESTART_SCRIPTS_DIR=$(dirname $0) TRIPLEO_SERVICE=$1 RESOURCE_NAME=$2 BUNDLE_NAME=$3 WAIT_TARGET_LOCAL=$4 WAIT_TARGET_ANYWHERE=${5:-_} TRIPLEO_MINOR_UPDATE="${TRIPLEO_MINOR_UPDATE:-false}" TRIPLEO_HA_WRAPPER_RESOURCE_EXISTS="${TRIPLEO_HA_WRAPPER_RESOURCE_EXISTS:-false}" bundle_can_be_restarted() { local bundle=$1 # As long as the resource bundle is managed by pacemaker and is # not meant to stay stopped, no matter the state of any inner # pcmk_remote or ocf resource, we should restart it to give it a # chance to read the new config. [ "$(crm_resource --meta -r $1 -g is-managed 2>/dev/null)" != "false" ] && \ [ "$(crm_resource --meta -r $1 -g target-role 2>/dev/null)" != "Stopped" ] } log() { local msg=$1 logger -t pcmkrestart "$1" } HOSTNAME=$(/bin/hostname -s) if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ]; then if [ x"${TRIPLEO_HA_WRAPPER_RESOURCE_EXISTS,,}" = x"false" ]; then # Do not restart during initial deployment, as the resource # has just been created. SERVICE_NODEID=$(/bin/hiera -c /etc/puppet/hiera.yaml "${TRIPLEO_SERVICE}_short_bootstrap_node_name") if [[ "${HOSTNAME,,}" == "${SERVICE_NODEID,,}" ]]; then log "Initial deployment, skipping the restart of ${BUNDLE_NAME}" fi exit 0 else # During a stack update, this script is called in parallel on # every node the resource runs on, after the service's configs # have been updated on all nodes. So we need to run pcs only # once (e.g. on the service's boostrap node). if bundle_can_be_restarted ${BUNDLE_NAME}; then SERVICE_NODEID=$(/bin/hiera -c /etc/puppet/hiera.yaml "${TRIPLEO_SERVICE}_short_bootstrap_node_name") if [[ "${HOSTNAME,,}" == "${SERVICE_NODEID,,}" ]]; then replicas_running=$(crm_resource -Q -r $BUNDLE_NAME --locate 2>&1 | wc -l) if [ "$replicas_running" != "0" ]; then log "Restarting ${BUNDLE_NAME} globally. Stopping:" /sbin/pcs resource disable --wait=__PCMKTIMEOUT__ $BUNDLE_NAME log "Restarting ${BUNDLE_NAME} globally. Starting:" /sbin/pcs resource enable --wait=__PCMKTIMEOUT__ $BUNDLE_NAME else log "${BUNDLE_NAME} is not running anywhere," \ "cleaning up to restart it globally if necessary" /sbin/pcs resource cleanup $BUNDLE_NAME fi else log "Skipping global restart of ${BUNDLE_NAME} on ${HOSTNAME} it will be restarted by node ${SERVICE_NODEID}" fi else log "No global restart needed for ${BUNDLE_NAME}." fi fi else # During a minor update workflow however, a host gets fully # updated before updating the next one. So unlike stack # update, at the time this script is called, the service's # configs aren't updated on all nodes yet. So only restart the # resource locally, where it's guaranteed that the config is # up to date. HOST=$(facter hostname) if bundle_can_be_restarted ${BUNDLE_NAME}; then # if the resource is running locally, restart it if crm_resource -r $BUNDLE_NAME --locate 2>&1 | grep -w -q "${HOST}"; then log "Restarting ${BUNDLE_NAME} locally on '${HOST}'" /sbin/pcs resource restart $BUNDLE_NAME "${HOST}" else # At this point, if no resource is running locally, it's # either because a) it has failed previously, or b) because # it's an A/P resource running elsewhere. # By cleaning up resource, we ensure that a) it will try to # restart, or b) it won't do anything if the resource is # already running elsewhere. log "${BUNDLE_NAME} is currently not running on '${HOST}'," \ "cleaning up its state to restart it if necessary" /sbin/pcs resource cleanup $BUNDLE_NAME node="${HOST}" fi # Wait until the resource is in the expected target state $RESTART_SCRIPTS_DIR/pacemaker_wait_bundle.sh \ $RESOURCE_NAME $BUNDLE_NAME \ "$WAIT_TARGET_LOCAL" "$WAIT_TARGET_ANYWHERE" \ "${HOST}" __PCMKTIMEOUT__ else log "No restart needed for ${BUNDLE_NAME}." fi fi