Merge "CentOS 9: support restart of HA resources"
This commit is contained in:
commit
1e1b6d125c
@ -21,6 +21,11 @@ error() {
|
||||
exit 1
|
||||
}
|
||||
|
||||
pacemaker_supports_promoted() {
|
||||
# The Promoted token is only matched in recent pacemaker versions
|
||||
grep -wq "<value>Promoted</value>" /usr/share/pacemaker/resources-*.rng
|
||||
}
|
||||
|
||||
ACTION=$1
|
||||
case $ACTION in
|
||||
--help) usage; exit 0;;
|
||||
@ -46,9 +51,17 @@ BUNDLE_NAME=$4
|
||||
WAIT_TARGET_LOCAL=$5
|
||||
WAIT_TARGET_ANYWHERE=${6:-_}
|
||||
|
||||
if pacemaker_supports_promoted; then
|
||||
WAIT_TARGET_LOCAL=$(echo "$5" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
|
||||
WAIT_TARGET_ANYWHERE=$(echo "${6:-_}" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
|
||||
promoted_role="Promoted"
|
||||
else
|
||||
promoted_role="Master"
|
||||
fi
|
||||
|
||||
# The lock TTL should accomodate for the resource start/promote timeout
|
||||
if [ "$RESOURCE_NAME" != "$BUNDLE_NAME" ]; then
|
||||
if [ "$WAIT_TARGET_LOCAL" = "Master" ] || [ "$WAIT_TARGET_ANYWHERE" = "Master" ]; then
|
||||
if [ "$WAIT_TARGET_LOCAL" = "$promoted_role" ] || [ "$WAIT_TARGET_ANYWHERE" = "$promoted_role" ]; then
|
||||
rsc_op="promote"
|
||||
else
|
||||
rsc_op="start"
|
||||
|
@ -40,6 +40,10 @@ usage() {
|
||||
exit 1
|
||||
}
|
||||
|
||||
pacemaker_supports_promoted() {
|
||||
# The Promoted token is only matched in recent pacemaker versions
|
||||
grep -wq "<value>Promoted</value>" /usr/share/pacemaker/resources-*.rng
|
||||
}
|
||||
|
||||
#
|
||||
# Utility functions to detect stuck resources
|
||||
@ -69,7 +73,7 @@ bundle_running_globally() {
|
||||
local engine=$BUNDLE_CONTAINER_ENGINE
|
||||
# return the number of running bundles replica, i.e. the number of
|
||||
# docker/podman resource replicas currently running in the cluster
|
||||
crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='ocf::heartbeat:${engine}']/node)" -
|
||||
crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='${OCF}:heartbeat:${engine}']/node)" -
|
||||
}
|
||||
|
||||
ocf_failures_globally() {
|
||||
@ -91,7 +95,7 @@ did_resource_failed_locally() {
|
||||
# pacemaker_remote rather that on the real host, and the
|
||||
# failcounts are thus associated to the pcmk remote. Replace
|
||||
# the host's name with the pcmk remote's name.
|
||||
remotehost=$(crm_mon --as-xml | xmllint --xpath "string(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource/node[@name='${HOST}']/../../resource[@resource_agent='ocf::pacemaker:remote']/@id)" -)
|
||||
remotehost=$(crm_mon --as-xml | xmllint --xpath "string(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource/node[@name='${HOST}']/../../resource[@resource_agent='${OCF}:pacemaker:remote']/@id)" -)
|
||||
if [ -n "${remotehost}" ]; then
|
||||
crm_failcount -q -G -r $NAME -N $remotehost | grep -q -w INFINITY
|
||||
return $?
|
||||
@ -118,7 +122,7 @@ did_resource_failed_globally() {
|
||||
if [ "${NAME}" != "${BUNDLE_NAME}" ]; then
|
||||
# we check the state of an ocf resource only if the
|
||||
# pcmkremotes are started
|
||||
remotecount=$(crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='ocf::pacemaker:remote']/node)" -)
|
||||
remotecount=$(crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='${OCF}:pacemaker:remote']/node)" -)
|
||||
if [ "${remotecount}" = "0" ]; then
|
||||
# no pcmkremote is running, so check the bundle state
|
||||
# instead of checking the ocf resource
|
||||
@ -170,17 +174,26 @@ if [ -z "${ROLE_LOCAL}" ]; then
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
if !(echo "${ROLE_LOCAL}" | grep -q -x -E "(Started|Slave|Master)"); then
|
||||
echo 2>&1 "Error: argument ROLE_LOCAL must be either 'Started' 'Slave' or 'Master'"
|
||||
if !(echo "${ROLE_LOCAL}" | grep -q -x -E "(Started|Slave|Master|Unpromoted|Promoted)"); then
|
||||
echo 2>&1 "Error: argument ROLE_LOCAL must be either 'Started' 'Slave' 'Master' 'Unpromoted' or 'Promoted'"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "${ROLE_ANYWHERE}" ] && !(echo "${ROLE_ANYWHERE}" | grep -q -x -E "(Started|Slave|Master)"); then
|
||||
echo 2>&1 "Error: argument ROLE_ANYWHERE must be either 'Started' 'Slave' or 'Master'"
|
||||
if [ -n "${ROLE_ANYWHERE}" ] && !(echo "${ROLE_ANYWHERE}" | grep -q -x -E "(Started|Slave|Master|Unpromoted|Promoted)"); then
|
||||
echo 2>&1 "Error: argument ROLE_ANYWHERE must be either 'Started' 'Slave' 'Master' 'Unpromoted' or 'Promoted'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure compatibility with pacemaker 2.1
|
||||
if pacemaker_supports_promoted; then
|
||||
ROLE_LOCAL=$(echo "$ROLE_LOCAL" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
|
||||
ROLE_ANYWHERE=$(echo "$ROLE_ANYWHERE" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
|
||||
OCF="ocf"
|
||||
else
|
||||
OCF="ocf:"
|
||||
fi
|
||||
|
||||
HOST=${5:-$(facter hostname)}
|
||||
TIMEOUT=${6:-__PCMKTIMEOUT__}
|
||||
|
||||
@ -194,7 +207,7 @@ TIMEOUT=${6:-__PCMKTIMEOUT__}
|
||||
|
||||
if [ "${BUNDLE_NAME}" != "${NAME}" ]; then
|
||||
# ocf resource
|
||||
local_resource_xpath="//bundle/replica/resource[@resource_agent='ocf::pacemaker:remote']/node[@name='${HOST}']/../../resource[@id='${NAME}']"
|
||||
local_resource_xpath="//bundle/replica/resource[@resource_agent='${OCF}:pacemaker:remote']/node[@name='${HOST}']/../../resource[@id='${NAME}']"
|
||||
any_resource_xpath="//bundle//resource[@id='${NAME}']"
|
||||
replicas_xpath="//bundle/primitive[@id='${BUNDLE_NAME}']/../*[boolean(@image) and boolean(@replicas)]"
|
||||
else
|
||||
|
Loading…
Reference in New Issue
Block a user