CentOS 9: support restart of HA resources
Pacemaker 2.1 changed naming convention around multi-state resources and ocf resource name. Adapt our resource restart scripts so that they parse the proper data from the CIB. Change-Id: Ieade3444e44e305f507c057991e02048ab5f3b3a Closes-Bug: #1942771
This commit is contained in:
parent
ab8666fafe
commit
128c2bcc25
|
@ -21,6 +21,11 @@ error() {
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pacemaker_supports_promoted() {
|
||||||
|
# The Promoted token is only matched in recent pacemaker versions
|
||||||
|
grep -wq "<value>Promoted</value>" /usr/share/pacemaker/resources-*.rng
|
||||||
|
}
|
||||||
|
|
||||||
ACTION=$1
|
ACTION=$1
|
||||||
case $ACTION in
|
case $ACTION in
|
||||||
--help) usage; exit 0;;
|
--help) usage; exit 0;;
|
||||||
|
@ -46,9 +51,17 @@ BUNDLE_NAME=$4
|
||||||
WAIT_TARGET_LOCAL=$5
|
WAIT_TARGET_LOCAL=$5
|
||||||
WAIT_TARGET_ANYWHERE=${6:-_}
|
WAIT_TARGET_ANYWHERE=${6:-_}
|
||||||
|
|
||||||
|
if pacemaker_supports_promoted; then
|
||||||
|
WAIT_TARGET_LOCAL=$(echo "$5" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
|
||||||
|
WAIT_TARGET_ANYWHERE=$(echo "${6:-_}" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
|
||||||
|
promoted_role="Promoted"
|
||||||
|
else
|
||||||
|
promoted_role="Master"
|
||||||
|
fi
|
||||||
|
|
||||||
# The lock TTL should accomodate for the resource start/promote timeout
|
# The lock TTL should accomodate for the resource start/promote timeout
|
||||||
if [ "$RESOURCE_NAME" != "$BUNDLE_NAME" ]; then
|
if [ "$RESOURCE_NAME" != "$BUNDLE_NAME" ]; then
|
||||||
if [ "$WAIT_TARGET_LOCAL" = "Master" ] || [ "$WAIT_TARGET_ANYWHERE" = "Master" ]; then
|
if [ "$WAIT_TARGET_LOCAL" = "$promoted_role" ] || [ "$WAIT_TARGET_ANYWHERE" = "$promoted_role" ]; then
|
||||||
rsc_op="promote"
|
rsc_op="promote"
|
||||||
else
|
else
|
||||||
rsc_op="start"
|
rsc_op="start"
|
||||||
|
|
|
@ -40,6 +40,10 @@ usage() {
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pacemaker_supports_promoted() {
|
||||||
|
# The Promoted token is only matched in recent pacemaker versions
|
||||||
|
grep -wq "<value>Promoted</value>" /usr/share/pacemaker/resources-*.rng
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Utility functions to detect stuck resources
|
# Utility functions to detect stuck resources
|
||||||
|
@ -69,7 +73,7 @@ bundle_running_globally() {
|
||||||
local engine=$BUNDLE_CONTAINER_ENGINE
|
local engine=$BUNDLE_CONTAINER_ENGINE
|
||||||
# return the number of running bundles replica, i.e. the number of
|
# return the number of running bundles replica, i.e. the number of
|
||||||
# docker/podman resource replicas currently running in the cluster
|
# docker/podman resource replicas currently running in the cluster
|
||||||
crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='ocf::heartbeat:${engine}']/node)" -
|
crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='${OCF}:heartbeat:${engine}']/node)" -
|
||||||
}
|
}
|
||||||
|
|
||||||
ocf_failures_globally() {
|
ocf_failures_globally() {
|
||||||
|
@ -91,7 +95,7 @@ did_resource_failed_locally() {
|
||||||
# pacemaker_remote rather that on the real host, and the
|
# pacemaker_remote rather that on the real host, and the
|
||||||
# failcounts are thus associated to the pcmk remote. Replace
|
# failcounts are thus associated to the pcmk remote. Replace
|
||||||
# the host's name with the pcmk remote's name.
|
# the host's name with the pcmk remote's name.
|
||||||
remotehost=$(crm_mon --as-xml | xmllint --xpath "string(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource/node[@name='${HOST}']/../../resource[@resource_agent='ocf::pacemaker:remote']/@id)" -)
|
remotehost=$(crm_mon --as-xml | xmllint --xpath "string(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource/node[@name='${HOST}']/../../resource[@resource_agent='${OCF}:pacemaker:remote']/@id)" -)
|
||||||
if [ -n "${remotehost}" ]; then
|
if [ -n "${remotehost}" ]; then
|
||||||
crm_failcount -q -G -r $NAME -N $remotehost | grep -q -w INFINITY
|
crm_failcount -q -G -r $NAME -N $remotehost | grep -q -w INFINITY
|
||||||
return $?
|
return $?
|
||||||
|
@ -118,7 +122,7 @@ did_resource_failed_globally() {
|
||||||
if [ "${NAME}" != "${BUNDLE_NAME}" ]; then
|
if [ "${NAME}" != "${BUNDLE_NAME}" ]; then
|
||||||
# we check the state of an ocf resource only if the
|
# we check the state of an ocf resource only if the
|
||||||
# pcmkremotes are started
|
# pcmkremotes are started
|
||||||
remotecount=$(crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='ocf::pacemaker:remote']/node)" -)
|
remotecount=$(crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='${OCF}:pacemaker:remote']/node)" -)
|
||||||
if [ "${remotecount}" = "0" ]; then
|
if [ "${remotecount}" = "0" ]; then
|
||||||
# no pcmkremote is running, so check the bundle state
|
# no pcmkremote is running, so check the bundle state
|
||||||
# instead of checking the ocf resource
|
# instead of checking the ocf resource
|
||||||
|
@ -170,17 +174,26 @@ if [ -z "${ROLE_LOCAL}" ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
if !(echo "${ROLE_LOCAL}" | grep -q -x -E "(Started|Slave|Master)"); then
|
if !(echo "${ROLE_LOCAL}" | grep -q -x -E "(Started|Slave|Master|Unpromoted|Promoted)"); then
|
||||||
echo 2>&1 "Error: argument ROLE_LOCAL must be either 'Started' 'Slave' or 'Master'"
|
echo 2>&1 "Error: argument ROLE_LOCAL must be either 'Started' 'Slave' 'Master' 'Unpromoted' or 'Promoted'"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -n "${ROLE_ANYWHERE}" ] && !(echo "${ROLE_ANYWHERE}" | grep -q -x -E "(Started|Slave|Master)"); then
|
if [ -n "${ROLE_ANYWHERE}" ] && !(echo "${ROLE_ANYWHERE}" | grep -q -x -E "(Started|Slave|Master|Unpromoted|Promoted)"); then
|
||||||
echo 2>&1 "Error: argument ROLE_ANYWHERE must be either 'Started' 'Slave' or 'Master'"
|
echo 2>&1 "Error: argument ROLE_ANYWHERE must be either 'Started' 'Slave' 'Master' 'Unpromoted' or 'Promoted'"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Ensure compatibility with pacemaker 2.1
|
||||||
|
if pacemaker_supports_promoted; then
|
||||||
|
ROLE_LOCAL=$(echo "$ROLE_LOCAL" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
|
||||||
|
ROLE_ANYWHERE=$(echo "$ROLE_ANYWHERE" | sed -e 's/Master/Promoted/' -e 's/Slave/Unpromoted/')
|
||||||
|
OCF="ocf"
|
||||||
|
else
|
||||||
|
OCF="ocf:"
|
||||||
|
fi
|
||||||
|
|
||||||
HOST=${5:-$(facter hostname)}
|
HOST=${5:-$(facter hostname)}
|
||||||
TIMEOUT=${6:-__PCMKTIMEOUT__}
|
TIMEOUT=${6:-__PCMKTIMEOUT__}
|
||||||
|
|
||||||
|
@ -194,7 +207,7 @@ TIMEOUT=${6:-__PCMKTIMEOUT__}
|
||||||
|
|
||||||
if [ "${BUNDLE_NAME}" != "${NAME}" ]; then
|
if [ "${BUNDLE_NAME}" != "${NAME}" ]; then
|
||||||
# ocf resource
|
# ocf resource
|
||||||
local_resource_xpath="//bundle/replica/resource[@resource_agent='ocf::pacemaker:remote']/node[@name='${HOST}']/../../resource[@id='${NAME}']"
|
local_resource_xpath="//bundle/replica/resource[@resource_agent='${OCF}:pacemaker:remote']/node[@name='${HOST}']/../../resource[@id='${NAME}']"
|
||||||
any_resource_xpath="//bundle//resource[@id='${NAME}']"
|
any_resource_xpath="//bundle//resource[@id='${NAME}']"
|
||||||
replicas_xpath="//bundle/primitive[@id='${BUNDLE_NAME}']/../*[boolean(@image) and boolean(@replicas)]"
|
replicas_xpath="//bundle/primitive[@id='${BUNDLE_NAME}']/../*[boolean(@image) and boolean(@replicas)]"
|
||||||
else
|
else
|
||||||
|
|
Loading…
Reference in New Issue