7f81f3aea2
Previously, the backup restoration phase was not considered part of the State Snapshot Transfer, as only the backup creation and transportation processes are checked for this purpose. To cover the missing phase as well, it is more reasonable to monitor the appropriate process that controls the entire State Snapshot Transfer. Closes-Bug: 1660275 Change-Id: Ie98af501c1cd130098381a8463452f892898470b Signed-off-by: Gabor Orosz <gabor.orosz@ericsson.com>
857 lines
28 KiB
Bash
Executable File
857 lines
28 KiB
Bash
Executable File
#!/bin/bash
|
|
# Authors: Bartosz Kupidura (Mirantis): Rewrite RA to support mysql/galera
|
|
# Sergii Golovatiuk (Mirantis): Rewrite RA to support mysql/galera
|
|
# Alan Robertson: DB2 Script
|
|
# Jakub Janczak: rewrite as MySQL
|
|
# Andrew Beekhof: cleanup and import
|
|
# Sebastian Reitenbach: add OpenBSD defaults, more cleanup
|
|
# Narayan Newton: add Gentoo/Debian defaults
|
|
# Marian Marinov, Florian Haas: add replication capability
|
|
# Yves Trudeau, Baron Schwartz: add VIP support and improve replication
|
|
#
|
|
# Support: openstack@lists.launchpad.net
|
|
# License: GNU General Public License (GPL)
|
|
#
|
|
# (c) 2002-2005 International Business Machines, Inc.
|
|
# 2005-2010 Linux-HA contributors
|
|
# 2014 Mirantis Inc.
|
|
#######################################################################
|
|
# Initialization:
|
|
|
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
: ${OCF_FUEL_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/fuel}
|
|
. ${OCF_FUEL_FUNCTIONS_DIR}/ocf-fuel-funcs
|
|
|
|
#######################################################################
|
|
# Fill in some defaults if no values are specified
|
|
OCF_RESKEY_binary_default="/usr/bin/mysqld_safe"
|
|
OCF_RESKEY_client_binary_default="/usr/bin/mysql"
|
|
OCF_RESKEY_config_default="/etc/mysql/my.cnf"
|
|
OCF_RESKEY_datadir_default="/var/lib/mysql"
|
|
OCF_RESKEY_user_default="mysql"
|
|
OCF_RESKEY_group_default="mysql"
|
|
OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid"
|
|
OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock"
|
|
OCF_RESKEY_test_user_default="root"
|
|
OCF_RESKEY_test_passwd_default=""
|
|
OCF_RESKEY_test_conf_default=""
|
|
OCF_RESKEY_additional_parameters_default=""
|
|
OCF_RESKEY_master_timeout_default="300"
|
|
|
|
: ${HA_LOGTAG="ocf-mysql-wss"}
|
|
: ${HA_LOGFACILITY="daemon"}
|
|
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
MYSQL_BINDIR="$(dirname ${OCF_RESKEY_binary})"
|
|
|
|
: ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}}
|
|
|
|
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
|
: ${OCF_RESKEY_datadir=${OCF_RESKEY_datadir_default}}
|
|
|
|
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
|
: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
|
|
|
|
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
|
: ${OCF_RESKEY_socket=${OCF_RESKEY_socket_default}}
|
|
|
|
: ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}}
|
|
: ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}}
|
|
: ${OCF_RESKEY_test_conf=${OCF_RESKEY_test_conf_default}}
|
|
|
|
: ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}}
|
|
: ${OCF_RESKEY_master_timeout=${OCF_RESKEY_master_timeout_default}}
|
|
|
|
#######################################################################
|
|
# Convenience variables
|
|
MYSQL=$OCF_RESKEY_client_binary
|
|
HOSTNAME=$(uname -n)
|
|
MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
|
|
if [ "${OCF_RESKEY_test_conf}" ]; then
|
|
MYSQL_OPTIONS_TEST="--defaults-extra-file=${OCF_RESKEY_test_conf} ${MYSQL_OPTIONS_LOCAL}"
|
|
else
|
|
MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
|
|
fi
|
|
#######################################################################
|
|
usage() {
|
|
cat <<UEND
|
|
usage: $0 (start|stop|meta-data|validate-all|monitor)
|
|
|
|
$0 manages a MySQL Database as an HA resource.
|
|
|
|
The 'start' operation starts the database.
|
|
The 'stop' operation stops the database.
|
|
The 'monitor' operation reports whether the database seems to be working
|
|
The 'validate-all' operation reports whether the parameters are valid
|
|
|
|
UEND
|
|
}
|
|
meta_data() {
|
|
cat <<END
|
|
<?xml version="1.0"?>
|
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
<resource-agent name="mysql" version="0.1">
|
|
<version>0.1</version>
|
|
<longdesc lang="en">
|
|
Resource script for MySQL
|
|
</longdesc>
|
|
<shortdesc lang="en">Resource script for MySQL</shortdesc>
|
|
<parameters>
|
|
<parameter name="binary" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the MySQL server binary
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL server binary</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_binary_default}" />
|
|
</parameter>
|
|
<parameter name="client_binary" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the MySQL client binary
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL client binary</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
|
|
</parameter>
|
|
<parameter name="config" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Configuration file
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL config</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_config_default}" />
|
|
</parameter>
|
|
<parameter name="datadir" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Directory containing databases
|
|
</longdesc>
|
|
<shortdesc lang="en">Data directory</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_datadir_default}" />
|
|
</parameter>
|
|
<parameter name="user" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
User running MySQL daemon
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL user</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_user_default}" />
|
|
</parameter>
|
|
<parameter name="group" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Group running MySQL daemon (for logfile and directory permissions)
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL group</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_group_default}"/>
|
|
</parameter>
|
|
<parameter name="pid" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The pidfile to be used for mysqld.
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL pid file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_pid_default}"/>
|
|
</parameter>
|
|
<parameter name="socket" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The socket to be used for mysqld.
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL socket</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_socket_default}"/>
|
|
</parameter>
|
|
<parameter name="test_user" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
MySQL test user, must have select privilege on 'show status'
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL test user</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_test_user_default}" />
|
|
</parameter>
|
|
<parameter name="test_passwd" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
MySQL test user password
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL test user password</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_test_passwd_default}" />
|
|
</parameter>
|
|
<parameter name="test_conf" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
MySQL test user conf file to override user/pass
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL test user conf file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_test_conf_default}" />
|
|
</parameter>
|
|
<parameter name="additional_parameters" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Additional parameters which are passed to the mysqld on startup.
|
|
(e.g. --skip-external-locking or --skip-grant-tables)
|
|
</longdesc>
|
|
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
|
|
</parameter>
|
|
<parameter name="master_timeout" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
How long we should wait for galera master. If master not come up before timeout,
|
|
RA will choose new master from already running nodes. This value can by changed by crm_attribute:
|
|
# crm_attribute --name galera_master_timeout --update 500
|
|
Remember to remove this after maintenance. USE WITH CAUTION!
|
|
Remember to change timeout for start operation. Start timeout should be bigger than master_timeout
|
|
</longdesc>
|
|
<shortdesc lang="en">Galera master timeout</shortdesc>
|
|
<content type="integer" default="${OCF_RESKEY_master_timeout_default}"/>
|
|
</parameter>
|
|
</parameters>
|
|
<actions>
|
|
<action name="start" timeout="330" />
|
|
<action name="stop" timeout="120" />
|
|
<action name="monitor" timeout="30" interval="20" depth="0" />
|
|
<action name="meta-data" timeout="5" />
|
|
<action name="validate-all" timeout="10" />
|
|
</actions>
|
|
</resource-agent>
|
|
END
|
|
}
|
|
# Convenience functions
|
|
#######################################################################
|
|
nodes_in_cluster_online() {
|
|
local LH="${LL} nodes_in_cluster_online():"
|
|
local NODES
|
|
|
|
NODES=$(crm_node --partition | sed -e '/(null)/d')
|
|
if [ ! -z "$NODES" ]; then
|
|
ocf_log "${LH} Online Nodes in cluster: ${NODES}"
|
|
echo $NODES
|
|
else
|
|
ocf_log "${LH} No online nodes in cluster"
|
|
echo
|
|
fi
|
|
}
|
|
nodes_in_cluster() {
|
|
local LH="${LL} nodes_in_cluster_online():"
|
|
local NODES
|
|
|
|
#Ubuntu doesn't like \w
|
|
NODES=$(crm_node --list | awk '/^[a-zA-Z0-9]/ {print $2}' | sed -e '/(null)/d')
|
|
if [ ! -z "$NODES" ]; then
|
|
ocf_log "${LH} Nodes in cluster: ${NODES}"
|
|
echo $NODES
|
|
else
|
|
ocf_log "${LH} No nodes in cluster"
|
|
echo
|
|
fi
|
|
}
|
|
|
|
#Validate if GTID have correct format (return 0), else return 1
|
|
#valid values are:
|
|
#XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:123 - standard cluster-id:commit-id
|
|
#XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:-1 - standard non initialized cluster, 00000000-0000-0000-0000-000000000000:-1
|
|
validate_gtid() {
|
|
local LH="${LL} validate_gtid():"
|
|
local rc
|
|
local status_loglevel="err"
|
|
|
|
if [ -z $1 ]; then
|
|
ocf_log $status_loglevel "${LH} No GTID provided"
|
|
return 1
|
|
fi
|
|
|
|
echo $1 | grep -q -E "${UUID_REGEX}"
|
|
rc=$?
|
|
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log $status_loglevel "${LH} GTID have wrong format: $1"
|
|
return 1
|
|
else
|
|
ocf_log info "${LH} GTID OK: $1"
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
#Get galera GTID from local mysql instance.
|
|
#If changed, update it in CIB, then return 0 and new GTID
|
|
#If unchanged or bad vlue, return 1 and the current GTID from CIB
|
|
update_node_gtid() {
|
|
local LH="${LL} update_node_gtid():"
|
|
local status_loglevel="err"
|
|
local GTID
|
|
local GTID_current
|
|
local CLUSTER_ID
|
|
local COMMIT_ID
|
|
|
|
# Set loglevel to info during probe
|
|
if ocf_is_probe; then
|
|
status_loglevel="info"
|
|
fi
|
|
|
|
if mysql_status $status_loglevel 1; then
|
|
CLUSTER_ID=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
|
|
-e "SHOW STATUS LIKE 'wsrep_local_state_uuid'" | awk '{print $NF}')
|
|
COMMIT_ID=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
|
|
-e "SHOW STATUS LIKE 'wsrep_last_committed'" | awk '{print $NF}')
|
|
GTID="$CLUSTER_ID:$COMMIT_ID"
|
|
else
|
|
GTID=$(${OCF_RESKEY_binary} --wsrep-recover 2>&1 | \
|
|
grep -e 'Recovered position' -e 'wsrep_start_position' | grep -Eo "${UUID_REGEX}")
|
|
[ -z "${GTID}" ] && GTID=$(cat ${OCF_RESKEY_datadir}/grastate.dat \
|
|
| awk '/uuid/ { uuid = $NF} /seqno/ { seqno = $NF} END {print uuid":"seqno}')
|
|
fi
|
|
|
|
GTID_current=$(get_node_gtid $HOSTNAME)
|
|
if [ "${GTID}" != "${GTID_current}" ]; then
|
|
if validate_gtid "${GTID}"; then
|
|
ocf_log info "${LH} Galera GTID: ${GTID}"
|
|
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name gtid \
|
|
--update $GTID
|
|
echo "${GTID}"
|
|
return 0
|
|
fi
|
|
fi
|
|
echo "${GTID_current}"
|
|
return 1
|
|
}
|
|
|
|
update_node_pc()
|
|
{
|
|
local LH="${LL} update_node_pc():"
|
|
ocf_log info "${LH} Setting node PC flag to true"
|
|
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name is_pc --update 'true'
|
|
}
|
|
|
|
clear_node_pc()
|
|
{
|
|
ocf_log info "${LH} Cleaning up is_pc attribute"
|
|
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name is_pc \
|
|
--delete
|
|
}
|
|
|
|
get_master_timeout() {
|
|
local LH="${LL} get_master_timeout():"
|
|
local timeout
|
|
timeout=$(crm_attribute --quiet --name galera_master_timeout \
|
|
--query --default=$OCF_RESKEY_master_timeout -q | sed -e '/(null)/d')
|
|
|
|
ocf_log info "${LH} Setting timeout $timeout"
|
|
echo $timeout
|
|
}
|
|
|
|
#Get gtid attribute for $1 node, "0" means no GTID set or wrong format for GTID
|
|
get_node_gtid() {
|
|
local LH="${LL} get_node_gtid():"
|
|
local GTID
|
|
|
|
GTID=$(crm_attribute --quiet --node $1 --lifetime reboot --query \
|
|
--name gtid 2> /dev/null | sed -e '/(null)/d')
|
|
|
|
if [ -z "$GTID" ]; then
|
|
ocf_log info "${LH} No GTID for $1"
|
|
echo 0
|
|
else
|
|
if validate_gtid "$GTID"; then
|
|
ocf_log info "${LH} Galera GTID: ${GTID}"
|
|
echo $GTID
|
|
else
|
|
ocf_log info "${LH} No GTID for $1"
|
|
echo 0
|
|
fi
|
|
fi
|
|
}
|
|
|
|
#Retry get node gtid with once with a random 1-10 second sleep if gtid is not found
|
|
get_node_gtid_with_retry() {
|
|
local LH="${LL} get_node_gtid():"
|
|
local GTID
|
|
local NODE="$1"
|
|
|
|
GTID=$(get_node_gtid $NODE)
|
|
|
|
if [ "$GTID" = "0" ]; then
|
|
sleep $(( ( $RANDOM % 10 ) + 1 ))
|
|
GTID=$(get_node_gtid $NODE)
|
|
fi
|
|
|
|
echo $GTID
|
|
}
|
|
|
|
check_if_reelection_needed() {
|
|
local LH="${LL} check_if_reelection_needed()"
|
|
local PARTITION_WITH_QUORUM
|
|
local RESOURCE_NAME
|
|
local NODE_COUNT
|
|
local RUNNING_INSTANCES
|
|
local rc
|
|
|
|
PARTITION_WITH_QUORUM=$(crm_node -q | sed -e '/(null)/d')
|
|
RESOURCE_NAME=$(echo $OCF_RESOURCE_INSTANCE | cut -f1 -d":")
|
|
NODE_COUNT=$(nodes_in_cluster | wc -w)
|
|
|
|
if [ $PARTITION_WITH_QUORUM -eq 1 -o $NODE_COUNT -eq 1 ]; then
|
|
RUNNING_INSTANCES=$(crm_resource \
|
|
--quiet --locate --resource $RESOURCE_NAME | sed -e '/(null)/d' | wc -l 2> /dev/null)
|
|
rc=$?
|
|
if [ $RUNNING_INSTANCES -lt 1 ]; then
|
|
ocf_log info "${LH} Election is needed"
|
|
return 0
|
|
fi
|
|
fi
|
|
|
|
ocf_log info "${LH} Election was done"
|
|
return 1
|
|
}
|
|
|
|
# Return 0 and the pid, if running a new cluster as a seed node
|
|
check_if_new_cluster() {
|
|
local LH="${LL} check_if_new_cluster()"
|
|
local pid
|
|
# Match a mysqld pid by the datadir and a new cluster sign, exclude position recovery
|
|
pid=$(ps -C mysqld -o pid= -o command= -o args= | \
|
|
grep -e "${OCF_RESKEY_datadir}.*wsrep-new-cluster" -e "wsrep-new-cluster.*${OCF_RESKEY_datadir}" | \
|
|
awk '!/wsrep.recover|defunct/ {print $1}')
|
|
if [ "${pid}" ]; then
|
|
update_node_pc
|
|
ocf_log info "${LH} New cluster"
|
|
return 0
|
|
fi
|
|
ocf_log info "${LH} Running cluster"
|
|
return 1
|
|
}
|
|
|
|
get_master() {
|
|
local LH="${LL} get_master()"
|
|
local NODES=$*
|
|
local POSSIBLE_MASTERS
|
|
local -A TMP
|
|
local MASTER_GTID
|
|
local GTID
|
|
local NODE
|
|
local NODE_SCORE
|
|
local LATEST_SEQNO=-1
|
|
local SEQNO
|
|
local MASTER
|
|
|
|
# Ensure the same nodes list to reach a consensus for the choosen master across all of the nodes
|
|
NODES=$(printf -- '%s\n' ${NODES} | sort -u)
|
|
# Form a hash of keys as node names, values as GTID:SEQNO
|
|
for NODE in $NODES; do
|
|
# Try and get a gtid with a retry when the GTID=0 to make sure there
|
|
# is plenty of time if multiple nodes are starting at the same time.
|
|
GTID=$(get_node_gtid_with_retry $NODE)
|
|
# Cut the seqno off the GTID:SEQNO pairs
|
|
TMP[$NODE]=$GTID
|
|
done
|
|
|
|
# Find possible masters
|
|
# Cut the seqnums off the stored GTID:SEQNO pairs, then find the most seen GTID for the nodes
|
|
MASTER_GTID=$(printf -- '%s\n' ${TMP[@]%:*} | grep -vE -e "^0$" -e $ZEROID | sort | uniq -c | awk '{print $2}' | head -1)
|
|
[ "${MASTER_GTID}" ] || MASTER_GTID=$ZEROID
|
|
ocf_log info "${LH} The most seen GTID is: ${MASTER_GTID}"
|
|
for NODE in $NODES; do
|
|
NODE_SCORE=$(crm_simulate -Ls | awk "/${OCF_RESOURCE_INSTANCE}/ && /clone_color/ && ! /${OCF_RESOURCE_INSTANCE}:/ && /${NODE}/ {print \$NF}")
|
|
if [[ $NODE_SCORE =~ ^-?[0-9]+$ && $NODE_SCORE -le 0 || $NODE_SCORE = "-INFINITY" || -z $NODE_SCORE ]]; then
|
|
ocf_log info "${LH} Skipping node $NODE as it is not eligible for running the resource. Its score is ${NODE_SCORE:-NULL}"
|
|
continue
|
|
fi
|
|
ocf_log info "${LH} Node's ${NODE} score: ${NODE_SCORE}, GTID/SEQNUM: ${TMP[$NODE]}"
|
|
# Filter node names with the most seen GTID as possible masters and find the latest SEQNO
|
|
if [ "${MASTER_GTID}" = "${TMP[$NODE]%:*}" ]; then
|
|
POSSIBLE_MASTERS="$POSSIBLE_MASTERS $NODE"
|
|
SEQNO=${TMP[$NODE]#*:}
|
|
[ $SEQNO -gt $LATEST_SEQNO ] && LATEST_SEQNO=$SEQNO
|
|
fi
|
|
done
|
|
ocf_log info "${LH} Possible masters: $POSSIBLE_MASTERS"
|
|
|
|
# Cut the gtids off the stored GTID:SEQNO pairs, then
|
|
# filter the master, which is one who has the latest SEQNO from the possible masters
|
|
for NODE in $POSSIBLE_MASTERS; do
|
|
if [ "${LATEST_SEQNO}" = "${TMP[$NODE]#*:}" ]; then
|
|
MASTER=$NODE
|
|
break
|
|
fi
|
|
done
|
|
ocf_log info "${LH} Choosed master: ${MASTER} with GTID: ${TMP[$MASTER]}"
|
|
echo "$MASTER"
|
|
}
|
|
|
|
#Find the best master and return its GTID.
|
|
#If the best master is this node, propose it as a prim, then return 1.
|
|
#If another node is, check if *this* node is also running a new cluster and exit
|
|
#with error for safety concerns
|
|
check_if_galera_pc() {
|
|
local LH="${LL} check_if_galera_pc():"
|
|
local NODES
|
|
local MASTER
|
|
local timeout
|
|
local GTID
|
|
local pid
|
|
local pcnum=0
|
|
|
|
timeout=$(get_master_timeout)
|
|
|
|
ocf_log info "${LH} Checking if Primary Component"
|
|
|
|
while [ $timeout -gt 0 ]; do
|
|
NODES=$(nodes_in_cluster_online)
|
|
MASTER=$(get_master "$NODES")
|
|
GTID=$(get_node_gtid $MASTER)
|
|
if [ "$MASTER" = "$HOSTNAME" ]; then
|
|
ocf_log info "${LH} I'm Primary Component. Join me! My GTID: ${GTID}"
|
|
echo "${GTID}"
|
|
return 0
|
|
fi
|
|
|
|
if ! check_if_reelection_needed; then
|
|
ocf_log info "${LH} My neighbour is Primary Component with GTID: ${GTID}"
|
|
if check_if_new_cluster
|
|
then
|
|
for node in ${NODES}; do
|
|
is_pc=$(crm_attribute --quiet --node ${node} --lifetime reboot --query --name is_pc | sed -e '/(null)/d')
|
|
if [ "${is_pc}" = "true" ]; then
|
|
pcnum=$((pcnum + 1))
|
|
fi
|
|
if [ ${pcnum} -gt 1 ]; then
|
|
ocf_log err "${LH} But I'm running a new cluster, PID:${pid}, this is a split-brain!"
|
|
exit $OCF_ERR_GENERIC
|
|
fi
|
|
done
|
|
fi
|
|
echo "${GTID}"
|
|
return 1
|
|
fi
|
|
|
|
sleep 10
|
|
timeout=$((timeout - 10))
|
|
ocf_log info "${LH} Waiting for master. ${timeout} seconds left"
|
|
done
|
|
|
|
ocf_log info "${LH} ${HOSTNAME} is not Primary Component"
|
|
return 1
|
|
}
|
|
# Functions invoked by resource manager actions
|
|
|
|
mysql_validate() {
|
|
local LH="${LL} mysql_validate()"
|
|
|
|
check_binary $OCF_RESKEY_binary
|
|
check_binary $OCF_RESKEY_client_binary
|
|
|
|
if [ ! -f $OCF_RESKEY_config ]; then
|
|
ocf_log err "${LH} Config $OCF_RESKEY_config doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
if [ ! -d $OCF_RESKEY_datadir ]; then
|
|
ocf_log err "${LH} Datadir $OCF_RESKEY_datadir doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
|
if [ ! $? -eq 0 ]; then
|
|
ocf_log err "${LH} User $OCF_RESKEY_user doesn't exit"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
getent group $OCF_RESKEY_group >/dev/null 2>&1
|
|
if [ ! $? -eq 0 ]; then
|
|
ocf_log err "${LH} Group $OCF_RESKEY_group doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
check_if_sst() {
|
|
local LH="${LL} check_if_sst():"
|
|
local loglevel=${1:-'info'}
|
|
# Match a MySQLd pid by the datadir, exclude position recovery
|
|
local pid=$(ps -C mysqld -o pid= -o command= -o args= | grep "${OCF_RESKEY_datadir}" | \
|
|
awk '!/wsrep.recover|defunct/ {print $1}')
|
|
if [ "${pid}" ] ; then
|
|
ocf_log info "${LH} MySQL process ${pid} found"
|
|
# MySQLd's running and may be blocked, check for signs of SST
|
|
local wsrep_sst_method=$(awk '/^wsrep_sst_method/ {print $3}' ${OCF_RESKEY_config})
|
|
local wsrep_sst_command="wsrep_sst_${wsrep_sst_method}"
|
|
local wsrep_sst_pid=$(ps -C ${wsrep_sst_command} -o pid= -o command= | \
|
|
awk '!/defunct/ {print $1}' | head -1)
|
|
if [ "${wsrep_sst_pid}" ]; then
|
|
ocf_log $loglevel "${LH} SST is in progress"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
fi
|
|
ocf_log $loglevel "${LH} No signs of SST found"
|
|
return $OCF_ERR_GENERIC
|
|
}
|
|
|
|
mysql_status() {
|
|
local LH="${LL} mysql_status():"
|
|
local loglevel=${1:-'info'}
|
|
local count=${2:-3}
|
|
local sleeptime=${3:-2}
|
|
local pid
|
|
|
|
while [ $count -gt 0 ]; do
|
|
if [ -f "$OCF_RESKEY_pid" ]; then
|
|
ocf_log info "${LH} MySQL PID found"
|
|
break
|
|
fi
|
|
|
|
count=$(( count-1 ))
|
|
ocf_log $loglevel "${LH} PIDFile ${OCF_RESKEY_pid} of MySQL server not found. Sleeping for $sleeptime seconds. ${count} retries left"
|
|
sleep $sleeptime
|
|
done
|
|
|
|
if [ $count -eq 0 ]; then
|
|
ocf_log $loglevel "${LH} MySQL is not running"
|
|
return $OCF_NOT_RUNNING
|
|
fi
|
|
|
|
pid=$(cat $OCF_RESKEY_pid)
|
|
if [ "u$pid" != "u" -a -d /proc/$pid ]; then
|
|
ocf_log $loglevel "${LH} MySQL is running"
|
|
return $OCF_SUCCESS
|
|
else
|
|
ocf_log $loglevel "${LH} MySQL is not running"
|
|
return $OCF_NOT_RUNNING
|
|
fi
|
|
}
|
|
|
|
mysql_monitor() {
|
|
local LH="${LL} mysql_monitor():"
|
|
local rc
|
|
local rc2
|
|
local status_loglevel="err"
|
|
local WSREP_CONNECTED
|
|
local WSREP_LOCAL_STATE_COMMENT
|
|
local WSREP_READY
|
|
local MGTID
|
|
local GTID
|
|
|
|
# Set loglevel to info during probe
|
|
if ocf_is_probe; then
|
|
status_loglevel="info"
|
|
fi
|
|
|
|
check_if_sst
|
|
rc=$?
|
|
[ $rc -eq $OCF_SUCCESS ] && return $rc
|
|
|
|
mysql_status $status_loglevel
|
|
rc=$?
|
|
[ $rc -eq $OCF_SUCCESS ] || return $rc
|
|
|
|
GTID=$(update_node_gtid)
|
|
|
|
WSREP_CONNECTED=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
|
|
-e "SHOW STATUS LIKE 'wsrep_connected'" | awk '{print $NF}')
|
|
|
|
if [ "$WSREP_CONNECTED" != "ON" ]; then
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
WSREP_LOCAL_STATE_COMMENT=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
|
|
-e "SHOW STATUS LIKE 'wsrep_local_state_comment'" | awk '{print $NF}')
|
|
|
|
if [[ "$WSREP_LOCAL_STATE_COMMENT" =~ 'Synced'|'Donor'|'Desync' ]]; then
|
|
WSREP_READY=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
|
|
-e "SHOW STATUS LIKE 'wsrep_ready'" | awk '{print $NF}')
|
|
|
|
if [ "$WSREP_READY" != "ON" ]; then
|
|
ocf_log err "${LH} MySQL synced but not ready"
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
elif [[ "$WSREP_LOCAL_STATE_COMMENT" == 'Initialized' ]]; then
|
|
ocf_log err "${LH} MySQL lost quorum or uninitialized"
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
# Check if this node is the master and is running the most recent GTID
|
|
check_if_new_cluster
|
|
MGTID=$(check_if_galera_pc)
|
|
rc=$?
|
|
if [ $rc -eq 0 -a "${MGTID}" != "${GTID}" ]; then
|
|
ocf_log err "${LH} I'm a master, and my GTID: ${GTID}, which was not expected"
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
ocf_log debug "${LH} MySQL monitor succeeded"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
mysql_start() {
|
|
local LH="${LL} mysql_start():"
|
|
local NODES
|
|
local socket_dir
|
|
local pid_dir
|
|
local rc
|
|
local dir
|
|
local mysql_extra_params
|
|
local set_pc=0
|
|
|
|
|
|
if mysql_status info 1; then
|
|
ocf_log info "${LH} MySQL already running"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
socket_dir="$( dirname ${OCF_RESKEY_socket} )"
|
|
if [ ! -d "${socket_dir}" ] ; then
|
|
ocf_log info "${LH} Create socket dir: ${socket_dir} and chown to ${OCF_RESKEY_user}:${OCF_RESKEY_group}"
|
|
mkdir -p "${socket_dir}"
|
|
chown ${OCF_RESKEY_user}:${OCF_RESKEY_group} "${socket_dir}"
|
|
chmod 755 "${socket_dir}"
|
|
fi
|
|
|
|
# check and make PID file dir
|
|
pid_dir="$( dirname ${OCF_RESKEY_pid} )"
|
|
if [ ! -d "${pid_dir}" ] ; then
|
|
ocf_log info "${LH} Create PID dir: ${pid_dir} and chown to ${OCF_RESKEY_user}:${OCF_RESKEY_group}"
|
|
mkdir -p "${pid_dir}"
|
|
chown -R ${OCF_RESKEY_user}:${OCF_RESKEY_group} "${pid_dir}"
|
|
chmod 755 "${pid_dir}"
|
|
fi
|
|
|
|
# set user/group for datadir
|
|
chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_datadir
|
|
|
|
# Regardless of whether we just created the directory or it
|
|
# already existed, check whether it is writable by the configured
|
|
# user
|
|
for dir in $pid_dir $socket_dir; do
|
|
if ! /usr/bin/sudo -n -u $OCF_RESKEY_user /usr/bin/test -w $dir; then
|
|
ocf_log err "${LH} Directory $dir is not writable by $OCF_RESKEY_user"
|
|
exit $OCF_ERR_PERM
|
|
fi
|
|
done
|
|
|
|
if [ -f /tmp/wsrep-init-file ]; then
|
|
mysql_extra_params="--init-file=/tmp/wsrep-init-file"
|
|
else
|
|
mysql_extra_params=""
|
|
fi
|
|
|
|
update_node_gtid
|
|
check_if_reelection_needed
|
|
rc=$?
|
|
|
|
if [ $rc -eq 0 ]; then
|
|
check_if_galera_pc
|
|
rc=$?
|
|
|
|
if [ $rc -eq 0 ]; then
|
|
mysql_extra_params="$mysql_extra_params --wsrep-new-cluster"
|
|
set_pc=1
|
|
fi
|
|
fi
|
|
|
|
clear_node_pc
|
|
ocf_log info "${LH} Starting MySQL"
|
|
${OCF_RESKEY_binary} \
|
|
--pid-file=$OCF_RESKEY_pid \
|
|
--socket=$OCF_RESKEY_socket \
|
|
--datadir=$OCF_RESKEY_datadir \
|
|
--user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \
|
|
$mysql_extra_params >/dev/null 2>&1 &
|
|
rc=$?
|
|
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log err "${LH} MySQL start command failed: $rc"
|
|
return $rc
|
|
fi
|
|
|
|
# Spin waiting for the server to come up or exit, if SST's in progress
|
|
# Let the CRM/LRM time us out if required.
|
|
while :; do
|
|
check_if_sst
|
|
rc=$?
|
|
[ $rc -eq $OCF_SUCCESS ] && break
|
|
|
|
if mysql_status info 1; then
|
|
break
|
|
fi
|
|
sleep 3
|
|
done
|
|
|
|
ocf_log info "${LH} MySQL started"
|
|
[ ${set_pc} -eq 1 ] && update_node_pc
|
|
[ $rc -ne $OCF_SUCCESS ] && update_node_gtid
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
mysql_cleanup() {
|
|
local LH="${LL} mysql_cleanup():"
|
|
ocf_log info "${LH} Cleaning up gtid attribute"
|
|
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name gtid \
|
|
--delete
|
|
|
|
clear_node_pc
|
|
ocf_log debug "${LH} Delete lock file: /var/lock/subsys/mysqld"
|
|
rm -f /var/lock/subsys/mysqld
|
|
|
|
ocf_log debug "${LH} Delete sock file: ${OCF_RESKEY_socket}"
|
|
rm -f $OCF_RESKEY_socket
|
|
|
|
ocf_log debug "${LH} Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
|
|
rm -f $OCF_RESKEY_pid
|
|
|
|
}
|
|
|
|
mysql_stop() {
|
|
local LH="${LL} mysql_stop():"
|
|
local shutdown_timeout
|
|
local rc
|
|
ocf_log info "${LH}"
|
|
|
|
shutdown_timeout=15
|
|
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
|
shutdown_timeout=$(( ($OCF_RESKEY_CRM_meta_timeout/1000)-5 ))
|
|
fi
|
|
|
|
proc_stop "${OCF_RESKEY_pid}" "mysqld.*${OCF_RESKEY_datadir}" SIGTERM 5 $(( $shutdown_timeout/5 ))
|
|
mysql_cleanup
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
##########################################################################
|
|
# If DEBUG_LOG is set, make this resource agent easy to debug: set up the
|
|
# debug log and direct all output to it. Otherwise, redirect to /dev/null.
|
|
# The log directory must be a directory owned by root, with permissions 0700,
|
|
# and the log must be writable and not a symlink.
|
|
##########################################################################
|
|
DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
|
|
UUID_REGEX="\w{8}-\w{4}-\w{4}-\w{4}-\w{12}:([[:digit:]]+|-1)"
|
|
ZEROID="00000000-0000-0000-0000-000000000000"
|
|
if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
|
|
DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
|
|
if [ -d "${DEBUG_LOG_DIR}" ]; then
|
|
exec 9>>"$DEBUG_LOG"
|
|
exec 1>&9 2>&9
|
|
date '+%Y%m%d %H:%M:%S' >&9
|
|
echo "$*" >&9
|
|
env | grep OCF_ | sort >&9
|
|
set -x
|
|
else
|
|
exec 9>/dev/null
|
|
fi
|
|
fi
|
|
|
|
case "$1" in
|
|
meta-data) meta_data
|
|
exit $OCF_SUCCESS;;
|
|
usage|help) usage
|
|
exit $OCF_SUCCESS;;
|
|
esac
|
|
|
|
export LL="${OCF_RESOURCE_INSTANCE}:"
|
|
|
|
mysql_validate
|
|
rc=$?
|
|
if [ $rc -ne 0 ]; then
|
|
case "$1" in
|
|
stop) exit $OCF_SUCCESS;;
|
|
monitor) exit $OCF_NOT_RUNNING;;
|
|
*) exit $rc;;
|
|
esac
|
|
fi
|
|
|
|
# What kind of method was invoked?
|
|
case "$1" in
|
|
start) mysql_start;;
|
|
stop) mysql_stop;;
|
|
monitor) mysql_monitor;;
|
|
validate-all) exit $OCF_SUCCESS;;
|
|
*) usage
|
|
exit $OCF_ERR_UNIMPLEMENTED;;
|
|
esac
|
|
# vim: set ts=4 sw=4 tw=0 et :
|