856 lines
28 KiB
Bash
Executable File
856 lines
28 KiB
Bash
Executable File
#!/bin/bash
|
|
# Authors: Bartosz Kupidura (Mirantis): Rewrite RA to support mysql/galera
|
|
# Sergii Golovatiuk (Mirantis): Rewrite RA to support mysql/galera
|
|
# Alan Robertson: DB2 Script
|
|
# Jakub Janczak: rewrite as MySQL
|
|
# Andrew Beekhof: cleanup and import
|
|
# Sebastian Reitenbach: add OpenBSD defaults, more cleanup
|
|
# Narayan Newton: add Gentoo/Debian defaults
|
|
# Marian Marinov, Florian Haas: add replication capability
|
|
# Yves Trudeau, Baron Schwartz: add VIP support and improve replication
|
|
#
|
|
# Support: openstack@lists.launchpad.net
|
|
# License: GNU General Public License (GPL)
|
|
#
|
|
# (c) 2002-2005 International Business Machines, Inc.
|
|
# 2005-2010 Linux-HA contributors
|
|
# 2014 Mirantis Inc.
|
|
#######################################################################
|
|
# Initialization:
|
|
|
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
: ${OCF_FUEL_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/fuel}
|
|
. ${OCF_FUEL_FUNCTIONS_DIR}/ocf-fuel-funcs
|
|
|
|
#######################################################################
|
|
# Fill in some defaults if no values are specified
|
|
OCF_RESKEY_binary_default="/usr/bin/mysqld_safe"
|
|
OCF_RESKEY_client_binary_default="/usr/bin/mysql"
|
|
OCF_RESKEY_config_default="/etc/mysql/my.cnf"
|
|
OCF_RESKEY_datadir_default="/var/lib/mysql"
|
|
OCF_RESKEY_user_default="mysql"
|
|
OCF_RESKEY_group_default="mysql"
|
|
OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid"
|
|
OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock"
|
|
OCF_RESKEY_test_user_default="root"
|
|
OCF_RESKEY_test_passwd_default=""
|
|
OCF_RESKEY_test_conf_default=""
|
|
OCF_RESKEY_additional_parameters_default=""
|
|
OCF_RESKEY_master_timeout_default="300"
|
|
|
|
: ${HA_LOGTAG="ocf-mysql-wss"}
|
|
: ${HA_LOGFACILITY="daemon"}
|
|
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
MYSQL_BINDIR="$(dirname ${OCF_RESKEY_binary})"
|
|
|
|
: ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}}
|
|
|
|
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
|
: ${OCF_RESKEY_datadir=${OCF_RESKEY_datadir_default}}
|
|
|
|
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
|
: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
|
|
|
|
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
|
: ${OCF_RESKEY_socket=${OCF_RESKEY_socket_default}}
|
|
|
|
: ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}}
|
|
: ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}}
|
|
: ${OCF_RESKEY_test_conf=${OCF_RESKEY_test_conf_default}}
|
|
|
|
: ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}}
|
|
: ${OCF_RESKEY_master_timeout=${OCF_RESKEY_master_timeout_default}}
|
|
|
|
#######################################################################
|
|
# Convenience variables
|
|
MYSQL=$OCF_RESKEY_client_binary
|
|
HOSTNAME=$(uname -n)
|
|
MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
|
|
if [ "${OCF_RESKEY_test_conf}" ]; then
|
|
MYSQL_OPTIONS_TEST="--defaults-extra-file=${OCF_RESKEY_test_conf} ${MYSQL_OPTIONS_LOCAL}"
|
|
else
|
|
MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
|
|
fi
|
|
#######################################################################
|
|
usage() {
|
|
cat <<UEND
|
|
usage: $0 (start|stop|meta-data|validate-all|monitor)
|
|
|
|
$0 manages a MySQL Database as an HA resource.
|
|
|
|
The 'start' operation starts the database.
|
|
The 'stop' operation stops the database.
|
|
The 'monitor' operation reports whether the database seems to be working
|
|
The 'validate-all' operation reports whether the parameters are valid
|
|
|
|
UEND
|
|
}
|
|
meta_data() {
|
|
cat <<END
|
|
<?xml version="1.0"?>
|
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
<resource-agent name="mysql" version="0.1">
|
|
<version>0.1</version>
|
|
<longdesc lang="en">
|
|
Resource script for MySQL
|
|
</longdesc>
|
|
<shortdesc lang="en">Resource script for MySQL</shortdesc>
|
|
<parameters>
|
|
<parameter name="binary" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the MySQL server binary
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL server binary</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_binary_default}" />
|
|
</parameter>
|
|
<parameter name="client_binary" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the MySQL client binary
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL client binary</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
|
|
</parameter>
|
|
<parameter name="config" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Configuration file
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL config</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_config_default}" />
|
|
</parameter>
|
|
<parameter name="datadir" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Directory containing databases
|
|
</longdesc>
|
|
<shortdesc lang="en">Data directory</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_datadir_default}" />
|
|
</parameter>
|
|
<parameter name="user" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
User running MySQL daemon
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL user</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_user_default}" />
|
|
</parameter>
|
|
<parameter name="group" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Group running MySQL daemon (for logfile and directory permissions)
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL group</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_group_default}"/>
|
|
</parameter>
|
|
<parameter name="pid" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The pidfile to be used for mysqld.
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL pid file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_pid_default}"/>
|
|
</parameter>
|
|
<parameter name="socket" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The socket to be used for mysqld.
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL socket</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_socket_default}"/>
|
|
</parameter>
|
|
<parameter name="test_user" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
MySQL test user, must have select privilege on 'show status'
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL test user</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_test_user_default}" />
|
|
</parameter>
|
|
<parameter name="test_passwd" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
MySQL test user password
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL test user password</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_test_passwd_default}" />
|
|
</parameter>
|
|
<parameter name="test_conf" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
MySQL test user conf file to override user/pass
|
|
</longdesc>
|
|
<shortdesc lang="en">MySQL test user conf file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_test_conf_default}" />
|
|
</parameter>
|
|
<parameter name="additional_parameters" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Additional parameters which are passed to the mysqld on startup.
|
|
(e.g. --skip-external-locking or --skip-grant-tables)
|
|
</longdesc>
|
|
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
|
|
</parameter>
|
|
<parameter name="master_timeout" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
How long we should wait for galera master. If master not come up before timeout,
|
|
RA will choose new master from already running nodes. This value can by changed by crm_attribute:
|
|
# crm_attribute --name galera_master_timeout --update 500
|
|
Remember to remove this after maintenance. USE WITH CAUTION!
|
|
Remember to change timeout for start operation. Start timeout should be bigger than master_timeout
|
|
</longdesc>
|
|
<shortdesc lang="en">Galera master timeout</shortdesc>
|
|
<content type="integer" default="${OCF_RESKEY_master_timeout_default}"/>
|
|
</parameter>
|
|
</parameters>
|
|
<actions>
|
|
<action name="start" timeout="330" />
|
|
<action name="stop" timeout="120" />
|
|
<action name="monitor" timeout="30" interval="20" depth="0" />
|
|
<action name="meta-data" timeout="5" />
|
|
<action name="validate-all" timeout="10" />
|
|
</actions>
|
|
</resource-agent>
|
|
END
|
|
}
|
|
# Convenience functions
|
|
#######################################################################
|
|
nodes_in_cluster_online() {
|
|
local LH="${LL} nodes_in_cluster_online():"
|
|
local NODES
|
|
|
|
NODES=$(crm_node --partition | sed -e '/(null)/d')
|
|
if [ ! -z "$NODES" ]; then
|
|
ocf_log "${LH} Online Nodes in cluster: ${NODES}"
|
|
echo $NODES
|
|
else
|
|
ocf_log "${LH} No online nodes in cluster"
|
|
echo
|
|
fi
|
|
}
|
|
nodes_in_cluster() {
|
|
local LH="${LL} nodes_in_cluster_online():"
|
|
local NODES
|
|
|
|
#Ubuntu doesn't like \w
|
|
NODES=$(crm_node --list | awk '/^[a-zA-Z0-9]/ {print $2}' | sed -e '/(null)/d')
|
|
if [ ! -z "$NODES" ]; then
|
|
ocf_log "${LH} Nodes in cluster: ${NODES}"
|
|
echo $NODES
|
|
else
|
|
ocf_log "${LH} No nodes in cluster"
|
|
echo
|
|
fi
|
|
}
|
|
|
|
#Validate if GTID have correct format (return 0), else return 1
|
|
#valid values are:
|
|
#XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:123 - standard cluster-id:commit-id
|
|
#XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:-1 - standard non initialized cluster, 00000000-0000-0000-0000-000000000000:-1
|
|
validate_gtid() {
|
|
local LH="${LL} validate_gtid():"
|
|
local rc
|
|
local status_loglevel="err"
|
|
|
|
if [ -z $1 ]; then
|
|
ocf_log $status_loglevel "${LH} No GTID provided"
|
|
return 1
|
|
fi
|
|
|
|
echo $1 | grep -q -E "${UUID_REGEX}"
|
|
rc=$?
|
|
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log $status_loglevel "${LH} GTID have wrong format: $1"
|
|
return 1
|
|
else
|
|
ocf_log info "${LH} GTID OK: $1"
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
#Get galera GTID from local mysql instance.
|
|
#If changed, update it in CIB, then return 0 and new GTID
|
|
#If unchanged or bad vlue, return 1 and the current GTID from CIB
|
|
update_node_gtid() {
|
|
local LH="${LL} update_node_gtid():"
|
|
local status_loglevel="err"
|
|
local GTID
|
|
local GTID_current
|
|
local CLUSTER_ID
|
|
local COMMIT_ID
|
|
|
|
# Set loglevel to info during probe
|
|
if ocf_is_probe; then
|
|
status_loglevel="info"
|
|
fi
|
|
|
|
if mysql_status $status_loglevel 1; then
|
|
CLUSTER_ID=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
|
|
-e "SHOW STATUS LIKE 'wsrep_local_state_uuid'" | awk '{print $NF}')
|
|
COMMIT_ID=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
|
|
-e "SHOW STATUS LIKE 'wsrep_last_committed'" | awk '{print $NF}')
|
|
GTID="$CLUSTER_ID:$COMMIT_ID"
|
|
else
|
|
GTID=$(${OCF_RESKEY_binary} --wsrep-recover 2>&1 | \
|
|
grep -e 'Recovered position' -e 'wsrep_start_position' | grep -Eo "${UUID_REGEX}")
|
|
[ -z "${GTID}" ] && GTID=$(cat ${OCF_RESKEY_datadir}/grastate.dat \
|
|
| awk '/uuid/ { uuid = $NF} /seqno/ { seqno = $NF} END {print uuid":"seqno}')
|
|
fi
|
|
|
|
GTID_current=$(get_node_gtid $HOSTNAME)
|
|
if [ "${GTID}" != "${GTID_current}" ]; then
|
|
if validate_gtid "${GTID}"; then
|
|
ocf_log info "${LH} Galera GTID: ${GTID}"
|
|
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name gtid \
|
|
--update $GTID
|
|
echo "${GTID}"
|
|
return 0
|
|
fi
|
|
fi
|
|
echo "${GTID_current}"
|
|
return 1
|
|
}
|
|
|
|
update_node_pc()
|
|
{
|
|
local LH="${LL} update_node_pc():"
|
|
ocf_log info "${LH} Setting node PC flag to true"
|
|
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name is_pc --update 'true'
|
|
}
|
|
|
|
clear_node_pc()
|
|
{
|
|
ocf_log info "${LH} Cleaning up is_pc attribute"
|
|
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name is_pc \
|
|
--delete
|
|
}
|
|
|
|
get_master_timeout() {
|
|
local LH="${LL} get_master_timeout():"
|
|
local timeout
|
|
timeout=$(crm_attribute --quiet --name galera_master_timeout \
|
|
--query --default=$OCF_RESKEY_master_timeout -q | sed -e '/(null)/d')
|
|
|
|
ocf_log info "${LH} Setting timeout $timeout"
|
|
echo $timeout
|
|
}
|
|
|
|
#Get gtid attribute for $1 node, "0" means no GTID set or wrong format for GTID
|
|
get_node_gtid() {
|
|
local LH="${LL} get_node_gtid():"
|
|
local GTID
|
|
|
|
GTID=$(crm_attribute --quiet --node $1 --lifetime reboot --query \
|
|
--name gtid 2> /dev/null | sed -e '/(null)/d')
|
|
|
|
if [ -z "$GTID" ]; then
|
|
ocf_log info "${LH} No GTID for $1"
|
|
echo 0
|
|
else
|
|
if validate_gtid "$GTID"; then
|
|
ocf_log info "${LH} Galera GTID: ${GTID}"
|
|
echo $GTID
|
|
else
|
|
ocf_log info "${LH} No GTID for $1"
|
|
echo 0
|
|
fi
|
|
fi
|
|
}
|
|
|
|
#Retry get node gtid with once with a random 1-10 second sleep if gtid is not found
|
|
get_node_gtid_with_retry() {
|
|
local LH="${LL} get_node_gtid():"
|
|
local GTID
|
|
local NODE="$1"
|
|
|
|
GTID=$(get_node_gtid $NODE)
|
|
|
|
if [ "$GTID" = "0" ]; then
|
|
sleep $(( ( $RANDOM % 10 ) + 1 ))
|
|
GTID=$(get_node_gtid $NODE)
|
|
fi
|
|
|
|
echo $GTID
|
|
}
|
|
|
|
check_if_reelection_needed() {
|
|
local LH="${LL} check_if_reelection_needed()"
|
|
local PARTITION_WITH_QUORUM
|
|
local RESOURCE_NAME
|
|
local NODE_COUNT
|
|
local RUNNING_INSTANCES
|
|
local rc
|
|
|
|
PARTITION_WITH_QUORUM=$(crm_node -q | sed -e '/(null)/d')
|
|
RESOURCE_NAME=$(echo $OCF_RESOURCE_INSTANCE | cut -f1 -d":")
|
|
NODE_COUNT=$(nodes_in_cluster | wc -w)
|
|
|
|
if [ $PARTITION_WITH_QUORUM -eq 1 -o $NODE_COUNT -eq 1 ]; then
|
|
RUNNING_INSTANCES=$(crm_resource \
|
|
--quiet --locate --resource $RESOURCE_NAME | sed -e '/(null)/d' | wc -l 2> /dev/null)
|
|
rc=$?
|
|
if [ $RUNNING_INSTANCES -lt 1 ]; then
|
|
ocf_log info "${LH} Election is needed"
|
|
return 0
|
|
fi
|
|
fi
|
|
|
|
ocf_log info "${LH} Election was done"
|
|
return 1
|
|
}
|
|
|
|
# Return 0 and the pid, if running a new cluster as a seed node
|
|
check_if_new_cluster() {
|
|
local LH="${LL} check_if_new_cluster()"
|
|
local pid
|
|
# Match a mysqld pid by the datadir and a new cluster sign, exclude position recovery
|
|
pid=$(ps -C mysqld -o pid= -o command= -o args= | \
|
|
grep -e "${OCF_RESKEY_datadir}.*wsrep-new-cluster" -e "wsrep-new-cluster.*${OCF_RESKEY_datadir}" | \
|
|
awk '!/wsrep.recover|defunct/ {print $1}')
|
|
if [ "${pid}" ]; then
|
|
update_node_pc
|
|
ocf_log info "${LH} New cluster"
|
|
return 0
|
|
fi
|
|
ocf_log info "${LH} Running cluster"
|
|
return 1
|
|
}
|
|
|
|
get_master() {
|
|
local LH="${LL} get_master()"
|
|
local NODES=$*
|
|
local POSSIBLE_MASTERS
|
|
local -A TMP
|
|
local MASTER_GTID
|
|
local GTID
|
|
local NODE
|
|
local NODE_SCORE
|
|
local LATEST_SEQNO=-1
|
|
local SEQNO
|
|
local MASTER
|
|
|
|
# Ensure the same nodes list to reach a consensus for the choosen master across all of the nodes
|
|
NODES=$(printf -- '%s\n' ${NODES} | sort -u)
|
|
# Form a hash of keys as node names, values as GTID:SEQNO
|
|
for NODE in $NODES; do
|
|
# Try and get a gtid with a retry when the GTID=0 to make sure there
|
|
# is plenty of time if multiple nodes are starting at the same time.
|
|
GTID=$(get_node_gtid_with_retry $NODE)
|
|
# Cut the seqno off the GTID:SEQNO pairs
|
|
TMP[$NODE]=$GTID
|
|
done
|
|
|
|
# Find possible masters
|
|
# Cut the seqnums off the stored GTID:SEQNO pairs, then find the most seen GTID for the nodes
|
|
MASTER_GTID=$(printf -- '%s\n' ${TMP[@]%:*} | grep -vE -e "^0$" -e $ZEROID | sort | uniq -c | awk '{print $2}' | head -1)
|
|
[ "${MASTER_GTID}" ] || MASTER_GTID=$ZEROID
|
|
ocf_log info "${LH} The most seen GTID is: ${MASTER_GTID}"
|
|
for NODE in $NODES; do
|
|
NODE_SCORE=$(crm_simulate -Ls | awk "/${OCF_RESOURCE_INSTANCE}/ && /clone_color/ && ! /${OCF_RESOURCE_INSTANCE}:/ && /${NODE}/ {print \$NF}")
|
|
if [[ $NODE_SCORE =~ ^-?[0-9]+$ && $NODE_SCORE -le 0 || $NODE_SCORE = "-INFINITY" || -z $NODE_SCORE ]]; then
|
|
ocf_log info "${LH} Skipping node $NODE as it is not eligible for running the resource. Its score is ${NODE_SCORE:-NULL}"
|
|
continue
|
|
fi
|
|
ocf_log info "${LH} Node's ${NODE} score: ${NODE_SCORE}, GTID/SEQNUM: ${TMP[$NODE]}"
|
|
# Filter node names with the most seen GTID as possible masters and find the latest SEQNO
|
|
if [ "${MASTER_GTID}" = "${TMP[$NODE]%:*}" ]; then
|
|
POSSIBLE_MASTERS="$POSSIBLE_MASTERS $NODE"
|
|
SEQNO=${TMP[$NODE]#*:}
|
|
[ $SEQNO -gt $LATEST_SEQNO ] && LATEST_SEQNO=$SEQNO
|
|
fi
|
|
done
|
|
ocf_log info "${LH} Possible masters: $POSSIBLE_MASTERS"
|
|
|
|
# Cut the gtids off the stored GTID:SEQNO pairs, then
|
|
# filter the master, which is one who has the latest SEQNO from the possible masters
|
|
for NODE in $POSSIBLE_MASTERS; do
|
|
if [ "${LATEST_SEQNO}" = "${TMP[$NODE]#*:}" ]; then
|
|
MASTER=$NODE
|
|
break
|
|
fi
|
|
done
|
|
ocf_log info "${LH} Choosed master: ${MASTER} with GTID: ${TMP[$MASTER]}"
|
|
echo "$MASTER"
|
|
}
|
|
|
|
#Find the best master and return its GTID.
|
|
#If the best master is this node, propose it as a prim, then return 1.
|
|
#If another node is, check if *this* node is also running a new cluster and exit
|
|
#with error for safety concerns
|
|
check_if_galera_pc() {
|
|
local LH="${LL} check_if_galera_pc():"
|
|
local NODES
|
|
local MASTER
|
|
local timeout
|
|
local GTID
|
|
local pid
|
|
local pcnum=0
|
|
|
|
timeout=$(get_master_timeout)
|
|
|
|
ocf_log info "${LH} Checking if Primary Component"
|
|
|
|
while [ $timeout -gt 0 ]; do
|
|
NODES=$(nodes_in_cluster_online)
|
|
MASTER=$(get_master "$NODES")
|
|
GTID=$(get_node_gtid $MASTER)
|
|
if [ "$MASTER" = "$HOSTNAME" ]; then
|
|
ocf_log info "${LH} I'm Primary Component. Join me! My GTID: ${GTID}"
|
|
echo "${GTID}"
|
|
return 0
|
|
fi
|
|
|
|
if ! check_if_reelection_needed; then
|
|
ocf_log info "${LH} My neighbour is Primary Component with GTID: ${GTID}"
|
|
if check_if_new_cluster
|
|
then
|
|
for node in ${NODES}; do
|
|
is_pc=$(crm_attribute --quiet --node ${node} --lifetime reboot --query --name is_pc | sed -e '/(null)/d')
|
|
if [ "${is_pc}" = "true" ]; then
|
|
pcnum=$((pcnum + 1))
|
|
fi
|
|
if [ ${pcnum} -gt 1 ]; then
|
|
ocf_log err "${LH} But I'm running a new cluster, PID:${pid}, this is a split-brain!"
|
|
clear_node_pc
|
|
exit $OCF_ERR_GENERIC
|
|
fi
|
|
done
|
|
fi
|
|
echo "${GTID}"
|
|
return 1
|
|
fi
|
|
|
|
sleep 10
|
|
timeout=$((timeout - 10))
|
|
ocf_log info "${LH} Waiting for master. ${timeout} seconds left"
|
|
done
|
|
|
|
ocf_log info "${LH} ${HOSTNAME} is not Primary Component"
|
|
return 1
|
|
}
|
|
# Functions invoked by resource manager actions
|
|
|
|
mysql_validate() {
|
|
local LH="${LL} mysql_validate()"
|
|
|
|
check_binary $OCF_RESKEY_binary
|
|
check_binary $OCF_RESKEY_client_binary
|
|
|
|
if [ ! -f $OCF_RESKEY_config ]; then
|
|
ocf_log err "${LH} Config $OCF_RESKEY_config doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
if [ ! -d $OCF_RESKEY_datadir ]; then
|
|
ocf_log err "${LH} Datadir $OCF_RESKEY_datadir doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
|
if [ ! $? -eq 0 ]; then
|
|
ocf_log err "${LH} User $OCF_RESKEY_user doesn't exit"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
getent group $OCF_RESKEY_group >/dev/null 2>&1
|
|
if [ ! $? -eq 0 ]; then
|
|
ocf_log err "${LH} Group $OCF_RESKEY_group doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
check_if_sst() {
|
|
local LH="${LL} check_if_sst():"
|
|
local loglevel=${1:-'info'}
|
|
# Match a MySQLd pid by the datadir, exclude position recovery
|
|
local pid=$(ps -C mysqld -o pid= -o command= -o args= | grep "${OCF_RESKEY_datadir}" | \
|
|
awk '!/wsrep.recover|defunct/ {print $1}')
|
|
if [ "${pid}" ] ; then
|
|
ocf_log info "${LH} MySQL process ${pid} found"
|
|
# MySQLd's running and may be blocked, check for signs of SST
|
|
local wsrep_sst_method=$(awk '/^wsrep_sst_method/ {print $3}' ${OCF_RESKEY_config})
|
|
local wsrep_sst_command="wsrep_sst_${wsrep_sst_method}"
|
|
local wsrep_sst_pid=$(ps -C ${wsrep_sst_command} -o pid= -o command= | \
|
|
awk '!/defunct/ {print $1}' | head -1)
|
|
if [ "${wsrep_sst_pid}" ]; then
|
|
ocf_log $loglevel "${LH} SST is in progress"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
fi
|
|
ocf_log $loglevel "${LH} No signs of SST found"
|
|
return $OCF_ERR_GENERIC
|
|
}
|
|
|
|
mysql_status() {
|
|
local LH="${LL} mysql_status():"
|
|
local loglevel=${1:-'info'}
|
|
local count=${2:-3}
|
|
local sleeptime=${3:-2}
|
|
local pid
|
|
|
|
while [ $count -gt 0 ]; do
|
|
if [ -f "$OCF_RESKEY_pid" ]; then
|
|
ocf_log info "${LH} MySQL PID found"
|
|
break
|
|
fi
|
|
|
|
count=$(( count-1 ))
|
|
ocf_log $loglevel "${LH} PIDFile ${OCF_RESKEY_pid} of MySQL server not found. Sleeping for $sleeptime seconds. ${count} retries left"
|
|
sleep $sleeptime
|
|
done
|
|
|
|
if [ $count -eq 0 ]; then
|
|
ocf_log $loglevel "${LH} MySQL is not running"
|
|
return $OCF_NOT_RUNNING
|
|
fi
|
|
|
|
pid=$(cat $OCF_RESKEY_pid)
|
|
if [ "u$pid" != "u" -a -d /proc/$pid ]; then
|
|
ocf_log $loglevel "${LH} MySQL is running"
|
|
return $OCF_SUCCESS
|
|
else
|
|
ocf_log $loglevel "${LH} MySQL is not running"
|
|
return $OCF_NOT_RUNNING
|
|
fi
|
|
}
|
|
|
|
mysql_monitor() {
|
|
local LH="${LL} mysql_monitor():"
|
|
local rc
|
|
local rc2
|
|
local status_loglevel="err"
|
|
local WSREP_CONNECTED
|
|
local WSREP_LOCAL_STATE_COMMENT
|
|
local WSREP_READY
|
|
local MGTID
|
|
local GTID
|
|
|
|
# Set loglevel to info during probe
|
|
if ocf_is_probe; then
|
|
status_loglevel="info"
|
|
fi
|
|
|
|
check_if_sst
|
|
rc=$?
|
|
[ $rc -eq $OCF_SUCCESS ] && return $rc
|
|
|
|
mysql_status $status_loglevel
|
|
rc=$?
|
|
[ $rc -eq $OCF_SUCCESS ] || return $rc
|
|
|
|
GTID=$(update_node_gtid)
|
|
|
|
WSREP_CONNECTED=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
|
|
-e "SHOW STATUS LIKE 'wsrep_connected'" | awk '{print $NF}')
|
|
|
|
if [ "$WSREP_CONNECTED" != "ON" ]; then
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
WSREP_LOCAL_STATE_COMMENT=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
|
|
-e "SHOW STATUS LIKE 'wsrep_local_state_comment'" | awk '{print $NF}')
|
|
|
|
if [[ "$WSREP_LOCAL_STATE_COMMENT" =~ 'Synced'|'Donor'|'Desync' ]]; then
|
|
WSREP_READY=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \
|
|
-e "SHOW STATUS LIKE 'wsrep_ready'" | awk '{print $NF}')
|
|
|
|
if [ "$WSREP_READY" != "ON" ]; then
|
|
ocf_log err "${LH} MySQL synced but not ready"
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
elif [[ "$WSREP_LOCAL_STATE_COMMENT" == 'Initialized' ]]; then
|
|
ocf_log err "${LH} MySQL lost quorum or uninitialized"
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
# Check if this node is the master and is running the most recent GTID
|
|
check_if_new_cluster
|
|
MGTID=$(check_if_galera_pc)
|
|
rc=$?
|
|
if [ $rc -eq 0 -a "${MGTID}" != "${GTID}" ]; then
|
|
ocf_log err "${LH} I'm a master, and my GTID: ${GTID}, which was not expected"
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
ocf_log debug "${LH} MySQL monitor succeeded"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
mysql_start() {
|
|
local LH="${LL} mysql_start():"
|
|
local NODES
|
|
local socket_dir
|
|
local pid_dir
|
|
local rc
|
|
local dir
|
|
local mysql_extra_params
|
|
|
|
if mysql_status info 1; then
|
|
ocf_log info "${LH} MySQL already running"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
socket_dir="$( dirname ${OCF_RESKEY_socket} )"
|
|
if [ ! -d "${socket_dir}" ] ; then
|
|
ocf_log info "${LH} Create socket dir: ${socket_dir} and chown to ${OCF_RESKEY_user}:${OCF_RESKEY_group}"
|
|
mkdir -p "${socket_dir}"
|
|
chown ${OCF_RESKEY_user}:${OCF_RESKEY_group} "${socket_dir}"
|
|
chmod 755 "${socket_dir}"
|
|
fi
|
|
|
|
# check and make PID file dir
|
|
pid_dir="$( dirname ${OCF_RESKEY_pid} )"
|
|
if [ ! -d "${pid_dir}" ] ; then
|
|
ocf_log info "${LH} Create PID dir: ${pid_dir} and chown to ${OCF_RESKEY_user}:${OCF_RESKEY_group}"
|
|
mkdir -p "${pid_dir}"
|
|
chown -R ${OCF_RESKEY_user}:${OCF_RESKEY_group} "${pid_dir}"
|
|
chmod 755 "${pid_dir}"
|
|
fi
|
|
|
|
# set user/group for datadir
|
|
chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_datadir
|
|
|
|
# Regardless of whether we just created the directory or it
|
|
# already existed, check whether it is writable by the configured
|
|
# user
|
|
for dir in $pid_dir $socket_dir; do
|
|
if ! /usr/bin/sudo -n -u $OCF_RESKEY_user /usr/bin/test -w $dir; then
|
|
ocf_log err "${LH} Directory $dir is not writable by $OCF_RESKEY_user"
|
|
exit $OCF_ERR_PERM
|
|
fi
|
|
done
|
|
|
|
if [ -f /tmp/wsrep-init-file ]; then
|
|
mysql_extra_params="--init-file=/tmp/wsrep-init-file"
|
|
else
|
|
mysql_extra_params=""
|
|
fi
|
|
|
|
update_node_gtid
|
|
check_if_reelection_needed
|
|
rc=$?
|
|
|
|
if [ $rc -eq 0 ]; then
|
|
check_if_galera_pc
|
|
rc=$?
|
|
|
|
if [ $rc -eq 0 ]; then
|
|
mysql_extra_params="$mysql_extra_params --wsrep-new-cluster"
|
|
update_node_pc
|
|
else
|
|
clear_node_pc
|
|
fi
|
|
fi
|
|
|
|
ocf_log info "${LH} Starting MySQL"
|
|
${OCF_RESKEY_binary} \
|
|
--pid-file=$OCF_RESKEY_pid \
|
|
--socket=$OCF_RESKEY_socket \
|
|
--datadir=$OCF_RESKEY_datadir \
|
|
--user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \
|
|
$mysql_extra_params >/dev/null 2>&1 &
|
|
rc=$?
|
|
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log err "${LH} MySQL start command failed: $rc"
|
|
clear_node_pc
|
|
return $rc
|
|
fi
|
|
|
|
# Spin waiting for the server to come up or exit, if SST's in progress
|
|
# Let the CRM/LRM time us out if required.
|
|
while :; do
|
|
check_if_sst
|
|
rc=$?
|
|
[ $rc -eq $OCF_SUCCESS ] && break
|
|
|
|
if mysql_status info 1; then
|
|
break
|
|
fi
|
|
sleep 3
|
|
done
|
|
|
|
ocf_log info "${LH} MySQL started"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
mysql_cleanup() {
|
|
local LH="${LL} mysql_cleanup():"
|
|
ocf_log info "${LH} Cleaning up gtid attribute"
|
|
crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name gtid \
|
|
--delete
|
|
|
|
clear_node_pc
|
|
ocf_log debug "${LH} Delete lock file: /var/lock/subsys/mysqld"
|
|
rm -f /var/lock/subsys/mysqld
|
|
|
|
ocf_log debug "${LH} Delete sock file: ${OCF_RESKEY_socket}"
|
|
rm -f $OCF_RESKEY_socket
|
|
|
|
ocf_log debug "${LH} Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
|
|
rm -f $OCF_RESKEY_pid
|
|
|
|
}
|
|
|
|
mysql_stop() {
|
|
local LH="${LL} mysql_stop():"
|
|
local shutdown_timeout
|
|
local rc
|
|
ocf_log info "${LH}"
|
|
|
|
shutdown_timeout=15
|
|
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
|
shutdown_timeout=$(( ($OCF_RESKEY_CRM_meta_timeout/1000)-5 ))
|
|
fi
|
|
|
|
proc_stop "${OCF_RESKEY_pid}" "mysqld.*${OCF_RESKEY_datadir}" SIGTERM 5 $(( $shutdown_timeout/5 ))
|
|
mysql_cleanup
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
##########################################################################
|
|
# If DEBUG_LOG is set, make this resource agent easy to debug: set up the
|
|
# debug log and direct all output to it. Otherwise, redirect to /dev/null.
|
|
# The log directory must be a directory owned by root, with permissions 0700,
|
|
# and the log must be writable and not a symlink.
|
|
##########################################################################
|
|
DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
|
|
UUID_REGEX="\w{8}-\w{4}-\w{4}-\w{4}-\w{12}:([[:digit:]]+|-1)"
|
|
ZEROID="00000000-0000-0000-0000-000000000000"
|
|
if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
|
|
DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
|
|
if [ -d "${DEBUG_LOG_DIR}" ]; then
|
|
exec 9>>"$DEBUG_LOG"
|
|
exec 1>&9 2>&9
|
|
date '+%Y%m%d %H:%M:%S' >&9
|
|
echo "$*" >&9
|
|
env | grep OCF_ | sort >&9
|
|
set -x
|
|
else
|
|
exec 9>/dev/null
|
|
fi
|
|
fi
|
|
|
|
case "$1" in
|
|
meta-data) meta_data
|
|
exit $OCF_SUCCESS;;
|
|
usage|help) usage
|
|
exit $OCF_SUCCESS;;
|
|
esac
|
|
|
|
export LL="${OCF_RESOURCE_INSTANCE}:"
|
|
|
|
mysql_validate
|
|
rc=$?
|
|
if [ $rc -ne 0 ]; then
|
|
case "$1" in
|
|
stop) exit $OCF_SUCCESS;;
|
|
monitor) exit $OCF_NOT_RUNNING;;
|
|
*) exit $rc;;
|
|
esac
|
|
fi
|
|
|
|
# What kind of method was invoked?
|
|
case "$1" in
|
|
start) mysql_start;;
|
|
stop) mysql_stop;;
|
|
monitor) mysql_monitor;;
|
|
validate-all) exit $OCF_SUCCESS;;
|
|
*) usage
|
|
exit $OCF_ERR_UNIMPLEMENTED;;
|
|
esac
|
|
# vim: set ts=4 sw=4 tw=0 et :
|