ad9097a1bb
When Corosync notifies that particular node in its cluster is dead, rabbit-fence daemon fences the failed node in RabbitMQ cluster as well: * It casts disconnect failed_node & forget_cluster_node for the rest of the nodes in the RabbitMQ cluster. * Does not fence alive nodes with mnesia running. * Does not fence already forgotten nodes, that means that only the first node detected a 'dead event' will issue the fencing action, while the rest of the cluster nodes will ignore it. * Requires corosync compiled with --enable-dbus option, ensures corosync-notifyd and dbus (messagebus) are running. * Contains temp hacks in the corosync-notifyd init.d script to w/a upstream bugs https://bugs.launchpad.net/ubuntu/+source/corosync/+bug/1437368, https://bugs.launchpad.net/ubuntu/+source/corosync/+bug/1437359 * Installs init.d and upstart scripts for rabbit-fence daemon and enables it after the puppet Rabbitmq class evaluated Note: system events may be monitored with dbus-monitor --system Note: If corosync package got updated with apt-get, the corosync-notifyd service would be affected by the mentioned Ubuntu upstream bugs again and wouldn't start as a result. Make sure to backup the init script for corosync-notifyd prior to issue the update and restore it once the update is done. Doc-Impact: ops guide Closes-bug: #1437348 Related blueprint rabbitmq-pacemaker-multimaster-clone Change-Id: I691363386efe01421acc317ef6371ce45a0d4d11
65 lines
963 B
Bash
65 lines
963 B
Bash
#!/bin/bash
|
|
# rabbit-fence RabbitMQ fence
|
|
#
|
|
# chkconfig: 2345 24 79
|
|
# description: Starts/Stops RabbitMQ fence daemon
|
|
#
|
|
# processname: rabbit-fence.py
|
|
|
|
# Source function library.
|
|
. /etc/rc.d/init.d/functions
|
|
|
|
prog="rabbit-fence"
|
|
script="/usr/bin/${prog}.py"
|
|
piddir="/var/run/rabbitmq"
|
|
pidfile="${piddir}/${prog}.pid"
|
|
|
|
[ -x $script ] || exit 0
|
|
[ -d $piddir ] || exit 0
|
|
|
|
start() {
|
|
exec $script
|
|
return $?
|
|
}
|
|
|
|
stop() {
|
|
PID=$(cat $pidfile)
|
|
kill $PID
|
|
retval=$?
|
|
[ $retval -eq 0 ] && (rm -f $pidfile || true)
|
|
return $retval
|
|
}
|
|
|
|
rh_status() {
|
|
status -p $pidfile $prog
|
|
}
|
|
|
|
rh_status_q() {
|
|
rh_status >/dev/null 2>&1
|
|
}
|
|
|
|
restart() {
|
|
stop
|
|
start
|
|
}
|
|
|
|
case "$1" in
|
|
start)
|
|
rh_status_q && exit 0
|
|
$1
|
|
;;
|
|
stop)
|
|
rh_status_q || exit 0
|
|
$1
|
|
;;
|
|
restart)
|
|
$1
|
|
;;
|
|
status)
|
|
rh_status
|
|
;;
|
|
*)
|
|
echo Usage: start|stop|restart|status
|
|
exit 2
|
|
esac
|