fuel-library/files/rabbit-fence/rabbit-fence.py
Vladimir Kuklin 710553c1cc All the work done for fuel-library packaging
1) Package fuel library into three different
packages:
RPM: fuel-library6.1
ALL: fuel-ha-utils, fuel-misc

2) Install packages onto slave nodes
implements blueprint: package-fuel-components

Change-Id: Ie759857fb94db9aa94aaeaeda2c6ab5bb159cc9e
2015-05-15 02:44:53 +03:00

159 lines
4.8 KiB
Python

#!/usr/bin/env python
usage = """Help: this daemon fences dead rabbitmq nodes"""
import daemon
import daemon.pidlockfile
import dbus
import dbus.decorators
import dbus.mainloop.glib
import gobject
import logging
import logging.handlers
import os
import pwd
import signal
import socket
import subprocess
import sys
import time
USER = 'rabbitmq'
MAIL = '/var/spool/mail/rabbitmq'
PWD = '/var/lib/rabbitmq'
HOME = '/var/lib/rabbitmq'
LOGNAME = 'rabbitmq'
def catchall_signal_lh(*args, **kwargs):
def bash_command(cmd):
p = subprocess.Popen(cmd, env=env, shell=True,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
out, err = p.communicate()
my_logger.debug('Command %s' % cmd)
if out != '':
my_logger.debug(' Stdout: %s' % out)
if err != '':
my_logger.debug(' Stderr: %s' % err)
return out.strip()
message = kwargs['message']
action = args[3]
if kwargs['type'] == 'NodeStateChange' and action == 'left':
node = args[0]
this_node = socket.gethostname().split('.')[0]
node_name = node.split('.')[0]
node_to_remove = 'rabbit@%s' % node_name
my_logger.info("Got %s that left cluster" % node)
my_logger.debug(kwargs)
for arg in message.get_args_list():
my_logger.debug(" " + str(arg))
my_logger.info("Preparing to fence node %s from rabbit cluster"
% node_to_remove)
if node == '' or node_name == this_node:
my_logger.debug('Ignoring the node %s' % node_to_remove)
return
# NOTE(bogdando) when the rabbit node went down, its status
# remains 'running' for a while, so few retries are required
count = 0
while True:
cmd = ('rabbitmqctl eval '
'"mnesia:system_info(running_db_nodes)."'
'| grep -o %s') % node_to_remove
results = bash_command(cmd)
is_running = results != ''
if not is_running or count >= 5:
break
count += 1
time.sleep(10)
if is_running:
my_logger.warn('Ignoring alive node %s' % node_to_remove)
return
cmd = ('rabbitmqctl eval '
'"mnesia:system_info(db_nodes)."'
'| grep -o %s') % node_to_remove
results = bash_command(cmd)
in_cluster = results != ''
if not in_cluster:
my_logger.debug('Ignoring forgotten node %s' % node_to_remove)
return
my_logger.info('Disconnecting node %s' % node_to_remove)
cmd = ('rabbitmqctl eval "disconnect_node'
'(list_to_atom(\\"%s\\"))."') % node_to_remove
bash_command(cmd)
my_logger.info('Forgetting cluster node %s' % node_to_remove)
cmd = ('rabbitmqctl forget_cluster_node %s') % node_to_remove
bash_command(cmd)
def sigterm_handler(_signo, _stack_frame):
my_logger.info("Caught SIGTERM, terminating...")
sys.exit(0)
def main():
my_logger.info('Starting rabbit fence script main loop')
dbus.mainloop.glib.DBusGMainLoop(set_as_default=True)
bus = dbus.SystemBus()
try:
bus.get_object("org.freedesktop.DBus", "/org/corosync")
except dbus.DBusException:
my_logger.exception("Cannot get the DBus object")
sys.exit(1)
bus.add_signal_receiver(catchall_signal_lh,
member_keyword="type",
message_keyword="message",
dbus_interface="org.corosync")
signal.signal(signal.SIGTERM, sigterm_handler)
loop = gobject.MainLoop()
loop.run()
if __name__ == '__main__':
my_logger = logging.getLogger('rabbit-fence')
my_logger.setLevel(logging.DEBUG)
lh = logging.handlers.SysLogHandler(address='/dev/log',
facility='daemon')
formatter = logging.Formatter('%(name)-12s '
'%(asctime)s '
'%(levelname)-8s '
'%(message)s')
lh.setFormatter(formatter)
my_logger.addHandler(lh)
rabbit_pwd = pwd.getpwnam('rabbitmq')
uid = rabbit_pwd.pw_uid
gid = rabbit_pwd.pw_gid
env = os.environ.copy()
env['USER'] = USER
env['MAIL'] = MAIL
env['PWD'] = PWD
env['HOME'] = HOME
env['LOGNAME'] = LOGNAME
pidfilename = '/var/run/rabbitmq/rabbit-fence.pid'
pidfile = daemon.pidlockfile.TimeoutPIDLockFile(pidfilename, 10)
try:
with daemon.DaemonContext(files_preserve=[lh.socket.fileno()],
pidfile=pidfile, uid=uid,
gid=gid, umask=0o022):
main()
except Exception:
my_logger.exception("A generic exception caught!")
sys.exit(1)