configure mysql_monitor agent

Defining a location rule to make sure the vip is always running in a
node that has a writable percona
This commit is contained in:
Felipe Reyes 2015-03-06 12:35:01 -03:00
parent c26565498f
commit 0eda7e7f47
8 changed files with 248 additions and 22 deletions

View File

@ -2,3 +2,6 @@ bin
.coverage
.pydevproject
.project
*.pyc
*.pyo
__pycache__

View File

@ -9,6 +9,10 @@ lint:
unit_test:
@$(PYTHON) /usr/bin/nosetests --nologcapture unit_tests
functional_test:
@echo Starting amulet tests...
@juju test -v -p AMULET_HTTP_PROXY --timeout 900
bin/charm_helpers_sync.py:
@mkdir -p bin
@bzr cat lp:charm-helpers/tools/charm_helpers_sync/charm_helpers_sync.py \

View File

@ -73,17 +73,13 @@ from charmhelpers.contrib.network.ip import (
hooks = Hooks()
LEADER_RES = 'grp_percona_cluster'
RES_MYSQL_PARAMS = ('params config="/etc/mysql/my.cnf" '
'pid="/var/run/mysqld/mysqld.pid" '
'socket="/var/run/mysqld/mysqld.sock" '
'max_slave_lag="60" ' # default is 3600
'binary="/usr/sbin/mysqld" '
'op monitor interval="5s" role="Master" '
'OCF_CHECK_LEVEL="1" '
'op monitor interval="2s" role="Slave" '
'OCF_CHECK_LEVEL="1" '
'op start interval="0" timeout="60s" '
'op stop interval="0" timeout="60s" ')
RES_MONITOR_PARAMS = ('params user="sstuser" password="%(sstpass)s" '
'pid="/var/run/mysqld/mysqld.pid" '
'socket="/var/run/mysqld/mysqld.sock" '
'max_slave_lag="5" '
'cluster_type="pxc" '
'op monitor interval="1s" timeout="30s" '
'OCF_CHECK_LEVEL="1"')
@hooks.hook('install')
@ -402,18 +398,22 @@ def ha_relation_joined():
(vip, vip_cidr, vip_iface)
resources = {'res_mysql_vip': res_mysql_vip,
'res_mysqld': 'ocf:percona:mysql'}
'res_mysql_monitor': 'ocf:percona:mysql_monitor'}
db_helper = get_db_helper()
cfg_passwd = config('sst-password')
sstpsswd = db_helper.get_mysql_password(username='sstuser',
password=cfg_passwd)
resource_params = {'res_mysql_vip': vip_params,
'res_mysqld': RES_MYSQL_PARAMS % {'sstpsswd': sstpsswd}}
'res_mysql_monitor':
RES_MONITOR_PARAMS % {'sstpass': sstpsswd}}
groups = {'grp_percona_cluster': 'res_mysql_vip'}
clones = {'cl_mysqld': 'res_mysqld meta interleave=true'}
clones = {'cl_mysql_monitor': 'res_mysql_monitor meta interleave=true'}
colocations = {'vip_mysqld': 'inf: res_mysqld res_mysql_vip role=Master'}
colocations = {'vip_mysqld': 'inf: grp_percona_cluster cl_mysql_monitor'}
locations = {'loc_percona_cluster':
'grp_percona_cluster rule inf: writable eq 1'}
for rel_id in relation_ids('ha'):
relation_set(relation_id=rel_id,
@ -423,7 +423,8 @@ def ha_relation_joined():
resource_params=resource_params,
groups=groups,
clones=clones,
colocations=colocations)
colocations=colocations,
locations=locations)
@hooks.hook('ha-relation-changed')

View File

@ -234,10 +234,15 @@ def unit_sorted(units):
def install_mysql_ocf():
dest_file = '/usr/lib/ocf/resource.d/percona/mysql'
src_file = os.path.join(charm_dir(), 'ocf/percona/mysql')
dest_dir = '/usr/lib/ocf/resource.d/percona/'
for fname in ['ocf/percona/mysql', 'ocf/percona/mysql_monitor']:
src_file = os.path.join(charm_dir(), fname)
if not os.path.isdir(dest_dir):
os.makedirs(dest_dir)
if not os.path.isdir(os.path.dirname(dest_file)):
os.makedirs(os.path.dirname(dest_file))
if not os.path.exists(dest_file):
shutil.copy(src_file, dest_file)
dest_file = os.path.join(dest_dir, os.path.basename(src_file))
if not os.path.exists(dest_file):
log('Installing %s' % dest_file, level='INFO')
shutil.copy(src_file, dest_file)
else:
log("'%s' already exists, skipping" % dest_file, level='INFO')

20
tests/00-setup.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash -ex
# The script installs amulet and other tools needed for the amulet tests.
# Get the status of the amulet package, this returns 0 of package is installed.
dpkg -s amulet
if [ $? -ne 0 ]; then
# Install the Amulet testing harness.
sudo add-apt-repository -y ppa:juju/stable
sudo apt-get update
sudo apt-get install -y amulet juju-core charm-tools
fi
PACKAGES="python3 python3-yaml"
for pkg in $PACKAGES; do
dpkg -s python3
if [ $? -ne 0 ]; then
sudo apt-get install -y -q $pkg
fi
done

29
tests/10-deploy_test.py Executable file
View File

@ -0,0 +1,29 @@
#!/usr/bin/python3
# test percona-cluster (3 nodes)
import basic_deployment
import time
class ThreeNode(basic_deployment.BasicDeployment):
def __init__(self):
super(ThreeNode, self).__init__(units=3)
def run(self):
super(ThreeNode, self).run()
# we are going to kill the master
old_master = self.master_unit
self.master_unit.run('sudo poweroff')
time.sleep(10) # give some time to pacemaker to react
new_master = self.find_master()
assert new_master is not None, "master unit not found"
assert (new_master.info['public-address'] !=
old_master.info['public-address'])
assert self.is_port_open(address=self.vip), 'cannot connect to vip'
if __name__ == "__main__":
t = ThreeNode()
t.run()

38
tests/20-broken-mysqld.py Executable file
View File

@ -0,0 +1,38 @@
#!/usr/bin/python3
# test percona-cluster (3 nodes)
import basic_deployment
import time
class ThreeNode(basic_deployment.BasicDeployment):
def __init__(self):
super(ThreeNode, self).__init__(units=3)
def run(self):
super(ThreeNode, self).run()
# we are going to kill the master
old_master = self.master_unit
print('stopping mysql in %s' % str(self.master_unit.info))
self.master_unit.run('sudo service mysql stop')
print('looking for the new master')
i = 0
changed = False
while i < 10 and not changed:
i += 1
time.sleep(5) # give some time to pacemaker to react
new_master = self.find_master()
if (new_master and new_master.info['unit_name'] !=
old_master.info['unit_name']):
changed = True
assert changed, "The master didn't change"
assert self.is_port_open(address=self.vip), 'cannot connect to vip'
if __name__ == "__main__":
t = ThreeNode()
t.run()

126
tests/basic_deployment.py Normal file
View File

@ -0,0 +1,126 @@
import amulet
import os
import telnetlib
import unittest
import yaml
class BasicDeployment(unittest.TestCase):
def __init__(self, vip=None, units=1):
self.units = units
self.master_unit = None
self.vip = None
if vip:
self.vip = vip
elif 'VIP' in os.environ:
self.vip = os.environ.get('VIP')
elif os.path.isfile('local.yaml'):
with open('local.yaml', 'rb') as f:
self.cfg = yaml.safe_load(f.read())
self.vip = self.cfg.get('vip')
else:
amulet.raise_status(amulet.SKIP,
("please set ENV variable VIP "
"to run this test"))
def run(self):
# The number of seconds to wait for the environment to setup.
seconds = 1200
self.d = amulet.Deployment(series="trusty")
self.d.add('percona-cluster', units=self.units)
self.d.add('hacluster')
self.d.relate('percona-cluster:ha', 'hacluster:ha')
cfg_percona = {'sst-password': 'ubuntu',
'root-password': 't00r',
'dataset-size': '128M',
'vip': self.vip}
cfg_ha = {'debug': True,
'corosync_mcastaddr': '226.94.1.4',
'corosync_key': ('xZP7GDWV0e8Qs0GxWThXirNNYlScgi3sRTdZk/IXKD'
'qkNFcwdCWfRQnqrHU/6mb6sz6OIoZzX2MtfMQIDcXu'
'PqQyvKuv7YbRyGHmQwAWDUA4ed759VWAO39kHkfWp9'
'y5RRk/wcHakTcWYMwm70upDGJEP00YT3xem3NQy27A'
'C1w=')}
self.d.configure('percona-cluster', cfg_percona)
self.d.configure('hacluster', cfg_ha)
try:
self.d.setup(timeout=seconds)
self.d.sentry.wait(seconds)
except amulet.helpers.TimeoutError:
message = 'The environment did not setup in %d seconds.' % seconds
amulet.raise_status(amulet.SKIP, msg=message)
except:
raise
self.master_unit = self.find_master()
assert self.master_unit is not None, 'percona-cluster vip not found'
output, code = self.master_unit.run('sudo crm_verify --live-check')
assert code == 0, "'crm_verify --live-check' failed"
resources = ['res_mysql_vip']
resources += ['res_mysql_monitor:%d' % i for i in range(self.units)]
assert sorted(self.get_pcmkr_resources()) == sorted(resources)
for i in range(self.units):
uid = 'percona-cluster/%d' % i
unit = self.d.sentry.unit[uid]
assert self.is_mysqld_running(unit), 'mysql not running: %s' % uid
def find_master(self):
for unit_id, unit in self.d.sentry.unit.items():
if not unit_id.startswith('percona-cluster/'):
continue
# is the vip running here?
output, code = unit.run('sudo ip a | grep %s' % self.vip)
print(unit_id)
print(output)
if code == 0:
print('vip(%s) running in %s' % (self.vip, unit_id))
return unit
def get_pcmkr_resources(self, unit=None):
if unit:
u = unit
else:
u = self.master_unit
output, code = u.run('sudo crm_resource -l')
assert code == 0, 'could not get "crm resource list"'
return output.split('\n')
def is_mysqld_running(self, unit=None):
if unit:
u = unit
else:
u = self.master_unit
output, code = u.run('pidof mysqld')
if code != 0:
return False
return self.is_port_open(u, '3306')
def is_port_open(self, unit=None, port='3306', address=None):
if unit:
addr = unit.info['public-address']
elif address:
addr = address
else:
raise Exception('Please provide a unit or address')
try:
telnetlib.Telnet(addr, port)
return True
except TimeoutError: # noqa this exception only available in py3
return False