[hopem,r=]

Add min-cluster-size config option. This allows the charm to wait
for a minimum number of peers to join before bootstrapping
percona and allowing relations to access the database.

Closes-Bug: 1475585
This commit is contained in:
Edward Hope-Morley 2015-07-22 12:17:09 +01:00
parent c5e9a93935
commit 1373073fd2
10 changed files with 440 additions and 73 deletions

View File

@ -13,8 +13,8 @@ test:
@echo Starting amulet tests...
#NOTE(beisner): can remove -v after bug 1320357 is fixed
# https://bugs.launchpad.net/amulet/+bug/1320357
# @juju test -v -p AMULET_HTTP_PROXY,AMULET_OS_VIP --timeout 2700
echo "Tests disables; http://pad.lv/1446169"
@juju test -v -p AMULET_HTTP_PROXY,AMULET_OS_VIP --timeout 2700
#echo "Tests disables; http://pad.lv/1446169"
bin/charm_helpers_sync.py:
@mkdir -p bin

View File

@ -111,3 +111,9 @@ options:
but also can be set to any specific value for the system.
Suffix this value with 'K','M','G', or 'T' to get the relevant kilo/mega/etc. bytes.
If suffixed with %, one will get that percentage of system total memory devoted.
min-cluster-size:
type: int
default:
description: |
Minimum number of units expected to exist before charm will attempt to
bootstrap percona cluster. If no value is provided this setting is ignored.

View File

@ -1,17 +1,19 @@
#!/usr/bin/python
# TODO: Support changes to root and sstuser passwords
import sys
import json
import os
import socket
import time
from charmhelpers.core.hookenv import (
Hooks, UnregisteredHookError,
is_relation_made,
log,
local_unit,
relation_get,
relation_set,
relation_id,
relation_ids,
related_units,
unit_get,
@ -20,10 +22,13 @@ from charmhelpers.core.hookenv import (
relation_type,
DEBUG,
INFO,
WARNING,
is_leader,
)
from charmhelpers.core.host import (
service,
service_restart,
service_start,
file_hash,
lsb_release,
)
@ -52,6 +57,9 @@ from percona_utils import (
get_db_helper,
mark_seeded, seeded,
install_mysql_ocf,
is_sufficient_peers,
notify_bootstrapped,
is_bootstrapped,
)
from charmhelpers.contrib.database.mysql import (
PerconaClusterHelper,
@ -131,6 +139,57 @@ def render_config(clustered=False, hosts=[]):
render(os.path.basename(MY_CNF), MY_CNF, context, perms=0o444)
def render_config_restart_on_changed(clustered, hosts, bootstrap=False):
"""Render mysql config and restart mysql service if file changes as a
result.
If bootstrap is True we do a bootstrap-pxc in order to bootstrap the
percona cluster. This should only be performed once at cluster creation
time.
If percona is already bootstrapped we can get away with just ensuring that
it is started so long as the new node to be added is guaranteed to have
been restarted so as to apply the new config.
"""
pre_hash = file_hash(MY_CNF)
render_config(clustered, hosts)
if file_hash(MY_CNF) != pre_hash:
if bootstrap:
service('bootstrap-pxc', 'mysql')
notify_bootstrapped()
update_shared_db_rels()
else:
delay = 1
attempts = 0
max_retries = 5
# NOTE(dosaboy): avoid unnecessary restarts. Once mysql is started
# it needn't be restarted when new units join the cluster since the
# new units will join and apply their own config.
if not seeded():
action = service_restart
else:
action = service_start
while not action('mysql'):
if attempts == max_retries:
raise Exception("Failed to start mysql (max retries "
"reached)")
log("Failed to start mysql - retrying in %ss" % (delay),
WARNING)
time.sleep(delay)
delay += 2
attempts += 1
else:
mark_seeded()
def update_shared_db_rels():
for r_id in relation_ids('shared-db'):
for unit in related_units(r_id):
shared_db_changed(r_id, unit)
@hooks.hook('upgrade-charm')
@hooks.hook('config-changed')
def config_changed():
@ -139,33 +198,48 @@ def config_changed():
hosts = get_cluster_hosts()
clustered = len(hosts) > 1
pre_hash = file_hash(MY_CNF)
render_config(clustered, hosts)
if file_hash(MY_CNF) != pre_hash:
bootstrapped = is_bootstrapped()
# NOTE: only configure the cluster if we have sufficient peers. This only
# applies if min-cluster-size is provided and is used to avoid extraneous
# configuration changes and premature bootstrapping as the cluster is
# deployed.
if is_sufficient_peers():
try:
# NOTE(jamespage): try with leadership election
if clustered and not is_leader() and not seeded():
# Bootstrap node into seeded cluster
service_restart('mysql')
mark_seeded()
elif not clustered:
# Restart with new configuration
service_restart('mysql')
if not clustered:
render_config_restart_on_changed(clustered, hosts)
elif clustered and is_leader():
log("Leader unit - bootstrap required=%s" % (not bootstrapped),
DEBUG)
render_config_restart_on_changed(clustered, hosts,
bootstrap=not bootstrapped)
elif bootstrapped:
log("Cluster is bootstrapped - configuring mysql on this node",
DEBUG)
render_config_restart_on_changed(clustered, hosts)
else:
log("Not configuring", DEBUG)
except NotImplementedError:
# NOTE(jamespage): fallback to legacy behaviour.
oldest = oldest_peer(peer_units())
if clustered and not oldest and not seeded():
# Bootstrap node into seeded cluster
service_restart('mysql')
mark_seeded()
elif not clustered:
# Restart with new configuration
service_restart('mysql')
if not clustered:
render_config_restart_on_changed(clustered, hosts)
elif clustered and oldest:
log("Leader unit - bootstrap required=%s" % (not bootstrapped),
DEBUG)
render_config_restart_on_changed(clustered, hosts,
bootstrap=not bootstrapped)
elif bootstrapped:
log("Cluster is bootstrapped - configuring mysql on this node",
DEBUG)
render_config_restart_on_changed(clustered, hosts)
else:
log("Not configuring", DEBUG)
# Notify any changes to the access network
for r_id in relation_ids('shared-db'):
for unit in related_units(r_id):
shared_db_changed(r_id, unit)
update_shared_db_rels()
# (re)install pcmkr agent
install_mysql_ocf()
@ -176,15 +250,20 @@ def config_changed():
@hooks.hook('cluster-relation-joined')
def cluster_joined(relation_id=None):
def cluster_joined():
if config('prefer-ipv6'):
addr = get_ipv6_addr(exc_list=[config('vip')])[0]
relation_settings = {'private-address': addr,
'hostname': socket.gethostname()}
log("Setting cluster relation: '%s'" % (relation_settings),
level=INFO)
relation_set(relation_id=relation_id,
relation_settings=relation_settings)
relation_set(relation_settings=relation_settings)
# Ensure all new peers are aware
cluster_state_uuid = relation_get('bootstrap-uuid', unit=local_unit())
if cluster_state_uuid:
notify_bootstrapped(cluster_rid=relation_id(),
cluster_uuid=cluster_state_uuid)
@hooks.hook('cluster-relation-departed')
@ -282,10 +361,15 @@ def configure_db_for_hosts(hosts, database, username, db_helper):
# TODO: This could be a hook common between mysql and percona-cluster
@hooks.hook('shared-db-relation-changed')
def shared_db_changed(relation_id=None, unit=None):
if not is_bootstrapped():
log("Percona cluster not yet bootstrapped - deferring shared-db rel "
"until bootstrapped", DEBUG)
return
if not is_elected_leader(DC_RESOURCE_NAME):
# NOTE(jamespage): relation level data candidate
log('Service is peered, clearing shared-db relation'
' as this service unit is not the leader')
log('Service is peered, clearing shared-db relation '
'as this service unit is not the leader')
relation_clear(relation_id)
# Each unit needs to set the db information otherwise if the unit
# with the info dies the settings die with it Bug# 1355848
@ -419,7 +503,7 @@ def ha_relation_joined():
resources = {'res_mysql_vip': res_mysql_vip,
'res_mysql_monitor': 'ocf:percona:mysql_monitor'}
db_helper = get_db_helper()
sstpsswd = config('sst-password')
resource_params = {'res_mysql_vip': vip_params,
'res_mysql_monitor':
@ -451,9 +535,7 @@ def ha_relation_changed():
if (clustered and is_elected_leader(DC_RESOURCE_NAME)):
log('Cluster configured, notifying other services')
# Tell all related services to start using the VIP
for r_id in relation_ids('shared-db'):
for unit in related_units(r_id):
shared_db_changed(r_id, unit)
update_shared_db_rels()
for r_id in relation_ids('db'):
for unit in related_units(r_id):
db_changed(r_id, unit, admin=False)
@ -465,9 +547,7 @@ def ha_relation_changed():
@hooks.hook('leader-settings-changed')
def leader_settings_changed():
# Notify any changes to data in leader storage
for r_id in relation_ids('shared-db'):
for unit in related_units(r_id):
shared_db_changed(r_id, unit)
update_shared_db_rels()
@hooks.hook('nrpe-external-master-relation-joined',

View File

@ -5,6 +5,8 @@ import socket
import tempfile
import os
import shutil
import uuid
from charmhelpers.core.host import (
lsb_release
)
@ -20,6 +22,14 @@ from charmhelpers.core.hookenv import (
config,
log,
DEBUG,
INFO,
WARNING,
ERROR,
is_leader,
)
from charmhelpers.contrib.hahelpers.cluster import (
oldest_peer,
peer_units,
)
from charmhelpers.fetch import (
apt_install,
@ -32,6 +42,11 @@ from charmhelpers.contrib.database.mysql import (
MySQLHelper,
)
# NOTE: python-mysqldb is installed by charmhelpers.contrib.database.mysql so
# hence why we import here
from MySQLdb import (
OperationalError
)
PACKAGES = [
'percona-xtradb-cluster-server-5.5',
@ -90,6 +105,29 @@ def get_host_ip(hostname=None):
return answers[0].address
def is_sufficient_peers():
"""If min-cluster-size has been provided, check that we have sufficient
number of peers to proceed with bootstrapping percona cluster.
"""
min_size = config('min-cluster-size')
if min_size:
size = 0
for rid in relation_ids('cluster'):
size = len(related_units(rid))
# Include this unit
size += 1
if min_size > size:
log("Insufficient number of units to configure percona cluster "
"(expected=%s, got=%s)" % (min_size, size), level=INFO)
return False
else:
log("Sufficient units available to configure percona cluster "
"(>=%s)" % (min_size), level=DEBUG)
return True
def get_cluster_hosts():
hosts_map = {}
hostname = get_host_ip()
@ -246,3 +284,86 @@ def install_mysql_ocf():
shutil.copy(src_file, dest_file)
else:
log("'%s' already exists, skipping" % dest_file, level='INFO')
def get_wsrep_value(key):
m_helper = get_db_helper()
try:
m_helper.connect(password=m_helper.get_mysql_root_password())
except OperationalError:
log("Could not connect to db", DEBUG)
return None
cursor = m_helper.connection.cursor()
ret = None
try:
cursor.execute("show status like '%s'" % (key))
ret = cursor.fetchall()
except:
log("Failed to get '%s'", ERROR)
return None
finally:
cursor.close()
if ret:
return ret[0][1]
return None
def is_bootstrapped():
if not is_sufficient_peers():
return False
uuids = []
rids = relation_ids('cluster') or []
for rid in rids:
units = related_units(rid)
units.append(local_unit())
for unit in units:
id = relation_get('bootstrap-uuid', unit=unit, rid=rid)
if id:
uuids.append(id)
if uuids:
if len(set(uuids)) > 1:
log("Found inconsistent bootstrap uuids - %s" % (uuids), WARNING)
return True
try:
if not is_leader():
return False
except:
oldest = oldest_peer(peer_units())
if not oldest:
return False
# If this is the leader but we have not yet broadcast the cluster uuid then
# do so now.
wsrep_ready = get_wsrep_value('wsrep_ready') or ""
if wsrep_ready.lower() in ['on', 'ready']:
cluster_state_uuid = get_wsrep_value('wsrep_cluster_state_uuid')
if cluster_state_uuid:
notify_bootstrapped(cluster_uuid=cluster_state_uuid)
return True
return False
def notify_bootstrapped(cluster_rid=None, cluster_uuid=None):
if cluster_rid:
rids = [cluster_rid]
else:
rids = relation_ids('cluster')
log("Notifying peers that percona is bootstrapped", DEBUG)
if not cluster_uuid:
cluster_uuid = get_wsrep_value('wsrep_cluster_state_uuid')
if not cluster_uuid:
cluster_uuid = str(uuid.uuid4())
log("Could not determine cluster uuid so using '%s' instead" %
(cluster_uuid), INFO)
for rid in rids:
relation_set(relation_id=rid, **{'bootstrap-uuid': cluster_uuid})

View File

@ -19,7 +19,7 @@ class ThreeNode(basic_deployment.BasicDeployment):
new_master = self.find_master()
assert new_master is not None, "master unit not found"
assert (new_master.info['public-address'] !=
old_master.info['public-address'])
old_master.info['public-address'])
assert self.is_port_open(address=self.vip), 'cannot connect to vip'

View File

@ -0,0 +1,17 @@
#!/usr/bin/env python
# test percona-cluster (1 node)
import basic_deployment
class SingleNode(basic_deployment.BasicDeployment):
def __init__(self):
super(SingleNode, self).__init__(units=1)
def run(self):
super(SingleNode, self).run()
assert self.is_pxc_bootstrapped(), "Cluster not bootstrapped"
if __name__ == "__main__":
t = SingleNode()
t.run()

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
# test percona-cluster (1 node)
import basic_deployment
class MultiNode(basic_deployment.BasicDeployment):
def __init__(self):
super(MultiNode, self).__init__(units=2)
def _get_configs(self):
"""Configure all of the services."""
cfg_percona = {'sst-password': 'ubuntu',
'root-password': 't00r',
'dataset-size': '512M',
'vip': self.vip,
'min-cluster-size': 3}
cfg_ha = {'debug': True,
'corosync_mcastaddr': '226.94.1.4',
'corosync_key': ('xZP7GDWV0e8Qs0GxWThXirNNYlScgi3sRTdZk/IXKD'
'qkNFcwdCWfRQnqrHU/6mb6sz6OIoZzX2MtfMQIDcXu'
'PqQyvKuv7YbRyGHmQwAWDUA4ed759VWAO39kHkfWp9'
'y5RRk/wcHakTcWYMwm70upDGJEP00YT3xem3NQy27A'
'C1w=')}
configs = {'percona-cluster': cfg_percona}
if self.units > 1:
configs['hacluster'] = cfg_ha
return configs
def run(self):
super(MultiNode, self).run()
got = self.get_cluster_size()
msg = "Percona cluster unexpected size (wanted=%s, got=%s)" % (1, got)
assert got == '1', msg
if __name__ == "__main__":
t = MultiNode()
t.run()

View File

@ -0,0 +1,43 @@
#!/usr/bin/env python
# test percona-cluster (1 node)
import basic_deployment
class MultiNode(basic_deployment.BasicDeployment):
def __init__(self):
super(MultiNode, self).__init__(units=3)
def _get_configs(self):
"""Configure all of the services."""
cfg_percona = {'sst-password': 'ubuntu',
'root-password': 't00r',
'dataset-size': '512M',
'vip': self.vip,
'min-cluster-size': 3}
cfg_ha = {'debug': True,
'corosync_mcastaddr': '226.94.1.4',
'corosync_key': ('xZP7GDWV0e8Qs0GxWThXirNNYlScgi3sRTdZk/IXKD'
'qkNFcwdCWfRQnqrHU/6mb6sz6OIoZzX2MtfMQIDcXu'
'PqQyvKuv7YbRyGHmQwAWDUA4ed759VWAO39kHkfWp9'
'y5RRk/wcHakTcWYMwm70upDGJEP00YT3xem3NQy27A'
'C1w=')}
configs = {'percona-cluster': cfg_percona}
if self.units > 1:
configs['hacluster'] = cfg_ha
return configs
def run(self):
super(MultiNode, self).run()
msg = "Percona cluster failed to bootstrap"
assert self.is_pxc_bootstrapped(), msg
got = self.get_cluster_size()
msg = "Percona cluster unexpected size (wanted=%s, got=%s)" % (3, got)
assert got == '3', msg
if __name__ == "__main__":
t = MultiNode()
t.run()

View File

@ -1,8 +1,8 @@
import amulet
import re
import os
import time
import telnetlib
import unittest
import yaml
from charmhelpers.contrib.openstack.amulet.deployment import (
OpenStackAmuletDeployment
@ -17,19 +17,21 @@ class BasicDeployment(OpenStackAmuletDeployment):
self.units = units
self.master_unit = None
self.vip = None
if vip:
self.vip = vip
elif 'AMULET_OS_VIP' in os.environ:
self.vip = os.environ.get('AMULET_OS_VIP')
elif os.path.isfile('local.yaml'):
with open('local.yaml', 'rb') as f:
self.cfg = yaml.safe_load(f.read())
if units > 1:
if vip:
self.vip = vip
elif 'AMULET_OS_VIP' in os.environ:
self.vip = os.environ.get('AMULET_OS_VIP')
elif os.path.isfile('local.yaml'):
with open('local.yaml', 'rb') as f:
self.cfg = yaml.safe_load(f.read())
self.vip = self.cfg.get('vip')
else:
amulet.raise_status(amulet.SKIP,
("please set the vip in local.yaml or env var "
"AMULET_OS_VIP to run this test suite"))
self.vip = self.cfg.get('vip')
else:
amulet.raise_status(amulet.SKIP,
("Please set the vip in local.yaml or "
"env var AMULET_OS_VIP to run this test "
"suite"))
def _add_services(self):
"""Add services
@ -40,16 +42,20 @@ class BasicDeployment(OpenStackAmuletDeployment):
"""
this_service = {'name': 'percona-cluster',
'units': self.units}
other_services = [{'name': 'hacluster'}]
other_services = []
if self.units > 1:
other_services.append({'name': 'hacluster'})
super(BasicDeployment, self)._add_services(this_service,
other_services)
def _add_relations(self):
"""Add all of the relations for the services."""
relations = {'percona-cluster:ha': 'hacluster:ha'}
super(BasicDeployment, self)._add_relations(relations)
if self.units > 1:
relations = {'percona-cluster:ha': 'hacluster:ha'}
super(BasicDeployment, self)._add_relations(relations)
def _configure_services(self):
def _get_configs(self):
"""Configure all of the services."""
cfg_percona = {'sst-password': 'ubuntu',
'root-password': 't00r',
@ -64,45 +70,55 @@ class BasicDeployment(OpenStackAmuletDeployment):
'y5RRk/wcHakTcWYMwm70upDGJEP00YT3xem3NQy27A'
'C1w=')}
configs = {'percona-cluster': cfg_percona,
'hacluster': cfg_ha}
super(BasicDeployment, self)._configure_services(configs)
configs = {'percona-cluster': cfg_percona}
if self.units > 1:
configs['hacluster'] = cfg_ha
return configs
def _configure_services(self):
super(BasicDeployment, self)._configure_services(self._get_configs())
def run(self):
# The number of seconds to wait for the environment to setup.
seconds = 1200
self._add_services()
self._add_relations()
self._configure_services()
self._deploy()
i = 0
while i < 30 and not self.master_unit:
self.master_unit = self.find_master()
i += 1
time.sleep(10)
if self.units > 1:
i = 0
while i < 30 and not self.master_unit:
self.master_unit = self.find_master()
i += 1
time.sleep(10)
assert self.master_unit is not None, 'percona-cluster vip not found'
msg = 'percona-cluster vip not found'
assert self.master_unit is not None, msg
output, code = self.master_unit.run('sudo crm_verify --live-check')
assert code == 0, "'crm_verify --live-check' failed"
_, code = self.master_unit.run('sudo crm_verify --live-check')
assert code == 0, "'crm_verify --live-check' failed"
resources = ['res_mysql_vip']
resources += ['res_mysql_monitor:%d' % i for i in range(self.units)]
resources = ['res_mysql_vip']
resources += ['res_mysql_monitor:%d' %
i for i in range(self.units)]
assert sorted(self.get_pcmkr_resources()) == sorted(resources)
assert sorted(self.get_pcmkr_resources()) == sorted(resources)
else:
self.master_unit = self.find_master(ha=False)
for i in range(self.units):
uid = 'percona-cluster/%d' % i
unit = self.d.sentry.unit[uid]
assert self.is_mysqld_running(unit), 'mysql not running: %s' % uid
def find_master(self):
def find_master(self, ha=True):
for unit_id, unit in self.d.sentry.unit.items():
if not unit_id.startswith('percona-cluster/'):
continue
if not ha:
return unit
# is the vip running here?
output, code = unit.run('sudo ip a | grep "inet %s/"' % self.vip)
print('---')
@ -130,13 +146,37 @@ class BasicDeployment(OpenStackAmuletDeployment):
else:
u = self.master_unit
output, code = u.run('pidof mysqld')
_, code = u.run('pidof mysqld')
if code != 0:
print("ERROR: command returned non-zero '%s'" % (code))
return False
return self.is_port_open(u, '3306')
def get_wsrep_value(self, attr, unit=None):
if unit:
u = unit
else:
u = self.master_unit
cmd = ("mysql -uroot -pt00r -e\"show status like '%s';\"| "
"grep %s" % (attr, attr))
output, code = u.run(cmd)
if code != 0:
print("ERROR: command returned non-zero '%s'" % (code))
return ""
value = re.search(r"^.+?\s+(.+)", output).group(1)
print("%s = %s" % (attr, value))
return value
def is_pxc_bootstrapped(self, unit=None):
value = self.get_wsrep_value('wsrep_ready', unit)
return value.lower() in ['on', 'ready']
def get_cluster_size(self, unit=None):
return self.get_wsrep_value('wsrep_cluster_size', unit)
def is_port_open(self, unit=None, port='3306', address=None):
if unit:
addr = unit.info['public-address']
@ -144,8 +184,10 @@ class BasicDeployment(OpenStackAmuletDeployment):
addr = address
else:
raise Exception('Please provide a unit or address')
try:
telnetlib.Telnet(addr, port)
return True
except TimeoutError: # noqa this exception only available in py3
print("ERROR: could not connect to %s:%s" % (addr, port))
return False

View File

@ -128,3 +128,20 @@ class UtilsTests(unittest.TestCase):
'0.0.0.0': 'hostB'})
mock_rel_get.assert_called_with(rid=2, unit=4)
self.assertEqual(hosts, ['hostA', 'hostB'])
@mock.patch.object(percona_utils, 'related_units')
@mock.patch.object(percona_utils, 'relation_ids')
@mock.patch.object(percona_utils, 'config')
def test_is_sufficient_peers(self, mock_config, mock_relation_ids,
mock_related_units):
_config = {'min-cluster-size': None}
mock_config.side_effect = lambda key: _config.get(key)
self.assertTrue(percona_utils.is_sufficient_peers())
mock_relation_ids.return_value = ['cluster:0']
mock_related_units.return_value = ['test/0']
_config = {'min-cluster-size': 3}
self.assertFalse(percona_utils.is_sufficient_peers())
mock_related_units.return_value = ['test/0', 'test/1']
self.assertTrue(percona_utils.is_sufficient_peers())