Cluster peers notify readiness

The percona-cluster charm was rendering, and therefore, restarting
after each peer joined. This causes race conditions with the leader
handing out client credentials.

This is due to the parallel but conflicting goals of building the
cluster as quickly as possible but also delaying client relations until
the full cluster is completely ready.

The charm relied on the bootstrap_uuid being set among peers. However,
the bootstrap_uuid is set intentionally early in pursuit of the first
goal building the cluster quickly. It did not signify true readiness.

This change adds another peer relation setting, "ready", that each node
sets when it has bootstrapped, its hacluster is complete, and it has a
sufficient number of peers.

The cluster is considered ready when (min-cluster-size) peers have
indicated readiness. This fulfils goal number two delaying client
relations until the full cluster is ready.

Change-Id: I0998407fcb5efbdb0f7734ac39363e8d41088c79
Closes-Bug: #1775682
This commit is contained in:
David Ames 2018-06-13 09:40:58 -07:00
parent 78d1df2d50
commit 055d2bb17f
4 changed files with 153 additions and 27 deletions

View File

@ -111,6 +111,8 @@ from percona_utils import (
cluster_wait,
get_wsrep_provider_options,
get_server_id,
is_sufficient_peers,
set_ready_on_peers,
)
from charmhelpers.core.unitdata import kv
@ -379,9 +381,9 @@ def config_changed():
DEBUG)
render_config_restart_on_changed(hosts,
bootstrap=not leader_bootstrapped)
elif leader_bootstrapped:
elif leader_bootstrapped and is_sufficient_peers():
# Speed up cluster process by bootstrapping when the leader has
# bootstrapped
# bootstrapped if we have expected number of peers
if leader_ip not in hosts:
# Fix Bug #1738896
hosts = [leader_ip] + hosts
@ -421,6 +423,7 @@ def config_changed():
# bootstrapped
if is_bootstrapped():
update_root_password()
set_ready_on_peers()
@hooks.hook('cluster-relation-joined')
@ -451,7 +454,7 @@ def cluster_changed():
inc_list = []
for attr in rdata.iterkeys():
if attr not in ['hostname', 'private-address', 'cluster-address',
'public-address']:
'public-address', 'ready']:
inc_list.append(attr)
peer_echo(includes=inc_list)

View File

@ -502,13 +502,7 @@ def is_bootstrapped():
@returns boolean
"""
min_size = config('min-cluster-size')
if not min_size:
units = 1
for relation_id in relation_ids('cluster'):
units += len(related_units(relation_id))
min_size = units
min_size = get_min_cluster_size()
if not is_sufficient_peers():
return False
elif min_size > 1:
@ -541,6 +535,9 @@ def is_bootstrapped():
else:
log("All {} percona units reporting clustered".format(min_size),
DEBUG)
elif not seeded():
# Single unit deployment but not yet bootstrapped
return False
# Set INITIAL_CLUSTERED_KEY as the cluster has fully bootstrapped
kvstore = kv()
@ -699,7 +696,10 @@ def charm_check_func():
else:
return ('waiting', 'Unit waiting on hacluster relation')
else:
return ('active', 'Unit is ready')
if seeded():
return ('active', 'Unit is ready')
else:
return ('waiting', 'Unit waiting to bootstrap')
@cached
@ -866,6 +866,22 @@ def get_cluster_host_ip():
return cluster_addr
def get_min_cluster_size():
""" Get the minimum cluster size
If the config value is set use that, if not count the number of units on
the cluster relation.
"""
min_cluster_size = config('min-cluster-size')
if not min_cluster_size:
units = 1
for relation_id in relation_ids('cluster'):
units += len(related_units(relation_id))
min_cluster_size = units
return min_cluster_size
def cluster_ready():
"""Determine if each node in the cluster is ready to respond to client
requests.
@ -883,7 +899,25 @@ def cluster_ready():
DEBUG)
return False
return is_bootstrapped()
min_cluster_size = get_min_cluster_size()
# Single unit deployment return state of seeded
if int(min_cluster_size) == 1:
return seeded()
peers = {}
relation_id = relation_ids('cluster')[0]
units = related_units(relation_id) or []
if local_unit() not in units:
units.append(local_unit())
for unit in units:
peers[unit] = relation_get(attribute='ready',
rid=relation_id,
unit=unit)
if len(peers) >= min_cluster_size:
return all(peers.values())
return False
def client_node_is_ready():
@ -1052,3 +1086,12 @@ def get_server_id():
server_id = MAX_SERVER_ID
return server_id
def set_ready_on_peers():
""" Set ready on peers
Notify peers this unit is clustered and ready to serve clients
"""
for relid in relation_ids('cluster'):
relation_set(relation_id=relid, ready=True)

View File

@ -41,6 +41,7 @@ TO_PATCH = ['log', 'config',
'leader_get',
'relation_clear',
'is_relation_made',
'is_sufficient_peers',
'peer_retrieve_by_prefix',
'client_node_is_ready',
'relation_set',
@ -294,6 +295,7 @@ class TestConfigChanged(CharmTestCase):
'is_bootstrapped',
'clustered_once',
'is_leader',
'is_sufficient_peers',
'render_config_restart_on_changed',
'update_client_db_relations',
'install_mysql_ocf',
@ -307,6 +309,7 @@ class TestConfigChanged(CharmTestCase):
'install_percona_xtradb_cluster',
'get_cluster_hosts',
'leader_get',
'set_ready_on_peers',
]
def setUp(self):
@ -399,12 +402,26 @@ class TestConfigChanged(CharmTestCase):
self.render_config_restart_on_changed.assert_not_called()
self.update_bootstrap_uuid.assert_not_called()
# Leader is bootstrapped, no peers
# Leader is bootstrapped, insufficient peers
# Do not render
self.is_sufficient_peers.return_value = False
self.is_leader_bootstrapped.return_value = True
self.render_config_restart_on_changed.reset_mock()
self.install_percona_xtradb_cluster.reset_mock()
hooks.config_changed()
self.install_percona_xtradb_cluster.assert_called_once_with()
self.render_config_restart_on_changed.assert_not_called()
self.update_bootstrap_uuid.assert_not_called()
# Leader is bootstrapped, sufficient peers
# Use the leader node and render.
self.is_sufficient_peers.return_value = True
self.is_leader_bootstrapped.return_value = True
self.get_cluster_hosts.return_value = []
self.render_config_restart_on_changed.reset_mock()
self.install_percona_xtradb_cluster.reset_mock()
hooks.config_changed()
self.render_config_restart_on_changed.assert_called_once_with(
['10.10.10.10'])
@ -436,6 +453,7 @@ class TestConfigChanged(CharmTestCase):
# In none of the prior scenarios should update_root_password have been
# called. is_bootstrapped was defaulted to False
self.update_root_password.assert_not_called()
self.set_ready_on_peers.assert_not_called()
# Leader present, leader bootstrapped, cluster bootstrapped
self.is_bootstrapped.return_value = True
@ -449,6 +467,7 @@ class TestConfigChanged(CharmTestCase):
['10.10.10.20', '10.10.10.30', '10.10.10.10'])
self.update_bootstrap_uuid.assert_called_once()
self.update_root_password.assert_called_once()
self.set_ready_on_peers.called_once()
class TestInstallPerconaXtraDB(CharmTestCase):

View File

@ -358,6 +358,25 @@ class UtilsTests(CharmTestCase):
with self.assertRaises(ValueError):
percona_utils.get_wsrep_provider_options()
def test_set_ready_on_peers(self):
self.relation_ids.return_value = ["rel:1"]
percona_utils.set_ready_on_peers()
self.relation_set.assert_called_with(relation_id="rel:1", ready=True)
def test_get_min_cluster_size(self):
_config = {}
self.config.side_effect = lambda key: _config.get(key)
self.relation_ids.return_value = ["rel:1"]
self.related_units.return_value = []
self.assertEqual(percona_utils.get_min_cluster_size(), 1)
self.related_units.return_value = ['unit/2', 'unit/9', 'unit/21']
self.assertEqual(percona_utils.get_min_cluster_size(), 4)
_config = {'min-cluster-size': 3}
self.config.side_effect = lambda key: _config.get(key)
self.assertEqual(percona_utils.get_min_cluster_size(), 3)
class UtilsTestsStatus(CharmTestCase):
@ -374,12 +393,15 @@ class UtilsTestsStatus(CharmTestCase):
'is_unit_paused_set',
'is_clustered',
'distributed_wait',
'cluster_ready',
]
def setUp(self):
super(UtilsTestsStatus, self).setUp(percona_utils, self.TO_PATCH)
def test_single_unit(self):
@mock.patch.object(percona_utils, 'seeded')
def test_single_unit(self, mock_seeded):
mock_seeded.return_value = True
self.config.return_value = None
self.is_sufficient_peers.return_value = True
stat, _ = percona_utils.charm_check_func()
@ -549,13 +571,35 @@ class UtilsTestsCTC(CharmTestCase):
# ports=None whilst port checks are disabled.
f.assert_called_once_with('assessor', services='s1', ports=None)
@mock.patch.object(percona_utils, 'get_min_cluster_size')
@mock.patch.object(percona_utils, 'seeded')
@mock.patch.object(percona_utils, 'is_sufficient_peers')
def test_is_bootstrapped(self, mock_is_sufficient_peers):
def test_is_bootstrapped(self, mock_is_sufficient_peers, mock_seeded,
mock_get_min_cluster_size):
kvstore = mock.MagicMock()
kvstore.get.return_value = False
self.kv.return_value = kvstore
mock_get_min_cluster_size.return_value = 1
# Single unit not yet seeded
self.relation_ids.return_value = []
mock_is_sufficient_peers.return_value = True
mock_seeded.return_value = False
self.assertFalse(percona_utils.is_bootstrapped())
kvstore.set.assert_not_called()
# Single unit seeded
self.relation_ids.return_value = []
mock_is_sufficient_peers.return_value = True
mock_seeded.return_value = True
self.assertTrue(percona_utils.is_bootstrapped())
kvstore.set.assert_called_once_with(key='initial-cluster-complete',
value=True)
# Not sufficient number of peers
kvstore.reset_mock()
mock_get_min_cluster_size.return_value = 3
self.relation_ids.return_value = ['cluster:0']
mock_is_sufficient_peers.return_value = False
self.assertFalse(percona_utils.is_bootstrapped())
@ -605,26 +649,43 @@ class UtilsTestsCTC(CharmTestCase):
self.config.side_effect = lambda key: _config.get(key)
self.assertTrue(percona_utils.is_bootstrapped())
# Assume single unit no-min-cluster-size
mock_is_sufficient_peers.return_value = True
self.relation_ids.return_value = []
self.related_units.return_value = []
self.relation_get.return_value = None
_config = {'min-cluster-size': None}
@mock.patch.object(percona_utils, 'seeded')
def test_cluster_ready(self, mock_seeded):
# Single unit not seeded
_config = {}
mock_seeded.return_value = False
self.config.side_effect = lambda key: _config.get(key)
self.assertTrue(percona_utils.is_bootstrapped())
self.relation_ids.return_value = ['rel:1']
self.related_units.return_value = []
self.assertFalse(percona_utils.cluster_ready())
# Single unit seeded
_config = {}
mock_seeded.return_value = True
self.config.side_effect = lambda key: _config.get(key)
self.relation_ids.return_value = ['rel:1']
self.related_units.return_value = []
self.assertTrue(percona_utils.cluster_ready())
@mock.patch.object(percona_utils, 'is_bootstrapped')
def test_cluster_ready(self, mock_is_bootstrapped):
# When VIP configured check is_clustered
mock_is_bootstrapped.return_value = True
_config = {'vip': '10.10.10.10', 'min-cluster-size': 3}
self.config.side_effect = lambda key: _config.get(key)
# HACluster not ready
self.is_clustered.return_value = False
self.assertFalse(percona_utils.cluster_ready())
# HACluster ready
# HACluster ready peers not ready
self.is_clustered.return_value = True
self.related_units.return_value = ['unit/1', 'unit/2']
self.relation_get.return_value = None
self.assertFalse(percona_utils.cluster_ready())
# HACluster ready one peer ready one not
self.relation_get.side_effect = [True, True, None]
self.assertFalse(percona_utils.cluster_ready())
# HACluster ready one all peers ready
self.relation_get.side_effect = [True, True, True]
self.assertTrue(percona_utils.cluster_ready())
@mock.patch.object(percona_utils, 'cluster_ready')