Notify bootstrapped action

It turns out a subsequent required step after a cold boot bootstrap is
notifying the cluster of the new bootstrap UUID.

The notify-bootstrapped action should be run on a different node than
the one which ran the bootstrap-pxc action.

This action will ensure the cluster converges on the correct bootstrap
UUID.

A subsequent patch stacked on this one will include tests for the new
cold boot actions.

Change-Id: Idee12d5f7e28498c5ab6ccb9605f751c6427ac30
Partial-Bug: #1744393
This commit is contained in:
David Ames 2019-07-12 16:16:46 -07:00
parent b97a0971c2
commit b8c2213dfb
6 changed files with 154 additions and 46 deletions

View File

@ -29,6 +29,8 @@ bootstrap-pxc:
Bootstrap this unit of Percona.
*WARNING* This action will bootstrap this unit of Percona cluster. This
should only occur in a recovery scenario. Make sure this unit has the
highest sequence number in grstate.dat or data loss may occur.
highest sequence number in grastate.dat or data loss may occur.
See upstream Percona documentation for context
https://www.percona.com/blog/2014/09/01/galera-replication-how-to-recover-a-pxc-cluster/
notify-bootstrapped:
descripttion: Notify the cluster of the new bootstrap uuid.

View File

@ -112,16 +112,28 @@ def backup(args):
def bootstrap_pxc(args):
""" Force a bootstrap on this node
This action will run bootstrap-pxc on this node bootstrapping the cluster.
This action should only be run after a cold start requiring a bootstrap.
This action should only be run on the node with the highest sequence number
as displayed in workgoup status and found in grastate.dat.
If this unit has the highest sequence number and is not the juju leader
node, a subsequent action run of notify-bootstrapped is required.
"""
try:
# Force safe to bootstrap
percona_utils.set_grstate_safe_to_bootstrap()
percona_utils.set_grastate_safe_to_bootstrap()
# Boostrap this node
percona_utils.bootstrap_pxc()
except (percona_utils.GRStateFileNotFound, OSError) as e:
percona_utils.notify_bootstrapped()
except (percona_utils.GRAStateFileNotFound, OSError) as e:
action_set({
'output': e.output,
'return-code': e.returncode})
action_fail("The GRState file does not exist or cannot be written to.")
action_fail("The GRAState file does not exist or cannot "
"be written to.")
except (subprocess.CalledProcessError, Exception) as e:
action_set({
'output': e.output,
@ -130,16 +142,27 @@ def bootstrap_pxc(args):
action_fail("The bootstrap-pxc failed. "
"See traceback in show-action-output")
action_set({
'output': "Bootstrap succeded. "
'output': "Bootstrap succeeded. "
"Wait for the other units to run update-status"})
percona_utils.assess_status(percona_utils.register_configs())
def notify_bootstrapped(args):
"""Notify the cluster of the new bootstrap cluster UUID.
As a consequence of timing, this action will often need to be executed
after the bootstrap-pxc action. It will need to be run on a different unit
than was bootstrap-pxc was executed on.
"""
percona_utils.notify_bootstrapped()
# A dictionary of all the defined actions to callables (which take
# parsed arguments).
ACTIONS = {"pause": pause, "resume": resume, "backup": backup,
"complete-cluster-series-upgrade": complete_cluster_series_upgrade,
"bootstrap-pxc": bootstrap_pxc}
"bootstrap-pxc": bootstrap_pxc,
"notify-bootstrapped": notify_bootstrapped}
def main(args):

1
actions/notify-bootstrapped Symbolic link
View File

@ -0,0 +1 @@
actions.py

View File

@ -106,6 +106,7 @@ from percona_utils import (
get_db_helper,
mark_seeded, seeded,
install_mysql_ocf,
maybe_notify_bootstrapped,
notify_bootstrapped,
is_bootstrapped,
clustered_once,
@ -145,6 +146,7 @@ from percona_utils import (
get_slave_status,
delete_replication_user,
list_replication_users,
check_mysql_connection,
)
from charmhelpers.core.unitdata import kv
@ -331,7 +333,8 @@ def render_config_restart_on_changed(hosts, bootstrap=False):
def update_client_db_relations():
""" Upate client db relations IFF ready
"""
if leader_node_is_ready() or client_node_is_ready():
if ((leader_node_is_ready() or
client_node_is_ready()) and check_mysql_connection()):
for r_id in relation_ids('shared-db'):
for unit in related_units(r_id):
shared_db_changed(r_id, unit)
@ -633,6 +636,8 @@ def cluster_changed():
peer_echo(includes=inc_list)
# NOTE(jamespage): deprecated - leader-election
maybe_notify_bootstrapped()
cluster_joined()
config_changed()
@ -951,6 +956,9 @@ def ha_relation_changed():
@hooks.hook('leader-settings-changed')
def leader_settings_changed():
'''Re-trigger install once leader has seeded passwords into install'''
maybe_notify_bootstrapped()
config_changed()
# NOTE(tkurek): re-set 'master' relation data
if relation_ids('master'):

View File

@ -113,8 +113,8 @@ class DesyncedException(Exception):
pass
class GRStateFileNotFound(Exception):
"""Raised when the grstate file does not exist"""
class GRAStateFileNotFound(Exception):
"""Raised when the grastate file does not exist"""
pass
@ -548,8 +548,9 @@ def is_bootstrapped():
DEBUG)
return False
elif len(set(uuids)) > 1:
raise Exception("Found inconsistent bootstrap uuids: "
"{}".format((uuids)))
log("Found inconsistent bootstrap uuids: "
"{}".format(uuids), level=WARNING)
return False
else:
log("All {} percona units reporting clustered".format(min_size),
DEBUG)
@ -693,7 +694,8 @@ def charm_check_func():
not check_mysql_connection()):
return ('blocked',
'MySQL is down. Sequence Number: {}. Safe To Bootstrap: {}'
.format(get_grstate_seqno(), get_grstate_safe_to_bootstrap()))
.format(get_grastate_seqno(),
get_grastate_safe_to_bootstrap()))
@retry_on_exception(num_retries=10,
base_delay=2,
@ -1489,58 +1491,89 @@ def check_mysql_connection():
return False
def get_grstate_seqno():
def get_grastate_seqno():
"""Get GR State safe sequence number.
Read the grstate yaml file to determine the sequence number for this
Read the grastate yaml file to determine the sequence number for this
instance.
:returns: int Sequence Number
"""
grstate_file = os.path.join(resolve_data_dir(), "grastate.dat")
if os.path.exists(grstate_file):
with open(grstate_file, 'r') as f:
grstate = yaml.safe_load(f)
return grstate.get("seqno")
grastate_file = os.path.join(resolve_data_dir(), "grastate.dat")
if os.path.exists(grastate_file):
with open(grastate_file, 'r') as f:
grastate = yaml.safe_load(f)
return grastate.get("seqno")
def get_grstate_safe_to_bootstrap():
def get_grastate_safe_to_bootstrap():
"""Get GR State safe to bootstrap.
Read the grstate yaml file to determine if it is safe to bootstrap from
Read the grastate yaml file to determine if it is safe to bootstrap from
this instance.
:returns: int Safe to bootstrap 0 or 1
"""
grstate_file = os.path.join(resolve_data_dir(), "grastate.dat")
if os.path.exists(grstate_file):
with open(grstate_file, 'r') as f:
grstate = yaml.safe_load(f)
return grstate.get("safe_to_bootstrap")
grastate_file = os.path.join(resolve_data_dir(), "grastate.dat")
if os.path.exists(grastate_file):
with open(grastate_file, 'r') as f:
grastate = yaml.safe_load(f)
return grastate.get("safe_to_bootstrap")
def set_grstate_safe_to_bootstrap():
def set_grastate_safe_to_bootstrap():
"""Set GR State safe to bootstrap.
Update the grstate yaml file to indicate it is safe to bootstrap from
Update the grastate yaml file to indicate it is safe to bootstrap from
this instance.
:side effect: Writes the grstate.dat file.
:raises GRStateFileNotFound: If grstate.dat file does not exist.
:side effect: Writes the grastate.dat file.
:raises GRAStateFileNotFound: If grastate.dat file does not exist.
:returns: None
"""
grstate_file = os.path.join(resolve_data_dir(), "grastate.dat")
if not os.path.exists(grstate_file):
raise GRStateFileNotFound("{} file does not exist"
.format(grstate_file))
with open(grstate_file, 'r') as f:
grstate = yaml.safe_load(f)
grastate_file = os.path.join(resolve_data_dir(), "grastate.dat")
if not os.path.exists(grastate_file):
raise GRAStateFileNotFound("{} file does not exist"
.format(grastate_file))
with open(grastate_file, 'r') as f:
grastate = yaml.safe_load(f)
# Force safe to bootstrap
grstate["safe_to_bootstrap"] = 1
grastate["safe_to_bootstrap"] = 1
with open(grstate_file, 'w') as f:
f.write(yaml.dump(grstate))
with open(grastate_file, 'w') as f:
f.write(yaml.dump(grastate))
def maybe_notify_bootstrapped():
"""Maybe notify bootstrapped.
In the event of a subsequent bootstrap after deploy time, as in the case of
a cold start, it is necessary to re-notify the cluster relation of the new
bootstrap UUID.
This function checks that the cluster has been clustered before and
notified clients, checks for agreement with the leader on the bootstrap
UUID and calls notify_bootstrapped to inform the cluster peers of the UUID.
:side effect: calls kv()
:side effect: may call notify_bootstrapped()
:returns: None
"""
if not check_mysql_connection():
log("MySQL is down: deferring notify bootstrapped", DEBUG)
return
kvstore = kv()
# Using INITIAL_CLIENT_UPDATE_KEY as this is a step beyond merely
# clustered, but rather clustered and clients were previously notified.
if kvstore.get(INITIAL_CLIENT_UPDATE_KEY, False):
# Handle a change of bootstrap UUID after cold start bootstrap
lead_cluster_state_uuid = leader_get('bootstrap-uuid')
cluster_state_uuid = get_wsrep_value('wsrep_cluster_state_uuid')
if lead_cluster_state_uuid == cluster_state_uuid:
notify_bootstrapped(cluster_uuid=cluster_state_uuid)

View File

@ -14,6 +14,8 @@ os.environ['JUJU_UNIT_NAME'] = 'percona-cluster/2'
class UtilsTests(CharmTestCase):
TO_PATCH = [
'config',
'kv',
'leader_get',
'log',
'relation_ids',
'related_units',
@ -87,7 +89,6 @@ class UtilsTests(CharmTestCase):
os.remove(tmpfile.name)
self.assertEqual(len(lines), 5)
print("XXX", lines)
self.assertEqual(lines[0], "#somedata\n")
self.assertEqual(lines[1],
"{} {}\n".format(list(_map.keys())[0],
@ -438,18 +439,18 @@ class UtilsTests(CharmTestCase):
@mock.patch("percona_utils.resolve_data_dir")
@mock.patch("percona_utils.os")
def test_get_grstate_seqno(self, _os, _resolve_dd):
def test_get_grastate_seqno(self, _os, _resolve_dd):
_resolve_dd.return_value = "/tmp"
_seqno = "25"
_os.path.exists.return_value = True
self.yaml.safe_load.return_value = {"seqno": _seqno}
with patch_open() as (_open, _file):
_open.return_value = _file
self.assertEqual(_seqno, percona_utils.get_grstate_seqno())
self.assertEqual(_seqno, percona_utils.get_grastate_seqno())
@mock.patch("percona_utils.resolve_data_dir")
@mock.patch("percona_utils.os")
def test_get_grstate_safe_to_bootstrap(self, _os, _resolve_dd):
def test_get_grastate_safe_to_bootstrap(self, _os, _resolve_dd):
_resolve_dd.return_value = "/tmp"
_bootstrap = "0"
_os.path.exists.return_value = True
@ -457,11 +458,11 @@ class UtilsTests(CharmTestCase):
with patch_open() as (_open, _file):
_open.return_value = _file
self.assertEqual(
_bootstrap, percona_utils.get_grstate_safe_to_bootstrap())
_bootstrap, percona_utils.get_grastate_safe_to_bootstrap())
@mock.patch("percona_utils.resolve_data_dir")
@mock.patch("percona_utils.os")
def test_set_grstate_safe_to_bootstrap(self, _os, _resolve_dd):
def test_set_grastate_safe_to_bootstrap(self, _os, _resolve_dd):
_resolve_dd.return_value = "/tmp"
_bootstrap = "0"
_os.path.exists.return_value = True
@ -469,10 +470,50 @@ class UtilsTests(CharmTestCase):
with patch_open() as (_open, _file):
_open.return_value = _file
_file.write = mock.MagicMock()
percona_utils.set_grstate_safe_to_bootstrap()
percona_utils.set_grastate_safe_to_bootstrap()
self.yaml.dump.assert_called_once_with({"safe_to_bootstrap": 1})
_file.write.assert_called_once()
@mock.patch("percona_utils.check_mysql_connection")
@mock.patch("percona_utils.get_wsrep_value")
@mock.patch("percona_utils.notify_bootstrapped")
def test_maybe_notify_bootstrapped(
self, _notify_bootstrapped,
_get_wsrep_value, _check_mysql_connection):
kvstore = mock.MagicMock()
kvstore.get.return_value = True
self.kv.return_value = kvstore
_check_mysql_connection.return_value = False
_uuid = "uuid-uuid"
self.leader_get.return_value = _uuid
_get_wsrep_value.return_value = _uuid
# mysql not runnig
percona_utils.maybe_notify_bootstrapped()
_notify_bootstrapped.assert_not_called()
# No clients initialized
_check_mysql_connection.return_value = True
kvstore.get.return_value = False
percona_utils.maybe_notify_bootstrapped()
_notify_bootstrapped.assert_not_called()
# Differing UUID
_check_mysql_connection.return_value = True
kvstore.get.return_value = True
_get_wsrep_value.return_value = "not-the-same-uuid"
percona_utils.maybe_notify_bootstrapped()
_notify_bootstrapped.assert_not_called()
# Differing UUID
_check_mysql_connection.return_value = True
kvstore.get.return_value = True
_get_wsrep_value.return_value = _uuid
percona_utils.maybe_notify_bootstrapped()
_notify_bootstrapped.assert_called_once_with(cluster_uuid=_uuid)
class UtilsTestsStatus(CharmTestCase):