From b0bb8dfa7a2196d484439b5f8aeb5faa029d45c8 Mon Sep 17 00:00:00 2001 From: Oleksii Grudev Date: Thu, 23 Jan 2020 18:45:18 +0200 Subject: [PATCH] Prevent splitbrain during full Galera restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces new cluster status "reboot" which is set by leader node hence other nodes will start mysql without "--wsrep-new-cluster" option. Before this following situation took place: All pods go down one by one with some offset; First and second nodes have max seqno; The script on the first node detects there are no active backends and starts timeout loop; The script on the second node detects there are no active backends and starts timeout loop (with approx. 20 sec offset from first node) ; Timeout loop finishes on first node, it checks highest seqno and lowest hostname and wins the ability to start cluster. Mysql is started with “--wsrep-new-cluster” parameter. Seqno is set to “-1” for this node after mysql startup; Periodic job syncs values from grastate file to configmap; Timeout loop finishes on second node. It checks node with highest seqno and lowest hostname and since seqno is already “-1” for first node, the second node decides that it should lead the cluster startup and executes mysql with “--wsrep-new-cluster” option as well which leads to split brain Change-Id: Ic63fd916289cb05411544cb33d5fdeed1352b380 --- mariadb/templates/bin/_start.py.tpl | 37 ++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/mariadb/templates/bin/_start.py.tpl b/mariadb/templates/bin/_start.py.tpl index b20d55786c..312ad84efb 100644 --- a/mariadb/templates/bin/_start.py.tpl +++ b/mariadb/templates/bin/_start.py.tpl @@ -436,7 +436,8 @@ def get_cluster_state(): "openstackhelm.openstack.org/cluster.state": state, "openstackhelm.openstack.org/leader.node": leader, "openstackhelm.openstack.org/leader.expiry": - leader_expiry + leader_expiry, + "openstackhelm.openstack.org/reboot.node": "" } }, "data": {} @@ -685,9 +686,17 @@ def check_if_i_lead(): "{1}".format(counter, count)) max_seqno_nodes = get_nodes_with_highest_seqno() leader_node = resolve_leader_node(max_seqno_nodes) - if local_hostname == leader_node: - logger.info("I lead the cluster") + if (local_hostname == leader_node and not check_for_active_nodes() + and get_cluster_state() == 'live'): + logger.info("I lead the cluster. Setting cluster state to reboot.") + set_configmap_annotation( + key='openstackhelm.openstack.org/cluster.state', value='reboot') + set_configmap_annotation( + key='openstackhelm.openstack.org/reboot.node', value=local_hostname) return True + elif local_hostname == leader_node: + logger.info("The cluster is already rebooting") + return False else: logger.info("{0} leads the cluster".format(leader_node)) return False @@ -866,6 +875,28 @@ elif get_cluster_state() == 'live': while not check_for_active_nodes(): time.sleep(default_sleep) run_mysqld() +elif get_cluster_state() == 'reboot': + reboot_node = get_configmap_value( + type='annotation', key='openstackhelm.openstack.org/reboot.node') + if reboot_node == local_hostname: + logger.info( + "Cluster reboot procedure wasn`t finished. Trying again.") + update_grastate_on_restart() + launch_leader_election() + launch_cluster_monitor() + mysqld_reboot() + else: + logger.info( + "Waiting for the lead node to come online before joining " + "it") + update_grastate_on_restart() + launch_leader_election() + launch_cluster_monitor() + while not check_for_active_nodes(): + time.sleep(default_sleep) + set_configmap_annotation( + key='openstackhelm.openstack.org/cluster.state', value='live') + run_mysqld() else: logger.critical("Dont understand cluster state, exiting with error status") sys.exit(1)