[mariadb] Improve leader election on cold start

During cold start we pick leader node by seqno. When node is running of finished non gracefully seqno may stay as -1 unless periodic task update its based on local grastate.dat or will detect latest seqno via wsrep_recover. This patch adds an unfinite waiter to leader election function to wait unless all nodes report seqno different that -1 to make sure we detect leader based on correct data. Change-Id: Id042f6f4c915b21b905bde4d57d40e159d924772
2024-04-04 15:45:40 +00:00 · 2024-04-04 15:45:40 +00:00 · 4aaa5fc778
commit 4aaa5fc778
parent 4ee7ebda43
3 changed files with 25 additions and 16 deletions
--- a/mariadb/Chart.yaml
+++ b/mariadb/Chart.yaml
@ -15,7 +15,7 @@ apiVersion: v1
 appVersion: v10.6.7
 description: OpenStack-Helm MariaDB
 name: mariadb
-version: 0.2.53
+version: 0.2.54
 home: https://mariadb.com/kb/en/
 icon: http://badges.mariadb.org/mariadb-badge-180x60.png
 sources:
--- a/mariadb/templates/bin/_start.py.tpl
+++ b/mariadb/templates/bin/_start.py.tpl
@ -684,20 +684,28 @@ def get_nodes_with_highest_seqno():
    """Find out which node(s) has the highest sequence number and return
    them in an array."""
    logger.info("Getting the node(s) with highest seqno from configmap.")
-    state_configmap = k8s_api_instance.read_namespaced_config_map(
-        name=state_configmap_name, namespace=pod_namespace)
-    state_configmap_dict = state_configmap.to_dict()
-    seqnos = dict()
-    for key, value in list(state_configmap_dict['data'].items()):
-        keyitems = key.split('.')
-        key = keyitems[0]
-        node = keyitems[1]
-        if key == 'seqno':
-            #Explicit casting to integer to have resulting list of integers for correct comparison
-            seqnos[node] = int(value)
-    max_seqno = max(seqnos.values())
-    max_seqno_nodes = sorted([k for k, v in list(seqnos.items()) if v == max_seqno])
-    return max_seqno_nodes
+    # We can proceed only when we get seqno from all nodes, and if seqno is
+    # -1 it means we didn't get it correctly, the shutdown was not clean and we need
+    # to wait for a value taken by wsrep recover.
+    while True:
+        state_configmap = k8s_api_instance.read_namespaced_config_map(
+            name=state_configmap_name, namespace=pod_namespace)
+        state_configmap_dict = state_configmap.to_dict()
+        seqnos = dict()
+        for key, value in list(state_configmap_dict['data'].items()):
+            keyitems = key.split('.')
+            key = keyitems[0]
+            node = keyitems[1]
+            if key == 'seqno':
+                #Explicit casting to integer to have resulting list of integers for correct comparison
+                seqnos[node] = int(value)
+        max_seqno = max(seqnos.values())
+        max_seqno_nodes = sorted([k for k, v in list(seqnos.items()) if v == max_seqno])
+        if [x for x in seqnos.values() if x < 0 ]:
+            logger.info("Thq seqno for some nodes is < 0, can't make a decision about leader. Node seqnums: %s", seqnos)
+            time.sleep(state_configmap_update_period)
+            continue
+        return max_seqno_nodes


 def resolve_leader_node(nodename_array):
@ -727,7 +735,7 @@ def check_if_i_lead():
    # reliably checking in following full restart of cluster.
    count = cluster_leader_ttl / state_configmap_update_period
    counter = 0
-    while counter <= count:
+    while counter < count:
        if check_if_cluster_data_is_fresh():
            counter += 1
        else:
--- a/releasenotes/notes/mariadb.yaml
+++ b/releasenotes/notes/mariadb.yaml
@ -69,4 +69,5 @@ mariadb:
  - 0.2.51 Add 2024.2 overrides
  - 0.2.52 Added SSL support to cluster-wait job
  - 0.2.53 Use constant for mysql binary name
+  - 0.2.54 Improve leader election on cold start
 ...