Merge "Prevent mariadb from split brain while cluster is in reboot state"

This commit is contained in:
Zuul 2021-05-07 19:37:56 +00:00 committed by Gerrit Code Review
commit 6dd39da6ad
3 changed files with 39 additions and 20 deletions

View File

@ -15,7 +15,7 @@ apiVersion: v1
appVersion: v10.2.31
description: OpenStack-Helm MariaDB
name: mariadb
version: 0.2.0
version: 0.2.1
home: https://mariadb.com/kb/en/
icon: http://badges.mariadb.org/mariadb-badge-180x60.png
sources:

View File

@ -17,6 +17,7 @@ limitations under the License.
import errno
import logging
import os
import secrets
import select
import signal
import subprocess # nosec
@ -58,6 +59,8 @@ kubernetes_version = kubernetes.client.VersionApi().get_code().git_version
logger.info("Kubernetes API Version: {0}".format(kubernetes_version))
k8s_api_instance = kubernetes.client.CoreV1Api()
# Setup secrets generator
secretsGen = secrets.SystemRandom()
def check_env_var(env_var):
"""Check if an env var exists.
@ -325,26 +328,33 @@ def safe_update_configmap(configmap_dict, configmap_patch):
# ensure nothing else has modified the confimap since we read it.
configmap_patch['metadata']['resourceVersion'] = configmap_dict[
'metadata']['resource_version']
try:
api_response = k8s_api_instance.patch_namespaced_config_map(
name=state_configmap_name,
namespace=pod_namespace,
body=configmap_patch)
return True
except kubernetes.client.rest.ApiException as error:
if error.status == 409:
# This status code indicates a collision trying to write to the
# config map while another instance is also trying the same.
logger.warning("Collision writing configmap: {0}".format(error))
# This often happens when the replicas were started at the same
# time, and tends to be persistent. Sleep briefly to break the
# synchronization.
time.sleep(1)
return True
else:
logger.error("Failed to set configmap: {0}".format(error))
return error
# Retry up to 8 times in case of 409 only. Each retry has a ~1 second
# sleep in between so do not want to exceed the roughly 10 second
# write interval per cm update.
for i in range(8):
try:
api_response = k8s_api_instance.patch_namespaced_config_map(
name=state_configmap_name,
namespace=pod_namespace,
body=configmap_patch)
return True
except kubernetes.client.rest.ApiException as error:
if error.status == 409:
# This status code indicates a collision trying to write to the
# config map while another instance is also trying the same.
logger.warning("Collision writing configmap: {0}".format(error))
# This often happens when the replicas were started at the same
# time, and tends to be persistent. Sleep with some random
# jitter value briefly to break the synchronization.
naptime = secretsGen.uniform(0.8,1.2)
time.sleep(naptime)
else:
logger.error("Failed to set configmap: {0}".format(error))
return error
logger.info("Retry writing configmap attempt={0} sleep={1}".format(
i+1, naptime))
return True
def set_configmap_annotation(key, value):
"""Update a configmap's annotations via patching.
@ -843,6 +853,14 @@ def run_mysqld(cluster='existing'):
"This is a fresh node joining the cluster for the 1st time, not attempting to set admin passwords"
)
# Node ready to start MariaDB, update cluster state to live and remove
# reboot node info, if set previously.
if cluster == 'new':
set_configmap_annotation(
key='openstackhelm.openstack.org/cluster.state', value='live')
set_configmap_annotation(
key='openstackhelm.openstack.org/reboot.node', value='')
logger.info("Launching MariaDB")
run_cmd_with_logging(mysqld_cmd, logger)

View File

@ -16,4 +16,5 @@ mariadb:
- 0.1.13 Fix race condition for grastate.dat
- 0.1.14 Update mysqld-exporter image to v0.12.1
- 0.2.0 Uplift mariadb version and ubuntu release
- 0.2.1 Prevent potential splitbrain issue if cluster is in reboot state
...