stx-monitor elasticsearch readiness probe enhancements

Modify the readiness probe in the elasticsearch chart so that
2 sets of cluster health parameters can be set.  One set of cluster
health parameters will be used when no connection to the master
is possible or there are less than 2 elasticsearch data nodes in the
cluster.  The other will be used when connection to the master is
possible and 2 or more elasticsearch data nodes are present in the
cluster.

This is necessary as in various cluster recovery scenarios and AIO-SX
only local node health should be checked, whereas when modifying
elasticsearch data nodes we need to ensure no recovery is in progress
before advancing to updating the next data node.

Change-Id: I8125c3c5b87e081a00907c519e3d513c74031c70
Closes-Bug: 1869001
Signed-off-by: Kevin Smith <kevin.smith@windriver.com>
This commit is contained in:
Kevin Smith 2020-03-25 09:27:15 -04:00
parent 687fb8584f
commit bdfd8a2abd
3 changed files with 94 additions and 1 deletions

View File

@ -23,6 +23,7 @@ Patch02: 0002-Add-compatibility-for-k8s-1.16.patch
Patch03: 0003-use-oss-image.patch
Patch04: 0004-Update-to-Elastic-7.4.0-Release.patch
Patch05: 0005-set-initial-masters-to-master-0.patch
Patch06: 0006-readiness-probe-enhancements.patch
BuildRequires: helm
@ -36,6 +37,7 @@ Monitor Helm elasticsearch charts
%patch03 -p1
%patch04 -p1
%patch05 -p1
%patch06 -p1
%build
# initialize helm and build the toolkit

View File

@ -0,0 +1,91 @@
From 36ea0e2a2fd6cf6ac8cb19411c14c5ef4d0618f9 Mon Sep 17 00:00:00 2001
From: Kevin Smith <kevin.smith@windriver.com>
Date: Mon, 23 Mar 2020 10:43:07 -0400
Subject: [PATCH 1/1] readiness probe enhancements
---
elasticsearch/templates/statefulset.yaml | 46 +++++++++++++++++++++++++++-----
elasticsearch/values.yaml | 2 ++
2 files changed, 41 insertions(+), 7 deletions(-)
diff --git a/elasticsearch/templates/statefulset.yaml b/elasticsearch/templates/statefulset.yaml
index e17d39e..483e1f4 100644
--- a/elasticsearch/templates/statefulset.yaml
+++ b/elasticsearch/templates/statefulset.yaml
@@ -194,7 +194,7 @@ spec:
# If the node is starting up wait for the cluster to be ready (request params: '{{ .Values.clusterHealthCheckParams }}' )
# Once it has started only check that the node itself is responding
START_FILE=/tmp/.es_start_file
-
+
http () {
local path="${1}"
if [ -n "${ELASTIC_USERNAME}" ] && [ -n "${ELASTIC_PASSWORD}" ]; then
@@ -209,13 +209,45 @@ spec:
echo 'Elasticsearch is already running, lets check the node is healthy'
http "/"
else
- echo 'Waiting for elasticsearch cluster to become cluster to be ready (request params: "{{ .Values.clusterHealthCheckParams }}" )'
- if http "/_cluster/health?{{ .Values.clusterHealthCheckParams }}" ; then
- touch ${START_FILE}
- exit 0
+ DATA_NODE=$(printenv node.data)
+ if [[ "$DATA_NODE" == true ]]; then
+ # This is a data node, check for health depending on whether we can
+ # reach the master node and how many data nodes there are.
+ DATA_NODE_COUNT=$(http "/_cat/nodes?master_timeout=1s" | grep -c data)
+ echo "data node count = $DATA_NODE_COUNT"
+ if [[ $DATA_NODE_COUNT -gt 1 ]]; then
+ # We connected to master and there is more than one data node.
+ echo 'Waiting for elasticsearch cluster to become ready (request params: "{{ .Values.clusterHealthCheckParams }}" )'
+ if http "/_cluster/health?{{ .Values.clusterHealthCheckParams }}" ; then
+ touch ${START_FILE}
+ exit 0
+ else
+ echo 'Cluster is not yet ready (request params: "{{ .Values.clusterHealthCheckParams }}" )'
+ exit 1
+ fi
+ else
+ # Cannot connect to the master or we are the only data node
+ # found. Could be DOR, AIO-SX, other host is locked and we
+ # experienced a pod restart or other similar scenario.
+ echo "Cannot connect to master or less than 2 data nodes"
+ echo 'Waiting for elasticsearch cluster to become ready (request params: "{{ .Values.clusterHealthCheckParamsBasic }}" )'
+ if http "/_cluster/health?{{ .Values.clusterHealthCheckParamsBasic }}" ; then
+ touch ${START_FILE}
+ exit 0
+ else
+ echo 'Cluster is not yet ready (request params: "{{ .Values.clusterHealthCheckParamsBasic }}" )'
+ exit 1
+ fi
+ fi
else
- echo 'Cluster is not yet ready (request params: "{{ .Values.clusterHealthCheckParams }}" )'
- exit 1
+ echo 'Waiting for elasticsearch cluster to become ready (request params: "{{ .Values.clusterHealthCheckParams }}" )'
+ if http "/_cluster/health?{{ .Values.clusterHealthCheckParams }}" ; then
+ touch ${START_FILE}
+ exit 0
+ else
+ echo 'Cluster is not yet ready (request params: "{{ .Values.clusterHealthCheckParams }}" )'
+ exit 1
+ fi
fi
fi
ports:
diff --git a/elasticsearch/values.yaml b/elasticsearch/values.yaml
index 0d983eb..ebbae6c 100755
--- a/elasticsearch/values.yaml
+++ b/elasticsearch/values.yaml
@@ -197,6 +197,8 @@ readinessProbe:
# https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html#request-params wait_for_status
clusterHealthCheckParams: "wait_for_status=green&timeout=1s"
+# Used for readiness probe when on a data node and only a basic health check is needed.
+clusterHealthCheckParamsBasic: "local=true"
## Use an alternate scheduler.
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
--
1.8.3.1

View File

@ -106,7 +106,7 @@ data:
esMajorVersion: 7
masterService: 'mon-elasticsearch-data-headless, mon-elasticsearch-master'
podManagementPolicy: OrderedReady
clusterHealthCheckParams: 'local=true'
clusterHealthCheckParams: 'wait_for_no_relocating_shards&wait_for_no_initializing_shards&timeout=1s'
maxUnavailable: 1
extraEnvs:
- name: DATA_PRESTOP_SLEEP