Elasticsearch: Update Rolling Restart Procedure
This change implements the reccomended rolling restart procedure[0] for elasticsearch-data pods. [0] https://www.elastic.co/guide/en/elasticsearch/reference/7.x/restart-cluster.html#restart-cluster-rolling Change-Id: I935b3681999e9bda616898f2b5e01f582ee54ed9
This commit is contained in:
parent
b62a46336c
commit
309278389e
@ -34,19 +34,29 @@ function stop () {
|
|||||||
kill -TERM 1
|
kill -TERM 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function wait_to_join() {
|
||||||
|
joined=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" "${ELASTICSEARCH_ENDPOINT}/_cat/nodes" | grep -w $NODE_NAME || true )
|
||||||
|
|
||||||
|
while [ -z "$joined" ]; do
|
||||||
|
sleep 5
|
||||||
|
joined=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" "${ELASTICSEARCH_ENDPOINT}/_cat/nodes" | grep -w $NODE_NAME || true )
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
function allocate_data_node () {
|
function allocate_data_node () {
|
||||||
CLUSTER_SETTINGS=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
|
if [ -f /data/restarting ]; then
|
||||||
"${ELASTICSEARCH_ENDPOINT}/_cluster/settings")
|
rm /data/restarting
|
||||||
if echo "${CLUSTER_SETTINGS}" | grep -E "${NODE_NAME}"; then
|
echo "Node ${NODE_NAME} has restarted. Waiting to rejoin the cluster."
|
||||||
echo "Activate node ${NODE_NAME}"
|
wait_to_join
|
||||||
curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" -XPUT -H 'Content-Type: application/json' \
|
|
||||||
|
echo "Re-enabling Replica Shard Allocation"
|
||||||
|
curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" -XPUT -H 'Content-Type: application/json' \
|
||||||
"${ELASTICSEARCH_ENDPOINT}/_cluster/settings" -d "{
|
"${ELASTICSEARCH_ENDPOINT}/_cluster/settings" -d "{
|
||||||
\"transient\" :{
|
\"persistent\": {
|
||||||
\"cluster.routing.allocation.exclude._name\" : null
|
\"cluster.routing.allocation.enable\": null
|
||||||
}
|
}
|
||||||
}"
|
}"
|
||||||
fi
|
fi
|
||||||
echo "Node ${NODE_NAME} is ready to be used"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function start_master_node () {
|
function start_master_node () {
|
||||||
@ -76,24 +86,37 @@ function start_data_node () {
|
|||||||
allocate_data_node &
|
allocate_data_node &
|
||||||
/usr/local/bin/docker-entrypoint.sh elasticsearch &
|
/usr/local/bin/docker-entrypoint.sh elasticsearch &
|
||||||
function drain_data_node () {
|
function drain_data_node () {
|
||||||
echo "Prepare to migrate data off node ${NODE_NAME}"
|
|
||||||
echo "Move all data from node ${NODE_NAME}"
|
# Implement the Rolling Restart Protocol Described Here:
|
||||||
curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" -XPUT -H 'Content-Type: application/json' \
|
# https://www.elastic.co/guide/en/elasticsearch/reference/7.x/restart-cluster.html#restart-cluster-rolling
|
||||||
|
|
||||||
|
echo "Disabling Replica Shard Allocation"
|
||||||
|
curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" -XPUT -H 'Content-Type: application/json' \
|
||||||
"${ELASTICSEARCH_ENDPOINT}/_cluster/settings" -d "{
|
"${ELASTICSEARCH_ENDPOINT}/_cluster/settings" -d "{
|
||||||
\"transient\" :{
|
\"persistent\": {
|
||||||
\"cluster.routing.allocation.exclude._name\" : \"${NODE_NAME}\"
|
\"cluster.routing.allocation.enable\": \"primaries\"
|
||||||
}
|
}
|
||||||
}"
|
}"
|
||||||
echo ""
|
|
||||||
while true ; do
|
# If version < 7.6 use _flush/synced; otherwise use _flush
|
||||||
echo -e "Wait for node ${NODE_NAME} to become empty"
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-synced-flush-api.html#indices-synced-flush-api
|
||||||
SHARDS_ALLOCATION=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
|
|
||||||
-XGET "${ELASTICSEARCH_ENDPOINT}/_cat/shards")
|
version=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" "${ELASTICSEARCH_ENDPOINT}/" | jq -r .version.number)
|
||||||
if ! echo "${SHARDS_ALLOCATION}" | grep -E "${NODE_NAME}"; then
|
|
||||||
break
|
if [[ $version =~ "7.1" ]]; then
|
||||||
|
action="_flush/synced"
|
||||||
|
else
|
||||||
|
action="_flush"
|
||||||
fi
|
fi
|
||||||
sleep 5
|
|
||||||
done
|
curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" -XPOST "${ELASTICSEARCH_ENDPOINT}/$action"
|
||||||
|
|
||||||
|
# TODO: Check the response of synced flush operations to make sure there are no failures.
|
||||||
|
# Synced flush operations that fail due to pending indexing operations are listed in the response body,
|
||||||
|
# although the request itself still returns a 200 OK status. If there are failures, reissue the request.
|
||||||
|
# (The only side effect of not doing so is slower start up times. See flush documentation linked above)
|
||||||
|
|
||||||
|
touch /data/restarting
|
||||||
echo "Node ${NODE_NAME} is ready to shutdown"
|
echo "Node ${NODE_NAME} is ready to shutdown"
|
||||||
kill -TERM 1
|
kill -TERM 1
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user