[ceph-client] Performance optimizations for the ceph-rbd-pool job

This change attempts to reduce the number of Ceph commands required
in the ceph-rbd-pool job by collecting most pool properties in a
single call and by setting only those properties where the current
value differs from the target value.

Calls to manage_pool() are also run in the background in parallel,
so all pools are configured concurrently instead of serially. The
script waits for all of those calls to complete before proceeding
in order to avoid issues related to the script finishing before all
pools are completely configured.

Change-Id: If105cd7146313ab9074eedc09580671a0eafcec5
This commit is contained in:
Stephen Taylor 2021-08-27 12:28:06 -06:00
parent b1a247e7f5
commit 46c8218fbf
3 changed files with 68 additions and 30 deletions
ceph-client
Chart.yaml
templates/bin/pool
releasenotes/notes

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0 appVersion: v1.0.0
description: OpenStack-Helm Ceph Client description: OpenStack-Helm Ceph Client
name: ceph-client name: ceph-client
version: 0.1.23 version: 0.1.24
home: https://github.com/ceph/ceph-client home: https://github.com/ceph/ceph-client
... ...

View File

@ -29,6 +29,10 @@ if [[ ! -e ${ADMIN_KEYRING} ]]; then
exit 1 exit 1
fi fi
function wait_for_pid() {
tail --pid=$1 -f /dev/null
}
function wait_for_pgs () { function wait_for_pgs () {
echo "#### Start: Checking pgs ####" echo "#### Start: Checking pgs ####"
@ -176,6 +180,21 @@ function unset_cluster_flags () {
fi fi
} }
# Helper function to set pool properties only if the target value differs from
# the current value to optimize performance
function set_pool_property() {
POOL_NAME=$1
PROPERTY_NAME=$2
CURRENT_PROPERTY_VALUE=$3
TARGET_PROPERTY_VALUE=$4
if [[ "${CURRENT_PROPERTY_VALUE}" != "${TARGET_PROPERTY_VALUE}" ]]; then
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" "${PROPERTY_NAME}" "${TARGET_PROPERTY_VALUE}"
fi
echo "${TARGET_PROPERTY_VALUE}"
}
function create_pool () { function create_pool () {
POOL_APPLICATION=$1 POOL_APPLICATION=$1
POOL_NAME=$2 POOL_NAME=$2
@ -194,47 +213,53 @@ function create_pool () {
ceph --cluster "${CLUSTER}" osd pool application enable "${POOL_NAME}" "${POOL_APPLICATION}" ceph --cluster "${CLUSTER}" osd pool application enable "${POOL_NAME}" "${POOL_APPLICATION}"
fi fi
pool_values=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" all -f json)
if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]]; then if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]]; then
if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then
pool_values=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" all -f json) pg_num=$(jq -r '.pg_num' <<< "${pool_values}")
pg_num=$(jq '.pg_num' <<< "${pool_values}") pgp_num=$(jq -r '.pgp_num' <<< "${pool_values}")
pg_num_min=$(jq '.pg_num_min' <<< "${pool_values}") pg_num_min=$(jq -r '.pg_num_min' <<< "${pool_values}")
pg_autoscale_mode=$(jq -r '.pg_autoscale_mode' <<< "${pool_values}")
# set pg_num_min to PG_NUM_MIN before enabling autoscaler # set pg_num_min to PG_NUM_MIN before enabling autoscaler
if [[ ${pg_num} -lt ${PG_NUM_MIN} ]]; then if [[ ${pg_num} -lt ${PG_NUM_MIN} ]]; then
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_autoscale_mode off pg_autoscale_mode=$(set_pool_property "${POOL_NAME}" pg_autoscale_mode "${pg_autoscale_mode}" "off")
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_num ${PG_NUM_MIN} pg_num=$(set_pool_property "${POOL_NAME}" pg_num "${pg_num}" "${PG_NUM_MIN}")
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pgp_num ${PG_NUM_MIN} pgp_num=$(set_pool_property "${POOL_NAME}" pgp_num "${pgp_num}" "${PG_NUM_MIN}")
fi fi
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_num_min ${PG_NUM_MIN} pg_num_min=$(set_pool_property "${POOL_NAME}" pg_num_min "${pg_num_min}" "${PG_NUM_MIN}")
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_autoscale_mode on pg_autoscale_mode=$(set_pool_property "${POOL_NAME}" pg_autoscale_mode "${pg_autoscale_mode}" "on")
else else
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_autoscale_mode off pg_autoscale_mode=$(set_pool_property "${POOL_NAME}" pg_autoscale_mode "${pg_autoscale_mode}" "off")
fi fi
fi fi
# #
# Make sure pool is not protected after creation AND expansion so we can manipulate its settings. # Make sure pool is not protected after creation AND expansion so we can manipulate its settings.
# Final protection settings are applied once parameters (size, pg) have been adjusted. # Final protection settings are applied once parameters (size, pg) have been adjusted.
# #
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nosizechange false nosizechange=$(jq -r '.nosizechange' <<< "${pool_values}")
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nopgchange false nopschange=$(jq -r '.nopschange' <<< "${pool_values}")
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nodelete false nodelete=$(jq -r '.nodelete' <<< "${pool_values}")
# size=$(jq -r '.size' <<< "${pool_values}")
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" size ${POOL_REPLICATION} crush_rule=$(jq -r '.crush_rule' <<< "${pool_values}")
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" crush_rule "${POOL_CRUSH_RULE}" nosizechange=$(set_pool_property "${POOL_NAME}" nosizechange "${nosizechange}" "false")
nopgchange=$(set_pool_property "${POOL_NAME}" nopgchange "${nopgchange}" "false")
nodelete=$(set_pool_property "${POOL_NAME}" nodelete "${nodelete}" "false")
size=$(set_pool_property "${POOL_NAME}" size "${size}" "${POOL_REPLICATION}")
crush_rule=$(set_pool_property "${POOL_NAME}" crush_rule "${crush_rule}" "${POOL_CRUSH_RULE}")
# set pg_num to pool # set pg_num to pool
if [[ ${POOL_PLACEMENT_GROUPS} -gt 0 ]]; then if [[ ${POOL_PLACEMENT_GROUPS} -gt 0 ]]; then
for PG_PARAM in pg_num pgp_num; do pg_num=$(jq -r ".pg_num" <<< "${pool_values}")
CURRENT_PG_VALUE=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" "${PG_PARAM}" | awk "/^${PG_PARAM}:/ { print \$NF }") pgp_num=$(jq -r ".pgp_num" <<< "${pool_values}")
if [ "${POOL_PLACEMENT_GROUPS}" -gt "${CURRENT_PG_VALUE}" ]; then pg_num=$(set_pool_property "${POOL_NAME}" pg_num "${pg_num}" "${POOL_PLACEMENT_GROUPS}")
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" "${PG_PARAM}" "${POOL_PLACEMENT_GROUPS}" pgp_num=$(set_pool_property "${POOL_NAME}" pgp_num "${pgp_num}" "${POOL_PLACEMENT_GROUPS}")
fi
done
fi fi
#This is to handle cluster expansion case where replication may change from intilization #This is to handle cluster expansion case where replication may change from intilization
if [ ${POOL_REPLICATION} -gt 1 ]; then if [ ${POOL_REPLICATION} -gt 1 ]; then
min_size=$(jq -r '.min_size' <<< "${pool_values}")
EXPECTED_POOLMINSIZE=$[${POOL_REPLICATION}-1] EXPECTED_POOLMINSIZE=$[${POOL_REPLICATION}-1]
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" min_size ${EXPECTED_POOLMINSIZE} min_size=$(set_pool_property "${POOL_NAME}" min_size "${min_size}" "${EXPECTED_POOLMINSIZE}")
fi fi
# #
# Handling of .Values.conf.pool.target.protected: # Handling of .Values.conf.pool.target.protected:
@ -251,8 +276,8 @@ function create_pool () {
# - nodelete = Do not allow deletion of the pool # - nodelete = Do not allow deletion of the pool
# #
if [ "x${POOL_PROTECTION}" == "xtrue" ] || [ "x${POOL_PROTECTION}" == "x1" ]; then if [ "x${POOL_PROTECTION}" == "xtrue" ] || [ "x${POOL_PROTECTION}" == "x1" ]; then
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nosizechange true nosizechange=$(set_pool_property "${POOL_NAME}" nosizechange "${nosizechange}" "true")
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nodelete true nodelete=$(set_pool_property "${POOL_NAME}" nodelete "${nodelete}" "true")
fi fi
} }
@ -274,7 +299,6 @@ function manage_pool () {
fi fi
fi fi
create_pool "${POOL_APPLICATION}" "${POOL_NAME}" "${POOL_REPLICATION}" "${POOL_PLACEMENT_GROUPS}" "${POOL_CRUSH_RULE}" "${POOL_PROTECTION}" create_pool "${POOL_APPLICATION}" "${POOL_NAME}" "${POOL_REPLICATION}" "${POOL_PLACEMENT_GROUPS}" "${POOL_CRUSH_RULE}" "${POOL_PROTECTION}"
POOL_REPLICAS=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" size | awk '{print $2}')
ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA
} }
@ -326,6 +350,9 @@ if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]] &
disable_autoscaling disable_autoscaling
fi fi
# Track the manage_pool() PIDs in an array so we can wait for them to finish
MANAGE_POOL_PIDS=()
{{- range $pool := .Values.conf.pool.spec -}} {{- range $pool := .Values.conf.pool.spec -}}
{{- with $pool }} {{- with $pool }}
pool_name="{{ .name }}" pool_name="{{ .name }}"
@ -339,14 +366,19 @@ fi
# Set pool_quota to 0 if target_quota is 0 # Set pool_quota to 0 if target_quota is 0
[[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})" [[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})"
{{- if .crush_rule }} {{- if .crush_rule }}
manage_pool {{ .application }} ${pool_name} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} $pool_quota {{ $targetProtection }} ${cluster_capacity} manage_pool {{ .application }} ${pool_name} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} $pool_quota {{ $targetProtection }} ${cluster_capacity} &
{{ else }} {{ else }}
manage_pool {{ .application }} ${pool_name} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} $pool_quota {{ $targetProtection }} ${cluster_capacity} manage_pool {{ .application }} ${pool_name} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} $pool_quota {{ $targetProtection }} ${cluster_capacity} &
{{- end }} {{- end }}
MANAGE_POOL_PID=$!
MANAGE_POOL_PIDS+=( $MANAGE_POOL_PID )
{{- if .rename }} {{- if .rename }}
# Wait for manage_pool() to finish for this pool before trying to rename the pool
wait_for_pid $MANAGE_POOL_PID
# If a rename value exists, the pool exists, and a pool with the rename value doesn't exist, rename the pool # If a rename value exists, the pool exists, and a pool with the rename value doesn't exist, rename the pool
if [[ -n "$(ceph --cluster ${CLUSTER} osd pool ls | grep ^{{ .name }}$)" ]] && pool_list=$(ceph --cluster ${CLUSTER} osd pool ls)
[[ -z "$(ceph --cluster ${CLUSTER} osd pool ls | grep ^{{ .rename }}$)" ]]; then if [[ -n $(grep ^{{ .name }}$ <<< "${pool_list}") ]] &&
[[ -z $(grep ^{{ .rename }}$ <<< "${pool_list}") ]]; then
ceph --cluster "${CLUSTER}" osd pool rename "{{ .name }}" "{{ .rename }}" ceph --cluster "${CLUSTER}" osd pool rename "{{ .name }}" "{{ .rename }}"
pool_name="{{ .rename }}" pool_name="{{ .rename }}"
fi fi
@ -364,6 +396,11 @@ fi
{{- end }} {{- end }}
{{- end }} {{- end }}
# Wait for all manage_pool() instances to finish before proceeding
for pool_pid in ${MANAGE_POOL_PIDS[@]}; do
wait_for_pid $pool_pid
done
if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]] && [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]] && [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then
enable_autoscaling enable_autoscaling
fi fi

View File

@ -24,4 +24,5 @@ ceph-client:
- 0.1.21 Fix Ceph checkDNS script - 0.1.21 Fix Ceph checkDNS script
- 0.1.22 Set pg_num_min in all cases - 0.1.22 Set pg_num_min in all cases
- 0.1.23 Helm 3 - Fix Job labels - 0.1.23 Helm 3 - Fix Job labels
- 0.1.24 Performance optimizations for the ceph-rbd-pool job
... ...