[ceph-client] Performance optimizations for the ceph-rbd-pool job

This change attempts to reduce the number of Ceph commands required
in the ceph-rbd-pool job by collecting most pool properties in a
single call and by setting only those properties where the current
value differs from the target value.

Calls to manage_pool() are also run in the background in parallel,
so all pools are configured concurrently instead of serially. The
script waits for all of those calls to complete before proceeding
in order to avoid issues related to the script finishing before all
pools are completely configured.

Change-Id: If105cd7146313ab9074eedc09580671a0eafcec5
This commit is contained in:
Stephen Taylor 2021-08-27 12:28:06 -06:00
parent b1a247e7f5
commit 46c8218fbf
3 changed files with 68 additions and 30 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph Client
name: ceph-client
version: 0.1.23
version: 0.1.24
home: https://github.com/ceph/ceph-client
...

View File

@ -29,6 +29,10 @@ if [[ ! -e ${ADMIN_KEYRING} ]]; then
exit 1
fi
function wait_for_pid() {
tail --pid=$1 -f /dev/null
}
function wait_for_pgs () {
echo "#### Start: Checking pgs ####"
@ -176,6 +180,21 @@ function unset_cluster_flags () {
fi
}
# Helper function to set pool properties only if the target value differs from
# the current value to optimize performance
function set_pool_property() {
POOL_NAME=$1
PROPERTY_NAME=$2
CURRENT_PROPERTY_VALUE=$3
TARGET_PROPERTY_VALUE=$4
if [[ "${CURRENT_PROPERTY_VALUE}" != "${TARGET_PROPERTY_VALUE}" ]]; then
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" "${PROPERTY_NAME}" "${TARGET_PROPERTY_VALUE}"
fi
echo "${TARGET_PROPERTY_VALUE}"
}
function create_pool () {
POOL_APPLICATION=$1
POOL_NAME=$2
@ -194,47 +213,53 @@ function create_pool () {
ceph --cluster "${CLUSTER}" osd pool application enable "${POOL_NAME}" "${POOL_APPLICATION}"
fi
pool_values=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" all -f json)
if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]]; then
if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then
pool_values=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" all -f json)
pg_num=$(jq '.pg_num' <<< "${pool_values}")
pg_num_min=$(jq '.pg_num_min' <<< "${pool_values}")
pg_num=$(jq -r '.pg_num' <<< "${pool_values}")
pgp_num=$(jq -r '.pgp_num' <<< "${pool_values}")
pg_num_min=$(jq -r '.pg_num_min' <<< "${pool_values}")
pg_autoscale_mode=$(jq -r '.pg_autoscale_mode' <<< "${pool_values}")
# set pg_num_min to PG_NUM_MIN before enabling autoscaler
if [[ ${pg_num} -lt ${PG_NUM_MIN} ]]; then
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_autoscale_mode off
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_num ${PG_NUM_MIN}
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pgp_num ${PG_NUM_MIN}
pg_autoscale_mode=$(set_pool_property "${POOL_NAME}" pg_autoscale_mode "${pg_autoscale_mode}" "off")
pg_num=$(set_pool_property "${POOL_NAME}" pg_num "${pg_num}" "${PG_NUM_MIN}")
pgp_num=$(set_pool_property "${POOL_NAME}" pgp_num "${pgp_num}" "${PG_NUM_MIN}")
fi
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_num_min ${PG_NUM_MIN}
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_autoscale_mode on
pg_num_min=$(set_pool_property "${POOL_NAME}" pg_num_min "${pg_num_min}" "${PG_NUM_MIN}")
pg_autoscale_mode=$(set_pool_property "${POOL_NAME}" pg_autoscale_mode "${pg_autoscale_mode}" "on")
else
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_autoscale_mode off
pg_autoscale_mode=$(set_pool_property "${POOL_NAME}" pg_autoscale_mode "${pg_autoscale_mode}" "off")
fi
fi
#
# Make sure pool is not protected after creation AND expansion so we can manipulate its settings.
# Final protection settings are applied once parameters (size, pg) have been adjusted.
#
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nosizechange false
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nopgchange false
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nodelete false
#
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" size ${POOL_REPLICATION}
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" crush_rule "${POOL_CRUSH_RULE}"
nosizechange=$(jq -r '.nosizechange' <<< "${pool_values}")
nopschange=$(jq -r '.nopschange' <<< "${pool_values}")
nodelete=$(jq -r '.nodelete' <<< "${pool_values}")
size=$(jq -r '.size' <<< "${pool_values}")
crush_rule=$(jq -r '.crush_rule' <<< "${pool_values}")
nosizechange=$(set_pool_property "${POOL_NAME}" nosizechange "${nosizechange}" "false")
nopgchange=$(set_pool_property "${POOL_NAME}" nopgchange "${nopgchange}" "false")
nodelete=$(set_pool_property "${POOL_NAME}" nodelete "${nodelete}" "false")
size=$(set_pool_property "${POOL_NAME}" size "${size}" "${POOL_REPLICATION}")
crush_rule=$(set_pool_property "${POOL_NAME}" crush_rule "${crush_rule}" "${POOL_CRUSH_RULE}")
# set pg_num to pool
if [[ ${POOL_PLACEMENT_GROUPS} -gt 0 ]]; then
for PG_PARAM in pg_num pgp_num; do
CURRENT_PG_VALUE=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" "${PG_PARAM}" | awk "/^${PG_PARAM}:/ { print \$NF }")
if [ "${POOL_PLACEMENT_GROUPS}" -gt "${CURRENT_PG_VALUE}" ]; then
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" "${PG_PARAM}" "${POOL_PLACEMENT_GROUPS}"
fi
done
pg_num=$(jq -r ".pg_num" <<< "${pool_values}")
pgp_num=$(jq -r ".pgp_num" <<< "${pool_values}")
pg_num=$(set_pool_property "${POOL_NAME}" pg_num "${pg_num}" "${POOL_PLACEMENT_GROUPS}")
pgp_num=$(set_pool_property "${POOL_NAME}" pgp_num "${pgp_num}" "${POOL_PLACEMENT_GROUPS}")
fi
#This is to handle cluster expansion case where replication may change from intilization
if [ ${POOL_REPLICATION} -gt 1 ]; then
min_size=$(jq -r '.min_size' <<< "${pool_values}")
EXPECTED_POOLMINSIZE=$[${POOL_REPLICATION}-1]
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" min_size ${EXPECTED_POOLMINSIZE}
min_size=$(set_pool_property "${POOL_NAME}" min_size "${min_size}" "${EXPECTED_POOLMINSIZE}")
fi
#
# Handling of .Values.conf.pool.target.protected:
@ -251,8 +276,8 @@ function create_pool () {
# - nodelete = Do not allow deletion of the pool
#
if [ "x${POOL_PROTECTION}" == "xtrue" ] || [ "x${POOL_PROTECTION}" == "x1" ]; then
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nosizechange true
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nodelete true
nosizechange=$(set_pool_property "${POOL_NAME}" nosizechange "${nosizechange}" "true")
nodelete=$(set_pool_property "${POOL_NAME}" nodelete "${nodelete}" "true")
fi
}
@ -274,7 +299,6 @@ function manage_pool () {
fi
fi
create_pool "${POOL_APPLICATION}" "${POOL_NAME}" "${POOL_REPLICATION}" "${POOL_PLACEMENT_GROUPS}" "${POOL_CRUSH_RULE}" "${POOL_PROTECTION}"
POOL_REPLICAS=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" size | awk '{print $2}')
ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA
}
@ -326,6 +350,9 @@ if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]] &
disable_autoscaling
fi
# Track the manage_pool() PIDs in an array so we can wait for them to finish
MANAGE_POOL_PIDS=()
{{- range $pool := .Values.conf.pool.spec -}}
{{- with $pool }}
pool_name="{{ .name }}"
@ -339,14 +366,19 @@ fi
# Set pool_quota to 0 if target_quota is 0
[[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})"
{{- if .crush_rule }}
manage_pool {{ .application }} ${pool_name} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} $pool_quota {{ $targetProtection }} ${cluster_capacity}
manage_pool {{ .application }} ${pool_name} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} $pool_quota {{ $targetProtection }} ${cluster_capacity} &
{{ else }}
manage_pool {{ .application }} ${pool_name} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} $pool_quota {{ $targetProtection }} ${cluster_capacity}
manage_pool {{ .application }} ${pool_name} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} $pool_quota {{ $targetProtection }} ${cluster_capacity} &
{{- end }}
MANAGE_POOL_PID=$!
MANAGE_POOL_PIDS+=( $MANAGE_POOL_PID )
{{- if .rename }}
# Wait for manage_pool() to finish for this pool before trying to rename the pool
wait_for_pid $MANAGE_POOL_PID
# If a rename value exists, the pool exists, and a pool with the rename value doesn't exist, rename the pool
if [[ -n "$(ceph --cluster ${CLUSTER} osd pool ls | grep ^{{ .name }}$)" ]] &&
[[ -z "$(ceph --cluster ${CLUSTER} osd pool ls | grep ^{{ .rename }}$)" ]]; then
pool_list=$(ceph --cluster ${CLUSTER} osd pool ls)
if [[ -n $(grep ^{{ .name }}$ <<< "${pool_list}") ]] &&
[[ -z $(grep ^{{ .rename }}$ <<< "${pool_list}") ]]; then
ceph --cluster "${CLUSTER}" osd pool rename "{{ .name }}" "{{ .rename }}"
pool_name="{{ .rename }}"
fi
@ -364,6 +396,11 @@ fi
{{- end }}
{{- end }}
# Wait for all manage_pool() instances to finish before proceeding
for pool_pid in ${MANAGE_POOL_PIDS[@]}; do
wait_for_pid $pool_pid
done
if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]] && [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then
enable_autoscaling
fi

View File

@ -24,4 +24,5 @@ ceph-client:
- 0.1.21 Fix Ceph checkDNS script
- 0.1.22 Set pg_num_min in all cases
- 0.1.23 Helm 3 - Fix Job labels
- 0.1.24 Performance optimizations for the ceph-rbd-pool job
...