[ceph-client] Separate pool quotas from pg_num calculations

Currently pool quotas and pg_num calculations are both based on
percent_total_data values. This can be problematic when the amount
of data allowed in a pool doesn't necessarily match the percentage
of the cluster's data expected to be stored in the pool. It is
also more intuitive to define absolute quotas for pools.

This change adds an optional pool_quota value that defines an
explicit value in bytes to be used as a pool quota. If pool_quota
is omitted for a given pool, that pool's quota is set to 0 (no
quota).

A check_pool_quota_target() Helm test has also been added to
verify that the sum of all pool quotas does not exceed the target
quota defined for the cluster if present.

Change-Id: I959fb9e95d8f1e03c36e44aba57c552a315867d0
This commit is contained in:
Stephen Taylor 2021-02-10 10:43:42 -07:00 committed by Stephen Taylor
parent 937c984766
commit cf7d665e79
4 changed files with 56 additions and 12 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph Client
name: ceph-client
version: 0.1.9
version: 0.1.10
home: https://github.com/ceph/ceph-client
...

View File

@ -243,42 +243,73 @@ function manage_pool () {
TOTAL_DATA_PERCENT=$4
TARGET_PG_PER_OSD=$5
POOL_CRUSH_RULE=$6
TARGET_QUOTA=$7
POOL_QUOTA=$7
POOL_PROTECTION=$8
CLUSTER_CAPACITY=$9
TOTAL_OSDS={{.Values.conf.pool.target.osd}}
POOL_PLACEMENT_GROUPS=$(python3 /tmp/pool-calc.py ${POOL_REPLICATION} ${TOTAL_OSDS} ${TOTAL_DATA_PERCENT} ${TARGET_PG_PER_OSD})
create_pool "${POOL_APPLICATION}" "${POOL_NAME}" "${POOL_REPLICATION}" "${POOL_PLACEMENT_GROUPS}" "${POOL_CRUSH_RULE}" "${POOL_PROTECTION}"
POOL_REPLICAS=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" size | awk '{print $2}')
POOL_QUOTA=$(python3 -c "print(int($CLUSTER_CAPACITY * $TOTAL_DATA_PERCENT * $TARGET_QUOTA / $POOL_REPLICAS / 100 / 100))")
ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA
}
# Helper to convert TiB, TB, GiB, GB, MiB, MB, KiB, KB, or bytes to bytes
function convert_to_bytes() {
value=${1}
value="$(echo "${value}" | sed 's/TiB/ \* 1024GiB/g')"
value="$(echo "${value}" | sed 's/TB/ \* 1000GB/g')"
value="$(echo "${value}" | sed 's/GiB/ \* 1024MiB/g')"
value="$(echo "${value}" | sed 's/GB/ \* 1000MB/g')"
value="$(echo "${value}" | sed 's/MiB/ \* 1024KiB/g')"
value="$(echo "${value}" | sed 's/MB/ \* 1000KB/g')"
value="$(echo "${value}" | sed 's/KiB/ \* 1024/g')"
value="$(echo "${value}" | sed 's/KB/ \* 1000/g')"
python3 -c "print(int(${value}))"
}
set_cluster_flags
unset_cluster_flags
reweight_osds
{{ $targetOSDCount := .Values.conf.pool.target.osd }}
{{ $targetFinalOSDCount := .Values.conf.pool.target.final_osd }}
{{ $targetPGperOSD := .Values.conf.pool.target.pg_per_osd }}
{{ $crushRuleDefault := .Values.conf.pool.default.crush_rule }}
{{ $targetQuota := .Values.conf.pool.target.quota | default 100 }}
{{ $targetProtection := .Values.conf.pool.target.protected | default "false" | quote | lower }}
cluster_capacity=0
if [[ $(ceph -v | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]]; then
cluster_capacity=$(ceph --cluster "${CLUSTER}" df | grep "TOTAL" | awk '{print $2 substr($3, 1, 1)}' | numfmt --from=iec)
else
cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec)
fi
cluster_capacity=$(ceph --cluster "${CLUSTER}" df -f json-pretty | grep '"total_bytes":' | head -n1 | awk '{print $2}' | tr -d ',')
if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -eq 14 ]]; then
enable_or_disable_autoscaling
fi
# Check to make sure pool quotas don't exceed the expected cluster capacity in its final state
target_quota=$(python3 -c "print(int(${cluster_capacity} * {{ $targetFinalOSDCount }} / {{ $targetOSDCount }} * {{ $targetQuota }} / 100))")
quota_sum=0
{{- range $pool := .Values.conf.pool.spec -}}
{{- with $pool }}
# Read the pool quota from the pool spec (no quota if absent)
# Set pool_quota to 0 if target_quota is 0
[[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})"
quota_sum=$(python3 -c "print(int(${quota_sum} + (${pool_quota} * {{ .replication }})))")
{{- end }}
{{- end }}
if [[ ${quota_sum} -gt ${target_quota} ]]; then
echo "The sum of all pool quotas exceeds the target quota for the cluster"
exit 1
fi
{{- range $pool := .Values.conf.pool.spec -}}
{{- with $pool }}
# Read the pool quota from the pool spec (no quota if absent)
# Set pool_quota to 0 if target_quota is 0
[[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})"
{{- if .crush_rule }}
manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} {{ $targetQuota }} {{ $targetProtection }} ${cluster_capacity}
manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} $pool_quota {{ $targetProtection }} ${cluster_capacity}
{{ else }}
manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} {{ $targetQuota }} {{ $targetProtection }} ${cluster_capacity}
manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} $pool_quota {{ $targetProtection }} ${cluster_capacity}
{{- end }}
{{- end }}
{{- end }}

View File

@ -272,8 +272,13 @@ conf:
tunables: null
target:
# NOTE(portdirect): arbitrarily we set the default number of expected OSD's to 5
# to match the number of nodes in the OSH gate (used only for helm tests).
# to match the number of nodes in the OSH gate.
osd: 5
# This the number of OSDs expected in the final state. This is to allow the above
# target to be smaller initially in the event of a partial deployment. This way
# helm tests can still pass at deployment time and pool quotas can be set based on
# the expected final state (actual target quota = final_osd / osd * quota).
final_osd: 5
# This is just for helm tests to proceed the deployment if we have mentioned % of
# osds are up and running.
required_percent_of_osds: 75
@ -282,6 +287,7 @@ conf:
# NOTE(st053q): target quota should be set to the overall cluster full percentage
# to be tolerated as a quota (percent full to allow in order to tolerate some
# level of failure)
# Set target quota to "0" (must be quoted) to remove quotas for all pools
quota: 100
default:
# NOTE(supamatt): Accepted values are taken from `crush_rules` list.
@ -336,6 +342,10 @@ conf:
application: rbd
replication: 3
percent_total_data: 40
# Example of 100 GiB pool_quota for rbd pool (no pool quota if absent)
# May be specified in TiB, TB, GiB, GB, MiB, MB, KiB, KB, or bytes
# NOTE: This should always be a string value to avoid Helm issues with large integers
# pool_quota: "100GiB"
# NOTE(supamatt): By default the crush rules used to create each pool will be
# taken from the pool default `crush_rule` unless a pool specific `crush_rule`
# is specified. The rule MUST exist for it to be defined here.

View File

@ -8,4 +8,7 @@ ceph-client:
- 0.1.5 Fix Helm test check_pgs() check for inactive PGs
- 0.1.6 Uplift from Nautilus to Octopus release
- 0.1.7 Don't wait for premerge PGs in the rbd pool job
- 0.1.8 enhance logic to enable the autoscaler for Octopus
- 0.1.9 Revert "[ceph-client] enhance logic to enable the autoscaler for Octopus"
- 0.1.10 Separate pool quotas from pg_num calculations
...