[ceph-client] Separate pool quotas from pg_num calculations

Currently pool quotas and pg_num calculations are both based on percent_total_data values. This can be problematic when the amount of data allowed in a pool doesn't necessarily match the percentage of the cluster's data expected to be stored in the pool. It is also more intuitive to define absolute quotas for pools. This change adds an optional pool_quota value that defines an explicit value in bytes to be used as a pool quota. If pool_quota is omitted for a given pool, that pool's quota is set to 0 (no quota). A check_pool_quota_target() Helm test has also been added to verify that the sum of all pool quotas does not exceed the target quota defined for the cluster if present. Change-Id: I959fb9e95d8f1e03c36e44aba57c552a315867d0
2021-02-10 10:43:42 -07:00 · 2021-02-10 10:43:42 -07:00 · cf7d665e79
commit cf7d665e79
parent 937c984766
4 changed files with 56 additions and 12 deletions
--- a/ceph-client/Chart.yaml
+++ b/ceph-client/Chart.yaml
@ -15,6 +15,6 @@ apiVersion: v1
 appVersion: v1.0.0
 description: OpenStack-Helm Ceph Client
 name: ceph-client
-version: 0.1.9
+version: 0.1.10
 home: https://github.com/ceph/ceph-client
 ...
--- a/ceph-client/templates/bin/pool/_init.sh.tpl
+++ b/ceph-client/templates/bin/pool/_init.sh.tpl
@ -243,42 +243,73 @@ function manage_pool () {
  TOTAL_DATA_PERCENT=$4
  TARGET_PG_PER_OSD=$5
  POOL_CRUSH_RULE=$6
-  TARGET_QUOTA=$7
+  POOL_QUOTA=$7
  POOL_PROTECTION=$8
  CLUSTER_CAPACITY=$9
  TOTAL_OSDS={{.Values.conf.pool.target.osd}}
  POOL_PLACEMENT_GROUPS=$(python3 /tmp/pool-calc.py ${POOL_REPLICATION} ${TOTAL_OSDS} ${TOTAL_DATA_PERCENT} ${TARGET_PG_PER_OSD})
  create_pool "${POOL_APPLICATION}" "${POOL_NAME}" "${POOL_REPLICATION}" "${POOL_PLACEMENT_GROUPS}" "${POOL_CRUSH_RULE}" "${POOL_PROTECTION}"
  POOL_REPLICAS=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" size | awk '{print $2}')
-  POOL_QUOTA=$(python3 -c "print(int($CLUSTER_CAPACITY * $TOTAL_DATA_PERCENT * $TARGET_QUOTA / $POOL_REPLICAS / 100 / 100))")
  ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA
 }

+# Helper to convert TiB, TB, GiB, GB, MiB, MB, KiB, KB, or bytes to bytes
+function convert_to_bytes() {
+  value=${1}
+  value="$(echo "${value}" | sed 's/TiB/ \* 1024GiB/g')"
+  value="$(echo "${value}" | sed 's/TB/ \* 1000GB/g')"
+  value="$(echo "${value}" | sed 's/GiB/ \* 1024MiB/g')"
+  value="$(echo "${value}" | sed 's/GB/ \* 1000MB/g')"
+  value="$(echo "${value}" | sed 's/MiB/ \* 1024KiB/g')"
+  value="$(echo "${value}" | sed 's/MB/ \* 1000KB/g')"
+  value="$(echo "${value}" | sed 's/KiB/ \* 1024/g')"
+  value="$(echo "${value}" | sed 's/KB/ \* 1000/g')"
+  python3 -c "print(int(${value}))"
+}
+
 set_cluster_flags
 unset_cluster_flags
 reweight_osds

+{{ $targetOSDCount := .Values.conf.pool.target.osd }}
+{{ $targetFinalOSDCount := .Values.conf.pool.target.final_osd }}
 {{ $targetPGperOSD := .Values.conf.pool.target.pg_per_osd }}
 {{ $crushRuleDefault := .Values.conf.pool.default.crush_rule }}
 {{ $targetQuota := .Values.conf.pool.target.quota | default 100 }}
 {{ $targetProtection := .Values.conf.pool.target.protected | default "false" | quote | lower }}
-cluster_capacity=0
-if [[ $(ceph -v | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]]; then
-  cluster_capacity=$(ceph --cluster "${CLUSTER}" df | grep "TOTAL" | awk '{print $2 substr($3, 1, 1)}' | numfmt --from=iec)
-else
-  cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec)
-fi
+cluster_capacity=$(ceph --cluster "${CLUSTER}" df -f json-pretty | grep '"total_bytes":' | head -n1 | awk '{print $2}' | tr -d ',')

 if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -eq 14 ]]; then
  enable_or_disable_autoscaling
 fi

+# Check to make sure pool quotas don't exceed the expected cluster capacity in its final state
+target_quota=$(python3 -c "print(int(${cluster_capacity} * {{ $targetFinalOSDCount }} / {{ $targetOSDCount }} * {{ $targetQuota }} / 100))")
+quota_sum=0
+
 {{- range $pool := .Values.conf.pool.spec -}}
 {{- with $pool }}
+# Read the pool quota from the pool spec (no quota if absent)
+# Set pool_quota to 0 if target_quota is 0
+[[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})"
+quota_sum=$(python3 -c "print(int(${quota_sum} + (${pool_quota} * {{ .replication }})))")
+{{- end }}
+{{- end }}
+
+if [[ ${quota_sum} -gt ${target_quota} ]]; then
+  echo "The sum of all pool quotas exceeds the target quota for the cluster"
+  exit 1
+fi
+
+{{- range $pool := .Values.conf.pool.spec -}}
+{{- with $pool }}
+# Read the pool quota from the pool spec (no quota if absent)
+# Set pool_quota to 0 if target_quota is 0
+[[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})"
 {{- if .crush_rule }}
-manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} {{ $targetQuota }} {{ $targetProtection }} ${cluster_capacity}
+manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} $pool_quota {{ $targetProtection }} ${cluster_capacity}
 {{ else }}
-manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} {{ $targetQuota }} {{ $targetProtection }} ${cluster_capacity}
+manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} $pool_quota {{ $targetProtection }} ${cluster_capacity}
 {{- end }}
 {{- end }}
 {{- end }}
--- a/ceph-client/values.yaml
+++ b/ceph-client/values.yaml
@ -272,8 +272,13 @@ conf:
      tunables: null
    target:
      # NOTE(portdirect): arbitrarily we set the default number of expected OSD's to 5
-      # to match the number of nodes in the OSH gate (used only for helm tests).
+      # to match the number of nodes in the OSH gate.
      osd: 5
+      # This the number of OSDs expected in the final state. This is to allow the above
+      # target to be smaller initially in the event of a partial deployment. This way
+      # helm tests can still pass at deployment time and pool quotas can be set based on
+      # the expected final state (actual target quota = final_osd / osd * quota).
+      final_osd: 5
      # This is  just for helm tests to proceed the deployment if  we have mentioned % of
      # osds are up and running.
      required_percent_of_osds: 75
@ -282,6 +287,7 @@ conf:
      # NOTE(st053q): target quota should be set to the overall cluster full percentage
      # to be tolerated as a quota (percent full to allow in order to tolerate some
      # level of failure)
+      # Set target quota to "0" (must be quoted) to remove quotas for all pools
      quota: 100
    default:
      # NOTE(supamatt): Accepted values are taken from `crush_rules` list.
@ -336,6 +342,10 @@ conf:
        application: rbd
        replication: 3
        percent_total_data: 40
+        # Example of 100 GiB pool_quota for rbd pool (no pool quota if absent)
+        # May be specified in TiB, TB, GiB, GB, MiB, MB, KiB, KB, or bytes
+        # NOTE: This should always be a string value to avoid Helm issues with large integers
+        # pool_quota: "100GiB"
      # NOTE(supamatt): By default the crush rules used to create each pool will be
      # taken from the pool default `crush_rule` unless a pool specific `crush_rule`
      # is specified. The rule MUST exist for it to be defined here.
--- a/releasenotes/notes/ceph-client.yaml
+++ b/releasenotes/notes/ceph-client.yaml
@ -8,4 +8,7 @@ ceph-client:
  - 0.1.5 Fix Helm test check_pgs() check for inactive PGs
  - 0.1.6 Uplift from Nautilus to Octopus release
  - 0.1.7 Don't wait for premerge PGs in the rbd pool job
+  - 0.1.8 enhance logic to enable the autoscaler for Octopus
+  - 0.1.9 Revert "[ceph-client] enhance logic to enable the autoscaler for Octopus"
+  - 0.1.10 Separate pool quotas from pg_num calculations
 ...