Merge "[ceph-client] Fix a helm test issue and disable PG autoscaler"
This commit is contained in:
commit
f79704a8f0
@ -24,7 +24,37 @@ function check_cluster_status() {
|
||||
if [ "x${ceph_health_status}" == "xHEALTH_OK" ]; then
|
||||
echo "Ceph status is HEALTH_OK"
|
||||
else
|
||||
echo "Ceph cluster status is NOT HEALTH_OK."
|
||||
echo "Ceph cluster status is not HEALTH_OK, checking PG states"
|
||||
retries=0
|
||||
# If all PGs are active, pass
|
||||
# This grep is just as robust as jq and is Ceph-version agnostic unlike jq
|
||||
while [[ $(ceph pg ls -f json-pretty | grep '"state":' | grep -v "active") ]] && [[ retries -lt 60 ]]; do
|
||||
# If all inactive PGs are peering, wait for peering to complete
|
||||
# Run 'ceph pg ls' again before failing in case PG states have changed
|
||||
if [[ $(ceph pg ls -f json-pretty | grep '"state":' | grep -v -e "active" -e "peering") ]]; then
|
||||
# If inactive PGs aren't peering, fail
|
||||
echo "Failure, found inactive PGs that aren't peering"
|
||||
exit 1
|
||||
fi
|
||||
sleep 3
|
||||
((retries=retries+1))
|
||||
done
|
||||
# If peering PGs haven't gone active after retries have expired, fail
|
||||
if [[ retries -ge 60 ]]; then
|
||||
echo "PGs appear to be stuck peering"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
function check_recovery_flags() {
|
||||
echo "### Start: Checking for flags that will prevent recovery"
|
||||
|
||||
# Ensure there are no flags set that will prevent recovery of degraded PGs
|
||||
if [[ $(ceph osd stat | grep "norecover\|nobackfill\|norebalance") ]]; then
|
||||
ceph osd stat
|
||||
echo "Flags are set that prevent recovery of degraded PGs"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
@ -257,3 +287,4 @@ pool_validation
|
||||
pool_failuredomain_validation
|
||||
check_failure_domain_count_per_pool
|
||||
check_cluster_status
|
||||
check_recovery_flags
|
||||
|
@ -46,6 +46,17 @@ function wait_for_inactive_pgs () {
|
||||
fi
|
||||
}
|
||||
|
||||
function check_recovery_flags () {
|
||||
echo "### Start: Checking for flags that will prevent recovery"
|
||||
|
||||
# Ensure there are no flags set that will prevent recovery of degraded PGs
|
||||
if [[ $(ceph osd stat | grep "norecover\|nobackfill\|norebalance") ]]; then
|
||||
ceph osd stat
|
||||
echo "Flags are set that prevent recovery of degraded PGs"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function check_osd_count() {
|
||||
echo "#### Start: Checking OSD count ####"
|
||||
noup_flag=$(ceph osd stat | awk '/noup/ {print $2}')
|
||||
@ -119,10 +130,12 @@ function reweight_osds () {
|
||||
done
|
||||
}
|
||||
|
||||
function enable_autoscaling () {
|
||||
function enable_or_disable_autoscaling () {
|
||||
if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then
|
||||
ceph mgr module enable pg_autoscaler
|
||||
ceph config set global osd_pool_default_pg_autoscale_mode on
|
||||
else
|
||||
ceph mgr module disable pg_autoscaler
|
||||
fi
|
||||
}
|
||||
|
||||
@ -232,7 +245,7 @@ reweight_osds
|
||||
cluster_capacity=0
|
||||
if [[ -z "$(ceph osd versions | grep ceph\ version | grep -v nautilus)" ]]; then
|
||||
cluster_capacity=$(ceph --cluster "${CLUSTER}" df | grep "TOTAL" | awk '{print $2 substr($3, 1, 1)}' | numfmt --from=iec)
|
||||
enable_autoscaling
|
||||
enable_or_disable_autoscaling
|
||||
else
|
||||
cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec)
|
||||
fi
|
||||
@ -253,3 +266,4 @@ ceph --cluster "${CLUSTER}" osd crush tunables {{ .Values.conf.pool.crush.tunabl
|
||||
{{- end }}
|
||||
|
||||
wait_for_inactive_pgs
|
||||
check_recovery_flags
|
||||
|
@ -254,7 +254,7 @@ conf:
|
||||
features:
|
||||
mds: true
|
||||
mgr: true
|
||||
pg_autoscaler: true
|
||||
pg_autoscaler: false
|
||||
cluster_flags:
|
||||
# List of flags to set or unset separated by spaces
|
||||
set: ""
|
||||
|
Loading…
x
Reference in New Issue
Block a user