Ceph-Client: Update, Enable and Cleanup helm tests
- Update ceph-client chart to 1) By default, enable ceph-client helm test. Update enabler key in values.yaml to follow pattern as in other charts 2) Add needed dependancy for ceph-client helm tests 3) Update helm test script to reduce output and update error msgs 4) Removed unwanted ENV variables SPECS and EXPECTED_POOLMINSIZE - Update gate scripts to run helm test command Change-Id: I6a0e4f5107e49dac081ac2037bcc0f9c0864793f
This commit is contained in:
parent
defb8b1f23
commit
5985b61286
|
@ -47,34 +47,91 @@ function check_osd_count() {
|
|||
fi
|
||||
}
|
||||
|
||||
function mgr_validation() {
|
||||
echo "#### Start: MGR validation ####"
|
||||
mgr_dump=$(ceph mgr dump -f json-pretty)
|
||||
echo "Checking for ${MGR_COUNT} MGRs"
|
||||
|
||||
mgr_avl=$(echo ${mgr_dump} | jq -r '.["available"]')
|
||||
|
||||
if [ "x${mgr_avl}" == "xtrue" ]; then
|
||||
mgr_active=$(echo ${mgr_dump} | jq -r '.["active_name"]')
|
||||
echo "Out of ${MGR_COUNT}, 1 MGR is active"
|
||||
|
||||
# Now lets check for standby managers
|
||||
mgr_stdby_count=$(echo ${mgr_dump} | jq -r '.["standbys"]' | jq length)
|
||||
|
||||
#Total MGR Count - 1 Active = Expected MGRs
|
||||
expected_standbys=$(( MGR_COUNT -1 ))
|
||||
|
||||
if [ $mgr_stdby_count -eq $expected_standbys ]
|
||||
then
|
||||
echo "Cluster has 1 Active MGR, $mgr_stdby_count Standbys MGR"
|
||||
else
|
||||
echo "Cluster Standbys MGR: Expected count= $expected_standbys Available=$mgr_stdby_count"
|
||||
retcode=1
|
||||
fi
|
||||
|
||||
else
|
||||
echo "No Active Manager found, Expected 1 MGR to be active out of ${MGR_COUNT}"
|
||||
retcode=1
|
||||
fi
|
||||
|
||||
if [ "x${retcode}" == "x1" ]
|
||||
then
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function pool_validation() {
|
||||
|
||||
echo "#### Start: Checking Ceph pools ####"
|
||||
pool_dump=$(ceph osd pool ls detail -f json-pretty)
|
||||
osd_crush_rule_dump=$(ceph osd crush rule dump -f json-pretty)
|
||||
|
||||
echo "From env variables, RBD pool replication count is: ${RBD}"
|
||||
|
||||
# Assuming all pools have same replication count as RBD
|
||||
# If RBD replication count is greater then 1, POOLMINSIZE should be 1 less then replication count
|
||||
# If RBD replication count is not greate then 1, then POOLMINSIZE should be 1
|
||||
|
||||
if [ ${RBD} -gt 1 ]; then
|
||||
EXPECTED_POOLMINSIZE=$[${RBD}-1]
|
||||
else
|
||||
EXPECTED_POOLMINSIZE=1
|
||||
fi
|
||||
|
||||
echo "EXPECTED_POOLMINSIZE: ${EXPECTED_POOLMINSIZE}"
|
||||
|
||||
expectedCrushRuleId=""
|
||||
nrules=$(echo ${osd_crush_rule_dump} | jq length)
|
||||
nrules=$(echo ${OSD_CRUSH_RULE_DUMP} | jq length)
|
||||
c=$[nrules-1]
|
||||
for n in $(seq 0 ${c})
|
||||
do
|
||||
name=$(echo ${osd_crush_rule_dump} | jq -r .[${n}].rule_name)
|
||||
osd_crush_rule_obj=$(echo ${OSD_CRUSH_RULE_DUMP} | jq -r .[${n}])
|
||||
|
||||
name=$(echo ${osd_crush_rule_obj} | jq -r .rule_name)
|
||||
echo "Expected Crushrule: ${EXPECTED_CRUSHRULE}, Pool Crushmap: ${name}"
|
||||
|
||||
if [ "x${EXPECTED_CRUSHRULE}" == "x${name}" ]; then
|
||||
expectedCrushRuleId=$(echo ${osd_crush_rule_dump} | jq .[${n}].rule_id)
|
||||
expectedCrushRuleId=$(echo ${osd_crush_rule_obj} | jq .rule_id)
|
||||
echo "Checking against rule: id: ${expectedCrushRuleId}, name:${name}"
|
||||
else
|
||||
echo "Didn't match"
|
||||
fi
|
||||
done
|
||||
echo "Checking cluster for size:${RBD}, min_size:${EXPECTED_POOLMINSIZE}, crush_rule:${EXPECTED_CRUSHRULE}, crush_rule_id:${expectedCrushRuleId}"
|
||||
|
||||
npools=$(echo ${pool_dump} | jq length)
|
||||
npools=$(echo ${OSD_POOLS_DETAILS} | jq length)
|
||||
i=$[npools - 1]
|
||||
for n in $(seq 0 ${i})
|
||||
do
|
||||
size=$(echo ${pool_dump} | jq -r ".[${n}][\"size\"]")
|
||||
min_size=$(echo ${pool_dump} | jq -r ".[${n}][\"min_size\"]")
|
||||
pg_num=$(echo ${pool_dump} | jq -r ".[${n}][\"pg_num\"]")
|
||||
pg_placement_num=$(echo ${pool_dump} | jq -r ".[${n}][\"pg_placement_num\"]")
|
||||
crush_rule=$(echo ${pool_dump} | jq -r ".[${n}][\"crush_rule\"]")
|
||||
name=$(echo ${pool_dump} | jq -r ".[${n}][\"pool_name\"]")
|
||||
pool_obj=$(echo ${OSD_POOLS_DETAILS} | jq -r ".[${n}]")
|
||||
|
||||
size=$(echo ${pool_obj} | jq -r .size)
|
||||
min_size=$(echo ${pool_obj} | jq -r .min_size)
|
||||
pg_num=$(echo ${pool_obj} | jq -r .pg_num)
|
||||
pg_placement_num=$(echo ${pool_obj} | jq -r .pg_placement_num)
|
||||
crush_rule=$(echo ${pool_obj} | jq -r .crush_rule)
|
||||
name=$(echo ${pool_obj} | jq -r .pool_name)
|
||||
|
||||
if [ "x${size}" != "x${RBD}" ] || [ "x${min_size}" != "x${EXPECTED_POOLMINSIZE}" ] \
|
||||
|| [ "x${pg_num}" != "x${pg_placement_num}" ] || [ "x${crush_rule}" != "x${expectedCrushRuleId}" ]; then
|
||||
|
@ -88,30 +145,33 @@ function pool_validation() {
|
|||
|
||||
function pool_failuredomain_validation() {
|
||||
echo "#### Start: Checking Pools are configured with specific failure domain ####"
|
||||
osd_pool_ls_details=$(ceph osd pool ls detail -f json-pretty)
|
||||
osd_crush_rule_dump=$(ceph osd crush rule dump -f json-pretty)
|
||||
|
||||
expectedCrushRuleId=""
|
||||
nrules=$(echo ${osd_crush_rule_dump} | jq length)
|
||||
nrules=$(echo ${OSD_CRUSH_RULE_DUMP} | jq length)
|
||||
c=$[nrules-1]
|
||||
for n in $(seq 0 ${c})
|
||||
do
|
||||
name=$(echo ${osd_crush_rule_dump} | jq -r .[${n}].rule_name)
|
||||
osd_crush_rule_obj=$(echo ${OSD_CRUSH_RULE_DUMP} | jq -r .[${n}])
|
||||
|
||||
name=$(echo ${osd_crush_rule_obj} | jq -r .rule_name)
|
||||
|
||||
if [ "x${EXPECTED_CRUSHRULE}" == "x${name}" ]; then
|
||||
expectedCrushRuleId=$(echo ${osd_crush_rule_dump} | jq .[${n}].rule_id)
|
||||
expectedCrushRuleId=$(echo ${osd_crush_rule_obj} | jq .rule_id)
|
||||
echo "Checking against rule: id: ${expectedCrushRuleId}, name:${name}"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Checking OSD pools are configured with Crush rule name:${EXPECTED_CRUSHRULE}, id:${expectedCrushRuleId}"
|
||||
|
||||
npools=$(echo ${osd_pool_ls_details} | jq length)
|
||||
npools=$(echo ${OSD_POOLS_DETAILS} | jq length)
|
||||
i=$[npools-1]
|
||||
for p in $(seq 0 ${i})
|
||||
do
|
||||
pool_crush_rule_id=$(echo $osd_pool_ls_details | jq -r ".[${p}][\"crush_rule\"]")
|
||||
pool_name=$(echo $osd_pool_ls_details | jq -r ".[${p}][\"pool_name\"]")
|
||||
pool_obj=$(echo ${OSD_POOLS_DETAILS} | jq -r ".[${p}]")
|
||||
|
||||
pool_crush_rule_id=$(echo $pool_obj | jq -r .crush_rule)
|
||||
pool_name=$(echo $pool_obj | jq -r .pool_name)
|
||||
|
||||
if [ "x${pool_crush_rule_id}" == "x${expectedCrushRuleId}" ]; then
|
||||
echo "--> Info: Pool ${pool_name} is configured with the correct rule ${pool_crush_rule_id}"
|
||||
else
|
||||
|
@ -123,59 +183,37 @@ function pool_failuredomain_validation() {
|
|||
|
||||
function pg_validation() {
|
||||
echo "#### Start: Checking placement groups active+clean ####"
|
||||
osd_pool_ls_details=$(ceph pg stat -f json-pretty)
|
||||
num_pgs=$(echo ${osd_pool_ls_details} | jq -r .num_pgs)
|
||||
npoolls=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state | jq length)
|
||||
i=${npoolls-1}
|
||||
|
||||
num_pgs=$(echo ${PG_STAT} | jq -r .num_pgs)
|
||||
npoolls=$(echo ${PG_STAT} | jq -r .num_pg_by_state | jq length)
|
||||
i=$[npoolls-1]
|
||||
for n in $(seq 0 ${i})
|
||||
do
|
||||
pg_state=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state[${n}].name)
|
||||
pg_state=$(echo ${PG_STAT} | jq -r .num_pg_by_state[${n}].name)
|
||||
if [ "xactive+clean" == "x${pg_state}" ]; then
|
||||
active_clean_pg_num=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state[${n}].num)
|
||||
active_clean_pg_num=$(echo ${PG_STAT} | jq -r .num_pg_by_state[${n}].num)
|
||||
if [ $num_pgs -eq $active_clean_pg_num ]; then
|
||||
echo "Success: All PGs configured (${num_pgs}) are in active+clean status"
|
||||
else
|
||||
echo "Error: All PGs configured (${num_pgs}) are NOT in active+clean status"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Error: PG state not in active+clean status"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
function mgr_validation() {
|
||||
echo "#### Start: MGR validation ####"
|
||||
mgr_dump=$(ceph mgr dump -f json-pretty)
|
||||
echo "Checking for ${MGR_COUNT} MGRs"
|
||||
|
||||
mgr_avl=$(echo ${mgr_dump} | jq -r '.["available"]')
|
||||
|
||||
if [ "x${mgr_avl}" == "xtrue" ]; then
|
||||
mgr_active=$(echo ${mgr_dump} | jq -r '.["active_name"]')
|
||||
|
||||
# Now test to check is we have at least one valid standby
|
||||
mgr_stdby_count=$(echo ${mgr_dump} | jq -r '.["standbys"]' | jq length)
|
||||
if [ $mgr_stdby_count -ge 1 ]
|
||||
then
|
||||
echo "Active manager ${mgr_active} is up and running. ${mgr_stdby_count} standby managers available"
|
||||
else
|
||||
echo "No standby Manager available"
|
||||
retcode=1
|
||||
fi
|
||||
else
|
||||
echo "Manager is not active"
|
||||
retcode=1
|
||||
fi
|
||||
|
||||
if [ "x${retcode}" == "x1" ]
|
||||
then
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_cluster_status
|
||||
check_osd_count
|
||||
mgr_validation
|
||||
|
||||
OSD_POOLS_DETAILS=$(ceph osd pool ls detail -f json-pretty)
|
||||
OSD_CRUSH_RULE_DUMP=$(ceph osd crush rule dump -f json-pretty)
|
||||
PG_STAT=$(ceph pg stat -f json-pretty)
|
||||
|
||||
pg_validation
|
||||
pool_validation
|
||||
pool_failuredomain_validation
|
||||
|
|
|
@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
|
|||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
{{- if .Values.manifests.pod_test }}
|
||||
{{- if .Values.manifests.helm_tests }}
|
||||
{{- $envAll := . }}
|
||||
{{- $serviceAccountName := printf "%s-%s" $envAll.Release.Name "test" }}
|
||||
{{ tuple $envAll "tests" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
|
||||
|
@ -45,12 +45,8 @@ spec:
|
|||
value: {{ .Values.conf.pool.target.osd | quote }}
|
||||
- name: EXPECTED_CRUSHRULE
|
||||
value: {{ .Values.conf.pool.default.crush_rule | default "replicated_rule" | quote }}
|
||||
- name: EXPECTED_POOLMINSIZE
|
||||
value: "2"
|
||||
- name: MGR_COUNT
|
||||
value: {{ .Values.pod.replicas.mgr | default "1" | quote }}
|
||||
- name: SPECS
|
||||
value: {{ include "helm-toolkit.utils.joinListWithComma" .Values.conf.pool.spec }}
|
||||
{{- range $pool := .Values.conf.pool.spec -}}
|
||||
{{- with $pool }}
|
||||
- name: {{ .name | upper | replace "." "_" }}
|
||||
|
|
|
@ -410,6 +410,8 @@ dependencies:
|
|||
services:
|
||||
- endpoint: internal
|
||||
service: ceph_mon
|
||||
- endpoint: internal
|
||||
service: ceph_mgr
|
||||
|
||||
bootstrap:
|
||||
enabled: false
|
||||
|
@ -511,5 +513,5 @@ manifests:
|
|||
job_image_repo_sync: true
|
||||
job_rbd_pool: true
|
||||
service_mgr: true
|
||||
pod_test: false
|
||||
helm_tests: true
|
||||
cronjob_checkPGs: true
|
||||
|
|
|
@ -71,4 +71,4 @@ helm upgrade --install radosgw-osh-infra ./ceph-rgw \
|
|||
#NOTE: Validate Deployment info
|
||||
helm status radosgw-osh-infra
|
||||
|
||||
helm test radosgw-osh-infra
|
||||
helm test radosgw-osh-infra --timeout 900
|
||||
|
|
|
@ -62,4 +62,4 @@ sleep 60 #NOTE(portdirect): Wait for ingress controller to update rules and rest
|
|||
openstack service list
|
||||
openstack endpoint list
|
||||
|
||||
helm test radosgw-openstack
|
||||
helm test radosgw-openstack --timeout 900
|
||||
|
|
|
@ -207,3 +207,5 @@ for CHART in ceph-mon ceph-osd ceph-client ceph-provisioners; do
|
|||
--no-headers | awk '{ print $1; exit }')
|
||||
kubectl exec -n ceph ${MON_POD} -- ceph -s
|
||||
done
|
||||
helm test ceph-osd --timeout 900
|
||||
helm test ceph-client --timeout 900
|
||||
|
|
|
@ -131,3 +131,5 @@ for CHART in ceph-mon ceph-osd ceph-client ceph-provisioners; do
|
|||
--no-headers | awk '{ print $1; exit }')
|
||||
kubectl exec -n ceph ${MON_POD} -- ceph -s
|
||||
done
|
||||
helm test ceph-osd --timeout 900
|
||||
helm test ceph-client --timeout 900
|
||||
|
|
|
@ -158,3 +158,6 @@ for CHART in ceph-mon ceph-osd ceph-client; do
|
|||
--no-headers | awk '{ print $1; exit }')
|
||||
kubectl exec -n tenant-ceph ${MON_POD} -- ceph -s
|
||||
done
|
||||
|
||||
helm test tenant-ceph-osd --timeout 900
|
||||
helm test ceph-client --timeout 900
|
||||
|
|
|
@ -71,4 +71,4 @@ helm upgrade --install radosgw-openstack ./ceph-rgw \
|
|||
#NOTE: Validate Deployment info
|
||||
helm status radosgw-openstack
|
||||
|
||||
helm test radosgw-openstack
|
||||
helm test radosgw-openstack --timeout 900
|
||||
|
|
Loading…
Reference in New Issue