Ceph-Client: Update, Enable and Cleanup helm tests

- Update ceph-client chart to
1) By default, enable ceph-client helm test. Update enabler
key in values.yaml to follow pattern as in other charts
2) Add needed dependancy for ceph-client helm tests
3) Update helm test script to reduce output and update
error msgs
4) Removed unwanted ENV variables SPECS and EXPECTED_POOLMINSIZE
- Update gate scripts to run helm test command

Change-Id: I6a0e4f5107e49dac081ac2037bcc0f9c0864793f
This commit is contained in:
Renis Makadia 2019-03-25 13:14:33 -07:00 committed by chinasubbareddy mallavarapu
parent defb8b1f23
commit 5985b61286
9 changed files with 108 additions and 65 deletions

View File

@ -47,34 +47,91 @@ function check_osd_count() {
fi
}
function mgr_validation() {
echo "#### Start: MGR validation ####"
mgr_dump=$(ceph mgr dump -f json-pretty)
echo "Checking for ${MGR_COUNT} MGRs"
mgr_avl=$(echo ${mgr_dump} | jq -r '.["available"]')
if [ "x${mgr_avl}" == "xtrue" ]; then
mgr_active=$(echo ${mgr_dump} | jq -r '.["active_name"]')
echo "Out of ${MGR_COUNT}, 1 MGR is active"
# Now lets check for standby managers
mgr_stdby_count=$(echo ${mgr_dump} | jq -r '.["standbys"]' | jq length)
#Total MGR Count - 1 Active = Expected MGRs
expected_standbys=$(( MGR_COUNT -1 ))
if [ $mgr_stdby_count -eq $expected_standbys ]
then
echo "Cluster has 1 Active MGR, $mgr_stdby_count Standbys MGR"
else
echo "Cluster Standbys MGR: Expected count= $expected_standbys Available=$mgr_stdby_count"
retcode=1
fi
else
echo "No Active Manager found, Expected 1 MGR to be active out of ${MGR_COUNT}"
retcode=1
fi
if [ "x${retcode}" == "x1" ]
then
exit 1
fi
}
function pool_validation() {
echo "#### Start: Checking Ceph pools ####"
pool_dump=$(ceph osd pool ls detail -f json-pretty)
osd_crush_rule_dump=$(ceph osd crush rule dump -f json-pretty)
echo "From env variables, RBD pool replication count is: ${RBD}"
# Assuming all pools have same replication count as RBD
# If RBD replication count is greater then 1, POOLMINSIZE should be 1 less then replication count
# If RBD replication count is not greate then 1, then POOLMINSIZE should be 1
if [ ${RBD} -gt 1 ]; then
EXPECTED_POOLMINSIZE=$[${RBD}-1]
else
EXPECTED_POOLMINSIZE=1
fi
echo "EXPECTED_POOLMINSIZE: ${EXPECTED_POOLMINSIZE}"
expectedCrushRuleId=""
nrules=$(echo ${osd_crush_rule_dump} | jq length)
nrules=$(echo ${OSD_CRUSH_RULE_DUMP} | jq length)
c=$[nrules-1]
for n in $(seq 0 ${c})
do
name=$(echo ${osd_crush_rule_dump} | jq -r .[${n}].rule_name)
osd_crush_rule_obj=$(echo ${OSD_CRUSH_RULE_DUMP} | jq -r .[${n}])
name=$(echo ${osd_crush_rule_obj} | jq -r .rule_name)
echo "Expected Crushrule: ${EXPECTED_CRUSHRULE}, Pool Crushmap: ${name}"
if [ "x${EXPECTED_CRUSHRULE}" == "x${name}" ]; then
expectedCrushRuleId=$(echo ${osd_crush_rule_dump} | jq .[${n}].rule_id)
expectedCrushRuleId=$(echo ${osd_crush_rule_obj} | jq .rule_id)
echo "Checking against rule: id: ${expectedCrushRuleId}, name:${name}"
else
echo "Didn't match"
fi
done
echo "Checking cluster for size:${RBD}, min_size:${EXPECTED_POOLMINSIZE}, crush_rule:${EXPECTED_CRUSHRULE}, crush_rule_id:${expectedCrushRuleId}"
npools=$(echo ${pool_dump} | jq length)
npools=$(echo ${OSD_POOLS_DETAILS} | jq length)
i=$[npools - 1]
for n in $(seq 0 ${i})
do
size=$(echo ${pool_dump} | jq -r ".[${n}][\"size\"]")
min_size=$(echo ${pool_dump} | jq -r ".[${n}][\"min_size\"]")
pg_num=$(echo ${pool_dump} | jq -r ".[${n}][\"pg_num\"]")
pg_placement_num=$(echo ${pool_dump} | jq -r ".[${n}][\"pg_placement_num\"]")
crush_rule=$(echo ${pool_dump} | jq -r ".[${n}][\"crush_rule\"]")
name=$(echo ${pool_dump} | jq -r ".[${n}][\"pool_name\"]")
pool_obj=$(echo ${OSD_POOLS_DETAILS} | jq -r ".[${n}]")
size=$(echo ${pool_obj} | jq -r .size)
min_size=$(echo ${pool_obj} | jq -r .min_size)
pg_num=$(echo ${pool_obj} | jq -r .pg_num)
pg_placement_num=$(echo ${pool_obj} | jq -r .pg_placement_num)
crush_rule=$(echo ${pool_obj} | jq -r .crush_rule)
name=$(echo ${pool_obj} | jq -r .pool_name)
if [ "x${size}" != "x${RBD}" ] || [ "x${min_size}" != "x${EXPECTED_POOLMINSIZE}" ] \
|| [ "x${pg_num}" != "x${pg_placement_num}" ] || [ "x${crush_rule}" != "x${expectedCrushRuleId}" ]; then
@ -88,30 +145,33 @@ function pool_validation() {
function pool_failuredomain_validation() {
echo "#### Start: Checking Pools are configured with specific failure domain ####"
osd_pool_ls_details=$(ceph osd pool ls detail -f json-pretty)
osd_crush_rule_dump=$(ceph osd crush rule dump -f json-pretty)
expectedCrushRuleId=""
nrules=$(echo ${osd_crush_rule_dump} | jq length)
nrules=$(echo ${OSD_CRUSH_RULE_DUMP} | jq length)
c=$[nrules-1]
for n in $(seq 0 ${c})
do
name=$(echo ${osd_crush_rule_dump} | jq -r .[${n}].rule_name)
osd_crush_rule_obj=$(echo ${OSD_CRUSH_RULE_DUMP} | jq -r .[${n}])
name=$(echo ${osd_crush_rule_obj} | jq -r .rule_name)
if [ "x${EXPECTED_CRUSHRULE}" == "x${name}" ]; then
expectedCrushRuleId=$(echo ${osd_crush_rule_dump} | jq .[${n}].rule_id)
expectedCrushRuleId=$(echo ${osd_crush_rule_obj} | jq .rule_id)
echo "Checking against rule: id: ${expectedCrushRuleId}, name:${name}"
fi
done
echo "Checking OSD pools are configured with Crush rule name:${EXPECTED_CRUSHRULE}, id:${expectedCrushRuleId}"
npools=$(echo ${osd_pool_ls_details} | jq length)
npools=$(echo ${OSD_POOLS_DETAILS} | jq length)
i=$[npools-1]
for p in $(seq 0 ${i})
do
pool_crush_rule_id=$(echo $osd_pool_ls_details | jq -r ".[${p}][\"crush_rule\"]")
pool_name=$(echo $osd_pool_ls_details | jq -r ".[${p}][\"pool_name\"]")
pool_obj=$(echo ${OSD_POOLS_DETAILS} | jq -r ".[${p}]")
pool_crush_rule_id=$(echo $pool_obj | jq -r .crush_rule)
pool_name=$(echo $pool_obj | jq -r .pool_name)
if [ "x${pool_crush_rule_id}" == "x${expectedCrushRuleId}" ]; then
echo "--> Info: Pool ${pool_name} is configured with the correct rule ${pool_crush_rule_id}"
else
@ -123,59 +183,37 @@ function pool_failuredomain_validation() {
function pg_validation() {
echo "#### Start: Checking placement groups active+clean ####"
osd_pool_ls_details=$(ceph pg stat -f json-pretty)
num_pgs=$(echo ${osd_pool_ls_details} | jq -r .num_pgs)
npoolls=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state | jq length)
i=${npoolls-1}
num_pgs=$(echo ${PG_STAT} | jq -r .num_pgs)
npoolls=$(echo ${PG_STAT} | jq -r .num_pg_by_state | jq length)
i=$[npoolls-1]
for n in $(seq 0 ${i})
do
pg_state=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state[${n}].name)
pg_state=$(echo ${PG_STAT} | jq -r .num_pg_by_state[${n}].name)
if [ "xactive+clean" == "x${pg_state}" ]; then
active_clean_pg_num=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state[${n}].num)
active_clean_pg_num=$(echo ${PG_STAT} | jq -r .num_pg_by_state[${n}].num)
if [ $num_pgs -eq $active_clean_pg_num ]; then
echo "Success: All PGs configured (${num_pgs}) are in active+clean status"
else
echo "Error: All PGs configured (${num_pgs}) are NOT in active+clean status"
exit 1
fi
else
echo "Error: PG state not in active+clean status"
exit 1
fi
done
}
function mgr_validation() {
echo "#### Start: MGR validation ####"
mgr_dump=$(ceph mgr dump -f json-pretty)
echo "Checking for ${MGR_COUNT} MGRs"
mgr_avl=$(echo ${mgr_dump} | jq -r '.["available"]')
if [ "x${mgr_avl}" == "xtrue" ]; then
mgr_active=$(echo ${mgr_dump} | jq -r '.["active_name"]')
# Now test to check is we have at least one valid standby
mgr_stdby_count=$(echo ${mgr_dump} | jq -r '.["standbys"]' | jq length)
if [ $mgr_stdby_count -ge 1 ]
then
echo "Active manager ${mgr_active} is up and running. ${mgr_stdby_count} standby managers available"
else
echo "No standby Manager available"
retcode=1
fi
else
echo "Manager is not active"
retcode=1
fi
if [ "x${retcode}" == "x1" ]
then
exit 1
fi
}
check_cluster_status
check_osd_count
mgr_validation
OSD_POOLS_DETAILS=$(ceph osd pool ls detail -f json-pretty)
OSD_CRUSH_RULE_DUMP=$(ceph osd crush rule dump -f json-pretty)
PG_STAT=$(ceph pg stat -f json-pretty)
pg_validation
pool_validation
pool_failuredomain_validation

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.pod_test }}
{{- if .Values.manifests.helm_tests }}
{{- $envAll := . }}
{{- $serviceAccountName := printf "%s-%s" $envAll.Release.Name "test" }}
{{ tuple $envAll "tests" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
@ -45,12 +45,8 @@ spec:
value: {{ .Values.conf.pool.target.osd | quote }}
- name: EXPECTED_CRUSHRULE
value: {{ .Values.conf.pool.default.crush_rule | default "replicated_rule" | quote }}
- name: EXPECTED_POOLMINSIZE
value: "2"
- name: MGR_COUNT
value: {{ .Values.pod.replicas.mgr | default "1" | quote }}
- name: SPECS
value: {{ include "helm-toolkit.utils.joinListWithComma" .Values.conf.pool.spec }}
{{- range $pool := .Values.conf.pool.spec -}}
{{- with $pool }}
- name: {{ .name | upper | replace "." "_" }}

View File

@ -410,6 +410,8 @@ dependencies:
services:
- endpoint: internal
service: ceph_mon
- endpoint: internal
service: ceph_mgr
bootstrap:
enabled: false
@ -511,5 +513,5 @@ manifests:
job_image_repo_sync: true
job_rbd_pool: true
service_mgr: true
pod_test: false
helm_tests: true
cronjob_checkPGs: true

View File

@ -71,4 +71,4 @@ helm upgrade --install radosgw-osh-infra ./ceph-rgw \
#NOTE: Validate Deployment info
helm status radosgw-osh-infra
helm test radosgw-osh-infra
helm test radosgw-osh-infra --timeout 900

View File

@ -62,4 +62,4 @@ sleep 60 #NOTE(portdirect): Wait for ingress controller to update rules and rest
openstack service list
openstack endpoint list
helm test radosgw-openstack
helm test radosgw-openstack --timeout 900

View File

@ -207,3 +207,5 @@ for CHART in ceph-mon ceph-osd ceph-client ceph-provisioners; do
--no-headers | awk '{ print $1; exit }')
kubectl exec -n ceph ${MON_POD} -- ceph -s
done
helm test ceph-osd --timeout 900
helm test ceph-client --timeout 900

View File

@ -131,3 +131,5 @@ for CHART in ceph-mon ceph-osd ceph-client ceph-provisioners; do
--no-headers | awk '{ print $1; exit }')
kubectl exec -n ceph ${MON_POD} -- ceph -s
done
helm test ceph-osd --timeout 900
helm test ceph-client --timeout 900

View File

@ -158,3 +158,6 @@ for CHART in ceph-mon ceph-osd ceph-client; do
--no-headers | awk '{ print $1; exit }')
kubectl exec -n tenant-ceph ${MON_POD} -- ceph -s
done
helm test tenant-ceph-osd --timeout 900
helm test ceph-client --timeout 900

View File

@ -71,4 +71,4 @@ helm upgrade --install radosgw-openstack ./ceph-rgw \
#NOTE: Validate Deployment info
helm status radosgw-openstack
helm test radosgw-openstack
helm test radosgw-openstack --timeout 900