|
|
|
@ -47,34 +47,91 @@ function check_osd_count() {
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function mgr_validation() {
|
|
|
|
|
echo "#### Start: MGR validation ####"
|
|
|
|
|
mgr_dump=$(ceph mgr dump -f json-pretty)
|
|
|
|
|
echo "Checking for ${MGR_COUNT} MGRs"
|
|
|
|
|
|
|
|
|
|
mgr_avl=$(echo ${mgr_dump} | jq -r '.["available"]')
|
|
|
|
|
|
|
|
|
|
if [ "x${mgr_avl}" == "xtrue" ]; then
|
|
|
|
|
mgr_active=$(echo ${mgr_dump} | jq -r '.["active_name"]')
|
|
|
|
|
echo "Out of ${MGR_COUNT}, 1 MGR is active"
|
|
|
|
|
|
|
|
|
|
# Now lets check for standby managers
|
|
|
|
|
mgr_stdby_count=$(echo ${mgr_dump} | jq -r '.["standbys"]' | jq length)
|
|
|
|
|
|
|
|
|
|
#Total MGR Count - 1 Active = Expected MGRs
|
|
|
|
|
expected_standbys=$(( MGR_COUNT -1 ))
|
|
|
|
|
|
|
|
|
|
if [ $mgr_stdby_count -eq $expected_standbys ]
|
|
|
|
|
then
|
|
|
|
|
echo "Cluster has 1 Active MGR, $mgr_stdby_count Standbys MGR"
|
|
|
|
|
else
|
|
|
|
|
echo "Cluster Standbys MGR: Expected count= $expected_standbys Available=$mgr_stdby_count"
|
|
|
|
|
retcode=1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
else
|
|
|
|
|
echo "No Active Manager found, Expected 1 MGR to be active out of ${MGR_COUNT}"
|
|
|
|
|
retcode=1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [ "x${retcode}" == "x1" ]
|
|
|
|
|
then
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function pool_validation() {
|
|
|
|
|
|
|
|
|
|
echo "#### Start: Checking Ceph pools ####"
|
|
|
|
|
pool_dump=$(ceph osd pool ls detail -f json-pretty)
|
|
|
|
|
osd_crush_rule_dump=$(ceph osd crush rule dump -f json-pretty)
|
|
|
|
|
|
|
|
|
|
echo "From env variables, RBD pool replication count is: ${RBD}"
|
|
|
|
|
|
|
|
|
|
# Assuming all pools have same replication count as RBD
|
|
|
|
|
# If RBD replication count is greater then 1, POOLMINSIZE should be 1 less then replication count
|
|
|
|
|
# If RBD replication count is not greate then 1, then POOLMINSIZE should be 1
|
|
|
|
|
|
|
|
|
|
if [ ${RBD} -gt 1 ]; then
|
|
|
|
|
EXPECTED_POOLMINSIZE=$[${RBD}-1]
|
|
|
|
|
else
|
|
|
|
|
EXPECTED_POOLMINSIZE=1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo "EXPECTED_POOLMINSIZE: ${EXPECTED_POOLMINSIZE}"
|
|
|
|
|
|
|
|
|
|
expectedCrushRuleId=""
|
|
|
|
|
nrules=$(echo ${osd_crush_rule_dump} | jq length)
|
|
|
|
|
nrules=$(echo ${OSD_CRUSH_RULE_DUMP} | jq length)
|
|
|
|
|
c=$[nrules-1]
|
|
|
|
|
for n in $(seq 0 ${c})
|
|
|
|
|
do
|
|
|
|
|
name=$(echo ${osd_crush_rule_dump} | jq -r .[${n}].rule_name)
|
|
|
|
|
osd_crush_rule_obj=$(echo ${OSD_CRUSH_RULE_DUMP} | jq -r .[${n}])
|
|
|
|
|
|
|
|
|
|
name=$(echo ${osd_crush_rule_obj} | jq -r .rule_name)
|
|
|
|
|
echo "Expected Crushrule: ${EXPECTED_CRUSHRULE}, Pool Crushmap: ${name}"
|
|
|
|
|
|
|
|
|
|
if [ "x${EXPECTED_CRUSHRULE}" == "x${name}" ]; then
|
|
|
|
|
expectedCrushRuleId=$(echo ${osd_crush_rule_dump} | jq .[${n}].rule_id)
|
|
|
|
|
expectedCrushRuleId=$(echo ${osd_crush_rule_obj} | jq .rule_id)
|
|
|
|
|
echo "Checking against rule: id: ${expectedCrushRuleId}, name:${name}"
|
|
|
|
|
else
|
|
|
|
|
echo "Didn't match"
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
echo "Checking cluster for size:${RBD}, min_size:${EXPECTED_POOLMINSIZE}, crush_rule:${EXPECTED_CRUSHRULE}, crush_rule_id:${expectedCrushRuleId}"
|
|
|
|
|
|
|
|
|
|
npools=$(echo ${pool_dump} | jq length)
|
|
|
|
|
npools=$(echo ${OSD_POOLS_DETAILS} | jq length)
|
|
|
|
|
i=$[npools - 1]
|
|
|
|
|
for n in $(seq 0 ${i})
|
|
|
|
|
do
|
|
|
|
|
size=$(echo ${pool_dump} | jq -r ".[${n}][\"size\"]")
|
|
|
|
|
min_size=$(echo ${pool_dump} | jq -r ".[${n}][\"min_size\"]")
|
|
|
|
|
pg_num=$(echo ${pool_dump} | jq -r ".[${n}][\"pg_num\"]")
|
|
|
|
|
pg_placement_num=$(echo ${pool_dump} | jq -r ".[${n}][\"pg_placement_num\"]")
|
|
|
|
|
crush_rule=$(echo ${pool_dump} | jq -r ".[${n}][\"crush_rule\"]")
|
|
|
|
|
name=$(echo ${pool_dump} | jq -r ".[${n}][\"pool_name\"]")
|
|
|
|
|
pool_obj=$(echo ${OSD_POOLS_DETAILS} | jq -r ".[${n}]")
|
|
|
|
|
|
|
|
|
|
size=$(echo ${pool_obj} | jq -r .size)
|
|
|
|
|
min_size=$(echo ${pool_obj} | jq -r .min_size)
|
|
|
|
|
pg_num=$(echo ${pool_obj} | jq -r .pg_num)
|
|
|
|
|
pg_placement_num=$(echo ${pool_obj} | jq -r .pg_placement_num)
|
|
|
|
|
crush_rule=$(echo ${pool_obj} | jq -r .crush_rule)
|
|
|
|
|
name=$(echo ${pool_obj} | jq -r .pool_name)
|
|
|
|
|
|
|
|
|
|
if [ "x${size}" != "x${RBD}" ] || [ "x${min_size}" != "x${EXPECTED_POOLMINSIZE}" ] \
|
|
|
|
|
|| [ "x${pg_num}" != "x${pg_placement_num}" ] || [ "x${crush_rule}" != "x${expectedCrushRuleId}" ]; then
|
|
|
|
@ -88,30 +145,33 @@ function pool_validation() {
|
|
|
|
|
|
|
|
|
|
function pool_failuredomain_validation() {
|
|
|
|
|
echo "#### Start: Checking Pools are configured with specific failure domain ####"
|
|
|
|
|
osd_pool_ls_details=$(ceph osd pool ls detail -f json-pretty)
|
|
|
|
|
osd_crush_rule_dump=$(ceph osd crush rule dump -f json-pretty)
|
|
|
|
|
|
|
|
|
|
expectedCrushRuleId=""
|
|
|
|
|
nrules=$(echo ${osd_crush_rule_dump} | jq length)
|
|
|
|
|
nrules=$(echo ${OSD_CRUSH_RULE_DUMP} | jq length)
|
|
|
|
|
c=$[nrules-1]
|
|
|
|
|
for n in $(seq 0 ${c})
|
|
|
|
|
do
|
|
|
|
|
name=$(echo ${osd_crush_rule_dump} | jq -r .[${n}].rule_name)
|
|
|
|
|
osd_crush_rule_obj=$(echo ${OSD_CRUSH_RULE_DUMP} | jq -r .[${n}])
|
|
|
|
|
|
|
|
|
|
name=$(echo ${osd_crush_rule_obj} | jq -r .rule_name)
|
|
|
|
|
|
|
|
|
|
if [ "x${EXPECTED_CRUSHRULE}" == "x${name}" ]; then
|
|
|
|
|
expectedCrushRuleId=$(echo ${osd_crush_rule_dump} | jq .[${n}].rule_id)
|
|
|
|
|
expectedCrushRuleId=$(echo ${osd_crush_rule_obj} | jq .rule_id)
|
|
|
|
|
echo "Checking against rule: id: ${expectedCrushRuleId}, name:${name}"
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
echo "Checking OSD pools are configured with Crush rule name:${EXPECTED_CRUSHRULE}, id:${expectedCrushRuleId}"
|
|
|
|
|
|
|
|
|
|
npools=$(echo ${osd_pool_ls_details} | jq length)
|
|
|
|
|
npools=$(echo ${OSD_POOLS_DETAILS} | jq length)
|
|
|
|
|
i=$[npools-1]
|
|
|
|
|
for p in $(seq 0 ${i})
|
|
|
|
|
do
|
|
|
|
|
pool_crush_rule_id=$(echo $osd_pool_ls_details | jq -r ".[${p}][\"crush_rule\"]")
|
|
|
|
|
pool_name=$(echo $osd_pool_ls_details | jq -r ".[${p}][\"pool_name\"]")
|
|
|
|
|
pool_obj=$(echo ${OSD_POOLS_DETAILS} | jq -r ".[${p}]")
|
|
|
|
|
|
|
|
|
|
pool_crush_rule_id=$(echo $pool_obj | jq -r .crush_rule)
|
|
|
|
|
pool_name=$(echo $pool_obj | jq -r .pool_name)
|
|
|
|
|
|
|
|
|
|
if [ "x${pool_crush_rule_id}" == "x${expectedCrushRuleId}" ]; then
|
|
|
|
|
echo "--> Info: Pool ${pool_name} is configured with the correct rule ${pool_crush_rule_id}"
|
|
|
|
|
else
|
|
|
|
@ -123,59 +183,37 @@ function pool_failuredomain_validation() {
|
|
|
|
|
|
|
|
|
|
function pg_validation() {
|
|
|
|
|
echo "#### Start: Checking placement groups active+clean ####"
|
|
|
|
|
osd_pool_ls_details=$(ceph pg stat -f json-pretty)
|
|
|
|
|
num_pgs=$(echo ${osd_pool_ls_details} | jq -r .num_pgs)
|
|
|
|
|
npoolls=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state | jq length)
|
|
|
|
|
i=${npoolls-1}
|
|
|
|
|
|
|
|
|
|
num_pgs=$(echo ${PG_STAT} | jq -r .num_pgs)
|
|
|
|
|
npoolls=$(echo ${PG_STAT} | jq -r .num_pg_by_state | jq length)
|
|
|
|
|
i=$[npoolls-1]
|
|
|
|
|
for n in $(seq 0 ${i})
|
|
|
|
|
do
|
|
|
|
|
pg_state=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state[${n}].name)
|
|
|
|
|
pg_state=$(echo ${PG_STAT} | jq -r .num_pg_by_state[${n}].name)
|
|
|
|
|
if [ "xactive+clean" == "x${pg_state}" ]; then
|
|
|
|
|
active_clean_pg_num=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state[${n}].num)
|
|
|
|
|
active_clean_pg_num=$(echo ${PG_STAT} | jq -r .num_pg_by_state[${n}].num)
|
|
|
|
|
if [ $num_pgs -eq $active_clean_pg_num ]; then
|
|
|
|
|
echo "Success: All PGs configured (${num_pgs}) are in active+clean status"
|
|
|
|
|
else
|
|
|
|
|
echo "Error: All PGs configured (${num_pgs}) are NOT in active+clean status"
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
echo "Error: PG state not in active+clean status"
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function mgr_validation() {
|
|
|
|
|
echo "#### Start: MGR validation ####"
|
|
|
|
|
mgr_dump=$(ceph mgr dump -f json-pretty)
|
|
|
|
|
echo "Checking for ${MGR_COUNT} MGRs"
|
|
|
|
|
|
|
|
|
|
mgr_avl=$(echo ${mgr_dump} | jq -r '.["available"]')
|
|
|
|
|
|
|
|
|
|
if [ "x${mgr_avl}" == "xtrue" ]; then
|
|
|
|
|
mgr_active=$(echo ${mgr_dump} | jq -r '.["active_name"]')
|
|
|
|
|
|
|
|
|
|
# Now test to check is we have at least one valid standby
|
|
|
|
|
mgr_stdby_count=$(echo ${mgr_dump} | jq -r '.["standbys"]' | jq length)
|
|
|
|
|
if [ $mgr_stdby_count -ge 1 ]
|
|
|
|
|
then
|
|
|
|
|
echo "Active manager ${mgr_active} is up and running. ${mgr_stdby_count} standby managers available"
|
|
|
|
|
else
|
|
|
|
|
echo "No standby Manager available"
|
|
|
|
|
retcode=1
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
echo "Manager is not active"
|
|
|
|
|
retcode=1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [ "x${retcode}" == "x1" ]
|
|
|
|
|
then
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check_cluster_status
|
|
|
|
|
check_osd_count
|
|
|
|
|
mgr_validation
|
|
|
|
|
|
|
|
|
|
OSD_POOLS_DETAILS=$(ceph osd pool ls detail -f json-pretty)
|
|
|
|
|
OSD_CRUSH_RULE_DUMP=$(ceph osd crush rule dump -f json-pretty)
|
|
|
|
|
PG_STAT=$(ceph pg stat -f json-pretty)
|
|
|
|
|
|
|
|
|
|
pg_validation
|
|
|
|
|
pool_validation
|
|
|
|
|
pool_failuredomain_validation
|
|
|
|
|