[ceph-osd] Remove ceph-mon dependency in ceph-osd liveness probe
It is possible for misbehaving ceph-mon pods to cause the ceph-osd liveness probe to fail for healthy ceph-osd pods, which can cause healthy pods to get restarted unnecessarily. This change removes the ceph-mon query from the ceph-osd liveness probe so the probe is only dependent on ceph-osd state. Change-Id: I9e1846cfdc5783dbb261583e04ea19df81d143f4
This commit is contained in:
parent
8e27278d68
commit
9a37183b26
@ -15,6 +15,6 @@ apiVersion: v1
|
|||||||
appVersion: v1.0.0
|
appVersion: v1.0.0
|
||||||
description: OpenStack-Helm Ceph OSD
|
description: OpenStack-Helm Ceph OSD
|
||||||
name: ceph-osd
|
name: ceph-osd
|
||||||
version: 0.1.40
|
version: 0.1.41
|
||||||
home: https://github.com/ceph/ceph
|
home: https://github.com/ceph/ceph
|
||||||
...
|
...
|
||||||
|
@ -25,17 +25,13 @@ cond=1
|
|||||||
for sock in $SOCKDIR/$SBASE.*.$SSUFFIX; do
|
for sock in $SOCKDIR/$SBASE.*.$SSUFFIX; do
|
||||||
if [ -S $sock ]; then
|
if [ -S $sock ]; then
|
||||||
OSD_ID=$(echo $sock | awk -F. '{print $2}')
|
OSD_ID=$(echo $sock | awk -F. '{print $2}')
|
||||||
OSD_STATE=$(ceph -f json-pretty --connect-timeout 1 --admin-daemon "${sock}" status|grep state|sed 's/.*://;s/[^a-z]//g')
|
OSD_STATE=$(ceph -f json --connect-timeout 1 --admin-daemon "${sock}" status|jq -r '.state')
|
||||||
NOUP_FLAG=$(ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring status | awk '/flags/{print $2}' | grep noup)
|
|
||||||
echo "OSD ${OSD_ID} ${OSD_STATE}";
|
echo "OSD ${OSD_ID} ${OSD_STATE}";
|
||||||
# this might be a stricter check than we actually want. what are the
|
# Succeed if the OSD state is active (running) or preboot (starting)
|
||||||
# other values for the "state" field?
|
if [ "${OSD_STATE}" = "active" ] || [ "${OSD_STATE}" = "preboot" ]; then
|
||||||
if [ "x${OSD_STATE}x" = 'xactivex' ]; then
|
|
||||||
cond=0
|
|
||||||
elif [ "${NOUP_FLAG}" ] && [ "x${OSD_STATE}x" = 'xprebootx' ]; then
|
|
||||||
cond=0
|
cond=0
|
||||||
else
|
else
|
||||||
# one's not ready, so the whole pod's not ready.
|
# Any other state is unexpected and the probe fails
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
|
@ -41,4 +41,5 @@ ceph-osd:
|
|||||||
- 0.1.38 Skip pod wait in post-apply job when disruptive
|
- 0.1.38 Skip pod wait in post-apply job when disruptive
|
||||||
- 0.1.39 Allow for unconditional OSD restart
|
- 0.1.39 Allow for unconditional OSD restart
|
||||||
- 0.1.40 Remove udev interactions from osd-init
|
- 0.1.40 Remove udev interactions from osd-init
|
||||||
|
- 0.1.41 Remove ceph-mon dependency in ceph-osd liveness probe
|
||||||
...
|
...
|
||||||
|
Loading…
Reference in New Issue
Block a user