Merge "Start Ceph processes before start monitoring"

This commit is contained in:
Zuul 2024-11-06 19:10:09 +00:00 committed by Gerrit Code Review
commit a26a728a99
2 changed files with 93 additions and 52 deletions

View File

@ -305,11 +305,6 @@ has_daemon_running ()
start () start ()
{ {
if [ ! -f ${CEPH_FILE} ]; then
# Ceph is not running on this node, return success
exit 0
fi
local service="$1" local service="$1"
# Evaluate the parameter because of local monitor (controller.${HOSTNAME}) # Evaluate the parameter because of local monitor (controller.${HOSTNAME})
eval service="${service}" eval service="${service}"
@ -598,6 +593,14 @@ start=$(date +%s%N)
log INFO "action:${args[0]}:start-at:${start: 0:-6} ms" log INFO "action:${args[0]}:start-at:${start: 0:-6} ms"
case "${args[0]}" in case "${args[0]}" in
start) start)
if [ ! -f ${CEPH_FILE} ]; then
# Ceph is not running on this node, return success
exit 0
fi
start ${args[1]}
;;
forcestart)
start ${args[1]} start ${args[1]}
;; ;;
stop) stop)

View File

@ -6,41 +6,96 @@
INITDIR=/etc/init.d INITDIR=/etc/init.d
LOGFILE=/var/log/ceph/ceph-init.log LOGFILE=/var/log/ceph/ceph-init.log
CEPH_FILE=/var/run/.ceph_started CEPH_STARTED_FLAG=/var/run/.ceph_started
CEPH_CONFIGURED_FLAG=/etc/platform/.node_ceph_configured
CEPH_INIT="${INITDIR}/ceph-init-wrapper"
# Get our nodetype # Get system/node configuration
. /etc/platform/platform.conf . /etc/platform/platform.conf
# Exit immediately if ceph not configured (i.e. no mon in the config file)
if ! grep -q "mon\." /etc/ceph/ceph.conf
then
exit 0
fi
logecho () logecho ()
{ {
echo $1 local head="$(date "+%Y-%m-%d %H:%M:%S.%3N")"
date >> ${LOGFILE} echo "$head ${BASHPID}: $@" >> ${LOGFILE}
echo $1 >> ${LOGFILE} echo "$@"
} }
# Exit immediately if ceph not configured
if [ ! -f "${CEPH_CONFIGURED_FLAG}" ]; then
logecho "Ceph is not configured in this node. Exiting."
exit 0
fi
# If system is an AIO the mtcClient will run this script twice
# from 2 locations on controllers.
# If this is a AIO DX+ it will also be called on compute nodes
# and it should be avoided since there is nothing to do.
# So exit the script if it is called from /etc//services.d/worker
if [[ "$system_type" == "All-in-one" ]]; then
dir_path=$(dirname "$(realpath $0)")
if [[ "$dir_path" == "/etc/services.d/worker" ]]; then
logecho "Calling from '${dir_path}' and this is ${system_type^}. Exiting."
exit 0
fi
fi
start () start ()
{ {
# Defer ceph initialization to avoid race conditions. Let SM and Pmon to start the # Start Ceph processes according to the system_type and system_mode.
# processes in the appropriate time. # Set the flag CEPH_STARTED_FLAG to let ceph to be monitored by Pmon and SM.
# Set the flag to let ceph start later. # The forcestart action is used to bypass the CEPH_STARTED_FLAG flag check
logecho "Setting flag to enable ceph processes to start." # that is created only after all processes are running to prevent
if [ ! -f ${CEPH_FILE} ]; then # Pmon and SM detecting process failure when monitoring the processes
touch ${CEPH_FILE}
logecho "Starting Ceph processes on ${system_type^} ${system_mode^}"
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "simplex" ]; then
${CEPH_INIT} forcestart mon
local rc_mon=$?
logecho "RC mon: ${rc_mon}"
${CEPH_INIT} forcestart mds
local rc_mds=$?
logecho "RC mds: ${rc_mds}"
${CEPH_INIT} forcestart osd
local rc_osd=$?
logecho "RC osd: ${rc_osd}"
fi
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then
${CEPH_INIT} forcestart mon.${HOSTNAME}
local rc_mon=$?
logecho "RC mon.${HOSTNAME}: ${rc_mon}"
${CEPH_INIT} forcestart mds
local rc_mds=$?
logecho "RC mds: ${rc_mds}"
fi
if [ "${system_type}" == "Standard" ]; then
${CEPH_INIT} forcestart
local rc_all=$?
logecho "RC all: ${rc_all}"
${CEPH_INIT} forcestart mds
local rc_mds=$?
logecho "RC mds: ${rc_mds}"
fi
logecho "Setting flag to enable ceph processes monitoring"
if [ ! -f ${CEPH_STARTED_FLAG} ]; then
touch ${CEPH_STARTED_FLAG}
fi fi
} }
stop () stop ()
{ {
logecho "Stopping Ceph processes on ${system_type^} ${system_mode^}"
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "simplex" ]; then if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "simplex" ]; then
# AIO-SX # AIO-SX do not stop services
logecho "Ceph services will continue to run on node" logecho "Ceph services will continue to run"
RC=0
elif [ "$system_type" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then elif [ "$system_type" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then
# AIO-DX and AIO-DX+ # AIO-DX and AIO-DX+
# Will stop OSDs and MDS processes only. # Will stop OSDs and MDS processes only.
@ -48,46 +103,30 @@ stop ()
# mon.${hostname} must be running. # mon.${hostname} must be running.
logecho "Ceph services will be stopped, except local ceph monitor" logecho "Ceph services will be stopped, except local ceph monitor"
if [ -f ${CEPH_FILE} ]; then if [ -f ${CEPH_STARTED_FLAG} ]; then
rm -f ${CEPH_FILE} rm -f ${CEPH_STARTED_FLAG}
fi fi
${INITDIR}/ceph-init-wrapper stop osd >> ${LOGFILE} 2>&1 ${CEPH_INIT} stop osd >> ${LOGFILE} 2>&1
local rc_osd=$? local rc_osd=$?
logecho "rc_osd=${rc_osd}" logecho "RC osd: ${rc_osd}"
${INITDIR}/ceph-init-wrapper stop mds >> ${LOGFILE} 2>&1 ${CEPH_INIT} stop mds >> ${LOGFILE} 2>&1
local rc_mds=$? local rc_mds=$?
logecho "rc_mds=${rc_mds}" logecho "RC mds: ${rc_mds}"
RC=0
[ ${rc_osd} -ne 0 ] || [ ${rc_mds} -ne 0 ] && RC=1
else else
# Standard and Standard Dedicated Storage # Standard and Standard Dedicated Storage
logecho "Stopping ceph services..." logecho "Stopping ceph services..."
if [ -f ${CEPH_FILE} ]; then if [ -f ${CEPH_STARTED_FLAG} ]; then
rm -f ${CEPH_FILE} rm -f ${CEPH_STARTED_FLAG}
fi fi
${INITDIR}/ceph-init-wrapper stop >> ${LOGFILE} 2>&1 ${CEPH_INIT} stop >> ${LOGFILE} 2>&1
RC=$?
fi fi
} }
# If system is an AIO the mtcClient will run this script twice
# from 2 locations and this generates some errors.
# So we have to exit the script if is called
# from /etc/services.d/worker in order to be executed once
if [[ "$system_type" == "All-in-one" ]]; then
dir_path=$(dirname "$(realpath $0)")
if [[ "$dir_path" == "/etc/services.d/worker" ]]; then
exit 0
fi
fi
RC=0
case "$1" in case "$1" in
start) start)
start start
@ -101,5 +140,4 @@ case "$1" in
;; ;;
esac esac
logecho "RC was: $RC" exit 0
exit $RC