Merge "Start Ceph processes before start monitoring"

This commit is contained in:
Zuul 2024-11-06 19:10:09 +00:00 committed by Gerrit Code Review
commit a26a728a99
2 changed files with 93 additions and 52 deletions

View File

@ -305,11 +305,6 @@ has_daemon_running ()
start ()
{
if [ ! -f ${CEPH_FILE} ]; then
# Ceph is not running on this node, return success
exit 0
fi
local service="$1"
# Evaluate the parameter because of local monitor (controller.${HOSTNAME})
eval service="${service}"
@ -598,6 +593,14 @@ start=$(date +%s%N)
log INFO "action:${args[0]}:start-at:${start: 0:-6} ms"
case "${args[0]}" in
start)
if [ ! -f ${CEPH_FILE} ]; then
# Ceph is not running on this node, return success
exit 0
fi
start ${args[1]}
;;
forcestart)
start ${args[1]}
;;
stop)

View File

@ -6,41 +6,96 @@
INITDIR=/etc/init.d
LOGFILE=/var/log/ceph/ceph-init.log
CEPH_FILE=/var/run/.ceph_started
CEPH_STARTED_FLAG=/var/run/.ceph_started
CEPH_CONFIGURED_FLAG=/etc/platform/.node_ceph_configured
CEPH_INIT="${INITDIR}/ceph-init-wrapper"
# Get our nodetype
# Get system/node configuration
. /etc/platform/platform.conf
# Exit immediately if ceph not configured (i.e. no mon in the config file)
if ! grep -q "mon\." /etc/ceph/ceph.conf
then
exit 0
fi
logecho ()
{
echo $1
date >> ${LOGFILE}
echo $1 >> ${LOGFILE}
local head="$(date "+%Y-%m-%d %H:%M:%S.%3N")"
echo "$head ${BASHPID}: $@" >> ${LOGFILE}
echo "$@"
}
# Exit immediately if ceph not configured
if [ ! -f "${CEPH_CONFIGURED_FLAG}" ]; then
logecho "Ceph is not configured in this node. Exiting."
exit 0
fi
# If system is an AIO the mtcClient will run this script twice
# from 2 locations on controllers.
# If this is a AIO DX+ it will also be called on compute nodes
# and it should be avoided since there is nothing to do.
# So exit the script if it is called from /etc//services.d/worker
if [[ "$system_type" == "All-in-one" ]]; then
dir_path=$(dirname "$(realpath $0)")
if [[ "$dir_path" == "/etc/services.d/worker" ]]; then
logecho "Calling from '${dir_path}' and this is ${system_type^}. Exiting."
exit 0
fi
fi
start ()
{
# Defer ceph initialization to avoid race conditions. Let SM and Pmon to start the
# processes in the appropriate time.
# Set the flag to let ceph start later.
logecho "Setting flag to enable ceph processes to start."
if [ ! -f ${CEPH_FILE} ]; then
touch ${CEPH_FILE}
# Start Ceph processes according to the system_type and system_mode.
# Set the flag CEPH_STARTED_FLAG to let ceph to be monitored by Pmon and SM.
# The forcestart action is used to bypass the CEPH_STARTED_FLAG flag check
# that is created only after all processes are running to prevent
# Pmon and SM detecting process failure when monitoring the processes
logecho "Starting Ceph processes on ${system_type^} ${system_mode^}"
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "simplex" ]; then
${CEPH_INIT} forcestart mon
local rc_mon=$?
logecho "RC mon: ${rc_mon}"
${CEPH_INIT} forcestart mds
local rc_mds=$?
logecho "RC mds: ${rc_mds}"
${CEPH_INIT} forcestart osd
local rc_osd=$?
logecho "RC osd: ${rc_osd}"
fi
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then
${CEPH_INIT} forcestart mon.${HOSTNAME}
local rc_mon=$?
logecho "RC mon.${HOSTNAME}: ${rc_mon}"
${CEPH_INIT} forcestart mds
local rc_mds=$?
logecho "RC mds: ${rc_mds}"
fi
if [ "${system_type}" == "Standard" ]; then
${CEPH_INIT} forcestart
local rc_all=$?
logecho "RC all: ${rc_all}"
${CEPH_INIT} forcestart mds
local rc_mds=$?
logecho "RC mds: ${rc_mds}"
fi
logecho "Setting flag to enable ceph processes monitoring"
if [ ! -f ${CEPH_STARTED_FLAG} ]; then
touch ${CEPH_STARTED_FLAG}
fi
}
stop ()
{
logecho "Stopping Ceph processes on ${system_type^} ${system_mode^}"
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "simplex" ]; then
# AIO-SX
logecho "Ceph services will continue to run on node"
RC=0
# AIO-SX do not stop services
logecho "Ceph services will continue to run"
elif [ "$system_type" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then
# AIO-DX and AIO-DX+
# Will stop OSDs and MDS processes only.
@ -48,46 +103,30 @@ stop ()
# mon.${hostname} must be running.
logecho "Ceph services will be stopped, except local ceph monitor"
if [ -f ${CEPH_FILE} ]; then
rm -f ${CEPH_FILE}
if [ -f ${CEPH_STARTED_FLAG} ]; then
rm -f ${CEPH_STARTED_FLAG}
fi
${INITDIR}/ceph-init-wrapper stop osd >> ${LOGFILE} 2>&1
${CEPH_INIT} stop osd >> ${LOGFILE} 2>&1
local rc_osd=$?
logecho "rc_osd=${rc_osd}"
logecho "RC osd: ${rc_osd}"
${INITDIR}/ceph-init-wrapper stop mds >> ${LOGFILE} 2>&1
${CEPH_INIT} stop mds >> ${LOGFILE} 2>&1
local rc_mds=$?
logecho "rc_mds=${rc_mds}"
logecho "RC mds: ${rc_mds}"
RC=0
[ ${rc_osd} -ne 0 ] || [ ${rc_mds} -ne 0 ] && RC=1
else
# Standard and Standard Dedicated Storage
logecho "Stopping ceph services..."
if [ -f ${CEPH_FILE} ]; then
rm -f ${CEPH_FILE}
if [ -f ${CEPH_STARTED_FLAG} ]; then
rm -f ${CEPH_STARTED_FLAG}
fi
${INITDIR}/ceph-init-wrapper stop >> ${LOGFILE} 2>&1
RC=$?
${CEPH_INIT} stop >> ${LOGFILE} 2>&1
fi
}
# If system is an AIO the mtcClient will run this script twice
# from 2 locations and this generates some errors.
# So we have to exit the script if is called
# from /etc/services.d/worker in order to be executed once
if [[ "$system_type" == "All-in-one" ]]; then
dir_path=$(dirname "$(realpath $0)")
if [[ "$dir_path" == "/etc/services.d/worker" ]]; then
exit 0
fi
fi
RC=0
case "$1" in
start)
start
@ -101,5 +140,4 @@ case "$1" in
;;
esac
logecho "RC was: $RC"
exit $RC
exit 0