Merge "AIO-DX Ceph Optimizations"
This commit is contained in:
commit
e49a70fd01
@ -612,8 +612,10 @@ stop_daemon() {
|
||||
|
||||
## command line options
|
||||
options=
|
||||
IFS=" " read -r -a args <<< "$@"
|
||||
wlog "-" INFO "$@"
|
||||
|
||||
OPTS=$(${GETOPT} -n 'init-ceph' -o 'hvam:c:' -l 'help,verbose,valgrind,novalgrind,allhosts,restart,norestart,btrfs,nobtrfs,fsmount,nofsmount,btrfsumount,fsumount,conf:,cluster:,hostname:' -- "$@")
|
||||
OPTS=$(${GETOPT} -n 'init-ceph' -o 'hvam:c:' -l 'help,verbose,valgrind,novalgrind,allhosts,restart,norestart,btrfs,nobtrfs,fsmount,nofsmount,btrfsumount,fsumount,conf:,cluster:,hostname:' -- "${args[@]}")
|
||||
if [ $? != 0 ]
|
||||
then
|
||||
exit 1
|
||||
@ -735,11 +737,51 @@ if [ "$command" = "stop" -o "$command" = "onestop" ]; then
|
||||
what="$new_order"
|
||||
fi
|
||||
|
||||
# Check if the monitors are up before starting any mds
|
||||
# This is needed only for Standard deployments
|
||||
|
||||
. /etc/platform/platform.conf
|
||||
|
||||
|
||||
# When this is a AIO-DX pmon is monitoring ceph-mds process.
|
||||
# If ceph-mon is not running, ceph-mds will hang when starting.
|
||||
# Check if we are trying to bring up ceph-mds and ceph-mon is not ready yet
|
||||
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
|
||||
if [ "${command}" = "start" -o "${command}" = "onestart" ]; then
|
||||
what_out=
|
||||
what_mds=
|
||||
re="\s*mon"
|
||||
if [[ ${what} =~ ${re} ]]; then
|
||||
has_mon=1
|
||||
else
|
||||
has_mon=0
|
||||
CEPH_STATUS=''
|
||||
execute_ceph_cmd CEPH_STATUS "ceph status" "ceph -s"
|
||||
if [ $? -eq 0 ]; then
|
||||
has_mon=1
|
||||
fi
|
||||
fi
|
||||
for name in ${what}; do
|
||||
type=$(echo "${name}" | cut -c 1-3)
|
||||
if [ "${type}" == "mds" ]; then
|
||||
what_mds="${name}"
|
||||
continue
|
||||
fi
|
||||
what_out+=" ${name}"
|
||||
done
|
||||
if [ ${has_mon} -eq 1 ] && [ ! -z "${what_mds}" ]; then
|
||||
what_out+=" ${what_mds}"
|
||||
fi
|
||||
what="${what_out}"
|
||||
fi
|
||||
# If the variable 'what' is empty, then it was trying to bring up ceph-mds but ceph-mon is not active.
|
||||
# When ceph-mon is not active, we cannot execute ceph-mds yet, thus returning error.
|
||||
if [ -z "${what}" ]; then
|
||||
EXIT_STATUS=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check if the monitors are up before starting any mds
|
||||
# This is needed only for Standard deployments
|
||||
|
||||
if [ "$system_type" == "Standard" ]; then
|
||||
CEPH_STATUS=''
|
||||
execute_ceph_cmd CEPH_STATUS "ceph status" "ceph -s"
|
||||
@ -999,8 +1041,15 @@ EOF
|
||||
[ -n "$post_stop" ] && do_cmd "$post_stop"
|
||||
[ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile
|
||||
# flush journal to data disk in background
|
||||
if [ "$type" = "osd" ];then
|
||||
$(/usr/bin/ceph-osd -i $id --flush-journal) &
|
||||
if [ "${type}" = "osd" ];then
|
||||
CMD_OUTPUT=''
|
||||
execute_ceph_cmd CMD_OUTPUT "Ceph Status" "ceph -s"
|
||||
if [ $? == 0 ]; then
|
||||
wlog "${name}" "INFO" "Flushing journal"
|
||||
$(/usr/bin/ceph-osd -i $id --flush-journal) &
|
||||
else
|
||||
wlog "${name}" "INFO" "Skipping journal flush"
|
||||
fi
|
||||
fi
|
||||
wlog $name "INFO" "Process stopped, setting state to $ST_STOPPED"
|
||||
save_proc_state $name $ST_STOPPED
|
||||
|
@ -26,6 +26,7 @@ etc/init.d/ceph
|
||||
etc/init.d/mgr-restful-plugin
|
||||
etc/init.d/ceph-init-wrapper
|
||||
etc/ceph/ceph.conf.pmon
|
||||
etc/ceph/ceph-mds.conf.pmon
|
||||
etc/ceph/ceph.conf
|
||||
etc/services.d/*
|
||||
usr/sbin/ceph-preshutdown.sh
|
||||
|
@ -6,13 +6,15 @@
|
||||
SOURCE1 := ceph.sh
|
||||
SOURCE2 := mgr-restful-plugin.py
|
||||
SOURCE3 := ceph.conf.pmon
|
||||
SOURCE4 := ceph-init-wrapper.sh
|
||||
SOURCE5 := ceph.conf
|
||||
SOURCE6 := ceph-manage-journal.py
|
||||
SOURCE7 := ceph.service
|
||||
SOURCE8 := mgr-restful-plugin.service
|
||||
SOURCE9 := ceph-preshutdown.sh
|
||||
SOURCE10 := starlingx-docker-override.conf
|
||||
SOURCE4 := ceph-mds.conf.pmon
|
||||
SOURCE5 := ceph-init-wrapper.sh
|
||||
SOURCE6 := ceph.conf
|
||||
SOURCE7 := ceph-manage-journal.py
|
||||
SOURCE8 := ceph.service
|
||||
SOURCE9 := mgr-restful-plugin.service
|
||||
SOURCE10 := ceph-preshutdown.sh
|
||||
SOURCE11 := starlingx-docker-override.conf
|
||||
|
||||
|
||||
# Paths
|
||||
export DESTDIR = $(CURDIR)/debian/tmp
|
||||
@ -188,13 +190,14 @@ override_dh_auto_install:
|
||||
install -D -m 750 ${SOURCE1} $(DESTDIR)/${SYSCONFDIR}/services.d/worker/
|
||||
install -D -m 750 ${SOURCE2} $(DESTDIR)/${INITDIR}/mgr-restful-plugin
|
||||
install -D -m 750 ${SOURCE3} $(DESTDIR)/${SYSCONFDIR}/ceph/
|
||||
install -D -m 750 ${SOURCE4} $(DESTDIR)/${INITDIR}/ceph-init-wrapper
|
||||
install -D -m 640 ${SOURCE5} $(DESTDIR)/${SYSCONFDIR}/ceph/
|
||||
install -D -m 700 ${SOURCE6} $(DESTDIR)/${SBINDIR}/ceph-manage-journal
|
||||
install -D -m 644 ${SOURCE7} $(DESTDIR)/${UNITDIR}/ceph.service
|
||||
install -D -m 644 ${SOURCE8} $(DESTDIR)/${UNITDIR}/mgr-restful-plugin.service
|
||||
install -D -m 700 ${SOURCE9} $(DESTDIR)/${SBINDIR}/ceph-preshutdown.sh
|
||||
install -D -m 644 ${SOURCE10} $(DESTDIR)/${UNITDIR}/docker.service.d/starlingx-docker-override.conf
|
||||
install -D -m 750 ${SOURCE4} $(DESTDIR)/${SYSCONFDIR}/ceph/
|
||||
install -D -m 750 ${SOURCE5} $(DESTDIR)/${INITDIR}/ceph-init-wrapper
|
||||
install -D -m 640 ${SOURCE6} $(DESTDIR)/${SYSCONFDIR}/ceph/
|
||||
install -D -m 700 ${SOURCE7} $(DESTDIR)/${SBINDIR}/ceph-manage-journal
|
||||
install -D -m 644 ${SOURCE8} $(DESTDIR)/${UNITDIR}/ceph.service
|
||||
install -D -m 644 ${SOURCE9} $(DESTDIR)/${UNITDIR}/mgr-restful-plugin.service
|
||||
install -D -m 700 ${SOURCE10} $(DESTDIR)/${SBINDIR}/ceph-preshutdown.sh
|
||||
install -D -m 644 ${SOURCE11} $(DESTDIR)/${UNITDIR}/docker.service.d/starlingx-docker-override.conf
|
||||
install -m 750 src/init-radosgw $(DESTDIR)/${INITDIR}/ceph-radosgw
|
||||
sed -i '/### END INIT INFO/a SYSTEMCTL_SKIP_REDIRECT=1' $(DESTDIR)/${INITDIR}/ceph-radosgw
|
||||
install -m 750 src/init-rbdmap $(DESTDIR)/${INITDIR}/rbdmap
|
||||
@ -275,6 +278,7 @@ override_dh_fixperms:
|
||||
-Xceph.sh \
|
||||
-Xmgr-restful-plugin \
|
||||
-Xceph.conf.pmon \
|
||||
-Xceph-mds.conf.pmon \
|
||||
-Xceph-init-wrapper \
|
||||
-Xceph.conf \
|
||||
-Xceph-manage-journal \
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
# Copyright (c) 2019-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -14,8 +14,8 @@
|
||||
# "/var/run/.ceph_started" when ceph is running and remove it when
|
||||
# is not.
|
||||
#
|
||||
# The script also extracts one or more ceph process names that are
|
||||
# reported as 'not running' or 'dead' or 'failed' by '/etc/intit.d/ceph status'
|
||||
# The script also extracts one or more ceph process names that are
|
||||
# reported as 'not running' or 'dead' or 'failed' by '/etc/init.d/ceph status'
|
||||
# and writes the names to a text file: /tmp/ceph_status_failure.txt for
|
||||
# pmond to access. The pmond adds the text to logs and alarms. Example of text
|
||||
# samples written to file by this script are:
|
||||
@ -24,7 +24,7 @@
|
||||
# 'mon.storage-0'
|
||||
# 'mon.storage-0, osd.2'
|
||||
#
|
||||
# Moreover, for processes that are reported as 'hung' by '/etc/intit.d/ceph status'
|
||||
# Moreover, for processes that are reported as 'hung' by '/etc/init.d/ceph status'
|
||||
# the script will try increase their logging to 'debug' for a configurable interval.
|
||||
# With logging increased it will outputs a few stack traces then, at the end of this
|
||||
# interval, it dumps its stack core and kills it.
|
||||
@ -43,6 +43,14 @@ CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status"
|
||||
CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status"
|
||||
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
|
||||
|
||||
# For All-in-one duplex, set some variables
|
||||
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
|
||||
CEPH_MON_LIB_PATH=/var/lib/ceph/mon
|
||||
CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_0"
|
||||
CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_1"
|
||||
CEPH_LAST_ACTIVE_CONTROLLER_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_${HOSTNAME/-/_}"
|
||||
fi
|
||||
|
||||
BINDIR=/usr/bin
|
||||
SBINDIR=/usr/sbin
|
||||
if grep -q "Debian" /etc/os-release; then
|
||||
@ -85,6 +93,114 @@ if [ ! -z $ARGS ]; then
|
||||
args+=("${new_args[@]}")
|
||||
fi
|
||||
|
||||
# Verify if drbd-cephmon is in sync, checking the output of 'drbdadm dstate'
|
||||
# Return 0 on success and 1 if drbd-cephmon is not ready
|
||||
is_drbd_cephmon_in_sync ()
|
||||
{
|
||||
local DRBD_CEPHMON_STATUS=$(drbdadm dstate drbd-cephmon)
|
||||
wlog "-" INFO "drbd-cephmon status: ${DRBD_CEPHMON_STATUS}"
|
||||
if [ "${DRBD_CEPHMON_STATUS}" == "UpToDate/UpToDate" ]; then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
# Verify if drbd-cephmon role is primary, checking the output of 'drbdadm role'
|
||||
# Return 0 on success and 1 if drbd-cephmon is not primary
|
||||
is_drbd_cephmon_primary ()
|
||||
{
|
||||
drbdadm role drbd-cephmon | grep -q 'Primary/'
|
||||
if [ $? -eq 0 ]; then
|
||||
wlog "-" INFO "drbd-cephmon role is Primary"
|
||||
return 0
|
||||
fi
|
||||
wlog "-" INFO "drbd-cephmon role is NOT Primary"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Verify if drbd-cephmon partition is mounted.
|
||||
# Return 0 on success and 1 if drbd-cephmon partition is not mounted
|
||||
is_drbd_cephmon_mounted ()
|
||||
{
|
||||
findmnt -no SOURCE "${CEPH_MON_LIB_PATH}" | grep -q drbd
|
||||
if [ $? -eq 0 ]; then
|
||||
wlog "-" INFO "drbd-cephmon partition is mounted"
|
||||
return 0
|
||||
fi
|
||||
wlog "-" INFO "drbd-cephmon partition is NOT mounted"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Verify if ceph mon can be started on AIO-DX configuration.
|
||||
# This function must be called only on AIO-DX.
|
||||
# Return 0 on success and 1 if ceph mon cannot be started
|
||||
can_start_ceph_mon ()
|
||||
{
|
||||
local times=""
|
||||
|
||||
# Verify if drbd-cephmon has role Primary
|
||||
# Retries 10 times, 1 second interval
|
||||
for times in {9..0}; do
|
||||
is_drbd_cephmon_primary
|
||||
if [ $? -eq 0 ]; then
|
||||
times=-1
|
||||
break;
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [ ${times} -eq 0 ]; then
|
||||
wlog "-" ERROR "drbd-cephmon is not primary, cannot start ceph mon"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check if drbd-cephmon partition is mounted
|
||||
# Retries 10 times, 1 second interval
|
||||
for times in {9..0}; do
|
||||
is_drbd_cephmon_mounted
|
||||
if [ $? -eq 0 ]; then
|
||||
times=-1
|
||||
break;
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [ ${times} -eq 0 ]; then
|
||||
wlog "-" ERROR "drbd-cephmon is not mounted, cannot start ceph mon"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Ceph mon was last active in this controller. Can run safely.
|
||||
if [ -f "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check if last active ceph-mon was in another controller
|
||||
if [ "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}" == "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}" ]; then
|
||||
local CEPH_OTHER_ACTIVE_CONTROLLER_FLAG="${CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG}"
|
||||
else
|
||||
local CEPH_OTHER_ACTIVE_CONTROLLER_FLAG="${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
|
||||
fi
|
||||
if [ -f "${CEPH_OTHER_ACTIVE_CONTROLLER_FLAG}" ]; then
|
||||
# Verify drbd-cephmon status
|
||||
for times in {9..0}; do
|
||||
is_drbd_cephmon_in_sync
|
||||
if [ $? -eq 0 ]; then
|
||||
# drbd-cephmon is in sync, it is safe to run.
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# drbd-cephmon is not in sync, it is not safe to run
|
||||
wlog "-" ERROR "drbd-cephmon is not in sync, cannot start ceph mon"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# This is safe to run ceph mon
|
||||
return 0
|
||||
}
|
||||
|
||||
with_service_lock ()
|
||||
{
|
||||
local target="$1"; shift
|
||||
@ -133,9 +249,45 @@ start ()
|
||||
# Ceph is not running on this node, return success
|
||||
exit 0
|
||||
fi
|
||||
wlog "-" INFO "Ceph START $1 command received"
|
||||
with_service_lock "$1" ${CEPH_SCRIPT} start $1
|
||||
wlog "-" INFO "Ceph START $1 command finished."
|
||||
|
||||
local service="$1"
|
||||
|
||||
# For AIO-DX, the mon service has special treatment
|
||||
if [ "${service}" == "mon" ] && [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
|
||||
# After the first controller unlock, ceph-mon is started by
|
||||
# puppet-ceph module via sysvinit using /etc/init.d/ceph directly.
|
||||
# Setting the controller-0 flag to the default prevents
|
||||
# another controller from starting before any host-swact.
|
||||
if [ ! -e "${CEPH_MON_LIB_PATH}"/.last_ceph_mon_active_controller_* ]; then
|
||||
touch "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
|
||||
fi
|
||||
|
||||
# NOTE: In case of uncontrolled swact, to force start ceph-mon service
|
||||
# it will be needed to rename the flag to the desired controller.
|
||||
can_start_ceph_mon
|
||||
if [ $? -ne 0 ]; then
|
||||
wlog "-" ERROR "Ceph mon cannot be started now."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Start the service
|
||||
wlog "-" INFO "Ceph START ${service} command received"
|
||||
with_service_lock "${service}" ${CEPH_SCRIPT} start ${service}
|
||||
wlog "-" INFO "Ceph START ${service} command finished."
|
||||
|
||||
# For AIO-DX, the mon service has special treatment
|
||||
if [ "${service}" == "mon" ] && [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
|
||||
# If ceph-mon is successfully running, clear old flags and set the new one
|
||||
# RC global variable is set by the with_service_lock function trying to start ceph-mon
|
||||
if [ ${RC} -eq 0 ]; then
|
||||
# Remove old flags
|
||||
rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
|
||||
rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG}"
|
||||
# Create new flag
|
||||
touch "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
stop ()
|
||||
|
26
ceph/ceph/files/ceph-mds.conf.pmon
Normal file
26
ceph/ceph/files/ceph-mds.conf.pmon
Normal file
@ -0,0 +1,26 @@
|
||||
[process]
|
||||
process = ceph-mds
|
||||
script = /etc/init.d/ceph
|
||||
|
||||
style = lsb
|
||||
severity = major ; minor, major, critical
|
||||
restarts = 5 ; restart retries before error assertion
|
||||
interval = 30 ; number of seconds to wait between restarts
|
||||
|
||||
mode = status ; Monitoring mode: passive (default) or active
|
||||
; passive: process death monitoring (default: always)
|
||||
; active : heartbeat monitoring, i.e. request / response messaging
|
||||
; status : determine process health with executing "status" command
|
||||
; "start" is used to start the process(es) again
|
||||
; ignore : do not monitor or stop monitoring
|
||||
|
||||
; Status and Active Monitoring Options
|
||||
|
||||
period = 30 ; monitor period in seconds
|
||||
timeout = 120 ; for active mode, messaging timeout period in seconds, must be shorter than period
|
||||
; for status mode, max amount of time for a command to execute
|
||||
|
||||
; Status Monitoring Options
|
||||
start_arg = start mds ; start argument for the script
|
||||
status_arg = status mds ; status argument for the script
|
||||
status_failure_text = /tmp/ceph_status_failure.txt ; text to be added to alarms or logs, this is optional
|
@ -1,4 +1,8 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
INITDIR=/etc/init.d
|
||||
LOGFILE=/var/log/ceph/ceph-init.log
|
||||
@ -22,20 +26,17 @@ logecho ()
|
||||
|
||||
start ()
|
||||
{
|
||||
SERVICES=""
|
||||
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" == "duplex" ]]; then
|
||||
# In an AIO-DX configuration SM manages the floating MON and OSDs. Here
|
||||
# we defer starting OSDs directly via the init script to allow SM to
|
||||
# start them at the appropriate time. This will eliminate a race between
|
||||
# MTC and SM starting OSDs simultaneously. Continue to start MON/MDS
|
||||
# service here so that MDS is operational after the monitor is up.
|
||||
SERVICES="mon mds"
|
||||
if [[ "$system_type" != "All-in-one" ]] || [[ "$system_mode" != "duplex" ]]; then
|
||||
logecho "Starting ceph services..."
|
||||
${INITDIR}/ceph start >> ${LOGFILE} 2>&1
|
||||
RC=$?
|
||||
else
|
||||
# In an AIO-DX configuration SM manages the floating MON and OSDs and pmon manages
|
||||
# the ceph-mds process. Here we defer starting all ceph process to allow SM and pmon
|
||||
# to start them at the appropriate time.
|
||||
RC=0
|
||||
fi
|
||||
|
||||
logecho "Starting ceph ${SERVICES} services..."
|
||||
${INITDIR}/ceph start ${SERVICES} >> ${LOGFILE} 2>&1
|
||||
RC=$?
|
||||
|
||||
if [ ! -f ${CEPH_FILE} ]; then
|
||||
touch ${CEPH_FILE}
|
||||
fi
|
||||
|
Loading…
Reference in New Issue
Block a user