ceph: update for stx 5.0

* inherit stx-metadata to help install updated
  files from stx 'integ' repo.

* remove the local copy of stx files.

Story: 2008952
Task: 42576

Signed-off-by: Jackie Huang <jackie.huang@windriver.com>
Change-Id: I41e37d1b0808ad86e2610394691ead37c8875326
This commit is contained in:
Jackie Huang 2021-06-23 14:55:10 +08:00
parent 4538d7841a
commit 14e979e087
13 changed files with 39 additions and 2073 deletions

View File

@ -113,21 +113,13 @@ SRC_URI = "\
file://0002-zstd-fix-error-for-cross-compile.patch \
file://0003-ceph-add-pybind-support-in-OE.patch \
file://0004-ceph-detect-init-correct-the-installation-for-OE.patch \
\
file://ceph-init-wrapper.sh \
file://ceph-manage-journal.py \
file://ceph-preshutdown.sh \
file://ceph-radosgw.service \
file://ceph.conf \
file://ceph.conf.pmon \
file://ceph.service \
file://ceph.sh \
file://mgr-restful-plugin.py \
file://mgr-restful-plugin.service \
file://starlingx-docker-override.conf \
"
inherit cmake pythonnative python-dir systemd
inherit stx-metadata
STX_REPO = "integ"
STX_SUBPATH = "ceph/ceph/files"
DISTRO_FEATURES_BACKFILL_CONSIDERED_remove = "sysvinit"
@ -196,31 +188,31 @@ do_install_append () {
install -m 0755 ${D}${libexecdir}/ceph/ceph_common.sh ${D}${libdir}/ceph
install -d ${D}${sysconfdir}/ceph
install -m 0644 ${WORKDIR}/ceph.conf ${D}${sysconfdir}/ceph/
install -m 0644 ${WORKDIR}/ceph-radosgw.service ${D}${systemd_system_unitdir}/ceph-radosgw@.service
install -m 0644 ${WORKDIR}/ceph.service ${D}${systemd_system_unitdir}
install -m 0644 ${WORKDIR}/mgr-restful-plugin.service ${D}${systemd_system_unitdir}
install -m 0644 ${STX_METADATA_PATH}/ceph.conf ${D}${sysconfdir}/ceph/
install -m 0644 ${STX_METADATA_PATH}/ceph-radosgw.service ${D}${systemd_system_unitdir}/ceph-radosgw@.service
install -m 0644 ${STX_METADATA_PATH}/ceph.service ${D}${systemd_system_unitdir}
install -m 0644 ${STX_METADATA_PATH}/mgr-restful-plugin.service ${D}${systemd_system_unitdir}
install -m 0700 ${WORKDIR}/ceph-manage-journal.py ${D}${sbindir}/ceph-manage-journal
install -Dm 0750 ${WORKDIR}/mgr-restful-plugin.py ${D}${sysconfdir}/rc.d/init.d/mgr-restful-plugin
install -Dm 0750 ${WORKDIR}/mgr-restful-plugin.py ${D}${sysconfdir}/init.d/mgr-restful-plugin
install -m 0750 ${WORKDIR}/ceph.conf.pmon ${D}${sysconfdir}/ceph/
install -m 0700 ${STX_METADATA_PATH}/ceph-manage-journal.py ${D}${sbindir}/ceph-manage-journal
install -Dm 0750 ${STX_METADATA_PATH}/mgr-restful-plugin.py ${D}${sysconfdir}/rc.d/init.d/mgr-restful-plugin
install -Dm 0750 ${STX_METADATA_PATH}/mgr-restful-plugin.py ${D}${sysconfdir}/init.d/mgr-restful-plugin
install -m 0750 ${STX_METADATA_PATH}/ceph.conf.pmon ${D}${sysconfdir}/ceph/
install -d -m 0750 ${D}${sysconfdir}/services.d/controller
install -d -m 0750 ${D}${sysconfdir}/services.d/storage
install -d -m 0750 ${D}${sysconfdir}/services.d/worker
install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/controller
install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/storage
install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/worker
install -m 0750 ${STX_METADATA_PATH}/ceph.sh ${D}${sysconfdir}/services.d/controller
install -m 0750 ${STX_METADATA_PATH}/ceph.sh ${D}${sysconfdir}/services.d/storage
install -m 0750 ${STX_METADATA_PATH}/ceph.sh ${D}${sysconfdir}/services.d/worker
install -Dm 0750 ${WORKDIR}/ceph-init-wrapper.sh ${D}${sysconfdir}/rc.d/init.d/ceph-init-wrapper
install -Dm 0750 ${WORKDIR}/ceph-init-wrapper.sh ${D}${sysconfdir}/init.d/ceph-init-wrapper
install -Dm 0750 ${STX_METADATA_PATH}/ceph-init-wrapper.sh ${D}${sysconfdir}/rc.d/init.d/ceph-init-wrapper
install -Dm 0750 ${STX_METADATA_PATH}/ceph-init-wrapper.sh ${D}${sysconfdir}/init.d/ceph-init-wrapper
sed -i -e 's|/usr/lib64|${libdir}|' ${D}${sysconfdir}/rc.d/init.d/ceph-init-wrapper ${D}${sysconfdir}/init.d/ceph-init-wrapper
install -m 0700 ${WORKDIR}/ceph-preshutdown.sh ${D}${sbindir}/ceph-preshutdown.sh
install -m 0700 ${STX_METADATA_PATH}/ceph-preshutdown.sh ${D}${sbindir}/ceph-preshutdown.sh
install -Dm 0644 ${WORKDIR}/starlingx-docker-override.conf ${D}${systemd_system_unitdir}/docker.service.d/starlingx-docker-override.conf
install -Dm 0644 ${STX_METADATA_PATH}/starlingx-docker-override.conf ${D}${systemd_system_unitdir}/docker.service.d/starlingx-docker-override.conf
install -m 0644 -D ${S}/src/etc-rbdmap ${D}${sysconfdir}/ceph/rbdmap
install -m 0644 -D ${S}/etc/sysconfig/ceph ${D}${sysconfdir}/sysconfig/ceph

View File

@ -1,22 +1,15 @@
FILESEXTRAPATHS_prepend := "${THISDIR}/${BP}:${THISDIR}/files:"
FILESEXTRAPATHS_prepend := "${THISDIR}/${BP}:"
inherit python3native python3-dir
inherit stx-metadata
STX_REPO = "integ"
STX_SUBPATH = "ceph/ceph/files"
DISTRO_FEATURES_BACKFILL_CONSIDERED_remove = "sysvinit"
SRC_URI += "\
file://0001-ceph-rebase-on-stx.3.0-and-warrior.patch \
file://ceph.conf \
file://ceph-init-wrapper.sh \
file://ceph-preshutdown.sh \
file://ceph.service \
file://mgr-restful-plugin.py \
file://starlingx-docker-override.conf \
file://ceph.conf.pmon \
file://ceph-manage-journal.py \
file://ceph-radosgw.service \
file://ceph.sh \
file://mgr-restful-plugin.service \
file://rados.runtime.decode.error.patch \
"
DEPENDS = "boost rdma-core bzip2 curl expat gperf-native \
@ -55,31 +48,31 @@ EXTRA_OECMAKE = "-DWITH_MANPAGE=OFF \
do_install_append () {
install -d ${D}${sysconfdir}/ceph
install -m 0644 ${WORKDIR}/ceph.conf ${D}${sysconfdir}/ceph/
install -m 0644 ${WORKDIR}/ceph-radosgw.service ${D}${systemd_system_unitdir}/ceph-radosgw@.service
install -m 0644 ${WORKDIR}/ceph.service ${D}${systemd_system_unitdir}
install -m 0644 ${WORKDIR}/mgr-restful-plugin.service ${D}${systemd_system_unitdir}
install -m 0644 ${STX_METADATA_PATH}/ceph.conf ${D}${sysconfdir}/ceph/
install -m 0644 ${STX_METADATA_PATH}/ceph-radosgw.service ${D}${systemd_system_unitdir}/ceph-radosgw@.service
install -m 0644 ${STX_METADATA_PATH}/ceph.service ${D}${systemd_system_unitdir}
install -m 0644 ${STX_METADATA_PATH}/mgr-restful-plugin.service ${D}${systemd_system_unitdir}
install -m 0700 ${WORKDIR}/ceph-manage-journal.py ${D}${sbindir}/ceph-manage-journal
install -Dm 0750 ${WORKDIR}/mgr-restful-plugin.py ${D}${sysconfdir}/rc.d/init.d/mgr-restful-plugin
install -Dm 0750 ${WORKDIR}/mgr-restful-plugin.py ${D}${sysconfdir}/init.d/mgr-restful-plugin
install -m 0750 ${WORKDIR}/ceph.conf.pmon ${D}${sysconfdir}/ceph/
install -m 0700 ${STX_METADATA_PATH}/ceph-manage-journal.py ${D}${sbindir}/ceph-manage-journal
install -Dm 0750 ${STX_METADATA_PATH}/mgr-restful-plugin.py ${D}${sysconfdir}/rc.d/init.d/mgr-restful-plugin
install -Dm 0750 ${STX_METADATA_PATH}/mgr-restful-plugin.py ${D}${sysconfdir}/init.d/mgr-restful-plugin
install -m 0750 ${STX_METADATA_PATH}/ceph.conf.pmon ${D}${sysconfdir}/ceph/
install -d -m 0750 ${D}${sysconfdir}/services.d/controller
install -d -m 0750 ${D}${sysconfdir}/services.d/storage
install -d -m 0750 ${D}${sysconfdir}/services.d/worker
install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/controller
install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/storage
install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/worker
install -m 0750 ${STX_METADATA_PATH}/ceph.sh ${D}${sysconfdir}/services.d/controller
install -m 0750 ${STX_METADATA_PATH}/ceph.sh ${D}${sysconfdir}/services.d/storage
install -m 0750 ${STX_METADATA_PATH}/ceph.sh ${D}${sysconfdir}/services.d/worker
install -Dm 0750 ${WORKDIR}/ceph-init-wrapper.sh ${D}${sysconfdir}/rc.d/init.d/ceph-init-wrapper
install -Dm 0750 ${WORKDIR}/ceph-init-wrapper.sh ${D}${sysconfdir}/init.d/ceph-init-wrapper
install -Dm 0750 ${STX_METADATA_PATH}/ceph-init-wrapper.sh ${D}${sysconfdir}/rc.d/init.d/ceph-init-wrapper
install -Dm 0750 ${STX_METADATA_PATH}/ceph-init-wrapper.sh ${D}${sysconfdir}/init.d/ceph-init-wrapper
sed -i -e 's|/usr/lib64|${libdir}|' ${D}${sysconfdir}/rc.d/init.d/ceph-init-wrapper ${D}${sysconfdir}/init.d/ceph-init-wrapper
install -m 0700 ${WORKDIR}/ceph-preshutdown.sh ${D}${sbindir}/ceph-preshutdown.sh
install -m 0700 ${STX_METADATA_PATH}/ceph-preshutdown.sh ${D}${sbindir}/ceph-preshutdown.sh
install -Dm 0644 ${WORKDIR}/starlingx-docker-override.conf ${D}${systemd_system_unitdir}/docker.service.d/starlingx-docker-override.conf
install -Dm 0644 ${STX_METADATA_PATH}/starlingx-docker-override.conf ${D}${systemd_system_unitdir}/docker.service.d/starlingx-docker-override.conf
install -m 0644 -D ${S}/src/etc-rbdmap ${D}${sysconfdir}/ceph/rbdmap
install -m 0644 -D ${S}/etc/sysconfig/ceph ${D}${sysconfdir}/sysconfig/ceph

View File

@ -1,331 +0,0 @@
#!/bin/bash
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script is a helper wrapper for pmon monitoring of ceph
# processes. The "/etc/init.d/ceph" script does not know if ceph is
# running on the node. For example when the node is locked, ceph
# processes are not running. In that case we do not want pmond to
# monitor these processes.
#
# The script "/etc/services.d/<node>/ceph.sh" will create the file
# "/var/run/.ceph_started" when ceph is running and remove it when
# is not.
#
# The script also extracts one or more ceph process names that are
# reported as 'not running' or 'dead' or 'failed' by '/etc/intit.d/ceph status'
# and writes the names to a text file: /tmp/ceph_status_failure.txt for
# pmond to access. The pmond adds the text to logs and alarms. Example of text
# samples written to file by this script are:
# 'osd.1'
# 'osd.1, osd.2'
# 'mon.storage-0'
# 'mon.storage-0, osd.2'
#
# Moreover, for processes that are reported as 'hung' by '/etc/intit.d/ceph status'
# the script will try increase their logging to 'debug' for a configurable interval.
# With logging increased it will outputs a few stack traces then, at the end of this
# interval, it dumps its stack core and kills it.
#
# Return values;
# zero - /etc/init.d/ceph returned success or ceph is not running on the node
# non-zero /etc/init.d/ceph returned a failure or invalid syntax
#
source /usr/bin/tsconfig
source /etc/platform/platform.conf
CEPH_SCRIPT="/etc/init.d/ceph"
CEPH_FILE="$VOLATILE_PATH/.ceph_started"
CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status"
CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status"
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
BINDIR=/usr/bin
SBINDIR=/usr/sbin
LIBDIR=/usr/lib64/ceph
ETCDIR=/etc/ceph
source $LIBDIR/ceph_common.sh
LOG_PATH=/var/log/ceph
LOG_FILE=$LOG_PATH/ceph-process-states.log
LOG_LEVEL=NORMAL # DEBUG
verbose=0
DATA_PATH=$VOLATILE_PATH/ceph_hang # folder where we keep state information
mkdir -p $DATA_PATH # make sure folder exists
MONITORING_INTERVAL=15
TRACE_LOOP_INTERVAL=5
CEPH_STATUS_TIMEOUT=20
LOCK_CEPH_MON_SERVICE_FILE="$VOLATILE_PATH/.ceph_mon_status"
LOCK_CEPH_OSD_SERVICE_FILE="$VOLATILE_PATH/.ceph_osd_status"
LOCK_CEPH_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_mon_service"
LOCK_CEPH_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_osd_service"
# Seconds to wait for ceph status to finish before
# continuing to execute a service action
MONITOR_STATUS_TIMEOUT=30
MAX_STATUS_TIMEOUT=120
RC=0
# SM can only pass arguments through environment variable
# when ARGS is not empty use it to extend command line arguments
args=("$@")
if [ ! -z $ARGS ]; then
IFS=";" read -r -a new_args <<< "$ARGS"
args+=("${new_args[@]}")
fi
with_service_lock ()
{
local target="$1"; shift
[ -z "${target}" ] && target="mon osd"
# Run in sub-shell so we don't leak file descriptors
# used for locking service actions
(
# Grab service locks
wlog "-" INFO "Grab service locks"
[[ "${target}" == *"mon"* ]] && flock ${LOCK_CEPH_MON_SERVICE_FD}
[[ "${target}" == *"osd"* ]] && flock ${LOCK_CEPH_OSD_SERVICE_FD}
# Try to lock status with a timeout in case status is stuck
wlog "-" INFO "Lock service status"
deadline=$((SECONDS + MAX_STATUS_TIMEOUT + 1))
if [[ "${target}" == *"mon"* ]]; then
flock --exclusive --timeout ${MONITOR_STATUS_TIMEOUT} ${LOCK_CEPH_MON_STATUS_FD}
fi
if [[ "${target}" == *"osd"* ]]; then
timeout=$((deadline - SECONDS))
if [[ $timeout -gt 0 ]]; then
flock --exclusive --timeout ${timeout} ${LOCK_CEPH_OSD_STATUS_FD}
fi
fi
# Close lock file descriptors so they are
# not inherited by the spawned process then
# run service action
wlog "-" INFO "Run service action: $@"
"$@" {LOCK_CEPH_MON_SERVICE_FD}>&- \
{LOCK_CEPH_MON_STATUS_FD}>&- \
{LOCK_CEPH_OSD_SERVICE_FD}>&- \
{LOCK_CEPH_OSD_STATUS_FD}>&-
) {LOCK_CEPH_MON_SERVICE_FD}>${LOCK_CEPH_MON_SERVICE_FILE} \
{LOCK_CEPH_MON_STATUS_FD}>${LOCK_CEPH_MON_STATUS_FILE} \
{LOCK_CEPH_OSD_SERVICE_FD}>${LOCK_CEPH_OSD_SERVICE_FILE} \
{LOCK_CEPH_OSD_STATUS_FD}>${LOCK_CEPH_OSD_STATUS_FILE}
RC=$?
}
start ()
{
if [ ! -f ${CEPH_FILE} ]; then
# Ceph is not running on this node, return success
exit 0
fi
wlog "-" INFO "Ceph START $1 command received"
with_service_lock "$1" ${CEPH_SCRIPT} start $1
wlog "-" INFO "Ceph START $1 command finished."
}
stop ()
{
wlog "-" INFO "Ceph STOP $1 command received."
with_service_lock "$1" ${CEPH_SCRIPT} stop $1
wlog "-" INFO "Ceph STOP $1 command finished."
}
restart ()
{
if [ ! -f ${CEPH_FILE} ]; then
# Ceph is not running on this node, return success
exit 0
fi
wlog "-" INFO "Ceph RESTART $1 command received."
with_service_lock "$1" ${CEPH_SCRIPT} restart $1
wlog "-" INFO "Ceph RESTART $1 command finished."
}
log_and_restart_blocked_osds ()
{
# Log info about the blocked osd daemons and then restart it
local names=$1
local message=$2
for name in $names; do
wlog $name "INFO" "$message"
${CEPH_SCRIPT} restart $name
done
}
log_and_kill_hung_procs ()
{
# Log info about the hung processes and then kill them; later on pmon will restart them
local names=$1
for name in $names; do
type=`echo $name | cut -c 1-3` # e.g. 'mon', if $item is 'mon1'
id=`echo $name | cut -c 4- | sed 's/^\\.//'`
get_conf run_dir "/var/run/ceph" "run dir"
get_conf pid_file "$run_dir/$type.$id.pid" "pid file"
pid=$(cat $pid_file)
wlog $name "INFO" "Dealing with hung process (pid:$pid)"
# monitoring interval
wlog $name "INFO" "Increasing log level"
execute_ceph_cmd ret $name "ceph daemon $name config set debug_$type 20/20"
monitoring=$MONITORING_INTERVAL
while [ $monitoring -gt 0 ]; do
if [ $(($monitoring % $TRACE_LOOP_INTERVAL)) -eq 0 ]; then
date=$(date "+%Y-%m-%d_%H-%M-%S")
log_file="$LOG_PATH/hang_trace_${name}_${pid}_${date}.log"
wlog $name "INFO" "Dumping stack trace to: $log_file"
$(pstack $pid >$log_file) &
fi
let monitoring-=1
sleep 1
done
wlog $name "INFO" "Trigger core dump"
kill -ABRT $pid &>/dev/null
rm -f $pid_file # process is dead, core dump is archiving, preparing for restart
# Wait for pending systemd core dumps
sleep 2 # hope systemd_coredump has started meanwhile
deadline=$(( $(date '+%s') + 300 ))
while [[ $(date '+%s') -lt "${deadline}" ]]; do
systemd_coredump_pid=$(pgrep -f "systemd-coredump.*${pid}.*ceph-${type}")
[[ -z "${systemd_coredump_pid}" ]] && break
wlog $name "INFO" "systemd-coredump ceph-${type} in progress: pid ${systemd_coredump_pid}"
sleep 2
done
kill -KILL $pid &>/dev/null
done
}
status ()
{
local target="$1" # no shift here
[ -z "${target}" ] && target="mon osd"
if [ ! -f ${CEPH_FILE} ]; then
# Ceph is not running on this node, return success
exit 0
fi
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
timeout $CEPH_STATUS_TIMEOUT ceph -s
if [ "$?" -ne 0 ]; then
# Ceph cluster is not accessible. Don't panic, controller swact
# may be in progress.
wlog "-" INFO "Ceph is down, ignoring OSD status."
exit 0
fi
fi
# Report success while ceph mon is running a service action
# otherwise mark ceph mon status is in progress
exec {LOCK_CEPH_MON_STATUS_FD}>${LOCK_CEPH_MON_STATUS_FILE}
if [[ "${target}" == *"mon"* ]]; then
flock --shared --nonblock ${LOCK_CEPH_MON_SERVICE_FILE} true
if [[ $? -ne 0 ]]; then
exit 0
fi
# Lock will be released when script exits
flock --shared ${LOCK_CEPH_MON_STATUS_FD}
fi
# Report success while ceph mon is running a service action
# otherwise mark ceph osd status is in progress
exec {LOCK_CEPH_OSD_STATUS_FD}>${LOCK_CEPH_OSD_STATUS_FILE}
if [[ "${target}" == *"osd"* ]]; then
flock --shared --nonblock ${LOCK_CEPH_OSD_SERVICE_FILE} true
if [[ $? -ne 0 ]]; then
exit 0
fi
# Lock will be released when script exits
flock --shared ${LOCK_CEPH_OSD_STATUS_FD}
fi
result=`${CEPH_SCRIPT} status $1 {LOCK_CEPH_MON_STATUS_FD}>&- {LOCK_CEPH_OSD_STATUS_FD}>&-`
RC=$?
if [ "$RC" -ne 0 ]; then
erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
hung_procs=`echo "$result" | sort | uniq | awk ' /hung/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
blocked_ops_procs=`echo "$result" | sort | uniq | awk ' /blocked ops/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
stuck_peering_procs=`echo "$result" | sort | uniq | awk ' /stuck peering/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
invalid=0
host=`hostname`
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
# On 2 node configuration we have a floating monitor
host="controller"
fi
for i in $(echo $erred_procs $hung_procs); do
if [[ "$i" =~ osd.?[0-9]?[0-9]|mon.$host ]]; then
continue
else
invalid=1
fi
done
log_and_restart_blocked_osds "$blocked_ops_procs"\
"Restarting OSD with blocked operations"
log_and_restart_blocked_osds "$stuck_peering_procs"\
"Restarting OSD stuck peering"
log_and_kill_hung_procs $hung_procs
rm -f $CEPH_STATUS_FAILURE_TEXT_FILE
if [ $invalid -eq 0 ]; then
text=""
for i in $erred_procs; do
text+="$i, "
done
for i in $hung_procs; do
text+="$i (process hang), "
done
echo "$text" | tr -d '\n' > $CEPH_STATUS_FAILURE_TEXT_FILE
else
echo "$host: '${CEPH_SCRIPT} status $1' result contains invalid process names: $erred_procs"
echo "Undetermined osd or monitor id" > $CEPH_STATUS_FAILURE_TEXT_FILE
fi
fi
if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
# SM needs exit code != 0 from 'status mon' argument of the init script on
# standby controller otherwise it thinks that the monitor is running and
# tries to stop it.
# '/etc/init.d/ceph status mon' checks the status of monitors configured in
# /etc/ceph/ceph.conf and if it should be running on current host.
# If it should not be running it just exits with code 0. This is what
# happens on the standby controller.
# When floating monitor is running on active controller /var/lib/ceph/mon of
# standby is not mounted (Ceph monitor partition is DRBD synced).
test -e "/var/lib/ceph/mon/ceph-controller"
if [ "$?" -ne 0 ]; then
exit 3
fi
fi
}
case "${args[0]}" in
start)
start ${args[1]}
;;
stop)
stop ${args[1]}
;;
restart)
restart ${args[1]}
;;
status)
status ${args[1]}
;;
*)
echo "Usage: $0 {start|stop|restart|status} [{mon|osd|osd.<number>|mon.<hostname>}]"
exit 1
;;
esac
exit $RC

View File

@ -1,334 +0,0 @@
#!/usr/bin/python
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import ast
import os
import os.path
import re
import subprocess
import sys
DEVICE_NAME_NVME = "nvme"
#########
# Utils #
#########
def command(arguments, **kwargs):
"""Execute e command and capture stdout, stderr & return code"""
process = subprocess.Popen(
arguments,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
**kwargs)
out, err = process.communicate()
return out, err, process.returncode
def get_input(arg, valid_keys):
"""Convert the input to a dict and perform basic validation"""
json_string = arg.replace("\\n", "\n")
try:
input_dict = ast.literal_eval(json_string)
if not all(k in input_dict for k in valid_keys):
return None
except Exception:
return None
return input_dict
def get_partition_uuid(dev):
output, _, _ = command(['blkid', dev])
try:
return re.search('PARTUUID=\"(.+?)\"', output).group(1)
except AttributeError:
return None
def device_path_to_device_node(device_path):
try:
output, _, _ = command(["udevadm", "settle", "-E", device_path])
out, err, retcode = command(["readlink", "-f", device_path])
out = out.rstrip()
except Exception as e:
return None
return out
###########################################
# Manage Journal Disk Partitioning Scheme #
###########################################
DISK_BY_PARTUUID = "/dev/disk/by-partuuid/"
JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106' # Type of a journal partition
def is_partitioning_correct(disk_path, partition_sizes):
"""Validate the existence and size of journal partitions"""
# Obtain the device node from the device path.
disk_node = device_path_to_device_node(disk_path)
# Check that partition table format is GPT
output, _, _ = command(["udevadm", "settle", "-E", disk_node])
output, _, _ = command(["parted", "-s", disk_node, "print"])
if not re.search('Partition Table: gpt', output):
print("Format of disk node %s is not GPT, zapping disk" % disk_node)
return False
# Check each partition size
partition_index = 1
for size in partition_sizes:
# Check that each partition size matches the one in input
if DEVICE_NAME_NVME in disk_node:
partition_node = '{}p{}'.format(disk_node, str(partition_index))
else:
partition_node = '{}{}'.format(disk_node, str(partition_index))
output, _, _ = command(["udevadm", "settle", "-E", partition_node])
cmd = ["parted", "-s", partition_node, "unit", "MiB", "print"]
output, _, _ = command(cmd)
regex = ("^Disk " + str(partition_node) + ":\\s*" +
str(size) + "[\\.0]*MiB")
if not re.search(regex, output, re.MULTILINE):
print("Journal partition %(node)s size is not %(size)s, "
"zapping disk" % {"node": partition_node, "size": size})
return False
partition_index += 1
output, _, _ = command(["udevadm", "settle", "-t", "10"])
return True
def create_partitions(disk_path, partition_sizes):
"""Recreate partitions"""
# Obtain the device node from the device path.
disk_node = device_path_to_device_node(disk_path)
# Issue: After creating a new partition table on a device, Udev does not
# always remove old symlinks (i.e. to previous partitions on that device).
# Also, even if links are erased before zapping the disk, some of them will
# be recreated even though there is no partition to back them!
# Therefore, we have to remove the links AFTER we erase the partition table
# Issue: DISK_BY_PARTUUID directory is not present at all if there are no
# GPT partitions on the storage node so nothing to remove in this case
links = []
if os.path.isdir(DISK_BY_PARTUUID):
links = [os.path.join(DISK_BY_PARTUUID, l) for l in os.listdir(DISK_BY_PARTUUID)
if os.path.islink(os.path.join(DISK_BY_PARTUUID, l))]
# Erase all partitions on current node by creating a new GPT table
_, err, ret = command(["parted", "-s", disk_node, "mktable", "gpt"])
if ret:
print("Error erasing partition table of %(node)s\n"
"Return code: %(ret)s reason: %(reason)s" %
{"node": disk_node, "ret": ret, "reason": err})
exit(1)
# Erase old symlinks
for l in links:
if disk_node in os.path.realpath(l):
os.remove(l)
# Create partitions in order
used_space_mib = 1 # leave 1 MB at the beginning of the disk
num = 1
for size in partition_sizes:
cmd = ['parted', '-s', disk_node, 'unit', 'mib',
'mkpart', 'primary',
str(used_space_mib), str(used_space_mib + size)]
_, err, ret = command(cmd)
parms = {"disk_node": disk_node,
"start": used_space_mib,
"end": used_space_mib + size,
"reason": err}
print("Created partition from start=%(start)s MiB to end=%(end)s MiB"
" on %(disk_node)s" % parms)
if ret:
print("Failed to create partition with "
"start=%(start)s, end=%(end)s "
"on %(disk_node)s reason: %(reason)s" % parms)
exit(1)
# Set partition type to ceph journal
# noncritical operation, it makes 'ceph-disk list' output correct info
cmd = ['sgdisk',
'--change-name={num}:ceph journal'.format(num=num),
'--typecode={num}:{uuid}'.format(
num=num,
uuid=JOURNAL_UUID,
),
disk_node]
_, err, ret = command(cmd)
if ret:
print("WARNINIG: Failed to set partition name and typecode")
used_space_mib += size
num += 1
###########################
# Manage Journal Location #
###########################
OSD_PATH = "/var/lib/ceph/osd/"
def mount_data_partition(data_path, osdid):
"""Mount an OSD data partition and return the mounted path"""
# Obtain the device node from the device path.
data_node = device_path_to_device_node(data_path)
mount_path = OSD_PATH + "ceph-" + str(osdid)
output, _, _ = command(['mount'])
regex = "^" + data_node + ".*" + mount_path
if not re.search(regex, output, re.MULTILINE):
cmd = ['mount', '-t', 'xfs', data_node, mount_path]
_, _, ret = command(cmd)
params = {"node": data_node, "path": mount_path}
if ret:
print("Failed to mount %(node)s to %(path), aborting" % params)
exit(1)
else:
print("Mounted %(node)s to %(path)s" % params)
return mount_path
def is_location_correct(path, journal_path, osdid):
"""Check if location points to the correct device"""
# Obtain the device node from the device path.
journal_node = device_path_to_device_node(journal_path)
cur_node = os.path.realpath(path + "/journal")
if cur_node == journal_node:
return True
else:
return False
def fix_location(mount_point, journal_path, osdid):
"""Move the journal to the new partition"""
# Obtain the device node from the device path.
journal_node = device_path_to_device_node(journal_path)
# Fix symlink
path = mount_point + "/journal" # 'journal' symlink path used by ceph-osd
journal_uuid = get_partition_uuid(journal_node)
new_target = DISK_BY_PARTUUID + journal_uuid
params = {"path": path, "target": new_target}
try:
if os.path.lexists(path):
os.unlink(path) # delete the old symlink
os.symlink(new_target, path)
print("Symlink created: %(path)s -> %(target)s" % params)
except:
print("Failed to create symlink: %(path)s -> %(target)s" % params)
exit(1)
# Fix journal_uuid
path = mount_point + "/journal_uuid"
try:
with open(path, 'w') as f:
f.write(journal_uuid)
except Exception as ex:
# The operation is noncritical, it only makes 'ceph-disk list'
# display complete output. We log and continue.
params = {"path": path, "uuid": journal_uuid}
print("WARNING: Failed to set uuid of %(path)s to %(uuid)s" % params)
# Clean the journal partition
# even if erasing the partition table, if another journal was present here
# it's going to be reused. Journals are always bigger than 100MB.
command(['dd', 'if=/dev/zero', 'of=%s' % journal_node,
'bs=1M', 'count=100'])
# Format the journal
cmd = ['/usr/bin/ceph-osd', '-i', str(osdid),
'--pid-file', '/var/run/ceph/osd.%s.pid' % osdid,
'-c', '/etc/ceph/ceph.conf',
'--cluster', 'ceph',
'--mkjournal']
out, err, ret = command(cmd)
params = {"journal_node": journal_node,
"osdid": osdid,
"ret": ret,
"reason": err}
if not ret:
print("Prepared new journal partition: %(journal_node)s "
"for osd id: %(osdid)s" % params)
else:
print("Error initializing journal node: "
"%(journal_node)s for osd id: %(osdid)s "
"ceph-osd return code: %(ret)s reason: %(reason)s" % params)
########
# Main #
########
def main(argv):
# parse and validate arguments
err = False
partitions = None
location = None
if len(argv) != 2:
err = True
elif argv[0] == "partitions":
valid_keys = ['disk_path', 'journals']
partitions = get_input(argv[1], valid_keys)
if not partitions:
err = True
elif not isinstance(partitions['journals'], list):
err = True
elif argv[0] == "location":
valid_keys = ['data_path', 'journal_path', 'osdid']
location = get_input(argv[1], valid_keys)
if not location:
err = True
elif not isinstance(location['osdid'], int):
err = True
else:
err = True
if err:
print("Command intended for internal use only")
exit(-1)
if partitions:
# Recreate partitions only if the existing ones don't match input
if not is_partitioning_correct(partitions['disk_path'],
partitions['journals']):
create_partitions(partitions['disk_path'], partitions['journals'])
else:
print("Partition table for %s is correct, "
"no need to repartition" %
device_path_to_device_node(partitions['disk_path']))
elif location:
# we need to have the data partition mounted & we can let it mounted
mount_point = mount_data_partition(location['data_path'],
location['osdid'])
# Update journal location only if link point to another partition
if not is_location_correct(mount_point,
location['journal_path'],
location['osdid']):
print("Fixing journal location for "
"OSD id: %(id)s" % {"node": location['data_path'],
"id": location['osdid']})
fix_location(mount_point,
location['journal_path'],
location['osdid'])
else:
print("Journal location for %s is correct,"
"no need to change it" % location['data_path'])
main(sys.argv[1:])

View File

@ -1,30 +0,0 @@
#!/bin/bash
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
script=$(basename $0)
# Set nullglob so wildcards will return empty string if no match
shopt -s nullglob
for dev in /dev/rbd[0-9]*; do
for mnt in $(mount | awk -v dev=$dev '($1 == dev) {print $3}'); do
logger -t ${script} "Unmounting $mnt"
/usr/bin/umount $mnt
done
logger -t ${script} "Unmounted $dev"
done
for dev in /dev/rbd[0-9]*; do
/usr/bin/rbd unmap -o force $dev
logger -t ${script} "Unmapped $dev"
done
lsmod | grep -q '^rbd\>' && /usr/sbin/modprobe -r rbd
lsmod | grep -q '^libceph\>' && /usr/sbin/modprobe -r libceph
exit 0

View File

@ -1,18 +0,0 @@
[Unit]
Description=radosgw RESTful rados gateway
After=network.target
#After=remote-fs.target nss-lookup.target network-online.target time-sync.target
#Wants=network-online.target
[Service]
Type=forking
Restart=no
KillMode=process
RemainAfterExit=yes
ExecStart=/etc/rc.d/init.d/ceph-radosgw start
ExecStop=/etc/rc.d/init.d/ceph-radosgw stop
ExecReload=/etc/rc.d/init.d/ceph-radosgw reload
[Install]
WantedBy=multi-user.target

View File

@ -1,58 +0,0 @@
[global]
# Unique ID for the cluster.
fsid = %CLUSTER_UUID%
# Public network where the monitor is connected to, i.e, 128.224.0.0/16
#public network = 127.0.0.1/24
# For version 0.55 and beyond, you must explicitly enable
# or disable authentication with "auth" entries in [global].
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
osd_journal_size = 1024
# Uncomment the following line if you are mounting with ext4
# filestore xattr use omap = true
# Number of replicas of objects. Write an object 2 times.
# Cluster cannot reach an active + clean state until there's enough OSDs
# to handle the number of copies of an object. In this case, it requires
# at least 2 OSDs
osd_pool_default_size = 2
# Allow writing one copy in a degraded state.
osd_pool_default_min_size = 1
# Ensure you have a realistic number of placement groups. We recommend
# approximately 100 per OSD. E.g., total number of OSDs multiplied by 100
# divided by the number of replicas (i.e., osd pool default size). So for
# 2 OSDs and osd pool default size = 2, we'd recommend approximately
# (100 * 2) / 2 = 100.
osd_pool_default_pg_num = 64
osd_pool_default_pgp_num = 64
osd_crush_chooseleaf_type = 1
setuser match path = /var/lib/ceph/$type/$cluster-$id
# Override Jewel default of 2 reporters. StarlingX has replication factor 2
mon_osd_min_down_reporters = 1
# Use Hammer's report interval default value
osd_mon_report_interval_max = 120
# Configure max PGs per OSD to cover worst-case scenario of all possible
# StarlingX deployments i.e. AIO-SX with one OSD. Otherwise using
# the default value provided by Ceph Mimic leads to "too many PGs per OSD"
# health warning as the pools needed by stx-openstack are being created.
mon_max_pg_per_osd = 2048
osd_max_pg_per_osd_hard_ratio = 1.2
[osd]
osd_mkfs_type = xfs
osd_mkfs_options_xfs = "-f"
osd_mount_options_xfs = "rw,noatime,inode64,logbufs=8,logbsize=256k"
[mon]
mon warn on legacy crush tunables = false
# Quiet new warnings on move to Hammer
mon pg warn max per osd = 2048
mon pg warn max object skew = 0
mgr initial modules = restful

View File

@ -1,26 +0,0 @@
[process]
process = ceph
script = /etc/init.d/ceph-init-wrapper
style = lsb
severity = major ; minor, major, critical
restarts = 3 ; restart retries before error assertion
interval = 30 ; number of seconds to wait between restarts
mode = status ; Monitoring mode: passive (default) or active
; passive: process death monitoring (default: always)
; active : heartbeat monitoring, i.e. request / response messaging
; status : determine process health with executing "status" command
; "start" is used to start the process(es) again
; ignore : do not monitor or stop monitoring
; Status and Active Monitoring Options
period = 30 ; monitor period in seconds
timeout = 120 ; for active mode, messaging timeout period in seconds, must be shorter than period
; for status mode, max amount of time for a command to execute
; Status Monitoring Options
start_arg = start ; start argument for the script
status_arg = status ; status argument for the script
status_failure_text = /tmp/ceph_status_failure.txt ; text to be added to alarms or logs, this is optional

View File

@ -1,16 +0,0 @@
[Unit]
Description=StarlingX Ceph Startup
After=network.target
[Service]
Type=forking
Restart=no
KillMode=process
RemainAfterExit=yes
ExecStart=/etc/rc.d/init.d/ceph start
ExecStop=/etc/rc.d/init.d/ceph stop
PIDFile=/var/run/ceph/ceph.pid
[Install]
WantedBy=multi-user.target

View File

@ -1,67 +0,0 @@
#!/bin/bash
INITDIR=/etc/init.d
LOGFILE=/var/log/ceph/ceph-init.log
CEPH_FILE=/var/run/.ceph_started
# Get our nodetype
. /etc/platform/platform.conf
# Exit immediately if ceph not configured (i.e. no mon in the config file)
if ! grep -q "mon\." /etc/ceph/ceph.conf
then
exit 0
fi
logecho ()
{
echo $1
date >> ${LOGFILE}
echo $1 >> ${LOGFILE}
}
start ()
{
logecho "Starting ceph services..."
${INITDIR}/ceph start >> ${LOGFILE} 2>&1
RC=$?
if [ ! -f ${CEPH_FILE} ]; then
touch ${CEPH_FILE}
fi
}
stop ()
{
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" == "simplex" ]]; then
logecho "Ceph services will continue to run on node"
exit 0
fi
logecho "Stopping ceph services..."
if [ -f ${CEPH_FILE} ]; then
rm -f ${CEPH_FILE}
fi
${INITDIR}/ceph stop >> ${LOGFILE} 2>&1
RC=$?
}
RC=0
case "$1" in
start)
start
;;
stop)
stop
;;
*)
echo "Usage: $0 {start|stop}"
exit 1
;;
esac
logecho "RC was: $RC"
exit $RC

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +0,0 @@
[Unit]
Description=Ceph MGR RESTful API Plugin
After=network-online.target sw-patch.service
[Service]
Type=forking
Restart=no
KillMode=process
RemainAfterExit=yes
ExecStart=/etc/rc.d/init.d/mgr-restful-plugin start
ExecStop=/etc/rc.d/init.d/mgr-restul-plugin stop
ExecReload=/etc/rc.d/init.d/mgr-restful-plugin reload
[Install]
WantedBy=multi-user.target

View File

@ -1,3 +0,0 @@
[Service]
ExecStopPost=/usr/sbin/ceph-preshutdown.sh