ceph: add bbappend to port StarlingX ceph packages
ceph recipe is in meta-virtualization, however, StarlingX has made many modifications, so needs extra effort to port these using bbappend. Signed-off-by: Litao Gao <litao.gao@windriver.com>
This commit is contained in:
parent
4882b1767a
commit
6a96c4fe72
85
recipes-core/stx-integ-ceph/ceph_13.2.2.bbappend
Normal file
85
recipes-core/stx-integ-ceph/ceph_13.2.2.bbappend
Normal file
@ -0,0 +1,85 @@
|
||||
FILESEXTRAPATHS_prepend := "${THISDIR}/patches:${THISDIR}/files:"
|
||||
SRC_URI_append = "file://0001-Add-hooks-for-orderly-shutdown-on-controller.patch \
|
||||
file://ceph.conf \
|
||||
file://ceph-init-wrapper.sh \
|
||||
file://ceph-preshutdown.sh \
|
||||
file://ceph.service \
|
||||
file://mgr-restful-plugin.py \
|
||||
file://starlingx-docker-override.conf \
|
||||
file://ceph.conf.pmon \
|
||||
file://ceph-manage-journal.py \
|
||||
file://ceph-radosgw.service \
|
||||
file://ceph.sh \
|
||||
file://mgr-restful-plugin.service \
|
||||
"
|
||||
|
||||
|
||||
do_install_append () {
|
||||
install -d ${D}${sysconfdir}/ceph
|
||||
install -m 0644 ${WORKDIR}/ceph.conf ${D}${sysconfdir}/ceph/
|
||||
install -m 0644 ${WORKDIR}/ceph-radosgw.service ${D}${systemd_system_unitdir}/ceph-radosgw@.service
|
||||
install -m 0644 ${WORKDIR}/ceph.service ${D}${systemd_system_unitdir}
|
||||
install -m 0644 ${WORKDIR}/mgr-restful-plugin.service ${D}${systemd_system_unitdir}
|
||||
|
||||
install -m 0700 ${WORKDIR}/ceph-manage-journal.py ${D}${sbindir}/ceph-manage-journal
|
||||
install -Dm 0750 ${WORKDIR}/mgr-restful-plugin.py ${D}${sysconfdir}/rc.d/init.d/mgr-restful-plugin
|
||||
install -m 0750 ${WORKDIR}/ceph.conf.pmon ${D}${sysconfdir}/ceph/
|
||||
|
||||
install -d -m 0750 ${D}${sysconfdir}/services.d/controller
|
||||
install -d -m 0750 ${D}${sysconfdir}/services.d/storage
|
||||
install -d -m 0750 ${D}${sysconfdir}/services.d/worker
|
||||
|
||||
install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/controller
|
||||
install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/storage
|
||||
install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/worker
|
||||
|
||||
install -Dm 0750 ${WORKDIR}/ceph-init-wrapper.sh ${D}${sysconfdir}/rc.d/init.d/ceph-init-wrapper
|
||||
install -m 0700 ${WORKDIR}/ceph-preshutdown.sh ${D}${sbindir}/ceph-preshutdown.sh
|
||||
|
||||
install -Dm 0644 ${WORKDIR}/starlingx-docker-override.conf ${D}${systemd_system_unitdir}/docker.service.d/starlingx-docker-override.conf
|
||||
|
||||
install -m 0644 -D ${S}/src/etc-rbdmap ${D}${sysconfdir}/ceph/rbdmap
|
||||
install -m 0644 -D ${S}/etc/sysconfig/ceph ${D}${sysconfdir}/sysconfig/ceph
|
||||
install -m 0644 -D ${S}/src/logrotate.conf ${D}${sysconfdir}/logrotate.d/ceph
|
||||
|
||||
install -m 0644 -D ${S}/COPYING ${D}${docdir}/ceph/COPYING
|
||||
install -m 0644 -D ${S}/etc/sysctl/90-ceph-osd.conf ${D}${libdir}/sysctl.d/90-ceph-osd.conf
|
||||
install -m 0644 -D ${S}/udev/50-rbd.rules ${D}${libdir}/udev/rules.d/50-rbd.rules
|
||||
install -m 0644 -D ${S}/udev/60-ceph-by-parttypeuuid.rules ${D}${libdir}/udev/rules.d/60-ceph-by-parttypeuuid.rules
|
||||
|
||||
mkdir -p ${D}${localstatedir}/ceph
|
||||
mkdir -p ${D}${localstatedir}/run/ceph
|
||||
mkdir -p ${D}${localstatedir}/log/ceph
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/tmp
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/mon
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/osd
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/mds
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/mgr
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/radosgw
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/bootstrap-osd
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/bootstrap-mds
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/bootstrap-rgw
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/bootstrap-mgr
|
||||
mkdir -p ${D}${localstatedir}/lib/ceph/bootstrap-rbd
|
||||
|
||||
install -m 0750 -D ${S}/src/init-radosgw ${D}${sysconfdir}/rc.d/init.d/ceph-radosgw
|
||||
sed -i '/### END INIT INFO/a SYSTEMCTL_SKIP_REDIRECT=1' ${D}${sysconfdir}/rc.d/init.d/ceph-radosgw
|
||||
install -m 0750 -D ${S}/src/init-rbdmap ${D}${sysconfdir}/rc.d/init.d/rbdmap
|
||||
install -m 0750 -D ${B}/bin/init-ceph ${D}${sysconfdir}/rc.d/init.d/ceph
|
||||
install -m 0750 -D ${B}/bin/init-ceph ${D}${sysconfdir}/init.d/ceph
|
||||
install -d -m 0750 ${D}${localstatedir}/log/radosgw
|
||||
}
|
||||
|
||||
TARGET_CC_ARCH += "${LDFLAGS}"
|
||||
RDEPENDS_${PN} += "\
|
||||
bash \
|
||||
"
|
||||
|
||||
FILES_${PN} += "\
|
||||
${localstatedir} ${libdir} ${docdir} \
|
||||
${systemd_system_unitdir}/mgr-restful-plugin.service \
|
||||
${systemd_system_unitdir}/ceph-radosgw@.service \
|
||||
${systemd_system_unitdir}/ceph.service \
|
||||
${systemd_system_unitdir}/docker.service.d \
|
||||
/run \
|
||||
"
|
282
recipes-core/stx-integ-ceph/files/ceph-init-wrapper.sh
Executable file
282
recipes-core/stx-integ-ceph/files/ceph-init-wrapper.sh
Executable file
@ -0,0 +1,282 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This script is a helper wrapper for pmon monitoring of ceph
|
||||
# processes. The "/etc/init.d/ceph" script does not know if ceph is
|
||||
# running on the node. For example when the node is locked, ceph
|
||||
# processes are not running. In that case we do not want pmond to
|
||||
# monitor these processes.
|
||||
#
|
||||
# The script "/etc/services.d/<node>/ceph.sh" will create the file
|
||||
# "/var/run/.ceph_started" when ceph is running and remove it when
|
||||
# is not.
|
||||
#
|
||||
# The script also extracts one or more ceph process names that are
|
||||
# reported as 'not running' or 'dead' or 'failed' by '/etc/intit.d/ceph status'
|
||||
# and writes the names to a text file: /tmp/ceph_status_failure.txt for
|
||||
# pmond to access. The pmond adds the text to logs and alarms. Example of text
|
||||
# samples written to file by this script are:
|
||||
# 'osd.1'
|
||||
# 'osd.1, osd.2'
|
||||
# 'mon.storage-0'
|
||||
# 'mon.storage-0, osd.2'
|
||||
#
|
||||
# Moreover, for processes that are reported as 'hung' by '/etc/intit.d/ceph status'
|
||||
# the script will try increase their logging to 'debug' for a configurable interval.
|
||||
# With logging increased it will outputs a few stack traces then, at the end of this
|
||||
# interval, it dumps its stack core and kills it.
|
||||
#
|
||||
# Return values;
|
||||
# zero - /etc/init.d/ceph returned success or ceph is not running on the node
|
||||
# non-zero /etc/init.d/ceph returned a failure or invalid syntax
|
||||
#
|
||||
|
||||
source /usr/bin/tsconfig
|
||||
source /etc/platform/platform.conf
|
||||
|
||||
CEPH_SCRIPT="/etc/init.d/ceph"
|
||||
CEPH_FILE="$VOLATILE_PATH/.ceph_started"
|
||||
CEPH_RESTARTING_FILE="$VOLATILE_PATH/.ceph_restarting"
|
||||
CEPH_GET_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_status"
|
||||
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
|
||||
|
||||
BINDIR=/usr/bin
|
||||
SBINDIR=/usr/sbin
|
||||
LIBDIR=/usr/lib64/ceph
|
||||
ETCDIR=/etc/ceph
|
||||
source $LIBDIR/ceph_common.sh
|
||||
|
||||
LOG_PATH=/var/log/ceph
|
||||
LOG_FILE=$LOG_PATH/ceph-process-states.log
|
||||
LOG_LEVEL=NORMAL # DEBUG
|
||||
verbose=0
|
||||
|
||||
DATA_PATH=$VOLATILE_PATH/ceph_hang # folder where we keep state information
|
||||
mkdir -p $DATA_PATH # make sure folder exists
|
||||
|
||||
MONITORING_INTERVAL=15
|
||||
TRACE_LOOP_INTERVAL=5
|
||||
GET_STATUS_TIMEOUT=120
|
||||
CEPH_STATUS_TIMEOUT=20
|
||||
|
||||
WAIT_FOR_CMD=1
|
||||
|
||||
RC=0
|
||||
|
||||
args=("$@")
|
||||
|
||||
if [ ! -z $ARGS ]; then
|
||||
IFS=";" read -r -a new_args <<< "$ARGS"
|
||||
args+=("${new_args[@]}")
|
||||
fi
|
||||
|
||||
wait_for_status ()
|
||||
{
|
||||
timeout=$GET_STATUS_TIMEOUT # wait for status no more than $timeout seconds
|
||||
while [ -f ${CEPH_GET_STATUS_FILE} ] && [ $timeout -gt 0 ]; do
|
||||
sleep 1
|
||||
let timeout-=1
|
||||
done
|
||||
if [ $timeout -eq 0 ]; then
|
||||
wlog "-" "WARN" "Getting status takes more than ${GET_STATUS_TIMEOUT}s, continuing"
|
||||
rm -f $CEPH_GET_STATUS_FILE
|
||||
fi
|
||||
}
|
||||
|
||||
start ()
|
||||
{
|
||||
if [ -f ${CEPH_FILE} ]; then
|
||||
wait_for_status
|
||||
${CEPH_SCRIPT} start $1
|
||||
RC=$?
|
||||
else
|
||||
# Ceph is not running on this node, return success
|
||||
exit 0
|
||||
fi
|
||||
}
|
||||
|
||||
stop ()
|
||||
{
|
||||
wait_for_status
|
||||
${CEPH_SCRIPT} stop $1
|
||||
}
|
||||
|
||||
restart ()
|
||||
{
|
||||
if [ -f ${CEPH_FILE} ]; then
|
||||
wait_for_status
|
||||
touch $CEPH_RESTARTING_FILE
|
||||
${CEPH_SCRIPT} restart $1
|
||||
rm -f $CEPH_RESTARTING_FILE
|
||||
else
|
||||
# Ceph is not running on this node, return success
|
||||
exit 0
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
log_and_restart_blocked_osds ()
|
||||
{
|
||||
# Log info about the blocked osd daemons and then restart it
|
||||
local names=$1
|
||||
for name in $names; do
|
||||
wlog $name "INFO" "Restarting OSD with blocked operations"
|
||||
${CEPH_SCRIPT} restart $name
|
||||
done
|
||||
}
|
||||
|
||||
log_and_kill_hung_procs ()
|
||||
{
|
||||
# Log info about the hung processes and then kill them; later on pmon will restart them
|
||||
local names=$1
|
||||
for name in $names; do
|
||||
type=`echo $name | cut -c 1-3` # e.g. 'mon', if $item is 'mon1'
|
||||
id=`echo $name | cut -c 4- | sed 's/^\\.//'`
|
||||
get_conf run_dir "/var/run/ceph" "run dir"
|
||||
get_conf pid_file "$run_dir/$type.$id.pid" "pid file"
|
||||
pid=$(cat $pid_file)
|
||||
wlog $name "INFO" "Dealing with hung process (pid:$pid)"
|
||||
|
||||
# monitoring interval
|
||||
wlog $name "INFO" "Increasing log level"
|
||||
execute_ceph_cmd ret $name "ceph daemon $name config set debug_$type 20/20"
|
||||
monitoring=$MONITORING_INTERVAL
|
||||
while [ $monitoring -gt 0 ]; do
|
||||
if [ $(($monitoring % $TRACE_LOOP_INTERVAL)) -eq 0 ]; then
|
||||
date=$(date "+%Y-%m-%d_%H-%M-%S")
|
||||
log_file="$LOG_PATH/hang_trace_${name}_${pid}_${date}.log"
|
||||
wlog $name "INFO" "Dumping stack trace to: $log_file"
|
||||
$(pstack $pid >$log_file) &
|
||||
fi
|
||||
let monitoring-=1
|
||||
sleep 1
|
||||
done
|
||||
wlog $name "INFO" "Trigger core dump"
|
||||
kill -ABRT $pid &>/dev/null
|
||||
rm -f $pid_file # process is dead, core dump is archiving, preparing for restart
|
||||
# Wait for pending systemd core dumps
|
||||
sleep 2 # hope systemd_coredump has started meanwhile
|
||||
deadline=$(( $(date '+%s') + 300 ))
|
||||
while [[ $(date '+%s') -lt "${deadline}" ]]; do
|
||||
systemd_coredump_pid=$(pgrep -f "systemd-coredump.*${pid}.*ceph-${type}")
|
||||
[[ -z "${systemd_coredump_pid}" ]] && break
|
||||
wlog $name "INFO" "systemd-coredump ceph-${type} in progress: pid ${systemd_coredump_pid}"
|
||||
sleep 2
|
||||
done
|
||||
kill -KILL $pid &>/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
status ()
|
||||
{
|
||||
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
|
||||
timeout $CEPH_STATUS_TIMEOUT ceph -s
|
||||
if [ "$?" -ne 0 ]; then
|
||||
# Ceph cluster is not accessible. Don't panic, controller swact
|
||||
# may be in progress.
|
||||
wlog "-" INFO "Ceph is down, ignoring OSD status."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -f ${CEPH_RESTARTING_FILE} ]; then
|
||||
# Ceph is restarting, we don't report state changes on the first pass
|
||||
rm -f ${CEPH_RESTARTING_FILE}
|
||||
exit 0
|
||||
fi
|
||||
if [ -f ${CEPH_FILE} ]; then
|
||||
# Make sure the script does not 'exit' between here and the 'rm -f' below
|
||||
# or the checkpoint file will be left behind
|
||||
touch -f ${CEPH_GET_STATUS_FILE}
|
||||
result=`${CEPH_SCRIPT} status $1`
|
||||
RC=$?
|
||||
if [ "$RC" -ne 0 ]; then
|
||||
erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
|
||||
hung_procs=`echo "$result" | sort | uniq | awk ' /hung/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
|
||||
blocked_ops_procs=`echo "$result" | sort | uniq | awk ' /blocked ops/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
|
||||
invalid=0
|
||||
host=`hostname`
|
||||
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
|
||||
# On 2 node configuration we have a floating monitor
|
||||
host="controller"
|
||||
fi
|
||||
for i in $(echo $erred_procs $hung_procs); do
|
||||
if [[ "$i" =~ osd.?[0-9]?[0-9]|mon.$host ]]; then
|
||||
continue
|
||||
else
|
||||
invalid=1
|
||||
fi
|
||||
done
|
||||
|
||||
log_and_restart_blocked_osds $blocked_ops_procs
|
||||
log_and_kill_hung_procs $hung_procs
|
||||
|
||||
hung_procs_text=""
|
||||
for i in $(echo $hung_procs); do
|
||||
hung_procs_text+="$i(process hung) "
|
||||
done
|
||||
|
||||
rm -f $CEPH_STATUS_FAILURE_TEXT_FILE
|
||||
if [ $invalid -eq 0 ]; then
|
||||
text=""
|
||||
for i in $erred_procs; do
|
||||
text+="$i, "
|
||||
done
|
||||
for i in $hung_procs; do
|
||||
text+="$i (process hang), "
|
||||
done
|
||||
echo "$text" | tr -d '\n' > $CEPH_STATUS_FAILURE_TEXT_FILE
|
||||
else
|
||||
echo "$host: '${CEPH_SCRIPT} status $1' result contains invalid process names: $erred_procs"
|
||||
echo "Undetermined osd or monitor id" > $CEPH_STATUS_FAILURE_TEXT_FILE
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f ${CEPH_GET_STATUS_FILE}
|
||||
|
||||
if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
|
||||
# SM needs exit code != 0 from 'status mon' argument of the init script on
|
||||
# standby controller otherwise it thinks that the monitor is running and
|
||||
# tries to stop it.
|
||||
# '/etc/init.d/ceph status mon' checks the status of monitors configured in
|
||||
# /etc/ceph/ceph.conf and if it should be running on current host.
|
||||
# If it should not be running it just exits with code 0. This is what
|
||||
# happens on the standby controller.
|
||||
# When floating monitor is running on active controller /var/lib/ceph/mon of
|
||||
# standby is not mounted (Ceph monitor partition is DRBD synced).
|
||||
test -e "/var/lib/ceph/mon/ceph-controller"
|
||||
if [ "$?" -ne 0 ]; then
|
||||
exit 3
|
||||
fi
|
||||
fi
|
||||
else
|
||||
# Ceph is not running on this node, return success
|
||||
exit 0
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
case "${args[0]}" in
|
||||
start)
|
||||
start ${args[1]}
|
||||
;;
|
||||
stop)
|
||||
stop ${args[1]}
|
||||
;;
|
||||
restart)
|
||||
restart ${args[1]}
|
||||
;;
|
||||
status)
|
||||
status ${args[1]}
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 {start|stop|restart|status} [{mon|osd|osd.<number>|mon.<hostname>}]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $RC
|
334
recipes-core/stx-integ-ceph/files/ceph-manage-journal.py
Normal file
334
recipes-core/stx-integ-ceph/files/ceph-manage-journal.py
Normal file
@ -0,0 +1,334 @@
|
||||
#!/usr/bin/python
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import ast
|
||||
import os
|
||||
import os.path
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
DEVICE_NAME_NVME = "nvme"
|
||||
|
||||
#########
|
||||
# Utils #
|
||||
#########
|
||||
|
||||
|
||||
def command(arguments, **kwargs):
|
||||
"""Execute e command and capture stdout, stderr & return code"""
|
||||
process = subprocess.Popen(
|
||||
arguments,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
**kwargs)
|
||||
out, err = process.communicate()
|
||||
return out, err, process.returncode
|
||||
|
||||
|
||||
def get_input(arg, valid_keys):
|
||||
"""Convert the input to a dict and perform basic validation"""
|
||||
json_string = arg.replace("\\n", "\n")
|
||||
try:
|
||||
input_dict = ast.literal_eval(json_string)
|
||||
if not all(k in input_dict for k in valid_keys):
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
return input_dict
|
||||
|
||||
|
||||
def get_partition_uuid(dev):
|
||||
output, _, _ = command(['blkid', dev])
|
||||
try:
|
||||
return re.search('PARTUUID=\"(.+?)\"', output).group(1)
|
||||
except AttributeError:
|
||||
return None
|
||||
|
||||
|
||||
def device_path_to_device_node(device_path):
|
||||
try:
|
||||
output, _, _ = command(["udevadm", "settle", "-E", device_path])
|
||||
out, err, retcode = command(["readlink", "-f", device_path])
|
||||
out = out.rstrip()
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
return out
|
||||
|
||||
|
||||
###########################################
|
||||
# Manage Journal Disk Partitioning Scheme #
|
||||
###########################################
|
||||
|
||||
DISK_BY_PARTUUID = "/dev/disk/by-partuuid/"
|
||||
JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106' # Type of a journal partition
|
||||
|
||||
|
||||
def is_partitioning_correct(disk_path, partition_sizes):
|
||||
"""Validate the existence and size of journal partitions"""
|
||||
|
||||
# Obtain the device node from the device path.
|
||||
disk_node = device_path_to_device_node(disk_path)
|
||||
|
||||
# Check that partition table format is GPT
|
||||
output, _, _ = command(["udevadm", "settle", "-E", disk_node])
|
||||
output, _, _ = command(["parted", "-s", disk_node, "print"])
|
||||
if not re.search('Partition Table: gpt', output):
|
||||
print("Format of disk node %s is not GPT, zapping disk" % disk_node)
|
||||
return False
|
||||
|
||||
# Check each partition size
|
||||
partition_index = 1
|
||||
for size in partition_sizes:
|
||||
# Check that each partition size matches the one in input
|
||||
if DEVICE_NAME_NVME in disk_node:
|
||||
partition_node = '{}p{}'.format(disk_node, str(partition_index))
|
||||
else:
|
||||
partition_node = '{}{}'.format(disk_node, str(partition_index))
|
||||
|
||||
output, _, _ = command(["udevadm", "settle", "-E", partition_node])
|
||||
cmd = ["parted", "-s", partition_node, "unit", "MiB", "print"]
|
||||
output, _, _ = command(cmd)
|
||||
|
||||
regex = ("^Disk " + str(partition_node) + ":\\s*" +
|
||||
str(size) + "[\\.0]*MiB")
|
||||
if not re.search(regex, output, re.MULTILINE):
|
||||
print("Journal partition %(node)s size is not %(size)s, "
|
||||
"zapping disk" % {"node": partition_node, "size": size})
|
||||
return False
|
||||
|
||||
partition_index += 1
|
||||
|
||||
output, _, _ = command(["udevadm", "settle", "-t", "10"])
|
||||
return True
|
||||
|
||||
|
||||
def create_partitions(disk_path, partition_sizes):
|
||||
"""Recreate partitions"""
|
||||
|
||||
# Obtain the device node from the device path.
|
||||
disk_node = device_path_to_device_node(disk_path)
|
||||
|
||||
# Issue: After creating a new partition table on a device, Udev does not
|
||||
# always remove old symlinks (i.e. to previous partitions on that device).
|
||||
# Also, even if links are erased before zapping the disk, some of them will
|
||||
# be recreated even though there is no partition to back them!
|
||||
# Therefore, we have to remove the links AFTER we erase the partition table
|
||||
# Issue: DISK_BY_PARTUUID directory is not present at all if there are no
|
||||
# GPT partitions on the storage node so nothing to remove in this case
|
||||
links = []
|
||||
if os.path.isdir(DISK_BY_PARTUUID):
|
||||
links = [os.path.join(DISK_BY_PARTUUID, l) for l in os.listdir(DISK_BY_PARTUUID)
|
||||
if os.path.islink(os.path.join(DISK_BY_PARTUUID, l))]
|
||||
|
||||
# Erase all partitions on current node by creating a new GPT table
|
||||
_, err, ret = command(["parted", "-s", disk_node, "mktable", "gpt"])
|
||||
if ret:
|
||||
print("Error erasing partition table of %(node)s\n"
|
||||
"Return code: %(ret)s reason: %(reason)s" %
|
||||
{"node": disk_node, "ret": ret, "reason": err})
|
||||
exit(1)
|
||||
|
||||
# Erase old symlinks
|
||||
for l in links:
|
||||
if disk_node in os.path.realpath(l):
|
||||
os.remove(l)
|
||||
|
||||
# Create partitions in order
|
||||
used_space_mib = 1 # leave 1 MB at the beginning of the disk
|
||||
num = 1
|
||||
for size in partition_sizes:
|
||||
cmd = ['parted', '-s', disk_node, 'unit', 'mib',
|
||||
'mkpart', 'primary',
|
||||
str(used_space_mib), str(used_space_mib + size)]
|
||||
_, err, ret = command(cmd)
|
||||
parms = {"disk_node": disk_node,
|
||||
"start": used_space_mib,
|
||||
"end": used_space_mib + size,
|
||||
"reason": err}
|
||||
print("Created partition from start=%(start)s MiB to end=%(end)s MiB"
|
||||
" on %(disk_node)s" % parms)
|
||||
if ret:
|
||||
print("Failed to create partition with "
|
||||
"start=%(start)s, end=%(end)s "
|
||||
"on %(disk_node)s reason: %(reason)s" % parms)
|
||||
exit(1)
|
||||
# Set partition type to ceph journal
|
||||
# noncritical operation, it makes 'ceph-disk list' output correct info
|
||||
cmd = ['sgdisk',
|
||||
'--change-name={num}:ceph journal'.format(num=num),
|
||||
'--typecode={num}:{uuid}'.format(
|
||||
num=num,
|
||||
uuid=JOURNAL_UUID,
|
||||
),
|
||||
disk_node]
|
||||
_, err, ret = command(cmd)
|
||||
if ret:
|
||||
print("WARNINIG: Failed to set partition name and typecode")
|
||||
used_space_mib += size
|
||||
num += 1
|
||||
|
||||
|
||||
###########################
|
||||
# Manage Journal Location #
|
||||
###########################
|
||||
|
||||
OSD_PATH = "/var/lib/ceph/osd/"
|
||||
|
||||
|
||||
def mount_data_partition(data_path, osdid):
|
||||
"""Mount an OSD data partition and return the mounted path"""
|
||||
|
||||
# Obtain the device node from the device path.
|
||||
data_node = device_path_to_device_node(data_path)
|
||||
|
||||
mount_path = OSD_PATH + "ceph-" + str(osdid)
|
||||
output, _, _ = command(['mount'])
|
||||
regex = "^" + data_node + ".*" + mount_path
|
||||
if not re.search(regex, output, re.MULTILINE):
|
||||
cmd = ['mount', '-t', 'xfs', data_node, mount_path]
|
||||
_, _, ret = command(cmd)
|
||||
params = {"node": data_node, "path": mount_path}
|
||||
if ret:
|
||||
print("Failed to mount %(node)s to %(path), aborting" % params)
|
||||
exit(1)
|
||||
else:
|
||||
print("Mounted %(node)s to %(path)s" % params)
|
||||
return mount_path
|
||||
|
||||
|
||||
def is_location_correct(path, journal_path, osdid):
|
||||
"""Check if location points to the correct device"""
|
||||
|
||||
# Obtain the device node from the device path.
|
||||
journal_node = device_path_to_device_node(journal_path)
|
||||
|
||||
cur_node = os.path.realpath(path + "/journal")
|
||||
if cur_node == journal_node:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def fix_location(mount_point, journal_path, osdid):
|
||||
"""Move the journal to the new partition"""
|
||||
|
||||
# Obtain the device node from the device path.
|
||||
journal_node = device_path_to_device_node(journal_path)
|
||||
|
||||
# Fix symlink
|
||||
path = mount_point + "/journal" # 'journal' symlink path used by ceph-osd
|
||||
journal_uuid = get_partition_uuid(journal_node)
|
||||
new_target = DISK_BY_PARTUUID + journal_uuid
|
||||
params = {"path": path, "target": new_target}
|
||||
try:
|
||||
if os.path.lexists(path):
|
||||
os.unlink(path) # delete the old symlink
|
||||
os.symlink(new_target, path)
|
||||
print("Symlink created: %(path)s -> %(target)s" % params)
|
||||
except:
|
||||
print("Failed to create symlink: %(path)s -> %(target)s" % params)
|
||||
exit(1)
|
||||
# Fix journal_uuid
|
||||
path = mount_point + "/journal_uuid"
|
||||
try:
|
||||
with open(path, 'w') as f:
|
||||
f.write(journal_uuid)
|
||||
except Exception as ex:
|
||||
# The operation is noncritical, it only makes 'ceph-disk list'
|
||||
# display complete output. We log and continue.
|
||||
params = {"path": path, "uuid": journal_uuid}
|
||||
print("WARNING: Failed to set uuid of %(path)s to %(uuid)s" % params)
|
||||
|
||||
# Clean the journal partition
|
||||
# even if erasing the partition table, if another journal was present here
|
||||
# it's going to be reused. Journals are always bigger than 100MB.
|
||||
command(['dd', 'if=/dev/zero', 'of=%s' % journal_node,
|
||||
'bs=1M', 'count=100'])
|
||||
|
||||
# Format the journal
|
||||
cmd = ['/usr/bin/ceph-osd', '-i', str(osdid),
|
||||
'--pid-file', '/var/run/ceph/osd.%s.pid' % osdid,
|
||||
'-c', '/etc/ceph/ceph.conf',
|
||||
'--cluster', 'ceph',
|
||||
'--mkjournal']
|
||||
out, err, ret = command(cmd)
|
||||
params = {"journal_node": journal_node,
|
||||
"osdid": osdid,
|
||||
"ret": ret,
|
||||
"reason": err}
|
||||
if not ret:
|
||||
print("Prepared new journal partition: %(journal_node)s "
|
||||
"for osd id: %(osdid)s" % params)
|
||||
else:
|
||||
print("Error initializing journal node: "
|
||||
"%(journal_node)s for osd id: %(osdid)s "
|
||||
"ceph-osd return code: %(ret)s reason: %(reason)s" % params)
|
||||
|
||||
|
||||
########
|
||||
# Main #
|
||||
########
|
||||
|
||||
def main(argv):
|
||||
# parse and validate arguments
|
||||
err = False
|
||||
partitions = None
|
||||
location = None
|
||||
if len(argv) != 2:
|
||||
err = True
|
||||
elif argv[0] == "partitions":
|
||||
valid_keys = ['disk_path', 'journals']
|
||||
partitions = get_input(argv[1], valid_keys)
|
||||
if not partitions:
|
||||
err = True
|
||||
elif not isinstance(partitions['journals'], list):
|
||||
err = True
|
||||
elif argv[0] == "location":
|
||||
valid_keys = ['data_path', 'journal_path', 'osdid']
|
||||
location = get_input(argv[1], valid_keys)
|
||||
if not location:
|
||||
err = True
|
||||
elif not isinstance(location['osdid'], int):
|
||||
err = True
|
||||
else:
|
||||
err = True
|
||||
if err:
|
||||
print("Command intended for internal use only")
|
||||
exit(-1)
|
||||
|
||||
if partitions:
|
||||
# Recreate partitions only if the existing ones don't match input
|
||||
if not is_partitioning_correct(partitions['disk_path'],
|
||||
partitions['journals']):
|
||||
create_partitions(partitions['disk_path'], partitions['journals'])
|
||||
else:
|
||||
print("Partition table for %s is correct, "
|
||||
"no need to repartition" %
|
||||
device_path_to_device_node(partitions['disk_path']))
|
||||
elif location:
|
||||
# we need to have the data partition mounted & we can let it mounted
|
||||
mount_point = mount_data_partition(location['data_path'],
|
||||
location['osdid'])
|
||||
# Update journal location only if link point to another partition
|
||||
if not is_location_correct(mount_point,
|
||||
location['journal_path'],
|
||||
location['osdid']):
|
||||
print("Fixing journal location for "
|
||||
"OSD id: %(id)s" % {"node": location['data_path'],
|
||||
"id": location['osdid']})
|
||||
fix_location(mount_point,
|
||||
location['journal_path'],
|
||||
location['osdid'])
|
||||
else:
|
||||
print("Journal location for %s is correct,"
|
||||
"no need to change it" % location['data_path'])
|
||||
|
||||
|
||||
main(sys.argv[1:])
|
30
recipes-core/stx-integ-ceph/files/ceph-preshutdown.sh
Normal file
30
recipes-core/stx-integ-ceph/files/ceph-preshutdown.sh
Normal file
@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
script=$(basename $0)
|
||||
|
||||
# Set nullglob so wildcards will return empty string if no match
|
||||
shopt -s nullglob
|
||||
|
||||
for dev in /dev/rbd[0-9]*; do
|
||||
for mnt in $(mount | awk -v dev=$dev '($1 == dev) {print $3}'); do
|
||||
logger -t ${script} "Unmounting $mnt"
|
||||
/usr/bin/umount $mnt
|
||||
done
|
||||
logger -t ${script} "Unmounted $dev"
|
||||
done
|
||||
|
||||
for dev in /dev/rbd[0-9]*; do
|
||||
/usr/bin/rbd unmap -o force $dev
|
||||
logger -t ${script} "Unmapped $dev"
|
||||
done
|
||||
|
||||
lsmod | grep -q '^rbd\>' && /usr/sbin/modprobe -r rbd
|
||||
lsmod | grep -q '^libceph\>' && /usr/sbin/modprobe -r libceph
|
||||
|
||||
exit 0
|
||||
|
18
recipes-core/stx-integ-ceph/files/ceph-radosgw.service
Normal file
18
recipes-core/stx-integ-ceph/files/ceph-radosgw.service
Normal file
@ -0,0 +1,18 @@
|
||||
[Unit]
|
||||
Description=radosgw RESTful rados gateway
|
||||
After=network.target
|
||||
#After=remote-fs.target nss-lookup.target network-online.target time-sync.target
|
||||
#Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=forking
|
||||
Restart=no
|
||||
KillMode=process
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/etc/rc.d/init.d/ceph-radosgw start
|
||||
ExecStop=/etc/rc.d/init.d/ceph-radosgw stop
|
||||
ExecReload=/etc/rc.d/init.d/ceph-radosgw reload
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
58
recipes-core/stx-integ-ceph/files/ceph.conf
Normal file
58
recipes-core/stx-integ-ceph/files/ceph.conf
Normal file
@ -0,0 +1,58 @@
|
||||
[global]
|
||||
# Unique ID for the cluster.
|
||||
fsid = %CLUSTER_UUID%
|
||||
# Public network where the monitor is connected to, i.e, 128.224.0.0/16
|
||||
#public network = 127.0.0.1/24
|
||||
# For version 0.55 and beyond, you must explicitly enable
|
||||
# or disable authentication with "auth" entries in [global].
|
||||
auth_cluster_required = cephx
|
||||
auth_service_required = cephx
|
||||
auth_client_required = cephx
|
||||
osd_journal_size = 1024
|
||||
|
||||
# Uncomment the following line if you are mounting with ext4
|
||||
# filestore xattr use omap = true
|
||||
|
||||
# Number of replicas of objects. Write an object 2 times.
|
||||
# Cluster cannot reach an active + clean state until there's enough OSDs
|
||||
# to handle the number of copies of an object. In this case, it requires
|
||||
# at least 2 OSDs
|
||||
osd_pool_default_size = 2
|
||||
|
||||
# Allow writing one copy in a degraded state.
|
||||
osd_pool_default_min_size = 1
|
||||
|
||||
# Ensure you have a realistic number of placement groups. We recommend
|
||||
# approximately 100 per OSD. E.g., total number of OSDs multiplied by 100
|
||||
# divided by the number of replicas (i.e., osd pool default size). So for
|
||||
# 2 OSDs and osd pool default size = 2, we'd recommend approximately
|
||||
# (100 * 2) / 2 = 100.
|
||||
osd_pool_default_pg_num = 64
|
||||
osd_pool_default_pgp_num = 64
|
||||
osd_crush_chooseleaf_type = 1
|
||||
setuser match path = /var/lib/ceph/$type/$cluster-$id
|
||||
|
||||
# Override Jewel default of 2 reporters. StarlingX has replication factor 2
|
||||
mon_osd_min_down_reporters = 1
|
||||
|
||||
# Use Hammer's report interval default value
|
||||
osd_mon_report_interval_max = 120
|
||||
|
||||
# Configure max PGs per OSD to cover worst-case scenario of all possible
|
||||
# StarlingX deployments i.e. AIO-SX with one OSD. Otherwise using
|
||||
# the default value provided by Ceph Mimic leads to "too many PGs per OSD"
|
||||
# health warning as the pools needed by stx-openstack are being created.
|
||||
mon_max_pg_per_osd = 2048
|
||||
osd_max_pg_per_osd_hard_ratio = 1.2
|
||||
|
||||
[osd]
|
||||
osd_mkfs_type = xfs
|
||||
osd_mkfs_options_xfs = "-f"
|
||||
osd_mount_options_xfs = "rw,noatime,inode64,logbufs=8,logbsize=256k"
|
||||
|
||||
[mon]
|
||||
mon warn on legacy crush tunables = false
|
||||
# Quiet new warnings on move to Hammer
|
||||
mon pg warn max per osd = 2048
|
||||
mon pg warn max object skew = 0
|
||||
mgr initial modules = restful
|
26
recipes-core/stx-integ-ceph/files/ceph.conf.pmon
Normal file
26
recipes-core/stx-integ-ceph/files/ceph.conf.pmon
Normal file
@ -0,0 +1,26 @@
|
||||
[process]
|
||||
process = ceph
|
||||
script = /etc/init.d/ceph-init-wrapper
|
||||
|
||||
style = lsb
|
||||
severity = major ; minor, major, critical
|
||||
restarts = 3 ; restart retries before error assertion
|
||||
interval = 30 ; number of seconds to wait between restarts
|
||||
|
||||
mode = status ; Monitoring mode: passive (default) or active
|
||||
; passive: process death monitoring (default: always)
|
||||
; active : heartbeat monitoring, i.e. request / response messaging
|
||||
; status : determine process health with executing "status" command
|
||||
; "start" is used to start the process(es) again
|
||||
; ignore : do not monitor or stop monitoring
|
||||
|
||||
; Status and Active Monitoring Options
|
||||
|
||||
period = 30 ; monitor period in seconds
|
||||
timeout = 120 ; for active mode, messaging timeout period in seconds, must be shorter than period
|
||||
; for status mode, max amount of time for a command to execute
|
||||
|
||||
; Status Monitoring Options
|
||||
start_arg = start ; start argument for the script
|
||||
status_arg = status ; status argument for the script
|
||||
status_failure_text = /tmp/ceph_status_failure.txt ; text to be added to alarms or logs, this is optional
|
16
recipes-core/stx-integ-ceph/files/ceph.service
Normal file
16
recipes-core/stx-integ-ceph/files/ceph.service
Normal file
@ -0,0 +1,16 @@
|
||||
[Unit]
|
||||
Description=StarlingX Ceph Startup
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=forking
|
||||
Restart=no
|
||||
KillMode=process
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/etc/rc.d/init.d/ceph start
|
||||
ExecStop=/etc/rc.d/init.d/ceph stop
|
||||
PIDFile=/var/run/ceph/ceph.pid
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
67
recipes-core/stx-integ-ceph/files/ceph.sh
Normal file
67
recipes-core/stx-integ-ceph/files/ceph.sh
Normal file
@ -0,0 +1,67 @@
|
||||
#!/bin/bash
|
||||
|
||||
INITDIR=/etc/init.d
|
||||
LOGFILE=/var/log/ceph/ceph-init.log
|
||||
CEPH_FILE=/var/run/.ceph_started
|
||||
|
||||
# Get our nodetype
|
||||
. /etc/platform/platform.conf
|
||||
|
||||
# Exit immediately if ceph not configured (i.e. no mon in the config file)
|
||||
if ! grep -q "mon\." /etc/ceph/ceph.conf
|
||||
then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
logecho ()
|
||||
{
|
||||
echo $1
|
||||
date >> ${LOGFILE}
|
||||
echo $1 >> ${LOGFILE}
|
||||
}
|
||||
|
||||
start ()
|
||||
{
|
||||
logecho "Starting ceph services..."
|
||||
${INITDIR}/ceph start >> ${LOGFILE} 2>&1
|
||||
RC=$?
|
||||
|
||||
if [ ! -f ${CEPH_FILE} ]; then
|
||||
touch ${CEPH_FILE}
|
||||
fi
|
||||
}
|
||||
|
||||
stop ()
|
||||
{
|
||||
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" == "simplex" ]]; then
|
||||
logecho "Ceph services will continue to run on node"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
logecho "Stopping ceph services..."
|
||||
|
||||
if [ -f ${CEPH_FILE} ]; then
|
||||
rm -f ${CEPH_FILE}
|
||||
fi
|
||||
|
||||
${INITDIR}/ceph stop >> ${LOGFILE} 2>&1
|
||||
RC=$?
|
||||
}
|
||||
|
||||
RC=0
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
start
|
||||
;;
|
||||
stop)
|
||||
stop
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 {start|stop}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
logecho "RC was: $RC"
|
||||
exit $RC
|
1056
recipes-core/stx-integ-ceph/files/mgr-restful-plugin.py
Normal file
1056
recipes-core/stx-integ-ceph/files/mgr-restful-plugin.py
Normal file
File diff suppressed because it is too large
Load Diff
15
recipes-core/stx-integ-ceph/files/mgr-restful-plugin.service
Normal file
15
recipes-core/stx-integ-ceph/files/mgr-restful-plugin.service
Normal file
@ -0,0 +1,15 @@
|
||||
[Unit]
|
||||
Description=Ceph MGR RESTful API Plugin
|
||||
After=network-online.target sw-patch.service
|
||||
|
||||
[Service]
|
||||
Type=forking
|
||||
Restart=no
|
||||
KillMode=process
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/etc/rc.d/init.d/mgr-restful-plugin start
|
||||
ExecStop=/etc/rc.d/init.d/mgr-restul-plugin stop
|
||||
ExecReload=/etc/rc.d/init.d/mgr-restful-plugin reload
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
@ -0,0 +1,3 @@
|
||||
[Service]
|
||||
ExecStopPost=/usr/sbin/ceph-preshutdown.sh
|
||||
|
@ -0,0 +1,59 @@
|
||||
From 03340eaf0004e3cc8e3f8991ea96a46757d92830 Mon Sep 17 00:00:00 2001
|
||||
From: Don Penney <don.penney@windriver.com>
|
||||
Date: Sat, 26 Jan 2019 13:34:55 -0500
|
||||
Subject: [PATCH] Add hooks for orderly shutdown on controller
|
||||
|
||||
Hook the ceph init script to add systemd overrides to define
|
||||
an orderly shutdown for StarlingX controllers.
|
||||
|
||||
Signed-off-by: Don Penney <don.penney@windriver.com>
|
||||
---
|
||||
src/init-ceph.in | 32 ++++++++++++++++++++++++++++++++
|
||||
1 file changed, 32 insertions(+)
|
||||
|
||||
diff --git a/src/init-ceph.in b/src/init-ceph.in
|
||||
index 1fdb4b3..515d818 100644
|
||||
--- a/src/init-ceph.in
|
||||
+++ b/src/init-ceph.in
|
||||
@@ -861,6 +861,38 @@ for name in $what; do
|
||||
fi
|
||||
fi
|
||||
|
||||
+ . /etc/platform/platform.conf
|
||||
+ if [ "${nodetype}" = "controller" ]; then
|
||||
+ # StarlingX: Hook the transient services launched by systemd-run
|
||||
+ # to allow for proper cleanup and orderly shutdown
|
||||
+
|
||||
+ # Set nullglob so wildcards will return empty string if no match
|
||||
+ shopt -s nullglob
|
||||
+
|
||||
+ OSD_SERVICES=$(for svc in /run/systemd/system/ceph-osd*.service; do basename $svc; done | xargs echo)
|
||||
+ for d in /run/systemd/system/ceph-osd*.d; do
|
||||
+ cat <<EOF > $d/starlingx-overrides.conf
|
||||
+[Unit]
|
||||
+Before=docker.service
|
||||
+After=sm-shutdown.service
|
||||
+
|
||||
+EOF
|
||||
+ done
|
||||
+
|
||||
+ for d in /run/systemd/system/ceph-mon*.d; do
|
||||
+ cat <<EOF > $d/starlingx-overrides.conf
|
||||
+[Unit]
|
||||
+Before=docker.service
|
||||
+After=sm-shutdown.service ${OSD_SERVICES}
|
||||
+
|
||||
+EOF
|
||||
+ done
|
||||
+
|
||||
+ shopt -u nullglob
|
||||
+
|
||||
+ systemctl daemon-reload
|
||||
+ fi
|
||||
+
|
||||
[ -n "$post_start" ] && do_cmd "$post_start"
|
||||
[ -n "$lockfile" ] && [ "$?" -eq 0 ] && touch $lockfile
|
||||
;;
|
||||
--
|
||||
1.8.3.1
|
||||
|
Loading…
x
Reference in New Issue
Block a user