Run processes without screen

This introduces new run_process() and screen_service() functions and sets the
groundwork to change how DevStack starts services.  screen_service() is simply a
direct call to the screen portion of the old screen_it() function and is intended
to run commands that only need to run under screen, such as log file watchers.

run_process() is a replacement for screen_it() (which remains until all of the
services are updated).  The usage is similar but requires updates to every current
screen_it() call to remove everything that requires the command to be interpreted
by a shell.

The old run_process() and _run_process() functions are still present as
old_run_process() and _old_run_process() to support the deprecated screen_it()
function.  These will all go away in the future once all services have been
confirmed to have been changed over.

There is a similar new set of stop process functions stop_process() and
screen_stop_service().  The old screen_stop() will also remain for the deprecation
period.

As an initial test/demostration this review also includes the changes for
lib/cinder to demonstrate what is required for every service.

I included the scripts I used to test this; tests/fake-service.sh and
tests/run-process.sh are quite rough around the edges and may bite.  They should
mature into productive members of the testing ecosystem someday.

Change-Id: I03322bf0208353ebd267811735c66f13a516637b
This commit is contained in:
Dean Troyer 2014-08-27 14:13:58 -05:00 committed by Chris Dent
parent 4bd4264960
commit 3159a821c2
4 changed files with 279 additions and 51 deletions

View File

@ -1135,8 +1135,8 @@ function zypper_install {
# fork. It includes the dirty work of closing extra filehandles and preparing log
# files to produce the same logs as screen_it(). The log filename is derived
# from the service name and global-and-now-misnamed ``SCREEN_LOGDIR``
# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``
# _run_process service "command-line"
# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``, ``SCREEN_NAME``, ``SERVICE_DIR``
# _old_run_process service "command-line"
function _run_process {
local service=$1
local command="$2"
@ -1155,8 +1155,12 @@ function _run_process {
export PYTHONUNBUFFERED=1
fi
exec /bin/bash -c "$command"
die "$service exec failure: $command"
# Run under ``setsid`` to force the process to become a session and group leader.
# The pid saved can be used with pkill -g to get the entire process group.
setsid $command & echo $! >$SERVICE_DIR/$SCREEN_NAME/$1.pid
# Just silently exit this process
exit 0
}
# Helper to remove the ``*.failure`` files under ``$SERVICE_DIR/$SCREEN_NAME``.
@ -1184,61 +1188,63 @@ function is_running {
return $exitcode
}
# run_process() launches a child process that closes all file descriptors and
# then exec's the passed in command. This is meant to duplicate the semantics
# of screen_it() without screen. PIDs are written to
# ``$SERVICE_DIR/$SCREEN_NAME/$service.pid``
# Run a single service under screen or directly
# If the command includes shell metachatacters (;<>*) it must be run using a shell
# run_process service "command-line"
function run_process {
local service=$1
local command="$2"
# Spawn the child process
_run_process "$service" "$command" &
echo $!
if is_service_enabled $service; then
if [[ "$USE_SCREEN" = "True" ]]; then
screen_service "$service" "$command"
else
# Spawn directly without screen
_run_process "$service" "$command" &
fi
fi
}
# Helper to launch a service in a named screen
# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_NAME``, ``SCREEN_LOGDIR``,
# ``SERVICE_DIR``, ``USE_SCREEN``
# screen_it service "command-line"
function screen_it {
# screen_service service "command-line"
# Run a command in a shell in a screen window
function screen_service {
local service=$1
local command="$2"
SCREEN_NAME=${SCREEN_NAME:-stack}
SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
USE_SCREEN=$(trueorfalse True $USE_SCREEN)
if is_service_enabled $1; then
if is_service_enabled $service; then
# Append the service to the screen rc file
screen_rc "$1" "$2"
screen_rc "$service" "$command"
if [[ "$USE_SCREEN" = "True" ]]; then
screen -S $SCREEN_NAME -X screen -t $1
screen -S $SCREEN_NAME -X screen -t $service
if [[ -n ${SCREEN_LOGDIR} ]]; then
screen -S $SCREEN_NAME -p $1 -X logfile ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log
screen -S $SCREEN_NAME -p $1 -X log on
ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
fi
# sleep to allow bash to be ready to be send the command - we are
# creating a new window in screen and then sends characters, so if
# bash isn't running by the time we send the command, nothing happens
sleep 3
NL=`echo -ne '\015'`
# This fun command does the following:
# - the passed server command is backgrounded
# - the pid of the background process is saved in the usual place
# - the server process is brought back to the foreground
# - if the server process exits prematurely the fg command errors
# and a message is written to stdout and the service failure file
# The pid saved can be used in screen_stop() as a process group
# id to kill off all child processes
screen -S $SCREEN_NAME -p $1 -X stuff "$2 & echo \$! >$SERVICE_DIR/$SCREEN_NAME/$1.pid; fg || echo \"$1 failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/$1.failure\"$NL"
else
# Spawn directly without screen
run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
if [[ -n ${SCREEN_LOGDIR} ]]; then
screen -S $SCREEN_NAME -p $service -X logfile ${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log
screen -S $SCREEN_NAME -p $service -X log on
ln -sf ${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${service}.log
fi
# sleep to allow bash to be ready to be send the command - we are
# creating a new window in screen and then sends characters, so if
# bash isn't running by the time we send the command, nothing happens
sleep 3
NL=`echo -ne '\015'`
# This fun command does the following:
# - the passed server command is backgrounded
# - the pid of the background process is saved in the usual place
# - the server process is brought back to the foreground
# - if the server process exits prematurely the fg command errors
# and a message is written to stdout and the service failure file
# The pid saved can be used in screen_stop() as a process group
# id to kill off all child processes
screen -S $SCREEN_NAME -p $service -X stuff "$command & echo \$! >$SERVICE_DIR/$SCREEN_NAME/${service}.pid; fg || echo \"$service failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/${service}.failure\"$NL"
fi
}
@ -1276,20 +1282,40 @@ function screen_rc {
# that did not leave a PID behind
# Uses globals ``SCREEN_NAME``, ``SERVICE_DIR``, ``USE_SCREEN``
# screen_stop service
function screen_stop {
function screen_stop_service {
local service=$1
SCREEN_NAME=${SCREEN_NAME:-stack}
SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
USE_SCREEN=$(trueorfalse True $USE_SCREEN)
if is_service_enabled $1; then
if is_service_enabled $service; then
# Clean up the screen window
screen -S $SCREEN_NAME -p $service -X kill
fi
}
# Stop a service process
# If a PID is available use it, kill the whole process group via TERM
# If screen is being used kill the screen window; this will catch processes
# that did not leave a PID behind
# Uses globals ``SERVICE_DIR``, ``USE_SCREEN``
# stop_process service
function stop_process {
local service=$1
SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
USE_SCREEN=$(trueorfalse True $USE_SCREEN)
if is_service_enabled $service; then
# Kill via pid if we have one available
if [[ -r $SERVICE_DIR/$SCREEN_NAME/$1.pid ]]; then
pkill -TERM -P -$(cat $SERVICE_DIR/$SCREEN_NAME/$1.pid)
rm $SERVICE_DIR/$SCREEN_NAME/$1.pid
if [[ -r $SERVICE_DIR/$SCREEN_NAME/$service.pid ]]; then
pkill -g $(cat $SERVICE_DIR/$SCREEN_NAME/$service.pid)
rm $SERVICE_DIR/$SCREEN_NAME/$service.pid
fi
if [[ "$USE_SCREEN" = "True" ]]; then
# Clean up the screen window
screen -S $SCREEN_NAME -p $1 -X kill
screen_stop_service $service
fi
fi
}
@ -1325,6 +1351,80 @@ function service_check {
}
# Deprecated Functions
# --------------------
# _old_run_process() is designed to be backgrounded by old_run_process() to simulate a
# fork. It includes the dirty work of closing extra filehandles and preparing log
# files to produce the same logs as screen_it(). The log filename is derived
# from the service name and global-and-now-misnamed ``SCREEN_LOGDIR``
# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``, ``SCREEN_NAME``, ``SERVICE_DIR``
# _old_run_process service "command-line"
function _old_run_process {
local service=$1
local command="$2"
# Undo logging redirections and close the extra descriptors
exec 1>&3
exec 2>&3
exec 3>&-
exec 6>&-
if [[ -n ${SCREEN_LOGDIR} ]]; then
exec 1>&${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log 2>&1
ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
# TODO(dtroyer): Hack to get stdout from the Python interpreter for the logs.
export PYTHONUNBUFFERED=1
fi
exec /bin/bash -c "$command"
die "$service exec failure: $command"
}
# old_run_process() launches a child process that closes all file descriptors and
# then exec's the passed in command. This is meant to duplicate the semantics
# of screen_it() without screen. PIDs are written to
# ``$SERVICE_DIR/$SCREEN_NAME/$service.pid`` by the spawned child process.
# old_run_process service "command-line"
function old_run_process {
local service=$1
local command="$2"
# Spawn the child process
_old_run_process "$service" "$command" &
echo $!
}
# Compatibility for existing start_XXXX() functions
# Uses global ``USE_SCREEN``
# screen_it service "command-line"
function screen_it {
if is_service_enabled $1; then
# Append the service to the screen rc file
screen_rc "$1" "$2"
if [[ "$USE_SCREEN" = "True" ]]; then
screen_service "$1" "$2"
else
# Spawn directly without screen
old_run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
fi
fi
}
# Compatibility for existing stop_XXXX() functions
# Stop a service in screen
# If a PID is available use it, kill the whole process group via TERM
# If screen is being used kill the screen window; this will catch processes
# that did not leave a PID behind
# screen_stop service
function screen_stop {
# Clean up the screen window
stop_process $1
}
# Python Functions
# ================

View File

@ -431,15 +431,15 @@ function start_cinder {
sudo tgtadm --mode system --op update --name debug --value on
fi
screen_it c-api "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-api --config-file $CINDER_CONF"
run_process c-api "$CINDER_BIN_DIR/cinder-api --config-file $CINDER_CONF"
echo "Waiting for Cinder API to start..."
if ! wait_for_service $SERVICE_TIMEOUT $CINDER_SERVICE_PROTOCOL://$CINDER_SERVICE_HOST:$CINDER_SERVICE_PORT; then
die $LINENO "c-api did not start"
fi
screen_it c-sch "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-scheduler --config-file $CINDER_CONF"
screen_it c-bak "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-backup --config-file $CINDER_CONF"
screen_it c-vol "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-volume --config-file $CINDER_CONF"
run_process c-sch "$CINDER_BIN_DIR/cinder-scheduler --config-file $CINDER_CONF"
run_process c-bak "$CINDER_BIN_DIR/cinder-backup --config-file $CINDER_CONF"
run_process c-vol "$CINDER_BIN_DIR/cinder-volume --config-file $CINDER_CONF"
# NOTE(jdg): For cinder, startup order matters. To ensure that repor_capabilities is received
# by the scheduler start the cinder-volume service last (or restart it) after the scheduler

19
tests/fake-service.sh Executable file
View File

@ -0,0 +1,19 @@
#!/bin/bash
# fake-service.sh - a fake service for start/stop testing
# $1 - sleep time
SLEEP_TIME=${1:-3}
LOG=/tmp/fake-service.log
TIMESTAMP_FORMAT=${TIMESTAMP_FORMAT:-"%F-%H%M%S"}
# duplicate output
exec 1> >(tee -a ${LOG})
echo ""
echo "Starting fake-service for ${SLEEP_TIME}"
while true; do
echo "$(date +${TIMESTAMP_FORMAT}) [$$]"
sleep ${SLEEP_TIME}
done

109
tests/run-process.sh Executable file
View File

@ -0,0 +1,109 @@
#!/bin/bash
# tests/exec.sh - Test DevStack screen_it() and screen_stop()
#
# exec.sh start|stop|status
#
# Set USE_SCREEN to change the default
#
# This script emulates the basic exec envirnment in ``stack.sh`` to test
# the process spawn and kill operations.
if [[ -z $1 ]]; then
echo "$0 start|stop"
exit 1
fi
TOP_DIR=$(cd $(dirname "$0")/.. && pwd)
source $TOP_DIR/functions
USE_SCREEN=${USE_SCREEN:-False}
ENABLED_SERVICES=fake-service
SERVICE_DIR=/tmp
SCREEN_NAME=test
SCREEN_LOGDIR=${SERVICE_DIR}/${SCREEN_NAME}
# Kill background processes on exit
trap clean EXIT
clean() {
local r=$?
jobs -p
kill >/dev/null 2>&1 $(jobs -p)
exit $r
}
# Exit on any errors so that errors don't compound
trap failed ERR
failed() {
local r=$?
jobs -p
kill >/dev/null 2>&1 $(jobs -p)
set +o xtrace
[ -n "$LOGFILE" ] && echo "${0##*/} failed: full log in $LOGFILE"
exit $r
}
function status {
if [[ -r $SERVICE_DIR/$SCREEN_NAME/fake-service.pid ]]; then
pstree -pg $(cat $SERVICE_DIR/$SCREEN_NAME/fake-service.pid)
fi
ps -ef | grep fake
}
function setup_screen {
if [[ ! -d $SERVICE_DIR/$SCREEN_NAME ]]; then
rm -rf $SERVICE_DIR/$SCREEN_NAME
mkdir -p $SERVICE_DIR/$SCREEN_NAME
fi
if [[ "$USE_SCREEN" == "True" ]]; then
# Create a new named screen to run processes in
screen -d -m -S $SCREEN_NAME -t shell -s /bin/bash
sleep 1
# Set a reasonable status bar
if [ -z "$SCREEN_HARDSTATUS" ]; then
SCREEN_HARDSTATUS='%{= .} %-Lw%{= .}%> %n%f %t*%{= .}%+Lw%< %-=%{g}(%{d}%H/%l%{g})'
fi
screen -r $SCREEN_NAME -X hardstatus alwayslastline "$SCREEN_HARDSTATUS"
fi
# Clear screen rc file
SCREENRC=$TOP_DIR/tests/$SCREEN_NAME-screenrc
if [[ -e $SCREENRC ]]; then
echo -n > $SCREENRC
fi
}
# Mimic logging
# Set up output redirection without log files
# Copy stdout to fd 3
exec 3>&1
if [[ "$VERBOSE" != "True" ]]; then
# Throw away stdout and stderr
#exec 1>/dev/null 2>&1
:
fi
# Always send summary fd to original stdout
exec 6>&3
if [[ "$1" == "start" ]]; then
echo "Start service"
setup_screen
screen_it fake-service "$TOP_DIR/tests/fake-service.sh"
sleep 1
status
elif [[ "$1" == "stop" ]]; then
echo "Stop service"
screen_stop fake-service
status
elif [[ "$1" == "status" ]]; then
status
else
echo "Unknown command"
exit 1
fi