Remove sm-watchdog service since NFS is now stable
sm-watchdog was introduced as a workaround because of NFS hung. Another clean fix is already provided, but the sm-watchdog was not removed. Test plan: [centos] build, install and unlock. [debian] build, install and unlock. Story: 2010087 Task: 46007 Signed-off-by: Davi Frossard <dbarrosf@windriver.com> Change-Id: I29fffff4e8982dc504f104f49c6586f7c74527fb
This commit is contained in:
parent
924c088f3a
commit
bd9e560d4b
@ -44,7 +44,6 @@
|
||||
sm-tools: true
|
||||
sm-api: true
|
||||
sm-eru: true
|
||||
sm-watchdog: true
|
||||
mysql: false
|
||||
postgresql: true
|
||||
tls-proxy: false
|
||||
|
@ -156,14 +156,9 @@ function cleanup_sm_common {
|
||||
$STX_INST_DIR/lib64/libsm_common.so.* \
|
||||
$STX_BIN_DIR/sm-eru \
|
||||
$STX_BIN_DIR/sm-eru-dump \
|
||||
$STX_BIN_DIR/sm-watchdog \
|
||||
$STX_SM_VAR_DIR/watchdog/modules/libsm_watchdog_nfs.so.* \
|
||||
$STX_SYSCONFDIR/systemd/system/sm-eru.service \
|
||||
$STX_SYSCONFDIR/systemd/system/sm-watchdog.service \
|
||||
$STX_SYSCONFDIR/pmon.d/sm-eru.conf \
|
||||
$STX_SYSCONFDIR/pmon.d/sm-watchdog.conf \
|
||||
$STX_SYSCONFDIR/init.d/sm-eru \
|
||||
$STX_SYSCONFDIR/init.d/sm-watchdog \
|
||||
/etc/ld.so.conf.d/stx-ha.conf
|
||||
|
||||
popd
|
||||
@ -190,7 +185,6 @@ function configure_ha {
|
||||
|
||||
if is_service_enabled sm-common; then
|
||||
config_eru
|
||||
config_watchdog
|
||||
fi
|
||||
|
||||
if is_service_enabled sm-daemon; then
|
||||
@ -215,12 +209,6 @@ function config_eru {
|
||||
iniset -sudo ${STX_SYSCONFDIR}/systemd/system/devstack@sm-eru.service "Service" "PIDFile" "/var/run/sm-eru.pid"
|
||||
}
|
||||
|
||||
function config_watchdog {
|
||||
sudo sed -i "s%SM_WATCHDOG=\"/usr/bin/\${SM_WATCHDOG_NAME}\"%SM_WATCHDOG=\"$STX_INST_DIR/bin/\${SM_WATCHDOG_NAME}\"%" $STX_SYSCONFDIR/init.d/sm-watchdog
|
||||
iniset -sudo ${STX_SYSCONFDIR}/systemd/system/devstack@sm-watchdog.service "Service" "Type" "forking"
|
||||
iniset -sudo ${STX_SYSCONFDIR}/systemd/system/devstack@sm-watchdog.service "Service" "PIDFile" "/var/run/sm-watchdog.pid"
|
||||
}
|
||||
|
||||
function create_sm_accounts {
|
||||
create_service_user "smapi"
|
||||
get_or_create_service "smapi" "servicemanagement" "Service Management"
|
||||
@ -340,15 +328,11 @@ function install_sm_common {
|
||||
|
||||
install_sm_common_libs
|
||||
|
||||
sudo install -m 0755 -p -D -t $STX_SM_VAR_DIR/watchdog/modules src/libsm_watchdog_nfs.so.${STX_SM_COMMON_VERSION}
|
||||
sudo cp -P src/libsm_watchdog_nfs.so src/libsm_watchdog_nfs.so.${STX_SM_COMMON_VERSION%%.*} $STX_SM_VAR_DIR/watchdog/modules
|
||||
|
||||
# scripts/
|
||||
(cd scripts; sudo make DEST_DIR= UNIT_DIR=$STX_SYSCONFDIR/systemd/system install)
|
||||
|
||||
sudo install -m 750 -p -D src/sm_eru $STX_BIN_DIR/sm-eru
|
||||
sudo install -m 750 -p -D src/sm_eru_dump $STX_BIN_DIR/sm-eru-dump
|
||||
sudo install -m 750 -p -D src/sm_watchdog $STX_BIN_DIR/sm-watchdog
|
||||
|
||||
echo $STX_INST_DIR/lib64 | sudo tee /etc/ld.so.conf.d/stx-ha.conf
|
||||
sudo ldconfig
|
||||
@ -411,10 +395,6 @@ function start_eru {
|
||||
run_process sm-eru "${STX_SYSCONFDIR}/init.d/sm-eru start" root root
|
||||
}
|
||||
|
||||
function start_watchdog {
|
||||
run_process sm-watchdog "${STX_SYSCONFDIR}/init.d/sm-watchdog start" root root
|
||||
}
|
||||
|
||||
function start_ha {
|
||||
if is_service_enabled sm-daemon; then
|
||||
start_sm
|
||||
@ -426,14 +406,12 @@ function start_ha {
|
||||
|
||||
if is_service_enabled sm-common; then
|
||||
start_eru
|
||||
start_watchdog
|
||||
fi
|
||||
}
|
||||
|
||||
function stop_ha {
|
||||
if is_service_enabled sm-common; then
|
||||
stop_process sm-eru
|
||||
stop_process sm-watchdog
|
||||
fi
|
||||
|
||||
if is_service_enabled sm-api; then
|
||||
|
@ -16,14 +16,10 @@ install:
|
||||
install -m 750 -d $(DEST_DIR)/usr/bin
|
||||
install -m 750 -p -D $(BUILDSUBDIR)/src/sm_eru $(DEST_DIR)/$(BIN_DIR)/sm-eru
|
||||
install -m 750 -p -D $(BUILDSUBDIR)/src/sm_eru_dump $(DEST_DIR)/$(BIN_DIR)/sm-eru-dump
|
||||
install -m 750 -p -D $(BUILDSUBDIR)/src/sm_watchdog $(DEST_DIR)/$(BIN_DIR)/sm-watchdog
|
||||
install -m 644 -p -D $(BUILDSUBDIR)/scripts/sm-eru.service $(DEST_DIR)/$(UNIT_DIR)/sm-eru.service
|
||||
install -m 644 -p -D $(BUILDSUBDIR)/scripts/sm-watchdog.service $(DEST_DIR)/$(UNIT_DIR)/sm-watchdog.service
|
||||
install -m 750 -d $(DEST_DIR)/$(ETC_DIR)/pmon.d
|
||||
install -m 640 -p -D $(BUILDSUBDIR)/scripts/sm-eru.conf $(DEST_DIR)/$(ETC_DIR)/pmon.d/sm-eru.conf
|
||||
install -m 640 -p -D $(BUILDSUBDIR)/scripts/sm-watchdog.conf $(DEST_DIR)/$(ETC_DIR)/pmon.d/sm-watchdog.conf
|
||||
install -m 750 -p -D $(BUILDSUBDIR)/scripts/sm-eru $(DEST_DIR)/$(ETC_DIR)/init.d/sm-eru
|
||||
install -m 750 -p -D $(BUILDSUBDIR)/scripts/sm-watchdog $(DEST_DIR)/$(ETC_DIR)/init.d/sm-watchdog
|
||||
|
||||
clean:
|
||||
@( cd src; make clean )
|
||||
|
@ -91,9 +91,6 @@ MAJOR=`echo $VER | awk -F . '{print $1}'`
|
||||
MINOR=`echo $VER | awk -F . '{print $2}'`
|
||||
make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_libdir} INC_DIR=%{_includedir} BUILDSUBDIR=%{_buildsubdir} VER=$VER VER_MJR=$MAJOR install
|
||||
|
||||
%post
|
||||
/usr/bin/systemctl enable sm-watchdog.service >/dev/null 2>&1
|
||||
|
||||
%post -n sm-eru
|
||||
/usr/bin/systemctl enable sm-eru.service >/dev/null 2>&1
|
||||
|
||||
@ -101,10 +98,6 @@ make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_li
|
||||
%files
|
||||
%license LICENSE
|
||||
%defattr(-,root,root,-)
|
||||
/etc/init.d/sm-watchdog
|
||||
/etc/pmon.d/sm-watchdog.conf
|
||||
/usr/bin/sm-watchdog
|
||||
/usr/lib/systemd/system/sm-watchdog.service
|
||||
|
||||
#%{_unitdir}/*
|
||||
#%{_bindir}/*
|
||||
@ -113,10 +106,6 @@ make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_li
|
||||
|
||||
%files libs
|
||||
%{_libdir}/*.so.*
|
||||
%dir "/var/lib/sm"
|
||||
%dir "/var/lib/sm/watchdog"
|
||||
%dir "/var/lib/sm/watchdog/modules"
|
||||
/var/lib/sm/watchdog/modules/*.so.*
|
||||
|
||||
%files -n sm-eru
|
||||
%defattr(-,root,root,-)
|
||||
@ -135,18 +124,14 @@ make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_li
|
||||
#"/usr/lib64/.debug/libsm_common.so.1.0.0"
|
||||
#%dir "/usr/bin/.debug"
|
||||
#"/usr/bin/.debug/sm-eru-dump"
|
||||
#"/usr/bin/.debug/sm-watchdog"
|
||||
#"/usr/bin/.debug/sm-eru"
|
||||
#%dir "/usr/src/debug/sm-common"
|
||||
#%dir "/usr/src/debug/sm-common/1.0.0-r7"
|
||||
#%dir "/usr/src/debug/sm-common/1.0.0-r7/src"
|
||||
#/usr/src/debug/sm-common/1.0.0-r7/src/*.h
|
||||
#/usr/src/debug/sm-common/1.0.0-r7/src/*.c
|
||||
#%dir "/var/lib/sm/watchdog/modules/.debug"
|
||||
#"/var/lib/sm/watchdog/modules/.debug/libsm_watchdog_nfs.so.1.0.0"
|
||||
|
||||
%files dev
|
||||
%defattr(-,root,root,-)
|
||||
%{_includedir}/*
|
||||
%{_libdir}/*.so
|
||||
/var/lib/sm/watchdog/modules/libsm_watchdog_nfs.so
|
||||
|
@ -23,11 +23,8 @@ override_dh_auto_install:
|
||||
# Prevents dh_fixperms from changing the permissions defined in the makefiles
|
||||
override_dh_fixperms:
|
||||
dh_fixperms \
|
||||
-Xsm-watchdog* \
|
||||
-Xlibsm_common.so.* \
|
||||
-Xlibsm_watchdog_nfs.so.* \
|
||||
-Xsm-eru*
|
||||
|
||||
override_dh_installsystemd:
|
||||
dh_installsystemd -psm-common sm-watchdog.service
|
||||
dh_installsystemd -psm-eru sm-eru.service
|
||||
|
@ -1,3 +1,2 @@
|
||||
usr/include/*
|
||||
usr/lib/*.so
|
||||
var/lib/sm/watchdog/modules/libsm_watchdog_nfs.so
|
||||
|
@ -1,3 +1 @@
|
||||
/var/lib/sm
|
||||
/var/lib/sm/watchdog
|
||||
/var/lib/sm/watchdog/modules
|
||||
|
@ -1,2 +1 @@
|
||||
usr/lib/*.so.*
|
||||
var/lib/sm/watchdog/modules/*.so.*
|
||||
|
@ -1,5 +1 @@
|
||||
etc/init.d/sm-watchdog
|
||||
etc/pmon.d/sm-watchdog.conf
|
||||
usr/bin/sm-watchdog
|
||||
lib/systemd/system/sm-watchdog.service
|
||||
debian/systemd/00-sm-common.preset etc/systemd/system-preset
|
||||
|
@ -1 +0,0 @@
|
||||
enable sm-watchdog.service
|
@ -72,19 +72,6 @@ MAJOR=`echo $VER | awk -F . '{print $1}'`
|
||||
MINOR=`echo $VER | awk -F . '{print $2}'`
|
||||
make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_libdir} INC_DIR=%{_includedir} BUILDSUBDIR=%{_buildsubdir} VER=$VER VER_MJR=$MAJOR install
|
||||
|
||||
%pre
|
||||
%service_add_pre sm-watchdog.service sm-watchdog.target
|
||||
|
||||
%preun
|
||||
%service_del_preun sm-watchdog.service sm-watchdog.target
|
||||
|
||||
%post
|
||||
%service_add_post sm-watchdog.service sm-watchdog.target
|
||||
/usr/bin/systemctl enable sm-watchdog.service
|
||||
|
||||
%postun
|
||||
%service_del_postun sm-watchdog.service sm-watchdog.target
|
||||
|
||||
%pre -n sm-eru
|
||||
%service_add_pre sm-eru.service sm-eru.target
|
||||
|
||||
@ -108,17 +95,10 @@ make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_li
|
||||
%files
|
||||
%license LICENSE
|
||||
%defattr(-,root,root,-)
|
||||
%{_sysconfdir}/init.d/sm-watchdog
|
||||
%config %{_sysconfdir}/pmon.d/sm-watchdog.conf
|
||||
%{_bindir}/sm-watchdog
|
||||
%{_unitdir}/sm-watchdog.service
|
||||
|
||||
%files libs
|
||||
%{_libdir}/*.so.*
|
||||
%dir %{_sharedstatedir}/sm
|
||||
%dir %{_sharedstatedir}/sm/watchdog
|
||||
%dir %{_sharedstatedir}/sm/watchdog/modules
|
||||
%{_sharedstatedir}/sm/watchdog/modules/*.so.*
|
||||
|
||||
%files -n sm-eru
|
||||
%defattr(-,root,root,-)
|
||||
@ -134,6 +114,5 @@ make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_li
|
||||
%defattr(-,root,root,-)
|
||||
%{_includedir}/*
|
||||
%{_libdir}/*.so
|
||||
%{_sharedstatedir}/sm/watchdog/modules/libsm_watchdog_nfs.so
|
||||
|
||||
%changelog
|
||||
|
@ -6,7 +6,7 @@ install:
|
||||
install -d $(DEST_DIR)$(UNIT_DIR)
|
||||
install -m 644 *.service $(DEST_DIR)$(UNIT_DIR)
|
||||
install -d $(DEST_DIR)/etc/init.d
|
||||
install sm-watchdog sm-eru $(DEST_DIR)/etc/init.d
|
||||
install sm-eru $(DEST_DIR)/etc/init.d
|
||||
install -d $(DEST_DIR)/etc/pmon.d
|
||||
install *.conf $(DEST_DIR)/etc/pmon.d
|
||||
|
||||
|
@ -1,131 +0,0 @@
|
||||
#! /bin/sh
|
||||
#
|
||||
# Copyright (c) 2014 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# chkconfig: - 87 87
|
||||
# processname: sm-watchdog
|
||||
# description: Service Management Watchdog
|
||||
#
|
||||
### BEGIN INIT INFO
|
||||
# Description: sm-watchdog
|
||||
#
|
||||
# Short-Description: Service Management Watchdog
|
||||
# Provides: sm-watchdog
|
||||
# Required-Start: $network
|
||||
# Should-Start: $syslog
|
||||
# Required-Stop: $network
|
||||
# Default-Start: 3 5
|
||||
# Default-Stop: 0 6
|
||||
### END INIT INFO
|
||||
|
||||
. /etc/init.d/functions
|
||||
|
||||
RETVAL=0
|
||||
|
||||
SM_WATCHDOG_NAME="sm-watchdog"
|
||||
SM_WATCHDOG="/usr/bin/${SM_WATCHDOG_NAME}"
|
||||
SM_WATCHDOG_PIDFILE="/var/run/${SM_WATCHDOG_NAME}.pid"
|
||||
|
||||
if [ ! -e "${SM_WATCHDOG}" ]
|
||||
then
|
||||
logger "${SM_WATCHDOG} is missing"
|
||||
exit 5
|
||||
fi
|
||||
|
||||
PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
echo -n "Starting ${SM_WATCHDOG_NAME}: "
|
||||
if [ -n "`pidof ${SM_WATCHDOG}`" ]
|
||||
then
|
||||
# PMOND might have restarted SM-WATCHDOG already.
|
||||
RETVAL=0
|
||||
else
|
||||
start-stop-daemon --start -b -x ${SM_WATCHDOG}
|
||||
RETVAL=$?
|
||||
fi
|
||||
if [ ${RETVAL} -eq 0 ]
|
||||
then
|
||||
echo "OK"
|
||||
else
|
||||
echo "FAIL"
|
||||
RETVAL=1
|
||||
fi
|
||||
;;
|
||||
|
||||
stop)
|
||||
echo -n "Stopping ${SM_WATCHDOG_NAME}: "
|
||||
if [ -n "`pidof ${SM_WATCHDOG}`" ]
|
||||
then
|
||||
killproc ${SM_WATCHDOG}
|
||||
fi
|
||||
|
||||
SHUTDOWN_TIMEOUT=5
|
||||
count=0
|
||||
while [ ${count} -lt ${SHUTDOWN_TIMEOUT} ]
|
||||
do
|
||||
pidof ${SM_WATCHDOG} &> /dev/null
|
||||
rc=$?
|
||||
if [ ${rc} -eq 1 ]
|
||||
then
|
||||
echo "OK"
|
||||
break
|
||||
fi
|
||||
count=`expr ${count} + 1`
|
||||
sleep 1
|
||||
done
|
||||
|
||||
pidof ${SM_WATCHDOG} &> /dev/null
|
||||
rc=$?
|
||||
if [ ${rc} -eq 0 ]
|
||||
then
|
||||
echo "FAIL"
|
||||
RETVAL=7
|
||||
fi
|
||||
|
||||
rm -f ${SM_WATCHDOG_PIDFILE}
|
||||
;;
|
||||
|
||||
status)
|
||||
pid=`cat ${SM_WATCHDOG_PIDFILE} 2>/dev/null`
|
||||
if [ -n "${pid}" ]
|
||||
then
|
||||
if ps -p ${pid} &>/dev/null
|
||||
then
|
||||
echo "${SM_WATCHDOG_NAME} is running"
|
||||
RETVAL=0
|
||||
else
|
||||
echo "${SM_WATCHDOG_NAME} is not running but has pid file"
|
||||
RETVAL=1
|
||||
fi
|
||||
else
|
||||
echo "${SM_WATCHDOG_NAME} is not running"
|
||||
RETVAL=3
|
||||
fi
|
||||
;;
|
||||
|
||||
restart)
|
||||
$0 stop
|
||||
sleep 1
|
||||
$0 start
|
||||
;;
|
||||
|
||||
reload)
|
||||
echo "${SM_WATCHDOG_NAME} reload"
|
||||
$0 restart
|
||||
;;
|
||||
|
||||
force-reload)
|
||||
echo "${SM_WATCHDOG_NAME} force-reload"
|
||||
$0 restart
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "usage: $0 { start | stop | status | restart | reload | force-reload }"
|
||||
;;
|
||||
esac
|
||||
|
||||
exit ${RETVAL}
|
@ -1,15 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2014 Wind River Systems, Inc.
|
||||
;
|
||||
; SPDX-License-Identifier: Apache-2.0
|
||||
;
|
||||
[process]
|
||||
process = sm-watchdog
|
||||
pidfile = /var/run/sm-watchdog.pid
|
||||
script = /etc/init.d/sm-watchdog
|
||||
style = lsb ; lsb
|
||||
severity = major ; minor, major, critical
|
||||
restarts = 3 ; restarts before error assertion
|
||||
startuptime = 5 ; seconds to wait after process start
|
||||
interval = 5 ; number of seconds to wait between restarts
|
||||
debounce = 20 ; number of seconds to wait before degrade clear
|
@ -1,15 +0,0 @@
|
||||
[Unit]
|
||||
Description=Service Management Watchdog
|
||||
After=network-online.target syslog-ng.service config.service
|
||||
Before=sm.service pmon.service
|
||||
|
||||
[Service]
|
||||
Type=forking
|
||||
RemainAfterExit=yes
|
||||
User=root
|
||||
ExecStart=/etc/init.d/sm-watchdog start
|
||||
ExecStop=/etc/init.d/sm-watchdog stop
|
||||
PIDFile=/var/run/sm-watchdog.pid
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
@ -34,7 +34,7 @@ EXTRACCFLAGS+= -Wformat -Wformat-security
|
||||
LDLIBS= -lsqlite3 -lglib-2.0 -lgmodule-2.0 -luuid -lrt -lpthread
|
||||
LDFLAGS = -shared -rdynamic
|
||||
|
||||
build: libsm_common.so libsm_watchdog_nfs.so sm_watchdog sm_eru sm_eru_dump
|
||||
build: libsm_common.so sm_eru sm_eru_dump
|
||||
|
||||
.c.o:
|
||||
$(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@
|
||||
@ -48,18 +48,6 @@ libsm_common.so.$(VER_MJR): libsm_common.so.$(VER)
|
||||
libsm_common.so.$(VER): ${OBJS}
|
||||
$(CXX) ${LDFLAGS} -Wl,--start-group $(LDLIBS) -Wl,-soname,libsm_common.so.$(VER_MJR) -o $@ $^
|
||||
|
||||
libsm_watchdog_nfs.so: libsm_watchdog_nfs.so.$(VER_MJR)
|
||||
ln -sf $^ $@
|
||||
|
||||
libsm_watchdog_nfs.so.$(VER_MJR): libsm_watchdog_nfs.so.$(VER)
|
||||
ln -sf $^ $@
|
||||
|
||||
libsm_watchdog_nfs.so.$(VER): libsm_common.so.$(VER) libsm_common.so
|
||||
$(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) sm_watchdog_nfs.c ${LDFLAGS} $(LDLIBS) -L./ -lsm_common -Wl,-soname,libsm_watchdog_nfs.so.$(VER_MJR) -o $@
|
||||
|
||||
sm_watchdog: libsm_common.so
|
||||
$(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) $(OBJS) sm_watchdog_module.c sm_watchdog_process.c sm_watchdog_main.c $(LDLIBS) -L./ -lsm_common -o sm_watchdog
|
||||
|
||||
sm_eru: libsm_common.so
|
||||
$(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) $(OBJS) sm_eru_process.c sm_eru_main.c $(LDLIBS) -L./ -lsm_common -o sm_eru
|
||||
|
||||
@ -71,15 +59,12 @@ install:
|
||||
# renamed with '-' like they are in the bitbake file.
|
||||
#
|
||||
# install -d $(DEST_DIR)$(BIN_DIR)
|
||||
# install sm_watchdog sm_eru sm_eru_dump $(DEST_DIR)$(BIN_DIR)
|
||||
# install sm_eru sm_eru_dump $(DEST_DIR)$(BIN_DIR)
|
||||
install -d $(DEST_DIR)$(LIB_DIR)
|
||||
install libsm_common.so.${VER} $(DEST_DIR)$(LIB_DIR)
|
||||
cp -P libsm_common.so libsm_common.so.$(VER_MJR) $(DEST_DIR)$(LIB_DIR)
|
||||
install -d $(DEST_DIR)$(INC_DIR)
|
||||
install -m 644 *.h $(DEST_DIR)$(INC_DIR)
|
||||
install -d $(DEST_DIR)/var/lib/sm/watchdog/modules
|
||||
install libsm_watchdog_nfs.so.${VER} $(DEST_DIR)/var/lib/sm/watchdog/modules
|
||||
cp -P libsm_watchdog_nfs.so libsm_watchdog_nfs.so.${VER_MJR} $(DEST_DIR)/var/lib/sm/watchdog/modules
|
||||
|
||||
clean:
|
||||
rm -f *.o *.so *.so.*
|
||||
|
@ -77,15 +77,12 @@ extern "C" {
|
||||
|
||||
#define SM_PROCESS_PID_FILENAME "/var/run/sm.pid"
|
||||
#define SM_TRAP_PROCESS_PID_FILENAME "/var/run/sm-trap.pid"
|
||||
#define SM_WATCHDOG_PROCESS_PID_FILENAME "/var/run/sm-watchdog.pid"
|
||||
#define SM_ERU_PROCESS_PID_FILENAME "/var/run/sm-eru.pid"
|
||||
|
||||
#define SM_BOOT_COMPLETE_FILENAME "/var/run/sm_boot_complete"
|
||||
|
||||
#define SM_INDICATE_DEGRADED_FILENAME "/var/run/.sm_degraded"
|
||||
|
||||
#define SM_WATCHDOG_HEARTBEAT_FILENAME "/var/run/.sm_watchdog_heartbeat"
|
||||
|
||||
#define SM_DUMP_DATA_FILE "/tmp/sm_data_dump.txt"
|
||||
|
||||
#define SM_TROUBLESHOOT_LOG_FILE "/var/log/sm-troubleshoot.log"
|
||||
|
@ -15,9 +15,6 @@
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <time.h>
|
||||
#include <utime.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
// ****************************************************************************
|
||||
@ -127,77 +124,3 @@ SmErrorT sm_utils_clear_degraded( void )
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Utils - Watchdog Heartbeat
|
||||
// ==========================
|
||||
void sm_utils_watchdog_heartbeat( void )
|
||||
{
|
||||
struct utimbuf file_times;
|
||||
struct timespec ts_mono;
|
||||
|
||||
clock_gettime( CLOCK_MONOTONIC_RAW, &ts_mono );
|
||||
|
||||
memset( &file_times, 0, sizeof(struct utimbuf) );
|
||||
|
||||
file_times.actime = ts_mono.tv_sec;
|
||||
file_times.modtime = ts_mono.tv_sec;
|
||||
|
||||
if( 0 > access( SM_WATCHDOG_HEARTBEAT_FILENAME, F_OK ) )
|
||||
{
|
||||
int fd = open( SM_WATCHDOG_HEARTBEAT_FILENAME, O_RDWR | O_CREAT,
|
||||
S_IRUSR | S_IRGRP | S_IROTH | O_CLOEXEC );
|
||||
if( 0 > fd )
|
||||
{
|
||||
DPRINTFE( "Failed to create/open watchdog heartbeat, error=%s.",
|
||||
strerror(errno) );
|
||||
return;
|
||||
}
|
||||
|
||||
close( fd );
|
||||
}
|
||||
|
||||
if( 0 > utime( SM_WATCHDOG_HEARTBEAT_FILENAME, &file_times ) )
|
||||
{
|
||||
DPRINTFE( "Failed to update watchdog heartbeat timings, error=%s.",
|
||||
strerror(errno) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Utils - Watchdog Delayed
|
||||
// =========================
|
||||
bool sm_utils_watchdog_delayed( int max_delay_secs )
|
||||
{
|
||||
struct stat stat_data;
|
||||
|
||||
if( 0 == access( SM_WATCHDOG_HEARTBEAT_FILENAME, F_OK ) )
|
||||
{
|
||||
int elapsed_secs;
|
||||
struct timespec ts_mono;
|
||||
|
||||
clock_gettime( CLOCK_MONOTONIC_RAW, &ts_mono );
|
||||
|
||||
if( 0 > stat( SM_WATCHDOG_HEARTBEAT_FILENAME, &stat_data ) )
|
||||
{
|
||||
DPRINTFE( "Stat failed on file (%s), error=%s.",
|
||||
SM_WATCHDOG_HEARTBEAT_FILENAME, strerror( errno ) );
|
||||
return( false );
|
||||
}
|
||||
|
||||
// Make sure that the elapsed seconds drift is in a valid range.
|
||||
elapsed_secs = ts_mono.tv_sec - stat_data.st_mtime;
|
||||
if(( max_delay_secs < elapsed_secs )&&( elapsed_secs <= 300 ))
|
||||
{
|
||||
DPRINTFI( "SM-Watchdog has been delayed by more than %d "
|
||||
"seconds, elapsed_secs=%d", max_delay_secs,
|
||||
elapsed_secs );
|
||||
return( true );
|
||||
}
|
||||
}
|
||||
|
||||
return( false );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
@ -50,18 +50,6 @@ extern SmErrorT sm_utils_indicate_degraded( void );
|
||||
extern SmErrorT sm_utils_clear_degraded( void );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Utils - Watchdog Heartbeat
|
||||
// ==========================
|
||||
extern void sm_utils_watchdog_heartbeat( void );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Utils - Watchdog Delayed
|
||||
// =========================
|
||||
extern bool sm_utils_watchdog_delayed( int max_delay_secs );
|
||||
// ****************************************************************************
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -1,49 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <libgen.h>
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "sm_types.h"
|
||||
#include "sm_debug.h"
|
||||
#include "sm_watchdog_process.h"
|
||||
|
||||
// ****************************************************************************
|
||||
// Main - Thread
|
||||
// =============
|
||||
int main( int argc, char *argv[], char *envp[] )
|
||||
{
|
||||
SmErrorT error;
|
||||
|
||||
error = sm_debug_initialize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
printf( "Debug initialization failed, error=%s.\n",
|
||||
sm_error_str( error ) );
|
||||
return( EXIT_FAILURE );
|
||||
}
|
||||
|
||||
error = sm_watchdog_process_main( argc, argv, envp );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
printf( "Process failure, error=%s.\n", sm_error_str( error ) );
|
||||
return( EXIT_FAILURE );
|
||||
}
|
||||
|
||||
error = sm_debug_finalize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
printf( "Debug finalization failed, error=%s.\n",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
return( EXIT_SUCCESS );
|
||||
}
|
||||
// ****************************************************************************
|
@ -1,247 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "sm_watchdog_module.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <glib.h>
|
||||
#include <gmodule.h>
|
||||
|
||||
#include "sm_types.h"
|
||||
#include "sm_list.h"
|
||||
#include "sm_timer.h"
|
||||
#include "sm_debug.h"
|
||||
|
||||
#define SM_WATCHDOG_MODULE_FILENAME_MAX_SIZE 128
|
||||
#define SM_WATCHDOG_MODULE_PATH "/var/lib/sm/watchdog/modules"
|
||||
#define SM_WATCHDOG_MODULE_DO_CHECK_FUNC "sm_watchdog_module_do_check"
|
||||
#define SM_WATCHDOG_MODULE_INITIALIZE_FUNC "sm_watchdog_module_initialize"
|
||||
#define SM_WATCHDOG_MODULE_FINALIZE_FUNC "sm_watchdog_module_finalize"
|
||||
|
||||
typedef void (*SmWatchdogModuleDoCheckT) (void);
|
||||
typedef bool (*SmWatchdogModuleInitializeT) (int* do_check_in_ms);
|
||||
typedef bool (*SmWatchdogModuleFinalizeT) (void);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
gchar filename[SM_WATCHDOG_MODULE_FILENAME_MAX_SIZE];
|
||||
GModule* glibmod;
|
||||
int do_check_in_ms;
|
||||
SmTimerIdT do_check_timer_id;
|
||||
SmWatchdogModuleDoCheckT do_check;
|
||||
SmWatchdogModuleInitializeT initialize;
|
||||
SmWatchdogModuleFinalizeT finalize;
|
||||
} SmWatchdogModuleT;
|
||||
|
||||
static SmListT* _modules = NULL;
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog Module - Do Check Timer
|
||||
// ================================
|
||||
static bool sm_watchdog_module_do_check_timer( SmTimerIdT timer_id,
|
||||
int64_t user_data )
|
||||
{
|
||||
SmListT* entry = NULL;
|
||||
SmListEntryDataPtrT entry_data;
|
||||
SmWatchdogModuleT* module = NULL;
|
||||
|
||||
SM_LIST_FOREACH( _modules, entry, entry_data )
|
||||
{
|
||||
module = (SmWatchdogModuleT*) entry_data;
|
||||
if( NULL == module )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if( timer_id == module->do_check_timer_id )
|
||||
{
|
||||
DPRINTFD( "Found do-check timer for module (%s).",
|
||||
g_module_name(module->glibmod) );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( NULL != module )
|
||||
{
|
||||
if( NULL != module->do_check )
|
||||
{
|
||||
DPRINTFD( "Calling do-check for module (%s).",
|
||||
g_module_name(module->glibmod) );
|
||||
module->do_check();
|
||||
return( true );
|
||||
}
|
||||
} else {
|
||||
DPRINTFE( "Module not found for do-check timer." );
|
||||
}
|
||||
|
||||
return( false );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ***************************************************************************
|
||||
// Watchdog Module - Load
|
||||
// ======================
|
||||
static SmErrorT sm_watchdog_module_load( const gchar* filename )
|
||||
{
|
||||
gchar* filepath;
|
||||
SmWatchdogModuleT* module;
|
||||
|
||||
module = (SmWatchdogModuleT*) malloc( sizeof(SmWatchdogModuleT) );
|
||||
if( NULL == module )
|
||||
{
|
||||
DPRINTFE( "Failed to allocate watchdog module." );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
memset( module, 0, sizeof(SmWatchdogModuleT) );
|
||||
|
||||
g_snprintf(module->filename, SM_WATCHDOG_MODULE_FILENAME_MAX_SIZE,
|
||||
"%s", filename);
|
||||
|
||||
filepath = g_module_build_path( SM_WATCHDOG_MODULE_PATH, filename );
|
||||
|
||||
module->glibmod = g_module_open( filepath, G_MODULE_BIND_LAZY );
|
||||
if( NULL == module->glibmod )
|
||||
{
|
||||
DPRINTFE( "Failed to open module (%s).", filepath );
|
||||
free( module );
|
||||
g_free( filepath );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
g_free( filepath );
|
||||
|
||||
g_module_symbol( module->glibmod, SM_WATCHDOG_MODULE_INITIALIZE_FUNC,
|
||||
(gpointer*) &(module->initialize) );
|
||||
|
||||
g_module_symbol( module->glibmod, SM_WATCHDOG_MODULE_FINALIZE_FUNC,
|
||||
(gpointer*) &(module->finalize) );
|
||||
|
||||
g_module_symbol( module->glibmod, SM_WATCHDOG_MODULE_DO_CHECK_FUNC,
|
||||
(gpointer*) &(module->do_check) );
|
||||
|
||||
SM_LIST_PREPEND( _modules, (SmListEntryDataPtrT) module );
|
||||
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ***************************************************************************
|
||||
|
||||
// ***************************************************************************
|
||||
// Watchdog Module - Load All
|
||||
// ==========================
|
||||
SmErrorT sm_watchdog_module_load_all( void )
|
||||
{
|
||||
const gchar* file;
|
||||
GDir* directory;
|
||||
GError* g_error;
|
||||
SmListT* entry = NULL;
|
||||
SmListEntryDataPtrT entry_data;
|
||||
SmWatchdogModuleT* module;
|
||||
SmErrorT error;
|
||||
|
||||
directory = g_dir_open( SM_WATCHDOG_MODULE_PATH, 0, &g_error );
|
||||
if( NULL == directory )
|
||||
{
|
||||
DPRINTFE( "Failed to open directory( %s), error=%s",
|
||||
SM_WATCHDOG_MODULE_PATH, g_error->message );
|
||||
g_error_free( g_error );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
file = g_dir_read_name( directory );
|
||||
while( NULL != file )
|
||||
{
|
||||
DPRINTFI( "Loading module (%s).", file );
|
||||
|
||||
error = sm_watchdog_module_load( file );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to load module (%s), error=%s.",
|
||||
file, sm_error_str(error) );
|
||||
}
|
||||
|
||||
file = g_dir_read_name( directory );
|
||||
}
|
||||
|
||||
g_dir_close( directory );
|
||||
|
||||
SM_LIST_FOREACH( _modules, entry, entry_data )
|
||||
{
|
||||
module = (SmWatchdogModuleT*) entry_data;
|
||||
if( NULL == module )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if( NULL != module->initialize )
|
||||
{
|
||||
DPRINTFI( "Initializing module (%s).",
|
||||
g_module_name(module->glibmod) );
|
||||
|
||||
if( !(module->initialize( &(module->do_check_in_ms) )) )
|
||||
{
|
||||
DPRINTFE( "Failed to initialize %s.",
|
||||
g_module_name(module->glibmod) );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
error = sm_timer_register( module->filename,
|
||||
module->do_check_in_ms,
|
||||
sm_watchdog_module_do_check_timer,
|
||||
0, &(module->do_check_timer_id) );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to create module (%s) do-check timer, "
|
||||
"error=%s.", g_module_name(module->glibmod),
|
||||
sm_error_str( error ) );
|
||||
return( error );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ***************************************************************************
|
||||
|
||||
// ***************************************************************************
|
||||
// Watchdog Module - Unload All
|
||||
// ============================
|
||||
SmErrorT sm_watchdog_module_unload_all( void )
|
||||
{
|
||||
SmListT* entry = NULL;
|
||||
SmListEntryDataPtrT entry_data;
|
||||
SmWatchdogModuleT* module;
|
||||
|
||||
SM_LIST_FOREACH( _modules, entry, entry_data )
|
||||
{
|
||||
module = (SmWatchdogModuleT*) entry_data;
|
||||
if( NULL == module )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if( NULL != module->finalize )
|
||||
{
|
||||
DPRINTFI( "Finalizing module (%s).",
|
||||
g_module_name(module->glibmod) );
|
||||
|
||||
if( !(module->finalize()) )
|
||||
{
|
||||
DPRINTFE( "Failed to finalize %s.",
|
||||
g_module_name(module->glibmod) );
|
||||
}
|
||||
}
|
||||
|
||||
g_module_close( module->glibmod );
|
||||
}
|
||||
|
||||
SM_LIST_CLEANUP_ALL( _modules );
|
||||
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ***************************************************************************
|
@ -1,31 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#ifndef __SM_WATCHDOG_MODULE_H__
|
||||
#define __SM_WATCHDOG_MODULE_H__
|
||||
|
||||
#include "sm_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog Module - Load All
|
||||
// ==========================
|
||||
extern SmErrorT sm_watchdog_module_load_all( void );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog Module - Unload All
|
||||
// ============================
|
||||
extern SmErrorT sm_watchdog_module_unload_all( void );
|
||||
// ****************************************************************************
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __SM_WATCHDOG_MODULE_H__
|
@ -1,608 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "sm_watchdog_nfs.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <signal.h>
|
||||
#include <errno.h>
|
||||
#include <sched.h>
|
||||
#include <pthread.h>
|
||||
#include <dirent.h>
|
||||
#include <limits.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include "sm_types.h"
|
||||
#include "sm_time.h"
|
||||
#include "sm_debug.h"
|
||||
#include "sm_node_utils.h"
|
||||
#include "sm_node_stats.h"
|
||||
|
||||
#define SM_WATCHDOG_NFS_THREAD_NAME "(nfsd)"
|
||||
#define SM_WATCHDOG_NFS_REBOOT_INPROGRESS 0xA5A5A5A5
|
||||
#define SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS 32
|
||||
#define SM_WATCHDOG_NFS_CHECK_IN_MS 10000
|
||||
#define SM_WATCHDOG_NFS_MAX_UNINTERRUPTIBLE_SLEEP 60000
|
||||
#define SM_WATCHDOG_NFS_DELAY_REBOOT_IN_MS 60000
|
||||
#define SM_WATCHDOG_NFS_DELAY_REBOOT_FORCE_IN_MS 480000
|
||||
#define SM_WATCHDOG_NFS_DEBUG_FILE "/var/log/nfs.debug"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool inuse;
|
||||
bool stale;
|
||||
int pid;
|
||||
SmTimeT timestamp;
|
||||
SmNodeProcessStatusT status;
|
||||
} SmWatchDogNfsBlockedInfoT;
|
||||
|
||||
static uint32_t _nfs_reboot_inprogress;
|
||||
|
||||
static SmWatchDogNfsBlockedInfoT
|
||||
_nfs_blocked_threads[SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS];
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Find Blocked Thread
|
||||
// ==================================
|
||||
static SmWatchDogNfsBlockedInfoT* sm_watchdog_nfs_find_blocked_thread( int pid )
|
||||
{
|
||||
SmWatchDogNfsBlockedInfoT* entry;
|
||||
|
||||
int thread_i;
|
||||
for( thread_i=0; SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS > thread_i;
|
||||
++thread_i )
|
||||
{
|
||||
entry = &(_nfs_blocked_threads[thread_i]);
|
||||
|
||||
if( entry->inuse )
|
||||
{
|
||||
if( pid == entry->pid )
|
||||
{
|
||||
return( entry );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return( NULL );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Add Blocked Thread
|
||||
// =================================
|
||||
static void sm_watchdog_nfs_add_blocked_thread( int pid,
|
||||
SmNodeProcessStatusT* status )
|
||||
{
|
||||
SmWatchDogNfsBlockedInfoT* entry;
|
||||
|
||||
int thread_i;
|
||||
for( thread_i=0; SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS > thread_i;
|
||||
++thread_i )
|
||||
{
|
||||
entry = &(_nfs_blocked_threads[thread_i]);
|
||||
|
||||
if( !(entry->inuse) )
|
||||
{
|
||||
entry->inuse = true;
|
||||
entry->stale = false;
|
||||
entry->pid = pid;
|
||||
sm_time_get( &(entry->timestamp) );
|
||||
memcpy( &(entry->status), status, sizeof(SmNodeProcessStatusT) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
DPRINTFE( "Not enough room for all the NFS blocked threads." );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Delete Blocked Thread
|
||||
// ====================================
|
||||
static void sm_watchdog_nfs_delete_blocked_thread( int pid )
|
||||
{
|
||||
SmWatchDogNfsBlockedInfoT* entry;
|
||||
|
||||
entry = sm_watchdog_nfs_find_blocked_thread( pid );
|
||||
if( NULL != entry )
|
||||
{
|
||||
memset( entry, 0, sizeof(SmWatchDogNfsBlockedInfoT) );
|
||||
entry->inuse = false;
|
||||
}
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Do Reboot
|
||||
// ========================
|
||||
static void sm_watchdog_nfs_do_reboot( void )
|
||||
{
|
||||
char cmd[2048];
|
||||
pid_t reboot_pid;
|
||||
pid_t reboot_force_pid;
|
||||
pid_t sm_troubleshoot_pid;
|
||||
pid_t collect_pid;
|
||||
SmWatchDogNfsBlockedInfoT* entry;
|
||||
SmErrorT error;
|
||||
|
||||
if( SM_WATCHDOG_NFS_REBOOT_INPROGRESS == _nfs_reboot_inprogress )
|
||||
{
|
||||
DPRINTFD( "Reboot already inprogress." );
|
||||
return;
|
||||
}
|
||||
|
||||
// Fork child to do the reboot.
|
||||
reboot_pid = fork();
|
||||
if( 0 > reboot_pid )
|
||||
{
|
||||
DPRINTFE( "Failed to fork process for reboot, error=%s.",
|
||||
strerror( errno ) );
|
||||
return;
|
||||
|
||||
} else if( 0 == reboot_pid ) {
|
||||
// Child process.
|
||||
long ms_expired;
|
||||
char reboot_cmd[] = "reboot";
|
||||
char* reboot_argv[] = {reboot_cmd, NULL};
|
||||
char* reboot_env[] = {NULL};
|
||||
struct rlimit file_limits;
|
||||
SmTimeT timestamp;
|
||||
|
||||
setpgid( 0, 0 );
|
||||
|
||||
if( 0 == getrlimit( RLIMIT_NOFILE, &file_limits ) )
|
||||
{
|
||||
unsigned int fd_i;
|
||||
for( fd_i=0; fd_i < file_limits.rlim_cur; ++fd_i )
|
||||
{
|
||||
close( fd_i );
|
||||
}
|
||||
|
||||
open( "/dev/null", O_RDONLY ); // stdin
|
||||
open( "/dev/null", O_WRONLY ); // stdout
|
||||
open( "/dev/null", O_WRONLY ); // stderr
|
||||
}
|
||||
|
||||
sm_time_get( ×tamp );
|
||||
|
||||
while( true )
|
||||
{
|
||||
ms_expired = sm_time_get_elapsed_ms( ×tamp );
|
||||
if( SM_WATCHDOG_NFS_DELAY_REBOOT_IN_MS < ms_expired )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
sleep( 10 ); // 10 seconds
|
||||
}
|
||||
|
||||
execve( "/sbin/reboot", reboot_argv, reboot_env );
|
||||
|
||||
// Shouldn't get this far, else there was an error.
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// Fork child to do reboot force.
|
||||
reboot_force_pid = fork();
|
||||
if( 0 > reboot_force_pid )
|
||||
{
|
||||
DPRINTFE( "Failed to fork process for reboot escalation, "
|
||||
"error=%s.", strerror( errno ) );
|
||||
return;
|
||||
|
||||
} else if( 0 == reboot_force_pid ) {
|
||||
// Child process.
|
||||
long ms_expired;
|
||||
int sysrq_handler_fd;
|
||||
int sysrq_tigger_fd;
|
||||
struct rlimit file_limits;
|
||||
SmTimeT timestamp;
|
||||
|
||||
setpgid( 0, 0 );
|
||||
|
||||
if( 0 == getrlimit( RLIMIT_NOFILE, &file_limits ) )
|
||||
{
|
||||
unsigned int fd_i;
|
||||
for( fd_i=0; fd_i < file_limits.rlim_cur; ++fd_i )
|
||||
{
|
||||
close( fd_i );
|
||||
}
|
||||
|
||||
open( "/dev/null", O_RDONLY ); // stdin
|
||||
open( "/dev/null", O_WRONLY ); // stdout
|
||||
open( "/dev/null", O_WRONLY ); // stderr
|
||||
}
|
||||
|
||||
sm_time_get( ×tamp );
|
||||
|
||||
while( true )
|
||||
{
|
||||
ms_expired = sm_time_get_elapsed_ms( ×tamp );
|
||||
if( SM_WATCHDOG_NFS_DELAY_REBOOT_FORCE_IN_MS < ms_expired )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
sleep( 10 ); // 10 seconds
|
||||
}
|
||||
|
||||
// Enable sysrq handling.
|
||||
sysrq_handler_fd = open( "/proc/sys/kernel/sysrq", O_RDWR | O_CLOEXEC );
|
||||
if( 0 > sysrq_handler_fd )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
write( sysrq_handler_fd, "1", 1 );
|
||||
close( sysrq_handler_fd );
|
||||
|
||||
// Trigger sysrq command.
|
||||
sysrq_tigger_fd = open( "/proc/sysrq-trigger", O_RDWR | O_CLOEXEC );
|
||||
if( 0 > sysrq_tigger_fd )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
write( sysrq_tigger_fd, "b", 1 );
|
||||
close( sysrq_tigger_fd );
|
||||
|
||||
exit( EXIT_SUCCESS );
|
||||
}
|
||||
|
||||
_nfs_reboot_inprogress = SM_WATCHDOG_NFS_REBOOT_INPROGRESS;
|
||||
|
||||
// Fork child to do the sm-troubleshoot.
|
||||
sm_troubleshoot_pid = fork();
|
||||
if( 0 > sm_troubleshoot_pid )
|
||||
{
|
||||
DPRINTFE( "Failed to fork process for sm-trouble, error=%s.",
|
||||
strerror( errno ) );
|
||||
|
||||
} else if( 0 == sm_troubleshoot_pid ) {
|
||||
// Child process.
|
||||
char cmd[] = "sm-troubleshoot";
|
||||
char log_file[] = SM_TROUBLESHOOT_LOG_FILE;
|
||||
char* argv[] = {cmd, log_file, NULL};
|
||||
char* env[] = {NULL};
|
||||
struct rlimit file_limits;
|
||||
|
||||
setpgid( 0, 0 );
|
||||
|
||||
if( 0 == getrlimit( RLIMIT_NOFILE, &file_limits ) )
|
||||
{
|
||||
unsigned int fd_i;
|
||||
for( fd_i=0; fd_i < file_limits.rlim_cur; ++fd_i )
|
||||
{
|
||||
close( fd_i );
|
||||
}
|
||||
|
||||
open( "/dev/null", O_RDONLY ); // stdin
|
||||
open( "/dev/null", O_WRONLY ); // stdout
|
||||
open( "/dev/null", O_WRONLY ); // stderr
|
||||
}
|
||||
|
||||
execve( SM_TROUBLESHOOT_SCRIPT, argv, env );
|
||||
|
||||
// Shouldn't get this far, else there was an error.
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// Fork child to run collect.
|
||||
collect_pid = fork();
|
||||
if( 0 > collect_pid )
|
||||
{
|
||||
DPRINTFE( "Failed to fork process for collect, error=%s.",
|
||||
strerror( errno ) );
|
||||
|
||||
} else if( 0 == collect_pid ) {
|
||||
// Child process.
|
||||
char cmd[] = "collect";
|
||||
char* argv[] = {cmd, NULL};
|
||||
char* env[] = {NULL};
|
||||
struct rlimit file_limits;
|
||||
|
||||
setpgid( 0, 0 );
|
||||
|
||||
if( 0 == getrlimit( RLIMIT_NOFILE, &file_limits ) )
|
||||
{
|
||||
unsigned int fd_i;
|
||||
for( fd_i=0; fd_i < file_limits.rlim_cur; ++fd_i )
|
||||
{
|
||||
close( fd_i );
|
||||
}
|
||||
|
||||
open( "/dev/null", O_RDONLY ); // stdin
|
||||
open( "/dev/null", O_WRONLY ); // stdout
|
||||
open( "/dev/null", O_WRONLY ); // stderr
|
||||
}
|
||||
|
||||
execve( "/usr/local/sbin/collect", argv, env );
|
||||
|
||||
// Shouldn't get this far, else there was an error.
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
error = sm_node_utils_set_unhealthy();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to set node unhealthy, error=%s.",
|
||||
sm_error_str(error) );
|
||||
}
|
||||
|
||||
DPRINTFI( "*******************************************************" );
|
||||
DPRINTFI( "** Issuing a reboot of the system, NFS hang detected **" );
|
||||
DPRINTFI( "*******************************************************" );
|
||||
|
||||
DPRINTFI( "Reboot (%i) process created.", (int) reboot_pid );
|
||||
DPRINTFI( "Reboot force (%i) process created.", (int) reboot_force_pid );
|
||||
DPRINTFI( "SM troubleshoot (%i) process created.", (int) sm_troubleshoot_pid );
|
||||
DPRINTFI( "Collect (%i) process created.", (int) collect_pid );
|
||||
|
||||
snprintf( cmd, sizeof(cmd),
|
||||
"date >> %s; "
|
||||
"echo \"*******************************************\" >> %s; "
|
||||
"echo \"NFS HANG DETECTED\" >> %s", SM_WATCHDOG_NFS_DEBUG_FILE,
|
||||
SM_WATCHDOG_NFS_DEBUG_FILE, SM_WATCHDOG_NFS_DEBUG_FILE );
|
||||
system( cmd );
|
||||
|
||||
int thread_i;
|
||||
for( thread_i=0; SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS > thread_i;
|
||||
++thread_i )
|
||||
{
|
||||
entry = &(_nfs_blocked_threads[thread_i]);
|
||||
|
||||
if( entry->inuse )
|
||||
{
|
||||
snprintf( cmd, sizeof(cmd),
|
||||
"date >> %s; "
|
||||
"echo \"cat /proc/%i/sched\" >> %s; "
|
||||
"cat /proc/%i/sched >> %s", SM_WATCHDOG_NFS_DEBUG_FILE,
|
||||
entry->pid, SM_WATCHDOG_NFS_DEBUG_FILE, entry->pid,
|
||||
SM_WATCHDOG_NFS_DEBUG_FILE );
|
||||
system( cmd );
|
||||
|
||||
snprintf( cmd, sizeof(cmd),
|
||||
"date >> %s; "
|
||||
"echo \"cat /proc/%i/stack\" >> %s; "
|
||||
"cat /proc/%i/stack >> %s", SM_WATCHDOG_NFS_DEBUG_FILE,
|
||||
entry->pid, SM_WATCHDOG_NFS_DEBUG_FILE, entry->pid,
|
||||
SM_WATCHDOG_NFS_DEBUG_FILE );
|
||||
system( cmd );
|
||||
}
|
||||
}
|
||||
|
||||
snprintf( cmd, sizeof(cmd),
|
||||
"echo \"*******************************************\" >> %s",
|
||||
SM_WATCHDOG_NFS_DEBUG_FILE );
|
||||
system( cmd );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Search
|
||||
// =====================
|
||||
static void sm_watchdog_nfs_search( const char dir_name[] )
|
||||
{
|
||||
bool is_dir;
|
||||
DIR* dir;
|
||||
char path[PATH_MAX];
|
||||
int path_len;
|
||||
SmNodeProcessStatusT status;
|
||||
SmErrorT error;
|
||||
|
||||
dir = opendir( dir_name );
|
||||
if( NULL == dir )
|
||||
{
|
||||
DPRINTFE( "Failed to open directory (%s), error=%s.", dir_name,
|
||||
strerror( errno ) );
|
||||
return;
|
||||
}
|
||||
|
||||
struct dirent* entry;
|
||||
for( entry = readdir( dir ); NULL != entry; entry = readdir( dir ) )
|
||||
{
|
||||
is_dir = false;
|
||||
|
||||
path_len = snprintf( path, sizeof(path), "%s/%s", dir_name,
|
||||
entry->d_name );
|
||||
if( PATH_MAX <= path_len )
|
||||
{
|
||||
DPRINTFE( "Path (%s/%s) is too long, max_len=%i.",
|
||||
dir_name, entry->d_name, path_len );
|
||||
break;
|
||||
}
|
||||
|
||||
if( 0 != (DT_REG & entry->d_type) )
|
||||
{
|
||||
if( '.' != entry->d_name[0] )
|
||||
{
|
||||
struct stat stat_data;
|
||||
|
||||
if( 0 > lstat( path, &stat_data ) )
|
||||
{
|
||||
DPRINTFE( "Stat on (%s) failed, error=%s.", entry->d_name,
|
||||
strerror( errno ) );
|
||||
continue;
|
||||
}
|
||||
|
||||
is_dir = S_ISDIR( stat_data.st_mode );
|
||||
}
|
||||
} else if( 0 != (DT_DIR & entry->d_type) ) {
|
||||
if(( 0 != strcmp( ".", entry->d_name ) )&&
|
||||
( 0 != strcmp( "..", entry->d_name ) ))
|
||||
{
|
||||
is_dir = true;
|
||||
}
|
||||
}
|
||||
|
||||
if( is_dir )
|
||||
{
|
||||
long val;
|
||||
char* end;
|
||||
|
||||
val = strtol( entry->d_name, &end, 10 );
|
||||
if(( ERANGE == errno )&&
|
||||
(( LONG_MIN == val ) ||( LONG_MAX == val )))
|
||||
{
|
||||
DPRINTFD( "Directory (%s) name out of range.",
|
||||
entry->d_name );
|
||||
continue;
|
||||
}
|
||||
|
||||
if( end == entry->d_name )
|
||||
{
|
||||
DPRINTFD( "Directory (%s) is not a pid directory.",
|
||||
entry->d_name );
|
||||
continue;
|
||||
}
|
||||
|
||||
error = sm_node_stats_get_process_status( val, &status );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
if( SM_NOT_FOUND == error )
|
||||
{
|
||||
DPRINTFD( "Failed to get %ld pid status, error=%s.",
|
||||
val, sm_error_str(error) );
|
||||
} else {
|
||||
DPRINTFE( "Failed to get %ld pid status, error=%s.",
|
||||
val, sm_error_str(error) );
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
DPRINTFD( "Looking at pid=%i, name=%s", status.pid, status.name );
|
||||
|
||||
if( 0 != strcmp( SM_WATCHDOG_NFS_THREAD_NAME, status.name ) )
|
||||
{
|
||||
DPRINTFD( "Process (%s) not an nfs thread, pid=%i.",
|
||||
status.name, status.pid );
|
||||
continue;
|
||||
}
|
||||
|
||||
DPRINTFD( "NFS thread, pid=%i, state=%c, block_start_ns=%lld.",
|
||||
status.pid, status.state, status.block_start_ns );
|
||||
|
||||
if(( 0 != status.block_start_ns )&&( 'D' == status.state ))
|
||||
{
|
||||
SmWatchDogNfsBlockedInfoT* entry;
|
||||
|
||||
entry = sm_watchdog_nfs_find_blocked_thread( (int) val );
|
||||
if( NULL == entry )
|
||||
{
|
||||
sm_watchdog_nfs_add_blocked_thread( (int) val, &status );
|
||||
|
||||
} else if( status.block_start_ns == entry->status.block_start_ns ) {
|
||||
long ms_expired;
|
||||
|
||||
entry->stale = false;
|
||||
ms_expired = sm_time_get_elapsed_ms( &(entry->timestamp) );
|
||||
if( SM_WATCHDOG_NFS_MAX_UNINTERRUPTIBLE_SLEEP < ms_expired )
|
||||
{
|
||||
sm_watchdog_nfs_do_reboot();
|
||||
DPRINTFI( "Rebooting stuck nfs thread (%i).",
|
||||
(int) val );
|
||||
break;
|
||||
} else {
|
||||
if( (SM_WATCHDOG_NFS_MAX_UNINTERRUPTIBLE_SLEEP/2)
|
||||
< ms_expired )
|
||||
{
|
||||
DPRINTFI( "WARNING: NFS thread, pid=%i, state=%c, "
|
||||
"block_start_ns=%lld, elapsed_ms=%ld.",
|
||||
status.pid, status.state,
|
||||
status.block_start_ns, ms_expired );
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sm_watchdog_nfs_delete_blocked_thread( (int) val );
|
||||
sm_watchdog_nfs_add_blocked_thread( (int) val, &status );
|
||||
}
|
||||
} else {
|
||||
sm_watchdog_nfs_delete_blocked_thread( (int) val );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
closedir( dir );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Do Check
|
||||
// =======================
|
||||
void sm_watchdog_module_do_check( void )
|
||||
{
|
||||
DPRINTFD( "NFS do check called." );
|
||||
|
||||
if( SM_WATCHDOG_NFS_REBOOT_INPROGRESS != _nfs_reboot_inprogress )
|
||||
{
|
||||
int thread_i;
|
||||
SmWatchDogNfsBlockedInfoT* entry;
|
||||
|
||||
// Mark entries as stale.
|
||||
for( thread_i=0; SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS > thread_i;
|
||||
++thread_i )
|
||||
{
|
||||
entry = &(_nfs_blocked_threads[thread_i]);
|
||||
|
||||
if( entry->inuse )
|
||||
{
|
||||
entry->stale = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Audit NFS threads.
|
||||
sm_watchdog_nfs_search( "/proc" );
|
||||
|
||||
// Cleanup stale entries.
|
||||
for( thread_i=0; SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS > thread_i;
|
||||
++thread_i )
|
||||
{
|
||||
entry = &(_nfs_blocked_threads[thread_i]);
|
||||
|
||||
if(( entry->inuse )&&( entry->stale ))
|
||||
{
|
||||
memset( entry, 0, sizeof(SmWatchDogNfsBlockedInfoT) );
|
||||
entry->inuse = false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
DPRINTFD( "Reboot inprogress." );
|
||||
}
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Initialize
|
||||
// =========================
|
||||
bool sm_watchdog_module_initialize( int* do_check_in_ms )
|
||||
{
|
||||
*do_check_in_ms = SM_WATCHDOG_NFS_CHECK_IN_MS;
|
||||
_nfs_reboot_inprogress = 0;
|
||||
memset( &_nfs_blocked_threads, 0, sizeof(_nfs_blocked_threads) );
|
||||
return( true );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Finalize
|
||||
// =======================
|
||||
bool sm_watchdog_module_finalize( void )
|
||||
{
|
||||
_nfs_reboot_inprogress = 0;
|
||||
memset( &_nfs_blocked_threads, 0, sizeof(_nfs_blocked_threads) );
|
||||
return( true );
|
||||
}
|
||||
// ****************************************************************************
|
@ -1,37 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#ifndef __SM_WATCHDOG_NFS_H__
|
||||
#define __SM_WATCHDOG_NFS_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Do Check
|
||||
// =======================
|
||||
extern void sm_watchdog_module_do_check( void );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Initialize
|
||||
// =========================
|
||||
extern bool sm_watchdog_module_initialize( int* do_check_in_ms );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog NFS - Finalize
|
||||
// =======================
|
||||
extern bool sm_watchdog_module_finalize( void );
|
||||
// ****************************************************************************
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __SM_WATCHDOG_NFS_H__
|
@ -1,241 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "sm_watchdog_process.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <signal.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <sched.h>
|
||||
#include <limits.h>
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/select.h>
|
||||
#include <getopt.h>
|
||||
|
||||
#include "sm_limits.h"
|
||||
#include "sm_types.h"
|
||||
#include "sm_debug.h"
|
||||
#include "sm_utils.h"
|
||||
#include "sm_selobj.h"
|
||||
#include "sm_time.h"
|
||||
#include "sm_timer.h"
|
||||
#include "sm_node_stats.h"
|
||||
#include "sm_watchdog_module.h"
|
||||
|
||||
#define SM_WATCHDOG_PROCESS_TICK_INTERVAL_IN_MS 1000
|
||||
|
||||
static sig_atomic_t _stay_on = 1;
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog Process - Signal Handler
|
||||
// =================================
|
||||
static void sm_watchdog_process_signal_handler( int signum )
|
||||
{
|
||||
switch( signum )
|
||||
{
|
||||
case SIGINT:
|
||||
case SIGTERM:
|
||||
case SIGQUIT:
|
||||
_stay_on = 0;
|
||||
break;
|
||||
|
||||
case SIGCONT:
|
||||
DPRINTFD( "Ignoring signal SIGCONT (%i).", signum );
|
||||
break;
|
||||
|
||||
default:
|
||||
DPRINTFD( "Signal (%i) ignored.", signum );
|
||||
break;
|
||||
}
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog Process - Setup Signal Handler
|
||||
// =======================================
|
||||
static void sm_watchdog_process_setup_signal_handler( void )
|
||||
{
|
||||
struct sigaction sa;
|
||||
|
||||
memset( &sa, 0, sizeof(sa) );
|
||||
sa.sa_handler = sm_watchdog_process_signal_handler;
|
||||
|
||||
sigaction( SIGINT, &sa, NULL );
|
||||
sigaction( SIGTERM, &sa, NULL );
|
||||
sigaction( SIGQUIT, &sa, NULL );
|
||||
sigaction( SIGCONT, &sa, NULL );
|
||||
|
||||
signal( SIGCHLD, SIG_IGN );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog Process - Initialize
|
||||
// =============================
|
||||
static SmErrorT sm_watchdog_process_initialize( void )
|
||||
{
|
||||
SmErrorT error;
|
||||
|
||||
error = sm_selobj_initialize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to initialize selection object module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
return( error );
|
||||
}
|
||||
|
||||
error = sm_timer_initialize( SM_WATCHDOG_PROCESS_TICK_INTERVAL_IN_MS );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to initialize timer module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
return( error );
|
||||
}
|
||||
|
||||
error = sm_node_stats_initialize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to initialize node stats, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
return( error );
|
||||
}
|
||||
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog Process - Finalize
|
||||
// ===========================
|
||||
static SmErrorT sm_watchdog_process_finalize( void )
|
||||
{
|
||||
SmErrorT error;
|
||||
|
||||
error = sm_node_stats_finalize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to finialize node stats, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
error = sm_timer_finalize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to finalize timer module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
error = sm_selobj_finalize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to finalize selection object module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog Process - Main
|
||||
// =======================
|
||||
SmErrorT sm_watchdog_process_main( int argc, char *argv[], char *envp[] )
|
||||
{
|
||||
long ms_expired;
|
||||
SmTimeT watchdog_heartbeat_time_prev;
|
||||
SmErrorT error;
|
||||
|
||||
sm_watchdog_process_setup_signal_handler();
|
||||
|
||||
DPRINTFI( "Starting" );
|
||||
|
||||
if( sm_utils_process_running( SM_WATCHDOG_PROCESS_PID_FILENAME ) )
|
||||
{
|
||||
DPRINTFI( "Already running an instance of sm-watchdog." );
|
||||
return( SM_OKAY );
|
||||
}
|
||||
|
||||
if( !sm_utils_set_pid_file( SM_WATCHDOG_PROCESS_PID_FILENAME ) )
|
||||
{
|
||||
DPRINTFE( "Failed to write pid file for sm-watchdog, error=%s.",
|
||||
strerror(errno) );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
error = sm_watchdog_process_initialize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed initialize process, error=%s.",
|
||||
sm_error_str(error) );
|
||||
return( error );
|
||||
}
|
||||
|
||||
error = sm_watchdog_module_load_all();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed load modules, error=%s.",
|
||||
sm_error_str(error) );
|
||||
return( error );
|
||||
}
|
||||
|
||||
DPRINTFI( "Started." );
|
||||
|
||||
sm_time_get( &watchdog_heartbeat_time_prev );
|
||||
sm_utils_watchdog_heartbeat();
|
||||
|
||||
while( _stay_on )
|
||||
{
|
||||
error = sm_selobj_dispatch( SM_WATCHDOG_PROCESS_TICK_INTERVAL_IN_MS );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Selection object dispatch failed, error=%s.",
|
||||
sm_error_str(error) );
|
||||
break;
|
||||
}
|
||||
|
||||
ms_expired = sm_time_get_elapsed_ms( &watchdog_heartbeat_time_prev );
|
||||
if( SM_WATCHDOG_PROCESS_TICK_INTERVAL_IN_MS <= ms_expired )
|
||||
{
|
||||
if( sm_timer_scheduling_on_time() )
|
||||
{
|
||||
sm_utils_watchdog_heartbeat();
|
||||
sm_time_get( &watchdog_heartbeat_time_prev );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DPRINTFI( "Shutting down." );
|
||||
|
||||
error = sm_watchdog_module_unload_all();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed unload modules, error=%s.",
|
||||
sm_error_str(error) );
|
||||
}
|
||||
|
||||
error = sm_watchdog_process_finalize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to finalize process, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
DPRINTFI( "Shutdown complete." );
|
||||
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
@ -1,25 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#ifndef __SM_WATCHDOG_PROCESS_H__
|
||||
#define __SM_WATCHDOG_PROCESS_H__
|
||||
|
||||
#include "sm_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// ****************************************************************************
|
||||
// Watchdog Process - Main
|
||||
// =======================
|
||||
extern SmErrorT sm_watchdog_process_main( int argc, char *argv[], char *envp[] );
|
||||
// ****************************************************************************
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __SM_WATCHDOG_PROCESS_H__
|
@ -1,6 +1,6 @@
|
||||
[Unit]
|
||||
Description=Service Management Unit
|
||||
After=network-online.target syslog-ng.service config.service sm-watchdog.service systemd-udev-settle.service drbd.service
|
||||
After=network-online.target syslog-ng.service config.service systemd-udev-settle.service drbd.service
|
||||
Before=sm-shutdown.service sm-api.service pmon.service
|
||||
|
||||
[Service]
|
||||
|
@ -67,9 +67,6 @@ timeout --signal KILL 5s pmap -x `cat /var/run/sm-trap.pid`
|
||||
delimiter "pmap -x cat /var/run/sm-eru.pid"
|
||||
timeout --signal KILL 5s pmap -x `cat /var/run/sm-eru.pid`
|
||||
|
||||
delimiter "pmap -x cat /var/run/sm-watchdog.pid"
|
||||
timeout --signal KILL 5s pmap -x `cat /var/run/sm-watchdog.pid`
|
||||
|
||||
delimiter "top -b -n 1 -H -c"
|
||||
timeout --signal KILL 5s top -b -n 1 -H -c
|
||||
|
||||
|
@ -19,14 +19,11 @@
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include "sm_types.h"
|
||||
#include "sm_utils.h"
|
||||
#include "sm_debug.h"
|
||||
#include "sm_sha512.h"
|
||||
#include "sm_service_action_table.h"
|
||||
#include "sm_service_action_result_table.h"
|
||||
|
||||
#define SM_SERVICE_ACTION_MAX_DELAY_IN_SECS 4
|
||||
#define SM_SERVICE_ACTION_TIMER_SKEW_IN_MS 60000
|
||||
#define SM_SERVICE_ACTION_VALIDATE_TIMER_IN_MS 60000
|
||||
|
||||
// ****************************************************************************
|
||||
@ -839,15 +836,6 @@ SmErrorT sm_service_action_run( char service_name[], char instance_name[],
|
||||
*process_id = (int) pid;
|
||||
*timeout_in_ms = action_data->timeout_in_secs * 1000;
|
||||
|
||||
if( sm_utils_watchdog_delayed( SM_SERVICE_ACTION_MAX_DELAY_IN_SECS ) )
|
||||
{
|
||||
DPRINTFI( "Service (%s) timeout %d secs increased by %d ms, "
|
||||
"sm-watchdog delayed.", action_data->service_name,
|
||||
action_data->timeout_in_secs,
|
||||
SM_SERVICE_ACTION_TIMER_SKEW_IN_MS );
|
||||
*timeout_in_ms += SM_SERVICE_ACTION_TIMER_SKEW_IN_MS;
|
||||
}
|
||||
|
||||
DPRINTFD( "Child process (%i) created for service (%s).", *process_id,
|
||||
action_data->service_name );
|
||||
}
|
||||
|
@ -40,9 +40,7 @@ typedef struct
|
||||
SmServiceGroupNotificationT service_group_notification;
|
||||
} SmNotificationEnvT;
|
||||
|
||||
#define SM_NOTIFICATION_SCRIPT_MAX_DELAY_IN_SECS 4
|
||||
#define SM_NOTIFICATION_SCRIPT_TIMEOUT_IN_MS 30000
|
||||
#define SM_NOTIFICATION_SCRIPT_TIMER_SKEW_IN_MS 60000
|
||||
#define SM_NOTIFICATION_SCRIPT_SUCCESS 0
|
||||
#define SM_NOTIFICATION_SCRIPT_TIMEOUT -65534
|
||||
#define SM_NOTIFICATION_SCRIPT_FAILURE -65535
|
||||
@ -712,14 +710,6 @@ SmErrorT sm_service_group_notification_notify( SmServiceGroupT* service_group,
|
||||
snprintf( timer_name, sizeof(timer_name), "%s %s notification ",
|
||||
service_group->name, notification_str );
|
||||
|
||||
if( sm_utils_watchdog_delayed( SM_NOTIFICATION_SCRIPT_MAX_DELAY_IN_SECS ) )
|
||||
{
|
||||
DPRINTFI( "Notification timeout %d secs increased by %d ms, "
|
||||
"sm-watchdog delayed.", timeout_in_ms,
|
||||
SM_NOTIFICATION_SCRIPT_TIMER_SKEW_IN_MS );
|
||||
timeout_in_ms += SM_NOTIFICATION_SCRIPT_TIMER_SKEW_IN_MS;
|
||||
}
|
||||
|
||||
error = sm_timer_register( timer_name, timeout_in_ms,
|
||||
sm_service_group_notification_timeout,
|
||||
service_group->id, &timer_id );
|
||||
|
Loading…
Reference in New Issue
Block a user