Add /var/crash dump management to maintenance.
The Linux kernel can be configured to perform a crash dump and reboot in response to specific, typically serious, events. A crash dump event produces a crash dump report bundle (directory) of files that represent the state of the kernel at the time of the event ; usefull for post-event root cause analysis. The kernel directs new crash dump bundles to /var/crash/<dated vmcore bundle>. Crash dump bundles are quite large and, if too many occur, can fill up its target filesystem. This update adds crash dump bundle management to the maintenance with a new crashDumpMgr service script and installs a crash dump logrotation configuration file to compress/preserve the first crash bundle and compress/rotate all subsequent bundles. With repeated crash dumps and the help of backgroud logrotation this update produces the following compressed crash dump bundles controller-1:~$ ls -lrth /var/log/crash total 238M -rw-r--r-- 1 root 77M <date> vmcore_first.tar.1.gz -rw-r--r-- 1 root 75M <date> vmcore.tar.1.gz Change-Id: I2741e610c6c417d7fc14dfada283a1edacd9327f Partial-Fix: 1898602 Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
f5725ad694
commit
85f605a762
@ -349,6 +349,8 @@ install -m 700 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.init %{buildroot}%{_
|
|||||||
# TODO: Init hack. Should move to proper module
|
# TODO: Init hack. Should move to proper module
|
||||||
install -m 755 -p -D %{_buildsubdir}/scripts/hwclock.sh %{buildroot}%{_sysconfdir}/init.d/hwclock.sh
|
install -m 755 -p -D %{_buildsubdir}/scripts/hwclock.sh %{buildroot}%{_sysconfdir}/init.d/hwclock.sh
|
||||||
install -m 644 -p -D %{_buildsubdir}/scripts/hwclock.service %{buildroot}%{_unitdir}/hwclock.service
|
install -m 644 -p -D %{_buildsubdir}/scripts/hwclock.service %{buildroot}%{_unitdir}/hwclock.service
|
||||||
|
install -m 755 -p -D %{_buildsubdir}/scripts/crashDumpMgr %{buildroot}%{_sysconfdir}/init.d/crashDumpMgr
|
||||||
|
install -m 644 -p -D %{_buildsubdir}/scripts/crashDumpMgr.service %{buildroot}%{_unitdir}/crashDumpMgr.service
|
||||||
|
|
||||||
# systemd service files
|
# systemd service files
|
||||||
install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmon.service %{buildroot}%{_unitdir}/fsmon.service
|
install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmon.service %{buildroot}%{_unitdir}/fsmon.service
|
||||||
@ -395,6 +397,7 @@ install -m 644 -p -D %{_buildsubdir}/lmon/scripts/lmon.pmon.conf %{buildroot}%{l
|
|||||||
|
|
||||||
# log rotation
|
# log rotation
|
||||||
install -m 755 -d %{buildroot}%{_sysconfdir}/logrotate.d
|
install -m 755 -d %{buildroot}%{_sysconfdir}/logrotate.d
|
||||||
|
install -m 644 -p -D %{_buildsubdir}/scripts/crashdump.logrotate %{buildroot}%{local_etc_logrotated}/crashdump.logrotate
|
||||||
install -m 644 -p -D %{_buildsubdir}/scripts/mtce.logrotate %{buildroot}%{local_etc_logrotated}/mtce.logrotate
|
install -m 644 -p -D %{_buildsubdir}/scripts/mtce.logrotate %{buildroot}%{local_etc_logrotated}/mtce.logrotate
|
||||||
install -m 644 -p -D %{_buildsubdir}/hostw/scripts/hostw.logrotate %{buildroot}%{local_etc_logrotated}/hostw.logrotate
|
install -m 644 -p -D %{_buildsubdir}/hostw/scripts/hostw.logrotate %{buildroot}%{local_etc_logrotated}/hostw.logrotate
|
||||||
install -m 644 -p -D %{_buildsubdir}/pmon/scripts/pmon.logrotate %{buildroot}%{local_etc_logrotated}/pmon.logrotate
|
install -m 644 -p -D %{_buildsubdir}/pmon/scripts/pmon.logrotate %{buildroot}%{local_etc_logrotated}/pmon.logrotate
|
||||||
@ -424,6 +427,7 @@ install -m 755 -d %{buildroot}/var/run
|
|||||||
/bin/systemctl enable rsyncd.service
|
/bin/systemctl enable rsyncd.service
|
||||||
/bin/systemctl enable goenabled.service
|
/bin/systemctl enable goenabled.service
|
||||||
/bin/systemctl enable mtcalarm.service
|
/bin/systemctl enable mtcalarm.service
|
||||||
|
/bin/systemctl enable crashDumpMgr.service
|
||||||
|
|
||||||
%post -n mtce-hostw
|
%post -n mtce-hostw
|
||||||
/bin/systemctl enable hostw.service
|
/bin/systemctl enable hostw.service
|
||||||
@ -470,6 +474,7 @@ install -m 755 -d %{buildroot}/var/run
|
|||||||
%{local_etc_logrotated}/fsmon.logrotate
|
%{local_etc_logrotated}/fsmon.logrotate
|
||||||
%{local_etc_logrotated}/mtce.logrotate
|
%{local_etc_logrotated}/mtce.logrotate
|
||||||
%{local_etc_logrotated}/mtcalarm.logrotate
|
%{local_etc_logrotated}/mtcalarm.logrotate
|
||||||
|
%{local_etc_logrotated}/crashdump.logrotate
|
||||||
|
|
||||||
# Maintenance start/stop services scripts
|
# Maintenance start/stop services scripts
|
||||||
%{local_etc_servicesd}/controller/mtcTest
|
%{local_etc_servicesd}/controller/mtcTest
|
||||||
@ -490,6 +495,7 @@ install -m 755 -d %{buildroot}/var/run
|
|||||||
%{_sysconfdir}/init.d/mtcClient
|
%{_sysconfdir}/init.d/mtcClient
|
||||||
%{_sysconfdir}/init.d/mtcalarm
|
%{_sysconfdir}/init.d/mtcalarm
|
||||||
%{_sysconfdir}/init.d/hwclock.sh
|
%{_sysconfdir}/init.d/hwclock.sh
|
||||||
|
%{_sysconfdir}/init.d/crashDumpMgr
|
||||||
|
|
||||||
%{_unitdir}/runservices.service
|
%{_unitdir}/runservices.service
|
||||||
%{_unitdir}/goenabled.service
|
%{_unitdir}/goenabled.service
|
||||||
@ -499,6 +505,7 @@ install -m 755 -d %{buildroot}/var/run
|
|||||||
%{_unitdir}/mtcClient.service
|
%{_unitdir}/mtcClient.service
|
||||||
%{_unitdir}/hbsClient.service
|
%{_unitdir}/hbsClient.service
|
||||||
%{_unitdir}/hwclock.service
|
%{_unitdir}/hwclock.service
|
||||||
|
%{_unitdir}/crashDumpMgr.service
|
||||||
|
|
||||||
# Binaries
|
# Binaries
|
||||||
%{local_bindir}/mtcAgent
|
%{local_bindir}/mtcAgent
|
||||||
|
131
mtce/src/scripts/crashDumpMgr
Normal file
131
mtce/src/scripts/crashDumpMgr
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2020 Wind River Systems, Inc.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
|
||||||
|
# chkconfig: 2345 98 2
|
||||||
|
#
|
||||||
|
### BEGIN INIT INFO
|
||||||
|
# Provides: crashDumpMgr
|
||||||
|
# Required-Start: $null
|
||||||
|
# Required-Stop: $null
|
||||||
|
# Default-Start: 3 5
|
||||||
|
# Default-Stop: 0 1 2 6
|
||||||
|
# Short-Description: Maintenance 'Crash Dump' Manager script
|
||||||
|
### END INIT INFO
|
||||||
|
|
||||||
|
CRASHDUMPMGR_TAG=${CRASHDUMPMGR_TAG:-"crashDumpMgr"}
|
||||||
|
|
||||||
|
RETVAL=0
|
||||||
|
|
||||||
|
#############################################################################
|
||||||
|
# Log message to syslog
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
function log()
|
||||||
|
{
|
||||||
|
logger -t ${CRASHDUMPMGR_TAG} $@
|
||||||
|
}
|
||||||
|
|
||||||
|
#############################################################################
|
||||||
|
#
|
||||||
|
# Name : manage_crash_dumps
|
||||||
|
#
|
||||||
|
# Purpose: Prevent crash dumps from filling up the root fs
|
||||||
|
#
|
||||||
|
# The kernel directs new crash dump bundles to
|
||||||
|
# /var/crash/<dated vmcore bundle>. Crash dump
|
||||||
|
# bundles are quite large and, if too many occur,
|
||||||
|
# can fill up its target filesystem.
|
||||||
|
#
|
||||||
|
# This function nicely tars a crash bundle found in /var/crash
|
||||||
|
# to /var/log/crash.
|
||||||
|
#
|
||||||
|
# The first bundle is tar'ed as vmcore_first.tar and preserved.
|
||||||
|
# Subsequent crash bundles are nicely tar'ed as vmcore.tar
|
||||||
|
#
|
||||||
|
# Save the crash dump vmcore summary for all crash dumps.
|
||||||
|
#
|
||||||
|
# Assumptions: logration is used to compress these bundles in the background
|
||||||
|
#
|
||||||
|
############################################################################
|
||||||
|
|
||||||
|
function manage_crash_dumps()
|
||||||
|
{
|
||||||
|
CRASH_DIR="/var/crash"
|
||||||
|
CRASH_BUNDLE_DIR="/var/log/crash"
|
||||||
|
OTHER_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore.tar"
|
||||||
|
FIRST_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore_first.tar"
|
||||||
|
FIRST_BUNDLE_ROTATED="${CRASH_BUNDLE_DIR}/vmcore_first.tar.1.gz"
|
||||||
|
CRASH_BUNDLE_SUMMARY="vmcore-dmesg.txt"
|
||||||
|
|
||||||
|
# tar command and nice levels
|
||||||
|
TAR_CMD="tar -cf"
|
||||||
|
NICE_CMD="/usr/bin/nice -n19"
|
||||||
|
IONICE_CMD="/usr/bin/ionice -c2 -n7"
|
||||||
|
|
||||||
|
log "managing ${CRASH_DIR}"
|
||||||
|
cleanup=false
|
||||||
|
|
||||||
|
# create dir if it does not exist
|
||||||
|
if [ ! -d ${CRASH_BUNDLE_DIR} ] ; then
|
||||||
|
mkdir ${CRASH_BUNDLE_DIR}
|
||||||
|
fi
|
||||||
|
|
||||||
|
for entry in ${CRASH_DIR}/*
|
||||||
|
do
|
||||||
|
if [ -d ${entry} ] ; then
|
||||||
|
if [ -e ${entry}/vmcore ] ; then
|
||||||
|
|
||||||
|
# save the crash dump vmcore summary for all crash dumps
|
||||||
|
cp -a ${entry}/${CRASH_BUNDLE_SUMMARY} ${CRASH_DIR}/$(basename ${entry})_${CRASH_BUNDLE_SUMMARY}
|
||||||
|
|
||||||
|
if [ "${cleanup}" != true ] ; then
|
||||||
|
if [ -e ${FIRST_BUNDLE} -o -e ${FIRST_BUNDLE_ROTATED} ] ; then
|
||||||
|
if [ ! -e ${OTHER_BUNDLE} ] ; then
|
||||||
|
log "creating bundle from ${entry}"
|
||||||
|
${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${OTHER_BUNDLE} -C ${CRASH_DIR} $(basename ${entry})
|
||||||
|
cleanup=true
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log "creating first bundle from ${entry}"
|
||||||
|
${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${FIRST_BUNDLE} -C ${CRASH_DIR} $(basename ${entry})
|
||||||
|
cleanup=true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
log "removing ${entry}"
|
||||||
|
rm -rf "${entry}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# service case
|
||||||
|
case "$1" in
|
||||||
|
start)
|
||||||
|
manage_crash_dumps
|
||||||
|
;;
|
||||||
|
|
||||||
|
stop)
|
||||||
|
log "stop"
|
||||||
|
;;
|
||||||
|
|
||||||
|
restart)
|
||||||
|
log "restart"
|
||||||
|
stop
|
||||||
|
start
|
||||||
|
;;
|
||||||
|
|
||||||
|
status)
|
||||||
|
log "status"
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
log "usage: $0 { start | stop | status | restart }"
|
||||||
|
RETVAL=1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
exit $RETVAL
|
13
mtce/src/scripts/crashDumpMgr.service
Normal file
13
mtce/src/scripts/crashDumpMgr.service
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Crash Dump Manager
|
||||||
|
After=network.target
|
||||||
|
Before=sshd.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
RemainAfterExit=no
|
||||||
|
ExecStart=/etc/init.d/crashDumpMgr start
|
||||||
|
ExecStop=/etc/init.d/crashDumpMgr stop
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
14
mtce/src/scripts/crashdump.logrotate
Normal file
14
mtce/src/scripts/crashdump.logrotate
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
/var/log/crash/vmcore.tar
|
||||||
|
/var/log/crash/vmcore_first.tar
|
||||||
|
{
|
||||||
|
nodateext
|
||||||
|
size 1K
|
||||||
|
start 1
|
||||||
|
rotate 1
|
||||||
|
missingok
|
||||||
|
notifempty
|
||||||
|
compress
|
||||||
|
postrotate
|
||||||
|
rm -f $1
|
||||||
|
endscript
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user