Add /var/crash dump management to maintenance.
The Linux kernel can be configured to perform a crash dump and reboot in response to specific, typically serious, events. A crash dump event produces a crash dump report bundle (directory) of files that represent the state of the kernel at the time of the event ; usefull for post-event root cause analysis. The kernel directs new crash dump bundles to /var/crash/<dated vmcore bundle>. Crash dump bundles are quite large and, if too many occur, can fill up its target filesystem. This update adds crash dump bundle management to the maintenance with a new crashDumpMgr service script and installs a crash dump logrotation configuration file to compress/preserve the first crash bundle and compress/rotate all subsequent bundles. With repeated crash dumps and the help of backgroud logrotation this update produces the following compressed crash dump bundles controller-1:~$ ls -lrth /var/log/crash total 238M -rw-r--r-- 1 root 77M <date> vmcore_first.tar.1.gz -rw-r--r-- 1 root 75M <date> vmcore.tar.1.gz Change-Id: I2741e610c6c417d7fc14dfada283a1edacd9327f Partial-Fix: 1898602 Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
f5725ad694
commit
85f605a762
@ -349,6 +349,8 @@ install -m 700 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.init %{buildroot}%{_
|
||||
# TODO: Init hack. Should move to proper module
|
||||
install -m 755 -p -D %{_buildsubdir}/scripts/hwclock.sh %{buildroot}%{_sysconfdir}/init.d/hwclock.sh
|
||||
install -m 644 -p -D %{_buildsubdir}/scripts/hwclock.service %{buildroot}%{_unitdir}/hwclock.service
|
||||
install -m 755 -p -D %{_buildsubdir}/scripts/crashDumpMgr %{buildroot}%{_sysconfdir}/init.d/crashDumpMgr
|
||||
install -m 644 -p -D %{_buildsubdir}/scripts/crashDumpMgr.service %{buildroot}%{_unitdir}/crashDumpMgr.service
|
||||
|
||||
# systemd service files
|
||||
install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmon.service %{buildroot}%{_unitdir}/fsmon.service
|
||||
@ -395,6 +397,7 @@ install -m 644 -p -D %{_buildsubdir}/lmon/scripts/lmon.pmon.conf %{buildroot}%{l
|
||||
|
||||
# log rotation
|
||||
install -m 755 -d %{buildroot}%{_sysconfdir}/logrotate.d
|
||||
install -m 644 -p -D %{_buildsubdir}/scripts/crashdump.logrotate %{buildroot}%{local_etc_logrotated}/crashdump.logrotate
|
||||
install -m 644 -p -D %{_buildsubdir}/scripts/mtce.logrotate %{buildroot}%{local_etc_logrotated}/mtce.logrotate
|
||||
install -m 644 -p -D %{_buildsubdir}/hostw/scripts/hostw.logrotate %{buildroot}%{local_etc_logrotated}/hostw.logrotate
|
||||
install -m 644 -p -D %{_buildsubdir}/pmon/scripts/pmon.logrotate %{buildroot}%{local_etc_logrotated}/pmon.logrotate
|
||||
@ -424,6 +427,7 @@ install -m 755 -d %{buildroot}/var/run
|
||||
/bin/systemctl enable rsyncd.service
|
||||
/bin/systemctl enable goenabled.service
|
||||
/bin/systemctl enable mtcalarm.service
|
||||
/bin/systemctl enable crashDumpMgr.service
|
||||
|
||||
%post -n mtce-hostw
|
||||
/bin/systemctl enable hostw.service
|
||||
@ -470,6 +474,7 @@ install -m 755 -d %{buildroot}/var/run
|
||||
%{local_etc_logrotated}/fsmon.logrotate
|
||||
%{local_etc_logrotated}/mtce.logrotate
|
||||
%{local_etc_logrotated}/mtcalarm.logrotate
|
||||
%{local_etc_logrotated}/crashdump.logrotate
|
||||
|
||||
# Maintenance start/stop services scripts
|
||||
%{local_etc_servicesd}/controller/mtcTest
|
||||
@ -490,6 +495,7 @@ install -m 755 -d %{buildroot}/var/run
|
||||
%{_sysconfdir}/init.d/mtcClient
|
||||
%{_sysconfdir}/init.d/mtcalarm
|
||||
%{_sysconfdir}/init.d/hwclock.sh
|
||||
%{_sysconfdir}/init.d/crashDumpMgr
|
||||
|
||||
%{_unitdir}/runservices.service
|
||||
%{_unitdir}/goenabled.service
|
||||
@ -499,6 +505,7 @@ install -m 755 -d %{buildroot}/var/run
|
||||
%{_unitdir}/mtcClient.service
|
||||
%{_unitdir}/hbsClient.service
|
||||
%{_unitdir}/hwclock.service
|
||||
%{_unitdir}/crashDumpMgr.service
|
||||
|
||||
# Binaries
|
||||
%{local_bindir}/mtcAgent
|
||||
|
131
mtce/src/scripts/crashDumpMgr
Normal file
131
mtce/src/scripts/crashDumpMgr
Normal file
@ -0,0 +1,131 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2020 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# chkconfig: 2345 98 2
|
||||
#
|
||||
### BEGIN INIT INFO
|
||||
# Provides: crashDumpMgr
|
||||
# Required-Start: $null
|
||||
# Required-Stop: $null
|
||||
# Default-Start: 3 5
|
||||
# Default-Stop: 0 1 2 6
|
||||
# Short-Description: Maintenance 'Crash Dump' Manager script
|
||||
### END INIT INFO
|
||||
|
||||
CRASHDUMPMGR_TAG=${CRASHDUMPMGR_TAG:-"crashDumpMgr"}
|
||||
|
||||
RETVAL=0
|
||||
|
||||
#############################################################################
|
||||
# Log message to syslog
|
||||
#############################################################################
|
||||
|
||||
function log()
|
||||
{
|
||||
logger -t ${CRASHDUMPMGR_TAG} $@
|
||||
}
|
||||
|
||||
#############################################################################
|
||||
#
|
||||
# Name : manage_crash_dumps
|
||||
#
|
||||
# Purpose: Prevent crash dumps from filling up the root fs
|
||||
#
|
||||
# The kernel directs new crash dump bundles to
|
||||
# /var/crash/<dated vmcore bundle>. Crash dump
|
||||
# bundles are quite large and, if too many occur,
|
||||
# can fill up its target filesystem.
|
||||
#
|
||||
# This function nicely tars a crash bundle found in /var/crash
|
||||
# to /var/log/crash.
|
||||
#
|
||||
# The first bundle is tar'ed as vmcore_first.tar and preserved.
|
||||
# Subsequent crash bundles are nicely tar'ed as vmcore.tar
|
||||
#
|
||||
# Save the crash dump vmcore summary for all crash dumps.
|
||||
#
|
||||
# Assumptions: logration is used to compress these bundles in the background
|
||||
#
|
||||
############################################################################
|
||||
|
||||
function manage_crash_dumps()
|
||||
{
|
||||
CRASH_DIR="/var/crash"
|
||||
CRASH_BUNDLE_DIR="/var/log/crash"
|
||||
OTHER_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore.tar"
|
||||
FIRST_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore_first.tar"
|
||||
FIRST_BUNDLE_ROTATED="${CRASH_BUNDLE_DIR}/vmcore_first.tar.1.gz"
|
||||
CRASH_BUNDLE_SUMMARY="vmcore-dmesg.txt"
|
||||
|
||||
# tar command and nice levels
|
||||
TAR_CMD="tar -cf"
|
||||
NICE_CMD="/usr/bin/nice -n19"
|
||||
IONICE_CMD="/usr/bin/ionice -c2 -n7"
|
||||
|
||||
log "managing ${CRASH_DIR}"
|
||||
cleanup=false
|
||||
|
||||
# create dir if it does not exist
|
||||
if [ ! -d ${CRASH_BUNDLE_DIR} ] ; then
|
||||
mkdir ${CRASH_BUNDLE_DIR}
|
||||
fi
|
||||
|
||||
for entry in ${CRASH_DIR}/*
|
||||
do
|
||||
if [ -d ${entry} ] ; then
|
||||
if [ -e ${entry}/vmcore ] ; then
|
||||
|
||||
# save the crash dump vmcore summary for all crash dumps
|
||||
cp -a ${entry}/${CRASH_BUNDLE_SUMMARY} ${CRASH_DIR}/$(basename ${entry})_${CRASH_BUNDLE_SUMMARY}
|
||||
|
||||
if [ "${cleanup}" != true ] ; then
|
||||
if [ -e ${FIRST_BUNDLE} -o -e ${FIRST_BUNDLE_ROTATED} ] ; then
|
||||
if [ ! -e ${OTHER_BUNDLE} ] ; then
|
||||
log "creating bundle from ${entry}"
|
||||
${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${OTHER_BUNDLE} -C ${CRASH_DIR} $(basename ${entry})
|
||||
cleanup=true
|
||||
fi
|
||||
else
|
||||
log "creating first bundle from ${entry}"
|
||||
${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${FIRST_BUNDLE} -C ${CRASH_DIR} $(basename ${entry})
|
||||
cleanup=true
|
||||
fi
|
||||
fi
|
||||
log "removing ${entry}"
|
||||
rm -rf "${entry}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# service case
|
||||
case "$1" in
|
||||
start)
|
||||
manage_crash_dumps
|
||||
;;
|
||||
|
||||
stop)
|
||||
log "stop"
|
||||
;;
|
||||
|
||||
restart)
|
||||
log "restart"
|
||||
stop
|
||||
start
|
||||
;;
|
||||
|
||||
status)
|
||||
log "status"
|
||||
;;
|
||||
|
||||
*)
|
||||
log "usage: $0 { start | stop | status | restart }"
|
||||
RETVAL=1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $RETVAL
|
13
mtce/src/scripts/crashDumpMgr.service
Normal file
13
mtce/src/scripts/crashDumpMgr.service
Normal file
@ -0,0 +1,13 @@
|
||||
[Unit]
|
||||
Description=Crash Dump Manager
|
||||
After=network.target
|
||||
Before=sshd.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=no
|
||||
ExecStart=/etc/init.d/crashDumpMgr start
|
||||
ExecStop=/etc/init.d/crashDumpMgr stop
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
14
mtce/src/scripts/crashdump.logrotate
Normal file
14
mtce/src/scripts/crashdump.logrotate
Normal file
@ -0,0 +1,14 @@
|
||||
/var/log/crash/vmcore.tar
|
||||
/var/log/crash/vmcore_first.tar
|
||||
{
|
||||
nodateext
|
||||
size 1K
|
||||
start 1
|
||||
rotate 1
|
||||
missingok
|
||||
notifempty
|
||||
compress
|
||||
postrotate
|
||||
rm -f $1
|
||||
endscript
|
||||
}
|
Loading…
Reference in New Issue
Block a user