34c2ef7865
The 'schedtop' monitoring tool is used to do engineering analysis of process scheduling, disk IO, and latency. This enhances the schedtop monitoring tool with: - additional fields "bmax" latency and "D" disk-sleep tasks - command-line options to watch specific tasks and mechanism to trigger sysrq The following new fields are reported: - "bmax" milliseconds, corresponds to linux scheduler stats "blocked_max". This represents involuntary wait of scheduling and IO wait. - "D:<n>", the current number of disk-sleep "D" tasks. The following command line options are added to be able to watch specific processes, and optionally trigger a sysrq (i.e., force a crashdump) when trigger delay threshold milliseconds is reached. [--watch-cmd=tid1,cmd1,cmd2,...] [--watch-only] [--watch-quiet] [--trig-delay=time] The --watch-cmd option matches process names 'comm' field pattern. The --watch-only option watches and displays only the subset of tasks discovered at tool startup. This dramatically reduces the tool cpu overhead. The --watch-quiet displays no sample output after tool startup, the only output occurs when the --trig-delay is exceeded. The --trig-delay=time option will trigger a sysrq to force a crash dump any watched process "bmax" delay exceeds trigger delay time in milliseconds. Example: collect 1 minute of data, monitor all tasks, reset scheduler hiwatermark statistics schedtop \ --period=60 --reset-hwm Example: collect 1 minute of data, watch specific tasks schedtop \ --period=60 --reset-hwm \ --watch-cmd=jbd2,kube-apiserver,etcd,forward-journald,containerd \ --watch-only Example: watch specific tasks and trigger sysrq when any of the watched commands exceed 10000ms delay (10 seconds) schedtop \ --period=36000 --reset-hwm \ --watch-cmd=jbd2,kube-apiserver,etcd,forward-journald,containerd \ --watch-only \ --trig-delay=10000 Testcases: PASS: Collect standard tool output, verified new bmax and D fields PASS: Verify --watch-cmds will detect the specified commands or tids PASS: Verify --watch-only will only display watched commands PASS: Verify --trig-delay will generate a sysrq PASS: Verify comm field is limited to 15 characters wide Closes-Bug: 1927772 Signed-off-by: Jim Gauld <james.gauld@windriver.com> Change-Id: I5368aac66b24608f5eab366cd929be4c0d4a1f76
1477 lines
46 KiB
Perl
Executable File
1477 lines
46 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
########################################################################
|
|
#
|
|
# Copyright (c) 2015-2021 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
#
|
|
########################################################################
|
|
#
|
|
# Description:
|
|
# This displays occupancy and scheduling information per sample period.
|
|
# Output includes total occupancy, per-core occupancy, loadavg, per-task cpu,
|
|
# per-task scheduling, per-task io-wait.
|
|
#
|
|
# Usage: schedtop OPTIONS
|
|
# [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]
|
|
# [--reset-hwm] [--idle] [--sort=<cpu|io>]
|
|
# [--watch-cmd=tid1,cmd1,cmd2,...] [--watch-only] [--watch-quiet]
|
|
# [--trig-delay=time]
|
|
# [--help]
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Data::Dumper;
|
|
use POSIX qw(uname strftime);
|
|
use Time::HiRes qw(clock_gettime usleep CLOCK_MONOTONIC CLOCK_REALTIME);
|
|
use Benchmark ':hireswallclock';
|
|
use Carp qw(croak carp);
|
|
use Math::BigInt;
|
|
|
|
# Define toolname
|
|
our $TOOLNAME = "schedtop";
|
|
our $VERSION = "0.1";
|
|
|
|
# Constants
|
|
use constant SI_k => 1.0E3;
|
|
use constant SI_M => 1.0E6;
|
|
use constant SI_G => 1.0E9;
|
|
use constant Ki => 1024.0;
|
|
use constant Mi => 1024.0*1024.0;
|
|
use constant Gi => 1024.0*1024.0*1024.0;
|
|
|
|
# Globals
|
|
our %opt_V = ();
|
|
our %opt_P = ();
|
|
our %percpu_0 = ();
|
|
our %percpu_1 = ();
|
|
our %task_0 = ();
|
|
our %task_1 = ();
|
|
our %tids_0 = ();
|
|
our %tids_1 = ();
|
|
our %tids_w = ();
|
|
our %D_task = ();
|
|
our %D_percpu = ();
|
|
our %loadavg = ();
|
|
our $tm_0 = ();
|
|
our $tm_1 = ();
|
|
our $tr_0 = ();
|
|
our $tr_1 = ();
|
|
our $tm_elapsed = ();
|
|
our $tr_elapsed = ();
|
|
our $tm_final = ();
|
|
our $uptime = ();
|
|
our $num_cpus = 1;
|
|
our $affinity_mask = Math::BigInt->new('0');
|
|
our $w_aff = 10;
|
|
our $num_tasks = 0;
|
|
our $num_blk = 0;
|
|
our $num_state_D = 0;
|
|
our $is_schedstat = 1;
|
|
our $USER_HZ = 100; # no easy way to get this
|
|
our $CLOCK_NS = SI_G / $USER_HZ;
|
|
our $print_host = 1;
|
|
|
|
# Print options
|
|
our ($P_none, $P_lite, $P_brief, $P_full) = (0, 1, 2, 3);
|
|
our ($P_ps, $P_cpu, $P_del, $P_io, $P_id, $P_cmd) = (0, 1, 2, 3, 4, 5);
|
|
our @P_list = ($::P_ps, $::P_cpu, $::P_del, $::P_io, $::P_id, $::P_cmd);
|
|
|
|
# Argument list parameters
|
|
our ($arg_debug,
|
|
$arg_delay,
|
|
$arg_repeat,
|
|
$arg_period,
|
|
$arg_reset_hwm,
|
|
$arg_idle,
|
|
$arg_sort,
|
|
$arg_print,
|
|
@arg_watch_cmd,
|
|
$arg_watch_only,
|
|
$arg_watch_quiet,
|
|
$arg_trig_delay,
|
|
) = ();
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# MAIN Program
|
|
#-------------------------------------------------------------------------------
|
|
my $ONE_BILLION = 1.0E9;
|
|
my $MIN_DELAY = 0.001;
|
|
my $MAX_DELAY = 0.001;
|
|
|
|
# benchmark variables
|
|
my ($bd, $b0, $b1);
|
|
my @policies = ('OT', 'FF', 'RR', 'BA', 'ID', 'UN', 'UN');
|
|
my @delta_list = (
|
|
'nr_switches',
|
|
'nr_migrations',
|
|
'exec_runtime',
|
|
'wait_sum',
|
|
'wait_count',
|
|
'iowait_sum',
|
|
'iowait_count',
|
|
'syscr',
|
|
'syscw',
|
|
'read_bytes',
|
|
'write_bytes',
|
|
'cancelled_write_bytes',
|
|
);
|
|
|
|
my @state_list = (
|
|
'exec_max', 'wait_max', 'block_max',
|
|
'pid', 'ppid', 'state', 'comm', 'cmdline', 'wchan', 'affinity',
|
|
'VmSize', 'VmRSS', 'start_time',
|
|
'nice', 'policy', 'priority', 'rt_priority', 'task_cpu'
|
|
);
|
|
|
|
# Autoflush output
|
|
select(STDERR);
|
|
$| = 1;
|
|
select(STDOUT); # default
|
|
$| = 1;
|
|
|
|
# Parse input arguments and print tool usage if necessary
|
|
&parse_schedtop_args(
|
|
\$::arg_debug,
|
|
\$::arg_delay,
|
|
\$::arg_repeat,
|
|
\$::arg_period,
|
|
\$::arg_reset_hwm,
|
|
\$::arg_idle,
|
|
\$::arg_sort,
|
|
\$::arg_print,
|
|
\@::arg_watch_cmd,
|
|
\$::arg_watch_only,
|
|
\$::arg_watch_quiet,
|
|
\$::arg_trig_delay,
|
|
);
|
|
|
|
# Set default print options
|
|
if ($::arg_print eq 'full') {
|
|
for my $P (@::P_list) { $::opt_P{$P} = $::P_full; }
|
|
} elsif ($::arg_print eq 'brief') {
|
|
for my $P (@::P_list) { $::opt_P{$P} = $::P_brief; }
|
|
} else {
|
|
for my $P (@::P_list) { $::opt_P{$P} = $::P_none; }
|
|
}
|
|
# Disable some options if data not present
|
|
$::opt_V{'sched'} = &is_sched();
|
|
$::opt_V{'io'} = &is_io();
|
|
if ($::opt_V{'sched'} == 0) {
|
|
$::opt_P{$::P_cpu} = $::P_none;
|
|
$::opt_P{$::P_del} = $::P_none;
|
|
$::opt_P{$::P_io} = $::P_none;
|
|
undef $::arg_reset_hwm;
|
|
}
|
|
if ($::opt_V{'io'} == 0) {
|
|
if ($::opt_V{'sched'} == 0) {
|
|
$::opt_P{$::P_io} = $::P_none;
|
|
$::arg_sort = 'cpu';
|
|
} else {
|
|
if ($::opt_P{$::P_io} != $::P_none) {
|
|
$::opt_P{$::P_io} = $::P_lite;
|
|
}
|
|
}
|
|
}
|
|
|
|
# Check for root user
|
|
if ($>) {
|
|
warn "$::TOOLNAME: requires root/sudo.\n";
|
|
exit 1;
|
|
}
|
|
|
|
# Print out some debugging information
|
|
if (defined $::arg_debug) {
|
|
$Data::Dumper::Indent = 1;
|
|
}
|
|
|
|
# Check for schedstat support; fallback to stats
|
|
$is_schedstat = -e '/proc/schedstat' ? 1 : 0;
|
|
|
|
# Print out selected options
|
|
printf "selected options: ".
|
|
"delay = %.3fs, repeat = %d, idle=%s, hwm=%s, sort=%s, print=%s\n",
|
|
$::arg_delay, $::arg_repeat,
|
|
(defined $::arg_idle ? 'idle_tasks' : 'no_idle_tasks'),
|
|
(defined $::arg_reset_hwm ? 'reset-hwm' : 'unchanged'),
|
|
$::arg_sort, $::arg_print;
|
|
if (@::arg_watch_cmd) {
|
|
printf "selected watch/trigger options: ".
|
|
"watch-cmd=%s, only=%s, quiet=%s, delay=%d ms\n",
|
|
join(',', @::arg_watch_cmd),
|
|
(defined $::arg_watch_only ? 'true' : 'false'),
|
|
(defined $::arg_watch_quiet ? 'true' : 'false'),
|
|
$::arg_trig_delay;
|
|
}
|
|
|
|
# Capture timestamp
|
|
$b0 = new Benchmark;
|
|
|
|
# Get number of logical cpus
|
|
&get_num_logical_cpus(\$::num_cpus);
|
|
$::affinity_mask = Math::BigInt->new('0');
|
|
for (my $i=0; $i < $::num_cpus; $i++) {
|
|
my $y = Math::BigInt->new('1');
|
|
$y->blsft($i);
|
|
$::affinity_mask->bior($y);
|
|
}
|
|
$w_aff = &max(length 'AFF', length $::affinity_mask->as_hex());
|
|
|
|
# Reset scheduling hi-water marks
|
|
if (defined $::arg_reset_hwm) {
|
|
&get_tids(\%::tids_1);
|
|
&reset_sched_hwm(\%::tids_1);
|
|
sleep(0.001);
|
|
}
|
|
|
|
# Get current hires epoc timestamp
|
|
$::tm_1 = clock_gettime(CLOCK_MONOTONIC);
|
|
$::tr_1 = clock_gettime(CLOCK_REALTIME);
|
|
$::tm_final = $::tm_1 + $::arg_delay*$::arg_repeat;
|
|
|
|
# Set initial delay
|
|
$::tm_elapsed = $::arg_delay;
|
|
$MAX_DELAY = $::arg_delay + $MIN_DELAY;
|
|
|
|
|
|
# Get overall per-cpu stats
|
|
if ($is_schedstat) {
|
|
&read_schedstat(\%::percpu_1);
|
|
} else {
|
|
&read_stat(\%::percpu_1);
|
|
}
|
|
# Get list of pids and tids
|
|
&get_tids(\%::tids_1);
|
|
# Get current scheduling and io info for all tids
|
|
&read_sched(\%::tids_1, \%::task_1);
|
|
# Track watched tids for monitoring
|
|
&track_watched_tids(\%::tids_1, \%::tids_w, \%::task_1, \@::arg_watch_cmd);
|
|
|
|
# determine column sort order
|
|
my $s_keyw = 'watched';
|
|
my ($s_key1, $s_key2, $s_key3) = ();
|
|
if ($::arg_sort eq 'cpu') {
|
|
($s_key1, $s_key2, $s_key3) = ('exec_runtime', 'nr_switches', 'pid');
|
|
} elsif ($::arg_sort eq 'io') {
|
|
($s_key1, $s_key2, $s_key3) = ('io', 'ios', 'exec_runtime');
|
|
} else {
|
|
($s_key1, $s_key2, $s_key3) = ('exec_runtime', 'nr_switches', , 'pid');
|
|
}
|
|
|
|
# Main loop
|
|
REPEAT_LOOP: for (my $repeat=1; $repeat <= $::arg_repeat; $repeat++) {
|
|
|
|
# copy all state variables
|
|
$::tm_0 = (); $::tr_0 = (); %::percpu_0 = (); %::tids_0 = (); %::task_0 = ();
|
|
$::tm_0 = $::tm_1; $::tr_0 = $::tr_1;
|
|
foreach my $cpu (keys %::percpu_1) { $::percpu_0{$cpu} = $::percpu_1{$cpu}; }
|
|
foreach my $tid (keys %::tids_1) { $::tids_0{$tid} = $::tids_1{$tid}; }
|
|
foreach my $tid (keys %::task_1) {
|
|
foreach my $var (keys %{$::task_1{$tid}}) {
|
|
$::task_0{$tid}{$var} = $::task_1{$tid}{$var};
|
|
}
|
|
}
|
|
|
|
# estimate sleep delay to achieve desired interarrival by subtracting out
|
|
# the measured cpu runtime of the tool.
|
|
my $delay = $::arg_delay;
|
|
if (defined $::D_task{$$}{'exec_runtime'}) {
|
|
$delay -= ($::D_task{$$}{'exec_runtime'}/SI_k);
|
|
}
|
|
$delay = $MIN_DELAY if ($delay < $MIN_DELAY);
|
|
$delay = $MAX_DELAY if ($delay > $MAX_DELAY);
|
|
usleep( SI_M*$delay );
|
|
|
|
# Collect current state
|
|
$::tm_1 = (); $::tr_1 = (); %::percpu_1 = (); %::tids_1 = (); %::task_1 = ();
|
|
# Get current hires epoc timestamp
|
|
$::tm_1 = clock_gettime(CLOCK_MONOTONIC);
|
|
$::tr_1 = clock_gettime(CLOCK_REALTIME);
|
|
# Get overall per-cpu stats
|
|
if ($is_schedstat) {
|
|
&read_schedstat(\%::percpu_1);
|
|
} else {
|
|
&read_stat(\%::percpu_1);
|
|
}
|
|
if (defined $::arg_watch_only) {
|
|
# Get list of pids and tids from watched commands;
|
|
# this reduces cpu impact dramatically
|
|
foreach my $tid (keys %::tids_w) {
|
|
$::tids_1{$tid} = $::tids_w{$tid};
|
|
}
|
|
} else {
|
|
# Get list of pids and tids
|
|
&get_tids(\%::tids_1);
|
|
}
|
|
# Get current scheduling and io info for all tids
|
|
&read_sched(\%::tids_1, \%::task_1);
|
|
# Get current uptime
|
|
&get_uptime(\$::uptime);
|
|
# Get current loadavg
|
|
&get_loadavg(\%::loadavg, \$::runq, \$::num_tasks);
|
|
# Get current processes blocked
|
|
&get_blocked(\$::num_blk);
|
|
|
|
# Get current processes in uninterruptible disk sleep.
|
|
$num_state_D = 0;
|
|
foreach my $tid (keys %::task_1) {
|
|
$::num_state_D++ if ($::task_1{$tid}{'state'} eq 'D');
|
|
}
|
|
|
|
# Delta calculation
|
|
%::D_task = (); %::D_percpu = ();
|
|
$::tm_elapsed = $::tm_1 - $::tm_0;
|
|
$::tr_elapsed = $::tr_1 - $::tr_0;
|
|
foreach my $tid (keys %::task_1) {
|
|
next if ( !(exists $::task_0{$tid}) );
|
|
|
|
# simple delta
|
|
foreach my $var (@delta_list) {
|
|
$::D_task{$tid}{$var} = ($::task_1{$tid}{$var} - $::task_0{$tid}{$var});
|
|
}
|
|
# state information
|
|
foreach my $state (@state_list) {
|
|
$::D_task{$tid}{$state} = $::task_1{$tid}{$state};
|
|
}
|
|
if (exists $::tids_w{$tid}) {
|
|
$::D_task{$tid}{'watched'} = 1;
|
|
} else {
|
|
$::D_task{$tid}{'watched'} = 0;
|
|
}
|
|
|
|
# derived calculations
|
|
my $exec_runtime = $::D_task{$tid}{'exec_runtime'};
|
|
my $nr_switches = $::D_task{$tid}{'nr_switches'};
|
|
my $iowait_sum = $::D_task{$tid}{'iowait_sum'};
|
|
if ($nr_switches > 0.0) {
|
|
$::D_task{$tid}{'tlen'} = $exec_runtime / $nr_switches;
|
|
} else {
|
|
$::D_task{$tid}{'tlen'} = 0.0;
|
|
}
|
|
if ($::tm_elapsed > 0.0) {
|
|
$::D_task{$tid}{'occ'} = 100.0*$exec_runtime/1.0E3/$::tm_elapsed;
|
|
$::D_task{$tid}{'iowait'} = 100.0*$iowait_sum/1.0E3/$::tm_elapsed;
|
|
} else {
|
|
$::D_task{$tid}{'occ'} = 0.0;
|
|
$::D_task{$tid}{'iowait'} = 0.0;
|
|
}
|
|
$::D_task{$tid}{'io'} = $::D_task{$tid}{'read_bytes'}
|
|
+ $::D_task{$tid}{'write_bytes'}
|
|
+ $::D_task{$tid}{'cancelled_write_bytes'};
|
|
$::D_task{$tid}{'ios'} = $::D_task{$tid}{'syscw'}
|
|
+ $::D_task{$tid}{'iowait_count'};
|
|
}
|
|
|
|
foreach my $cpu (keys %::percpu_1) {
|
|
$::D_percpu{$cpu}{'runtime'} = ($::percpu_1{$cpu} - $::percpu_0{$cpu})/1.0E6;
|
|
if ($::tm_elapsed > 0.0) {
|
|
$::D_percpu{$cpu}{'occ'} = 100.0*$D_percpu{$cpu}{'runtime'}/1.0E3/$::tm_elapsed;
|
|
} else {
|
|
$::D_percpu{$cpu}{'occ'} = 0.0;
|
|
}
|
|
}
|
|
my $occ_total = 0.0;
|
|
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
|
$occ_total += $::D_percpu{$cpu}{'occ'};
|
|
}
|
|
|
|
# Trigger sysrq and coredump if we exceed watch trigger threshold
|
|
my $trigger = 0;
|
|
if ($::arg_trig_delay > 0) {
|
|
foreach my $tid (keys %::tids_w) {
|
|
if ($::D_task{$tid}{'wait_max'} > $::arg_trig_delay) {
|
|
$trigger = 1;
|
|
if (!defined $::arg_watch_quiet) {
|
|
printf "TRIGGER: delay: %.3f > %.3f milliseconds, tid: %d, comm: %s\n",
|
|
$::D_task{$tid}{'wait_max'}, $::arg_trig_delay, $tid, $::task_1{$tid}{'comm'};
|
|
}
|
|
}
|
|
if ($::D_task{$tid}{'block_max'} > $::arg_trig_delay) {
|
|
$trigger = 1;
|
|
if (!defined $::arg_watch_quiet) {
|
|
printf "TRIGGER: block: %.3f > %.3f milliseconds, tid: %d, comm: %s\n",
|
|
$::D_task{$tid}{'block_max'}, $::arg_trig_delay, $tid, $::task_1{$tid}{'comm'};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if ($trigger) {
|
|
if (!defined $::arg_watch_quiet) {
|
|
printf "TRIGGER sysrq.";
|
|
}
|
|
&sysrq_trigger_crash();
|
|
}
|
|
|
|
# Suppress all output
|
|
next if (defined $::arg_watch_quiet);
|
|
|
|
# Print summary
|
|
&schedtop_header(
|
|
\$::tr_1,
|
|
\$::tm_elapsed,
|
|
\$::tr_elapsed,
|
|
\$::uptime,
|
|
\$::loadavg,
|
|
\$::runq,
|
|
\$::num_blk,
|
|
\$::num_state_D,
|
|
\$::num_tasks,
|
|
\$::print_host
|
|
);
|
|
|
|
printf "%-5s %7s ", 'core:', 'total';
|
|
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
|
printf "%5s ", $cpu;
|
|
}
|
|
print "\n";
|
|
printf "%-5s %7.1f ", 'occ:', $occ_total;
|
|
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
|
printf "%5.1f ", $::D_percpu{$cpu}{'occ'};
|
|
}
|
|
print "\n";
|
|
print "\n";
|
|
|
|
# Build up output line by specific area
|
|
my $L = ();
|
|
$L = '';
|
|
$L .= sprintf "%7s %7s %7s ", "TID", "PID", "PPID";
|
|
if ($::opt_P{$::P_ps} != $::P_none) {
|
|
$L .= sprintf "%1s %2s %*s %2s %3s %4s ",
|
|
"S", "P", $w_aff, "AFF", "PO", "NI", "PR";
|
|
}
|
|
if ($::opt_P{$::P_cpu} == $::P_brief) {
|
|
$L .= sprintf "%6s %7s ", "ctxt", "occ";
|
|
} elsif ($::opt_P{$::P_cpu} == $::P_full) {
|
|
$L .= sprintf "%6s %6s %7s ", "ctxt", "migr", "occ";
|
|
}
|
|
if ($::opt_P{$::P_del} != $::P_none) {
|
|
$L .= sprintf "%7s %7s %7s %7s %7s ", "tlen", "tmax", "delay", "dmax", "bmax";
|
|
}
|
|
if ($::opt_P{$::P_io} == $::P_lite) {
|
|
$L .= sprintf "%7s %6s ", "iowt", "iocnt";
|
|
} elsif ($::opt_P{$::P_io} == $::P_brief) {
|
|
$L .= sprintf "%7s %8s %8s ", "iowt", "read", "write";
|
|
} elsif ($::opt_P{$::P_io} == $::P_full) {
|
|
$L .= sprintf "%7s %8s %8s %8s %8s %8s ",
|
|
"iowt", "read", "write", "wcncl", "rsysc", "wsysc";
|
|
}
|
|
if ($::opt_P{$::P_id} != $::P_none) {
|
|
$L .= sprintf "%-22s ", "wchan";
|
|
}
|
|
if ($::opt_P{$::P_cmd} == $::P_brief) {
|
|
$L .= sprintf "%s", "cmdline";
|
|
} elsif ($::opt_P{$::P_cmd} == $::P_full) {
|
|
$L .= sprintf "%-15s %s", "comm", "cmdline";
|
|
}
|
|
print $L, "\n";
|
|
|
|
foreach my $tid (sort {($D_task{$b}{$s_keyw} <=> $D_task{$a}{$s_keyw}) or
|
|
($D_task{$b}{$s_key1} <=> $D_task{$a}{$s_key1}) or
|
|
($D_task{$b}{$s_key2} <=> $D_task{$a}{$s_key2}) or
|
|
($D_task{$b}{$s_key3} <=> $D_task{$a}{$s_key3})} keys %D_task) {
|
|
my $exec_runtime = $::D_task{$tid}{'exec_runtime'};
|
|
my $nr_switches = $::D_task{$tid}{'nr_switches'};
|
|
my $aff = $::D_task{$tid}{'affinity'}->as_hex();
|
|
|
|
# skip printing if there is no actual delta
|
|
if ( !(defined $::arg_idle) ) {
|
|
next if (($exec_runtime == 0.0) && ($nr_switches == 0));
|
|
}
|
|
|
|
# Build up output line by specific area
|
|
$L = '';
|
|
$L .= sprintf "%7d %7d %7d ",
|
|
$tid, $::D_task{$tid}{'pid'}, $::D_task{$tid}{'ppid'};
|
|
if ($::opt_P{$::P_ps} != $::P_none) {
|
|
$L .= sprintf "%1s %2d %*s %2s %3d %4d ",
|
|
$::D_task{$tid}{'state'}, $::D_task{$tid}{'task_cpu'}, $w_aff, $aff,
|
|
$policies[$::D_task{$tid}{'policy'}], $::D_task{$tid}{'nice'},
|
|
$::D_task{$tid}{'priority'};
|
|
}
|
|
if ($::opt_P{$::P_cpu} == $::P_brief) {
|
|
$L .= sprintf "%6d %7.2f ",
|
|
$::D_task{$tid}{'nr_switches'}, $::D_task{$tid}{'occ'};
|
|
} elsif ($::opt_P{$::P_cpu} == $::P_full) {
|
|
$L .= sprintf "%6d %6d %7.2f ",
|
|
$::D_task{$tid}{'nr_switches'}, $::D_task{$tid}{'nr_migrations'},
|
|
$::D_task{$tid}{'occ'},
|
|
}
|
|
if ($::opt_P{$::P_del} != $::P_none) {
|
|
$L .= sprintf "%7.3f %7.1f %7.3f %7.1f %7.1f ",
|
|
$::D_task{$tid}{'tlen'}, $::D_task{$tid}{'exec_max'},
|
|
$::D_task{$tid}{'wait_sum'}, $::D_task{$tid}{'wait_max'},
|
|
$::D_task{$tid}{'block_max'};
|
|
}
|
|
if ($::opt_P{$::P_io} == $::P_lite) {
|
|
$L .= sprintf "%7.2f %6d ",
|
|
$::D_task{$tid}{'iowait'}, $::D_task{$tid}{'iowait_count'};
|
|
} elsif ($::opt_P{$::P_io} == $::P_brief) {
|
|
$L .= sprintf "%7.2f %8s %8s ",
|
|
$::D_task{$tid}{'iowait'},
|
|
&format_SI($::D_task{$tid}{'read_bytes'}),
|
|
&format_SI($::D_task{$tid}{'write_bytes'});
|
|
} elsif ($::opt_P{$::P_io} == $::P_full) {
|
|
$L .= sprintf "%7.2f %8s %8s %8s %8s %8s ",
|
|
$::D_task{$tid}{'iowait'},
|
|
&format_SI($::D_task{$tid}{'read_bytes'}),
|
|
&format_SI($::D_task{$tid}{'write_bytes'}),
|
|
&format_SI($::D_task{$tid}{'cancelled_write_bytes'}),
|
|
&format_SI($::D_task{$tid}{'syscr'}),
|
|
&format_SI($::D_task{$tid}{'syscw'});
|
|
}
|
|
if ($::opt_P{$::P_id} != $::P_none) {
|
|
$L .= sprintf "%-22s ", substr($::D_task{$tid}{'wchan'}, 0, 22);
|
|
}
|
|
if ($::opt_P{$::P_cmd} == $::P_brief) {
|
|
$L .= sprintf "%s", $::D_task{$tid}{'cmdline'};
|
|
} elsif ($::opt_P{$::P_cmd} == $::P_full) {
|
|
$L .= sprintf "%-15s %s",
|
|
substr($::D_task{$tid}{'comm'}, 0, 15),
|
|
$::D_task{$tid}{'cmdline'};
|
|
}
|
|
print $L, "\n";
|
|
}
|
|
print "\n";
|
|
|
|
# exit repeat loop if we have exceeded overall time
|
|
last if ($::tm_1 > $::tm_final);
|
|
|
|
} # REPEAT LOOP
|
|
|
|
# Print that tool has finished
|
|
print "done\n";
|
|
|
|
# Capture timestamp and report delta
|
|
$b1 = new Benchmark; $bd = Benchmark::timediff($b1, $b0);
|
|
printf "processing time: %s\n", timestr($bd);
|
|
exit 0;
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Convert a number to SI unit xxx.yyyG
|
|
sub format_SI
|
|
{
|
|
(my $value) = @_;
|
|
if ($value >= SI_G) {
|
|
return sprintf("%.3fG", $value/SI_G);
|
|
} elsif ($value >= SI_M) {
|
|
return sprintf("%.3fM", $value/SI_M);
|
|
} elsif ($value >= SI_k) {
|
|
return sprintf("%.3fk", $value/SI_k);
|
|
} else {
|
|
return sprintf("%.0f", $value);
|
|
}
|
|
}
|
|
|
|
# Convert to IEC binary unit xxx.yyyGi
|
|
# Since underlying memory units are in pages, don't need decimals for Ki
|
|
sub format_IEC
|
|
{
|
|
(my $value) = @_;
|
|
if ($value >= Gi) {
|
|
return sprintf("%.3fGi", $value/Gi);
|
|
} elsif ($value >= Mi) {
|
|
return sprintf("%.3fMi", $value/Mi);
|
|
} elsif ($value >= Ki) {
|
|
return sprintf("%.0fKi", $value/Ki);
|
|
} else {
|
|
return sprintf("%.0f", $value);
|
|
}
|
|
}
|
|
|
|
# Determine whether scheduler stats are available
|
|
sub is_sched
|
|
{
|
|
return (-e '/proc/1/task/1/sched') ? 1 : 0;
|
|
}
|
|
|
|
# Determine whether IO stats are available
|
|
sub is_io
|
|
{
|
|
return (-e '/proc/1/task/1/io') ? 1 : 0;
|
|
}
|
|
|
|
# Determine max of array
|
|
sub max {
|
|
my ($max, @vars) = @_;
|
|
for (@vars) {
|
|
$max = $_ if $_ > $max;
|
|
}
|
|
return $max;
|
|
}
|
|
|
|
# Determine tids and pid mapping by walking /proc/<pid>/task/<tid>
|
|
sub get_tids
|
|
{
|
|
(local *::tids) = @_;
|
|
my (@pids_, @tids_) = ();
|
|
my ($dh, $pid, $tid);
|
|
|
|
# get pid list
|
|
my $dir = '/proc';
|
|
opendir($dh, $dir) || croak "Cannot open directory: $dir ($!)";
|
|
@pids_ = grep { /^\d+$/ && -d "$dir/$_" } readdir($dh);
|
|
closedir $dh;
|
|
|
|
# get tid list
|
|
foreach $pid (@pids_) {
|
|
$dir = '/proc/' . $pid . '/task';
|
|
opendir(my $dh, $dir) || next;
|
|
@tids_ = grep { /^\d+$/ && -d "$dir/$_" } readdir($dh);
|
|
closedir $dh;
|
|
foreach $tid (@tids_) { $::tids{$tid} = $pid; }
|
|
}
|
|
}
|
|
|
|
# Reset scheduling hi-water-marks
|
|
sub reset_sched_hwm
|
|
{
|
|
(local *::tids) = @_;
|
|
|
|
# reset scheduling hi-water-marks by writing '0' to each task
|
|
foreach my $tid (keys %::tids) {
|
|
my $file = '/proc/' . $tid . '/sched';
|
|
open(my $fh, "> $file") || next;
|
|
print $fh "0\n";
|
|
close($fh);
|
|
}
|
|
}
|
|
|
|
# Trigger a crash dump via sysrq, result in /var/crash .
|
|
# The following requires root privilege:
|
|
# echo 1 > /proc/sys/kernel/sysrq
|
|
# echo c > /proc/sysrq-trigger
|
|
sub sysrq_trigger_crash
|
|
{
|
|
my $file1 = '/proc/sys/kernel/sysrq';
|
|
open(my $fh1, "> $file1") || croak "Cannot open file: $file1 ($!)";
|
|
print $fh1 "1\n";
|
|
close($fh1);
|
|
|
|
my $file2 = '/proc/sysrq-trigger';
|
|
open(my $fh2, "> $file2") || croak "Cannot open file: $file2 ($!)";
|
|
print $fh2 "c\n";
|
|
close($fh2);
|
|
}
|
|
|
|
# Track watched tids for monitoring
|
|
sub track_watched_tids
|
|
{
|
|
(local *::tids, local *::tids_w, local *::task, local *::arg_watch_cmd) = @_;
|
|
|
|
foreach my $tid (keys %::tids) {
|
|
my $pid = $::tids{$tid};
|
|
my $comm = $::task{$tid}{'comm'};
|
|
my $watched = 0;
|
|
foreach my $cmd (@::arg_watch_cmd) {
|
|
if (($cmd =~ /^\d+$/) && ($tid == $cmd)) {
|
|
$::tids_w{$tid} = $pid;
|
|
printf "watching: tid=%7d, comm=%s\n", $tid, $comm;
|
|
}
|
|
if ((defined $comm) && ($comm =~ /^\Q$cmd\E/)) {
|
|
$::tids_w{$tid} = $pid;
|
|
printf "watching: tid=%7d, comm=%s\n", $tid, $comm;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Parse cpu and scheduling info for each tid
|
|
# - ignore the specific tid if there is incomplete data,
|
|
# (i.e., cannot obtain info because task has died,
|
|
# eg. missing ./stat, ./status, ./cmdline, ./wchan)
|
|
#
|
|
sub read_sched
|
|
{
|
|
(local *::tids, local *::task) = @_;
|
|
|
|
%::task = ();
|
|
foreach my $tid (keys %::tids) {
|
|
my ($fh, $file, $pid, $comm, $cmdline, $wchan, $id) = ();
|
|
my ($tpid, $tcomm, $state, $ppid, $pgrp, $sid,
|
|
$tty_nr, $tty_pgrp, $flags,
|
|
$min_flt, $cmin_flt, $maj_flt, $cmaj_flt,
|
|
$utime, $stime, $cutime, $cstime,
|
|
$priority, $nice, $num_threads,
|
|
$it_real_value, $start_time,
|
|
$vsize, $rss, $rsslim,
|
|
$start_code, $end_code, $start_stack, $esp, $eip,
|
|
$pending, $blocked, $sigign, $sigcatch, $wchan_addr,
|
|
$dum1, $dum2, $exit_signal, $task_cpu,
|
|
$rt_priority, $policy, $blkio_ticks,
|
|
$gtime, $cgtime,
|
|
$start_data, $end_data, $start_brk, $arg_start, $arg_end,
|
|
$env_start, $env_end, $exit_code) = ();
|
|
|
|
my ($nr_switches, $nr_migrations) = (0,0);
|
|
my ($exec_runtime, $exec_max) = (0.0, 0.0);
|
|
my ($wait_max, $wait_sum, $wait_count) = (0.0, 0.0, 0);
|
|
my ($block_max) = (0.0);
|
|
my ($iowait_sum, $iowait_count) = (0.0, 0);
|
|
my ($VmSize, $VmRSS) = ();
|
|
my $Cpus_allowed = Math::BigInt->new('0');
|
|
my $affinity = Math::BigInt->new('0');
|
|
my ($rchar, $wchar, $syscr, $syscw, $read_bytes, $write_bytes,
|
|
$cancelled_write_bytes) = (0,0,0,0,0,0,0);
|
|
|
|
my ($sched_valid, $io_valid, $status_valid, $cmdline_valid,
|
|
$wchan_valid, $stat_valid) = ();
|
|
|
|
$pid = $::tids{$tid};
|
|
|
|
# NOTE: Format change over time: OLD: se.statistics.X, NEW: se.statistics->X
|
|
#cat /proc/1/sched
|
|
#systemd (1, #threads: 1)
|
|
#-------------------------------------------------------------------
|
|
#se.exec_start : 33792676.285222
|
|
#se.vruntime : 28019997.693224
|
|
#se.sum_exec_runtime : 21918.207287
|
|
#se.nr_migrations : 5413
|
|
#se.statistics->sum_sleep_runtime : 1166561.198533
|
|
#se.statistics->wait_start : 0.000000
|
|
#se.statistics->sleep_start : 33792676.285222
|
|
#se.statistics->block_start : 0.000000
|
|
#se.statistics->sleep_max : 18951.679990
|
|
#se.statistics->block_max : 0.000000
|
|
#se.statistics->exec_max : 0.909747
|
|
#se.statistics->slice_max : 1.790123
|
|
#se.statistics->wait_max : 4.026544
|
|
#se.statistics->wait_sum : 507.245963
|
|
#se.statistics->wait_count : 2540
|
|
#se.statistics->iowait_sum : 0.000000
|
|
#se.statistics->iowait_count : 0
|
|
#se.statistics->nr_migrations_cold : 0
|
|
#se.statistics->nr_failed_migrations_affine : 67
|
|
#se.statistics->nr_failed_migrations_running : 1
|
|
#se.statistics->nr_failed_migrations_hot : 1
|
|
#se.statistics->nr_forced_migrations : 0
|
|
#se.statistics->nr_wakeups : 2472
|
|
#se.statistics->nr_wakeups_sync : 34
|
|
#se.statistics->nr_wakeups_migrate : 176
|
|
#se.statistics->nr_wakeups_local : 1442
|
|
#se.statistics->nr_wakeups_remote : 1030
|
|
#se.statistics->nr_wakeups_affine : 155
|
|
#se.statistics->nr_wakeups_affine_attempts : 969
|
|
#se.statistics->nr_wakeups_passive : 0
|
|
#se.statistics->nr_wakeups_idle : 0
|
|
#avg_atom : 0.286970
|
|
#avg_per_cpu : 4.049179
|
|
#nr_switches : 76378
|
|
#nr_voluntary_switches : 72308
|
|
#nr_involuntary_switches : 4070
|
|
#se.load.weight : 1024
|
|
#policy : 0
|
|
#prio : 120
|
|
#clock-delta : 28
|
|
|
|
# parse /proc/<pid>/task/<tid>/sched
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/sched';
|
|
open($fh, $file) || goto SKIP_SCHED;
|
|
$_ = <$fh>;
|
|
if (/^(.*)\s+\((\d+),\s+#threads:/) {
|
|
$comm = $1; $id = $2;
|
|
}
|
|
my ($k, $v, $c0);
|
|
LOOP_SCHED: while (<$fh>) {
|
|
if (/^se\.statistics.{1,2}wait_max\s+:\s+(\S+)/) {
|
|
$wait_max = $1;
|
|
} elsif (/^se\.statistics.{1,2}block_max\s+:\s+(\S+)/) {
|
|
$block_max = $1;
|
|
} elsif (/^se\.statistics.{1,2}wait_sum\s+:\s+(\S+)/) {
|
|
$wait_sum = $1;
|
|
} elsif (/^se\.statistics.{1,2}wait_count\s+:\s+(\S+)/) {
|
|
$wait_count = $1;
|
|
} elsif (/^se\.statistics.{1,2}exec_max\s+:\s+(\S+)/) {
|
|
$exec_max = $1;
|
|
} elsif (/^se\.statistics.{1,2}iowait_sum\s+:\s+(\S+)/) {
|
|
$iowait_sum = $1;
|
|
} elsif (/^se\.statistics.{1,2}iowait_count\s+:\s+(\S+)/) {
|
|
$iowait_count = $1;
|
|
} elsif (/^se\.sum_exec_runtime\s+:\s+(\S+)/) {
|
|
$exec_runtime = $1;
|
|
} elsif (/^se\.nr_migrations\s+:\s+(\S+)/) {
|
|
$nr_migrations = $1;
|
|
} elsif (/^nr_switches\s+:\s+(\S+)/) {
|
|
$nr_switches = $1;
|
|
$sched_valid = 1;
|
|
last LOOP_SCHED;
|
|
}
|
|
}
|
|
close($fh);
|
|
SKIP_SCHED:;
|
|
|
|
#cat /proc/1/io
|
|
#rchar: 3432590242
|
|
#wchar: 438665986
|
|
#syscr: 316595
|
|
#syscw: 104722
|
|
#read_bytes: 1586438144
|
|
#write_bytes: 246829056
|
|
#cancelled_write_bytes: 7798784
|
|
|
|
# parse /proc/<pid>/task/<tid>/io
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/io';
|
|
open($fh, $file) || goto SKIP_IO;
|
|
LOOP_IO: while (<$fh>) {
|
|
if (/^rchar:\s+(\S+)/) {
|
|
$rchar = $1;
|
|
} elsif (/^wchar:\s+(\S+)/) {
|
|
$wchar = $1;
|
|
} elsif (/^syscr:\s+(\S+)/) {
|
|
$syscr = $1;
|
|
} elsif (/^syscw:\s+(\S+)/) {
|
|
$syscw = $1;
|
|
} elsif (/^read_bytes:\s+(\S+)/) {
|
|
$read_bytes = $1;
|
|
} elsif (/^write_bytes:\s+(\S+)/) {
|
|
$write_bytes = $1;
|
|
} elsif (/^cancelled_write_bytes:\s+(\S+)/) {
|
|
$cancelled_write_bytes = $1;
|
|
$io_valid = 1;
|
|
last LOOP_IO;
|
|
}
|
|
}
|
|
close($fh);
|
|
SKIP_IO:;
|
|
|
|
# parse /proc/<pid>/task/<tid>/status
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/status';
|
|
open($fh, $file) || next;
|
|
LOOP_STATUS: while (<$fh>) {
|
|
if (/^Name:\s+(.*)/) {
|
|
$comm = $1;
|
|
} elsif (/^State:\s+(\S+)/) {
|
|
$state = $1;
|
|
} elsif (/^PPid:\s+(\S+)/) {
|
|
$ppid = $1;
|
|
} elsif (/^VmSize:\s+(\S+)/) {
|
|
$VmSize = $1;
|
|
} elsif (/^VmRSS:\s+(\S+)/) {
|
|
$VmRSS = $1;
|
|
} elsif (/^Cpus_allowed:\s+([0]+,)*(\S+)/) {
|
|
my $h = $2; $h =~ tr/,/_/;
|
|
$Cpus_allowed = Math::BigInt->from_hex($h);
|
|
$affinity = $Cpus_allowed->band($::affinity_mask);
|
|
$status_valid = 1;
|
|
last LOOP_STATUS;
|
|
}
|
|
}
|
|
close($fh);
|
|
|
|
# parse /proc/<pid>/task/<tid>/cmdline
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/cmdline';
|
|
open($fh, $file) || next;
|
|
LOOP_CMDLINE: while (<$fh>) {
|
|
if (/^(.*)$/) {
|
|
$cmdline = $1;
|
|
$cmdline =~ s/\000/ /g;
|
|
$cmdline_valid = 1;
|
|
last LOOP_CMDLINE;
|
|
}
|
|
}
|
|
if (!$cmdline_valid) {
|
|
$cmdline_valid = 1;
|
|
$cmdline = $comm;
|
|
}
|
|
close($fh);
|
|
|
|
# parse /proc/<pid>/task/<tid>/wchan
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/wchan';
|
|
open($fh, $file) || next;
|
|
LOOP_WCHAN: while (<$fh>) {
|
|
if (/^(.*)$/) {
|
|
$wchan = $1;
|
|
$wchan_valid = 1;
|
|
last LOOP_WCHAN;
|
|
}
|
|
}
|
|
close($fh);
|
|
|
|
#Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
|
|
#..............................................................................
|
|
# Field Content
|
|
# tpid process id (or tid, if /proc/<pid>/task/<tid>/stat)
|
|
# tcomm filename of the executable
|
|
# state state (R is running, S is sleeping, D is sleeping in an
|
|
# uninterruptible wait, Z is zombie, T is traced or stopped)
|
|
# ppid process id of the parent process
|
|
# pgrp pgrp of the process
|
|
# sid session id
|
|
# tty_nr tty the process uses
|
|
# tty_pgrp pgrp of the tty
|
|
# flags task flags
|
|
# min_flt number of minor faults
|
|
# cmin_flt number of minor faults with child's
|
|
# maj_flt number of major faults
|
|
# cmaj_flt number of major faults with child's
|
|
# utime user mode jiffies
|
|
# stime kernel mode jiffies
|
|
# cutime user mode jiffies with child's
|
|
# cstime kernel mode jiffies with child's
|
|
# priority priority level
|
|
# nice nice level
|
|
# num_threads number of threads
|
|
# it_real_value (obsolete, always 0)
|
|
# start_time time the process started after system boot
|
|
# vsize virtual memory size
|
|
# rss resident set memory size
|
|
# rsslim current limit in bytes on the rss
|
|
# start_code address above which program text can run
|
|
# end_code address below which program text can run
|
|
# start_stack address of the start of the main process stack
|
|
# esp current value of ESP
|
|
# eip current value of EIP
|
|
# pending bitmap of pending signals
|
|
# blocked bitmap of blocked signals
|
|
# sigign bitmap of ignored signals
|
|
# sigcatch bitmap of catched signals
|
|
# wchan address where process went to sleep
|
|
# 0 (place holder)
|
|
# 0 (place holder)
|
|
# exit_signal signal to send to parent thread on exit
|
|
# task_cpu which CPU the task is scheduled on
|
|
# rt_priority realtime priority
|
|
# policy scheduling policy (man sched_setscheduler)
|
|
# blkio_ticks time spent waiting for block IO
|
|
# gtime guest time of the task in jiffies
|
|
# cgtime guest time of the task children in jiffies
|
|
# start_data address above which program data+bss is placed
|
|
# end_data address below which program data+bss is placed
|
|
# start_brk address above which program heap can be expanded with brk()
|
|
# arg_start address above which program command line is placed
|
|
# arg_end address below which program command line is placed
|
|
# env_start address above which program environment is placed
|
|
# env_end address below which program environment is placed
|
|
# exit_code the thread's exit_code in the form reported by the waitpid system call
|
|
|
|
# parse /proc/<pid>/task/<tid>/stat
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/stat';
|
|
my $dummy;
|
|
open($fh, $file) || next;
|
|
$_ = <$fh>;
|
|
($tpid, $tcomm, $dummy) = /^(\d+)\s+\((.*)\)\s+(.*)/;
|
|
($state, $ppid, $pgrp, $sid,
|
|
$tty_nr, $tty_pgrp, $flags,
|
|
$min_flt, $cmin_flt, $maj_flt, $cmaj_flt,
|
|
$utime, $stime, $cutime, $cstime,
|
|
$priority, $nice, $num_threads,
|
|
$it_real_value, $start_time,
|
|
$vsize, $rss, $rsslim,
|
|
$start_code, $end_code, $start_stack, $esp, $eip,
|
|
$pending, $blocked, $sigign, $sigcatch, $wchan_addr,
|
|
$dum1, $dum2, $exit_signal, $task_cpu,
|
|
$rt_priority, $policy, $blkio_ticks, $gtime, $cgtime,
|
|
$start_data, $end_data, $start_brk, $arg_start, $arg_end,
|
|
$env_start, $env_end, $exit_code) = split(/\s+/, $dummy);
|
|
$stat_valid = 1;
|
|
close($fh);
|
|
|
|
# sched
|
|
if (defined $sched_valid) {
|
|
$::task{$tid}{'exec_runtime'} = $exec_runtime;
|
|
$::task{$tid}{'exec_max'} = $exec_max;
|
|
$::task{$tid}{'block_max'} = $block_max;
|
|
$::task{$tid}{'wait_max'} = $wait_max;
|
|
$::task{$tid}{'wait_sum'} = $wait_sum;
|
|
$::task{$tid}{'wait_count'} = $wait_count;
|
|
$::task{$tid}{'iowait_sum'} = $iowait_sum;
|
|
$::task{$tid}{'iowait_count'} = $iowait_count;
|
|
$::task{$tid}{'nr_migrations'} = $nr_migrations;
|
|
$::task{$tid}{'nr_switches'} = $nr_switches;
|
|
} else {
|
|
$::task{$tid}{'exec_runtime'} = 0;
|
|
$::task{$tid}{'exec_max'} = 0;
|
|
$::task{$tid}{'block_max'} = 0;
|
|
$::task{$tid}{'wait_max'} = 0;
|
|
$::task{$tid}{'wait_sum'} = 0;
|
|
$::task{$tid}{'wait_count'} = 0;
|
|
$::task{$tid}{'iowait_sum'} = 0;
|
|
$::task{$tid}{'iowait_count'} = 0;
|
|
$::task{$tid}{'nr_migrations'} = 0;
|
|
$::task{$tid}{'nr_switches'} = 0;
|
|
}
|
|
|
|
# io
|
|
if (defined $io_valid) {
|
|
$::task{$tid}{'rchar'} = $rchar;
|
|
$::task{$tid}{'wchar'} = $wchar;
|
|
$::task{$tid}{'syscr'} = $syscr;
|
|
$::task{$tid}{'syscw'} = $syscw;
|
|
$::task{$tid}{'read_bytes'} = $read_bytes;
|
|
$::task{$tid}{'write_bytes'} = $write_bytes;
|
|
$::task{$tid}{'cancelled_write_bytes'} = $cancelled_write_bytes;
|
|
} else {
|
|
$::task{$tid}{'rchar'} = 0;
|
|
$::task{$tid}{'wchar'} = 0;
|
|
$::task{$tid}{'syscr'} = 0;
|
|
$::task{$tid}{'syscw'} = 0;
|
|
$::task{$tid}{'read_bytes'} = 0;
|
|
$::task{$tid}{'write_bytes'} = 0;
|
|
$::task{$tid}{'cancelled_write_bytes'} = 0;
|
|
}
|
|
|
|
# status
|
|
if (defined $status_valid) {
|
|
$::task{$tid}{'pid'} = $pid;
|
|
$::task{$tid}{'comm'} = $comm;
|
|
$::task{$tid}{'state'} = $state;
|
|
$::task{$tid}{'ppid'} = $ppid;
|
|
$::task{$tid}{'VmSize'} = $VmSize;
|
|
$::task{$tid}{'VmRSS'} = $VmRSS;
|
|
$::task{$tid}{'affinity'} = $affinity;
|
|
} else {
|
|
$::task{$tid}{'pid'} = 0;
|
|
$::task{$tid}{'comm'} = '-';
|
|
$::task{$tid}{'state'} = '-';
|
|
$::task{$tid}{'ppid'} = 0;
|
|
$::task{$tid}{'VmSize'} = 0;
|
|
$::task{$tid}{'VmRSS'} = 0;
|
|
$::task{$tid}{'affinity'} = Math::BigInt->new('0');
|
|
}
|
|
|
|
# cmdline
|
|
if (defined $cmdline_valid) {
|
|
$::task{$tid}{'cmdline'} = $cmdline;
|
|
} else {
|
|
$::task{$tid}{'cmdline'} = $comm;
|
|
}
|
|
|
|
# wchan
|
|
if (defined $cmdline_valid) {
|
|
$::task{$tid}{'wchan'} = $wchan;
|
|
} else {
|
|
$::task{$tid}{'wchan'} = '-';
|
|
}
|
|
|
|
# stat
|
|
if (defined $stat_valid) {
|
|
$::task{$tid}{'nice'} = $nice;
|
|
$::task{$tid}{'policy'} = $policy;
|
|
$::task{$tid}{'priority'} = $priority;
|
|
$::task{$tid}{'rt_priority'} = $rt_priority;
|
|
$::task{$tid}{'start_time'} = $start_time;
|
|
$::task{$tid}{'task_cpu'} = $task_cpu;
|
|
} else {
|
|
$::task{$tid}{'nice'} = 0;
|
|
$::task{$tid}{'policy'} = '-';
|
|
$::task{$tid}{'priority'} = 0;
|
|
$::task{$tid}{'rt_priority'} = 0;
|
|
$::task{$tid}{'start_time'} = '';
|
|
$::task{$tid}{'task_cpu'} = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
# Parse per-cpu hi-resolution scheduling stats
|
|
sub read_schedstat
|
|
{
|
|
(local *::percpu) = @_;
|
|
my ($version, $timestamp);
|
|
my ($cpu, $cputime);
|
|
my ($fh, $file);
|
|
|
|
%::percpu = ();
|
|
|
|
# parse /proc/schedstat
|
|
$file = '/proc/schedstat';
|
|
open($fh, $file) || croak "Cannot open file: $file ($!)";
|
|
$_ = <$fh>; ($version) = /^version\s+(\d+)/;
|
|
$_ = <$fh>; ($timestamp) = /^timestamp\s+(\d+)/;
|
|
|
|
if ($version == 15) {
|
|
LOOP_SCHEDSTAT: while (<$fh>) {
|
|
# version 15: cputime is 7th field
|
|
if (/^cpu(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)\s+/) {
|
|
$cpu = $1; $cputime = $2;
|
|
$::percpu{$cpu} = $cputime;
|
|
}
|
|
}
|
|
} else {
|
|
croak "schedstat version: $version method not implemented.";
|
|
}
|
|
close($fh);
|
|
SKIP_SCHED:;
|
|
}
|
|
|
|
# Parse per-cpu jiffie stats; cputime excludes iowait.
|
|
sub read_stat
|
|
{
|
|
(local *::percpu) = @_;
|
|
my ($cpu, $cputime);
|
|
my ($user, $sys, $nice, $idle, $iowt, $hirq, $sirq);
|
|
my ($fh, $file);
|
|
|
|
%::percpu = ();
|
|
|
|
# parse /proc/stat
|
|
$file = '/proc/stat';
|
|
open($fh, $file) || croak "Cannot open file: $file ($!)";
|
|
LOOP_STAT: while (<$fh>) {
|
|
if (/^cpu(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+/) {
|
|
$cpu =$1; $user = $2; $sys = $3; $nice = $4; $idle = $5; $iowt = $6; $hirq = $7; $sirq = $8;
|
|
$cputime = $CLOCK_NS * ($user + $sys + $nice + $iowt + $hirq + $sirq);
|
|
$::percpu{$cpu} = $cputime;
|
|
}
|
|
}
|
|
close($fh);
|
|
}
|
|
|
|
# Parse load-average from /proc/loadavg
|
|
sub get_loadavg
|
|
{
|
|
(local *::loadavg, local *::runq, local *::num_tasks) = @_;
|
|
|
|
$::loadavg{'1'} = 0.0;
|
|
$::loadavg{'5'} = 0.0;
|
|
$::loadavg{'15'} = 0.0;
|
|
$::runq = 0;
|
|
$::num_tasks = 0;
|
|
|
|
my $file = '/proc/loadavg';
|
|
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
$_ = <$fh>;
|
|
if (/^(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\/(\d+)\s+\d+/) {
|
|
$::loadavg{'1'} = $1;
|
|
$::loadavg{'5'} = $2;
|
|
$::loadavg{'15'} = $3;
|
|
$::runq = $4;
|
|
$::num_tasks = $5;
|
|
}
|
|
close($fh);
|
|
}
|
|
|
|
# Parse blocked from /proc/stat
|
|
sub get_blocked
|
|
{
|
|
(local *::num_blk) = @_;
|
|
|
|
$::num_blk = 0;
|
|
|
|
my $file = '/proc/stat';
|
|
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
while ($_ = <$fh>) {
|
|
if (/^procs_blocked\s+(\d+)/) {
|
|
$::num_blk = $1;
|
|
}
|
|
}
|
|
close($fh);
|
|
}
|
|
|
|
# Parse uptime from /proc/uptime
|
|
sub get_uptime
|
|
{
|
|
(local *::uptime) = @_;
|
|
$::uptime = 0.0;
|
|
|
|
my $file = '/proc/uptime';
|
|
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
$_ = <$fh>;
|
|
if (/^(\S+)\s+\S+/) {
|
|
$::uptime = $1;
|
|
}
|
|
close($fh);
|
|
}
|
|
|
|
# Get number of online logical cpus
|
|
sub get_num_logical_cpus {
|
|
(local *::num_cpus) = @_;
|
|
$::num_cpus = 0;
|
|
|
|
my $file = "/proc/cpuinfo";
|
|
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
LOOP_CPUINFO: while (<$fh>) {
|
|
if (/^[Pp]rocessor\s+:\s\d+/) {
|
|
$::num_cpus++;
|
|
}
|
|
}
|
|
close($fh);
|
|
}
|
|
|
|
# Print header
|
|
sub schedtop_header {
|
|
(local *::tr_1,
|
|
local *::tm_elapsed,
|
|
local *::tr_elapsed,
|
|
local *::uptime,
|
|
local *::loadavg,
|
|
local *::runq,
|
|
local *::num_blk,
|
|
local *::num_state_D,
|
|
local *::num_tasks,
|
|
local *::print_host,
|
|
) = @_;
|
|
|
|
# process epoch to get current timestamp
|
|
my $mm_in_s = 60;
|
|
my $hh_in_s = 60*60;
|
|
my $dd_in_s = 24*60*60;
|
|
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
|
|
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::tr_1);
|
|
my $msec = 1000.0*($::tr_1 - int($::tr_1));
|
|
|
|
# convert uptime to elapsed <d>:<hh>:<mm>:<ss>
|
|
my ($up, $up_dd, $up_hh, $up_mm, $up_ss);
|
|
$up = int($::uptime);
|
|
$up_dd = int($up/$dd_in_s);
|
|
$up -= $dd_in_s*$up_dd;
|
|
$up_hh = int($up/$hh_in_s);
|
|
$up -= $hh_in_s*$up_hh;
|
|
$up_mm = int($up/$mm_in_s);
|
|
$up -= $mm_in_s*$up_mm;
|
|
$up_ss = $up;
|
|
|
|
# Calculate skew of CLOCK_REALTIME vs CLOCK_MONOTONIC,
|
|
# and display skew if > 5% relative difference.
|
|
my $skew_ms = ($::tr_elapsed - $::tm_elapsed)*1000.0;
|
|
my $skew = "";
|
|
if (abs($skew_ms)/$::tm_elapsed > 50.0) {
|
|
$skew = sprintf " skew:%.3f ms", $skew_ms;
|
|
}
|
|
|
|
#schedtop -- 2014/03/03 02:00:21.357 dt:2050.003 ms ldavg:0.07, 0.09, 0.08 runq:1 blk:0 D:0 nproc:440 up:6:13:00:56 skew:0.001 ms
|
|
printf "%s %s -- ".
|
|
"%4d-%02d-%02d %02d:%02d:%02d.%03d ".
|
|
"dt:%.3f ms ".
|
|
"ldavg:%.2f, %.2f, %.2f runq:%d blk:%d D:%d nproc:%d ".
|
|
"up:%d:%02d:%02d:%02d %s\n",
|
|
$::TOOLNAME, $::VERSION,
|
|
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
|
|
$::tm_elapsed*1000.0,
|
|
$::loadavg{'1'}, $::loadavg{'5'}, $::loadavg{'15'},
|
|
$::runq, $::num_blk, $::num_state_D, $::num_tasks,
|
|
$up_dd, $up_hh, $up_mm, $up_ss,
|
|
$skew;
|
|
|
|
return if (!($::print_host));
|
|
|
|
# After first print, disable print host information
|
|
$::print_host = 0;
|
|
|
|
# Get host specific information
|
|
my ($OSTYPE, $NODENAME, $OSRELEASE, $version, $MACHINE);
|
|
($OSTYPE, $NODENAME, $OSRELEASE, $version, $MACHINE) = POSIX::uname();
|
|
my ($NODETYPE, $SUBFUNCTION, $BUILDINFO) = ('-', '-', '-');
|
|
my ($SW_VERSION, $BUILD_ID) = ('-', '-');
|
|
|
|
# Get platform nodetype and subfunction
|
|
PLATFORM: {
|
|
my $file = "/etc/platform/platform.conf";
|
|
open(FILE, $file) || next;
|
|
while($_ = <FILE>) {
|
|
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
if (/^nodetype=(\S+)/) {
|
|
$NODETYPE = $1;
|
|
}
|
|
if (/^subfunction=(\S+)/) {
|
|
$SUBFUNCTION = $1;
|
|
}
|
|
}
|
|
close(FILE);
|
|
}
|
|
|
|
# Get loadbuild info
|
|
BUILD: {
|
|
my $file = "/etc/build.info";
|
|
open(FILE, $file) || next;
|
|
while($_ = <FILE>) {
|
|
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
if (/^SW_VERSION=\"([^"]+)\"/) {
|
|
$SW_VERSION = $1;
|
|
}
|
|
if (/^BUILD_ID=\"([^"]+)\"/) {
|
|
$BUILD_ID = $1;
|
|
}
|
|
}
|
|
close(FILE);
|
|
}
|
|
$BUILDINFO = join(' ', $SW_VERSION, $BUILD_ID);
|
|
|
|
# Parse /proc/cpuinfo to get specific processor info
|
|
my ($n_cpu, $model_name, $cpu_MHz) = (0, '-', 0);
|
|
CPUINFO: {
|
|
my $file = "/proc/cpuinfo";
|
|
open(FILE, $file) || croak "Cannot open file: $file ($!)";
|
|
while($_ = <FILE>) {
|
|
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
if (/^[Pp]rocessor\s+:\s+\d+/) {
|
|
$n_cpu++;
|
|
} elsif (/^model name\s+:\s+(.*)$/) {
|
|
$_ = $1; s/\s+/ /g;
|
|
$model_name = $_;
|
|
} elsif (/^cpu MHz\s+:\s+(\S+)/) {
|
|
$cpu_MHz = $1;
|
|
} elsif (/^bogomips\s+:\s+(\S+)/) {
|
|
$cpu_MHz = $1 if ($cpu_MHz == 0);
|
|
}
|
|
}
|
|
close(FILE);
|
|
}
|
|
|
|
printf " host:%s nodetype:%s subfunction:%s\n",
|
|
$NODENAME, $NODETYPE, $SUBFUNCTION;
|
|
printf " arch:%s processor:%s speed:%.0f #CPUs:%d\n",
|
|
$MACHINE, $model_name, $cpu_MHz, $n_cpu;
|
|
printf " %s %s build:%s\n", $OSTYPE, $OSRELEASE, $BUILDINFO;
|
|
|
|
}
|
|
|
|
# Parse and validate command line arguments
|
|
sub parse_schedtop_args {
|
|
(local *::arg_debug,
|
|
local *::arg_delay,
|
|
local *::arg_repeat,
|
|
local *::arg_period,
|
|
local *::arg_reset_hwm,
|
|
local *::arg_idle,
|
|
local *::arg_sort,
|
|
local *::arg_print,
|
|
local *::arg_watch_cmd,
|
|
local *::arg_watch_only,
|
|
local *::arg_watch_quiet,
|
|
local *::arg_trig_delay,
|
|
) = @_;
|
|
|
|
# Local variables
|
|
my ($fail, $arg_help);
|
|
|
|
# Use the Argument processing module
|
|
use Getopt::Long;
|
|
|
|
# Print usage if no arguments
|
|
if (!@::ARGV) {
|
|
&Usage();
|
|
exit 0;
|
|
}
|
|
|
|
# Process input arguments
|
|
$fail = 0;
|
|
GetOptions(
|
|
"debug:i", \$::arg_debug,
|
|
"delay=f", \$::arg_delay,
|
|
"period=i", \$::arg_period,
|
|
"repeat=i", \$::arg_repeat,
|
|
"reset-hwm", \$::arg_reset_hwm,
|
|
"idle", \$::arg_idle,
|
|
"sort=s", \$::arg_sort,
|
|
"print=s", \$::arg_print,
|
|
"watch-cmd=s@", \@::arg_watch_cmd,
|
|
"watch-only", \$::arg_watch_only,
|
|
"watch-quiet", \$::arg_watch_quiet,
|
|
"trig-delay=i", \$::arg_trig_delay,
|
|
"help|h", \$arg_help
|
|
) || GetOptionsMessage();
|
|
|
|
# Print help documentation if user has selected --help
|
|
&ListHelp() if (defined $arg_help);
|
|
|
|
# Validate options
|
|
if ((defined $::arg_repeat) && (defined $::arg_period)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: cannot specify both --repeat and --period options.\n";
|
|
}
|
|
if ((defined $::arg_delay) && ($::arg_delay < 0.01)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --delay %f is less than 0.01.\n",
|
|
$::arg_delay;
|
|
}
|
|
if ((defined $::arg_sort) && !(($::arg_sort eq 'cpu') || ($::arg_sort eq 'io'))) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --sort=$::arg_sort invalid; valid options are: cpu, io.\n";
|
|
}
|
|
if ((defined $::arg_print) && !(($::arg_print eq 'brief') || ($::arg_print eq 'full'))) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --print=$::arg_print invalid; valid options are: brief, full\n";
|
|
}
|
|
if ((defined $::arg_watch_only) && !(@::arg_watch_cmd)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --watch-only requires --watch-cmd option.\n";
|
|
}
|
|
if ((defined $::arg_watch_quiet) && !(@::arg_watch_cmd)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --watch-quiet requires --watch-cmd option.\n";
|
|
}
|
|
if ((defined $::arg_trig_delay) && !(@::arg_watch_cmd)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --trig-delay requires --watch-cmd option.\n";
|
|
}
|
|
if ((defined $::arg_trig_delay) && ($::arg_trig_delay < 1)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --trig-delay %d is less than 1.\n",
|
|
$::arg_trig_delay;
|
|
}
|
|
if (@::arg_watch_cmd) {
|
|
my @cmds = @::arg_watch_cmd;
|
|
@::arg_watch_cmd = ();
|
|
for my $cmd (@cmds) {
|
|
push(@::arg_watch_cmd, split(',', $cmd));
|
|
}
|
|
}
|
|
if (@::ARGV) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: not expecting these options: '@::ARGV'.\n";
|
|
}
|
|
|
|
# Set reasonable defaults
|
|
$::arg_delay ||= 1.0;
|
|
$::arg_repeat ||= 1;
|
|
if ($::arg_period) {
|
|
$::arg_repeat = $::arg_period / $::arg_delay;
|
|
} else {
|
|
$::arg_period = $::arg_delay * $::arg_repeat;
|
|
}
|
|
$::arg_sort ||= 'cpu';
|
|
$::arg_print ||= 'full';
|
|
$::arg_trig_delay ||= 0;
|
|
|
|
# Upon missing or invalid options, print usage
|
|
if ($fail == 1) {
|
|
&Usage();
|
|
exit 1;
|
|
}
|
|
}
|
|
|
|
# Print out a warning message and usage
|
|
sub GetOptionsMessage {
|
|
warn "$::TOOLNAME: Error processing input arguments.\n";
|
|
&Usage();
|
|
exit 1;
|
|
}
|
|
|
|
# Print out program usage
|
|
sub Usage {
|
|
printf "Usage: $::TOOLNAME OPTIONS\n";
|
|
printf " [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]\n";
|
|
printf " [--reset-hwm] [--idle] [--sort=<cpu|io>] [--print=<brief|full>]\n";
|
|
printf " [--watch-cmd=tid1,cmd1,cmd2,...] [--watch-only] [--watch-quiet]\n";
|
|
printf " [--trig-delay=time]\n";
|
|
printf " [--help]\n";
|
|
|
|
printf "\n";
|
|
}
|
|
|
|
# Print tool help
|
|
sub ListHelp {
|
|
printf "$::TOOLNAME -- display per-task scheduling occupancy\n";
|
|
&Usage();
|
|
printf "Options: miscellaneous\n";
|
|
printf " --delay=<seconds> : output interval (seconds): default: 1.0\n";
|
|
printf " --repeat=<num> : number of repeat samples: default: 1\n";
|
|
printf " --period=<seconds> : overall tool duration (seconds): default: --\n";
|
|
printf " --reset-hwm : reset scheduling delay hi-water marks\n";
|
|
printf " --idle : specify printing of idle tasks\n";
|
|
printf " --sort=<cpu|io> : sort order, select from 'cpu' or 'io'\n";
|
|
printf " --print=<brief|full> : select 'brief' or 'full' fields to display\n";
|
|
printf("Watch specific tasks or commands:\n");
|
|
printf(" --watch-cmd=tid1,cmd1,... : watch specific tids or 'comm' names\n");
|
|
printf(" (matches from beginning of comm with partial name, eg, --watch-cmd=sirq)\n");
|
|
printf(" --watch-only : display only watched tasks (reduces impact of tool)\n");
|
|
printf(" --watch-quiet : suppress output after watch starts\n");
|
|
printf("Trigger crash dump via sysrq:\n");
|
|
printf " --trig-delay=time : trigger delay threshold (ms)\n";
|
|
printf " --help : this help\n";
|
|
exit 0;
|
|
}
|
|
|
|
1;
|