#!/usr/bin/perl

use Getopt::Std;

$curr_version = 10;

$YLD_BOTH_EMPTY		= 1;
$YLD_ACT_EMPTY		= 2;
$YLD_EXP_EMPTY		= 3;
$YLD_CNT		= 4;
$SCHED_NOSWITCH		= 5;
$SCHED_SWITCH		= 6;
$SCHED_CNT		= 7;
$SCHED_GOIDLE		= 8;
$ALB_CNT		= 9;
$ALB_GAINED		= 10;
$ALB_LOST		= 11;
$ALB_FAILED		= 12;
$TTWU_CNT		= 13;
$TTWU_MOVED		= 14;
$TTWU_ATTEMPTS		= 15;
$WUFT_CNT		= 16;
$WUFT_MOVED		= 17;
$SMT_CNT		= 18;
$SBE_CNT		= 19;
$CPU_CPUTIME		= 20;
$CPU_RUNDELAY		= 21;
$CPU_TRIPCNT		= 22;
$PT_GAINED_IDLE		= 23;
$PT_LOST_IDLE		= 24;
$PT_GAINED_NOTIDLE	= 25;
$PT_LOST_NOTIDLE	= 26;
$PT_GAINED_NEWIDLE	= 27;
$PT_LOST_NEWIDLE	= 28;

#
# per-domain stats
#
$LB_CNT_IDLE		= 2;	$LB_CNT_NEWIDLE		= 12;
$LB_FAILED_IDLE		= 3;	$LB_FAILED_NEWIDLE	= 13;
$LB_IMBALANCE_IDLE	= 4;	$LB_IMBALANCE_NEWIDLE	= 14;
$LB_NOBUSYQ_IDLE	= 5;	$LB_NOBUSYQ_NEWIDLE	= 15;
$LB_NOBUSYG_IDLE	= 6;	$LB_NOBUSYG_NEWIDLE	= 16;
$LB_CNT_NOIDLE		= 7;	$SBE_ATTEMPTS		= 17;
$LB_FAILED_NOIDLE	= 8;	$SBE_PUSHED		= 18;
$LB_IMBALANCE_NOIDLE	= 9;	$TTWU_TRY_AFFINE	= 19;
$LB_NOBUSYQ_NOIDLE	= 10;	$TTWU_TRY_BALANCE	= 20;
$LB_NOBUSYG_NOIDLE	= 11;

die "Usage: $0 [-t] [file]\n" unless &getopts("tcd");

#
# @domain_diff_all is an array, for each field of domain data, of the sum
#	of that field across all cpus and all domains.
#
# @domain_diff_bycpu is an array of references to arrays. For each cpu, it
#	contains a reference to an array which sums each field in all its
#	domain stats.
#
# @diff is the array of runqueue data.
#
# @per_cpu_curr and @per_cpu_prev are arrays of runqueue data on a per cpu
#	basis for the current stats (just read) and previous stats.  These
#	are referenced to calculate @diff, above.  Fields beyond
#	$PT_LOST_IDLE are references to arrays of per-domain information
#	for this cpu; as many references are there are unique domains.
#
sub summarize_data {
    my $i;
    my $cpu, $domain;
    my @arr_curr, @arr_prev, @arr_diff;

    #
    # first we must sum up the diffs for the individual cpus
    #
    @diff = ();

    @domain_diff_all = ();
    foreach $cpu (0 .. $max_cpu) {
	@arr_curr = @{$per_cpu_curr[$cpu]};
	@arr_prev = @{$per_cpu_prev[$cpu]};
	foreach $i (1 .. 28) {
	    $arr_diff[$i] = $arr_curr[$i] - $arr_prev[$i];
	    $diff[$i] += $arr_diff[$i];
	}
	$per_cpu_diff[$cpu] = [ @arr_diff ];
	#print "diff[pt_gained_idle][$cpu] = $diff[$PT_GAINED_IDLE]\n";

	#
	# now stats from domains
	#
	@domain_diff_bycpu[$cpu] = [ ];
	foreach $domain (0..$max_domain) {
	    @arr_curr = @{@{$per_cpu_curr[$cpu]}[$domain+29]};
	    @arr_prev = @{@{$per_cpu_prev[$cpu]}[$domain+29]};
	    foreach $i (2..20) {
		#print "domain$domain: arr_curr[$i] ($arr_curr[$i]) -" .
		#    " arr_prev[$i] ($arr_prev[$i])\n";
		$arr_diff[$i] = $arr_curr[$i] - $arr_prev[$i];
		$diff[$domain+29][$i] += $arr_diff[$i];
		$domain_diff_bycpu[$cpu]->[i] += $arr_diff[$i];
		$domain_diff_all[$i] += $arr_diff[$i];
	    }
	    push @{$per_cpu_diff[$cpu]} , [ @arr_diff ];
	}
    }
}
    
$first = 2;
while (<>) {

    next if (/^$/);

    @curr = split;
    if ($curr[0] =~ /cpu(\d+)/) {
	$curr_cpu = $1;
	$per_cpu_curr[$curr_cpu] = [ @curr ];
	$max_cpu = $curr_cpu if ($curr_cpu > $max_cpu);
	next;
    }
    if ($curr[0] =~ /domain(\d+)/) {
	$arr = $per_cpu_curr[$curr_cpu];
	push @{$arr}, [ @curr ];
	#print "@{$arr}\n";
	#print "($curr_cpu,$1)$arr->[0],$arr->[$#{@{$arr}}]->[0]\n";
	#print "$#{@{$arr}}\n";
	$max_domain = $1 if ($1 > $max_domain);
	next;
    }
    if ($curr[0] eq "version") {
	if ($curr[1] != $curr_version) {
	    die "$0: Version mismatch: input is version $curr[1] but this" .
		" tool\nis for version $curr_version.\n";
	}
	if (!$first) {


	    #
	    # display diffs
	    #
	    if (!$opt_t) {
		summarize_data();
		$diff[0] = "diff";
		#print "@diff[1..29]\n";
		#print "  domain0 @{$diff[29]}[2..20]\n";
		#print "  domain1 @{$diff[29]}[2..20]\n";
		#print "  domain2 @{$diff[30]}[2..20]\n";
		print "\n";
		print_diffs();
		@per_cpu_prev = @per_cpu_curr;
	    } else {
		@per_cpu_prev = @per_cpu_curr if (!defined(@per_cpu_prev));
	    }
	} else {
	    @per_cpu_prev = @per_cpu_curr if (!--$first && !defined(@per_cpu_prev));
	}
	next;
    }
    if ($curr[0] eq "timestamp") {
	if ($curr[1] > $otimestamp) {
	    $delta = $curr[1] - $otimestamp;
	} else {
	    # timestamp rolled over
	    $delta = $curr[1] + (~0 - $otimestamp);
	    print "ROLLOVER! (delta=$delta)\n";
	}
	#print "delta is $delta ($curr[1])\n";
	$otimestamp = $curr[1];
	$timestart = $delta if (!$timestart);
	$timestamp += $delta;
	next;
    }

    #
    # format of line in /proc/schedstat
    #
    # cpuN 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
    # domainN xxxxxxxx 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
    #
    # version == 8
    #
    # These are the fields from the cpuN field, and deal with the runqueue
    # that cpu is in.
    #
    # NOTE: the active queue is considered empty if it has only one process
    #	in it, since obviously the process calling sched_yield is that process.
    #
    # First four are sched_yield statistics:
    #     1) # of times both the active and the expired queue were empty
    #     2) # of times just the active queue was empty
    #     3) # of times just the expired queue was empty
    #     4) # of times sched_yield() was called
    #
    # Next three are schedule() statistics:
    #     5) # of times the active queue had at least one other process on it.
    #     6) # of times we switched to the expired queue and reused it
    #     7) # of times schedule() was called
    #
    # Next four are statistics dealing with active_load_balance():
    #     8) # of times active_load_balance() was called
    #     9) # of times active_load_balance() caused this cpu to gain a task
    #    10) # of times active_load_balance() caused this cpu to lose a task
    #    11) # of times active_load_balance() tried to move a task and failed
    #
    # Next two are statistics dealing with try_to_wake_up():
    #    12) # of times try_to_wake_up() was called
    #    13) # of times try_to_wake_up() moved the awakening task
    #
    # Next one is a statistic dealing with sched_migrate_task():
    #	 14) # of times sched_migrate_task() was called
    #
    # Next one is a statistic dealing with sched_balance_exec():
    #	 15) # of times sched_balance_exec() was called
    #
    # Next three are statistics dealing with scheduling latency:
    #	 16) sum of all time spent running by tasks on this processor (in ms)
    #	 17) sum of all time spent waiting to run by tasks on this processor
    #	     (in ms)
    #	 18) # of tasks (not necessarily unique) given to the processor
    #
    # Next six are statistics dealing with pull_task():
    #	 19) # of tasks gained by this cpu as a result of a balance done when
    #	     this cpu was idle (and had been previously)
    #	 20) # of tasks lost by this cpu as a result of a balance done when
    #	     another cpu was idle (and had been previously)
    #	 21) # of tasks gained by this cpu as a result of a balance done when
    #	     this cpu was busy
    #	 22) # of tasks lost by this cpu as a result of a balance done when
    #	     another cpu was busy
    #	 23) # of tasks gained by this cpu as a result of a balance done when
    #	     this cpu had just become idle
    #	 24) # of tasks lost by this cpu as a result of a balance done when
    #	     another cpu had just become idle
    #
    # These are the fields from the domainN field, and deal with each of the
    # domains the previously mentioned cpu is in. The first field is a bit
    # mask which indicates the span of the domain being described.
    #
    # Next fifteen fields are statistics dealing with load_balance():
    #	  2) # of times in this domain load_balance was called when the cpu
    #	     was idle
    #	  3) # of times in this domain load_balance was called when the cpu
    #	     was busy
    #	  4) # of times in this domain load_balance was called when the cpu
    #	     was just becoming idle
    #	  5) # of times in this domain load_balance tried to move one or more
    #	     tasks and failed, when the cpu was idle
    #	  6) # of times in this domain load_balance tried to move one or more
    #	     tasks and failed, when the cpu was busy
    #	  7) # of times in this domain load_balance tried to move one or more
    #	     tasks and failed, when the cpu was just becoming idle
    #	  8) sum of imbalances discovered (if any) with each call to
    #        load_balance in this domain when the cpu was idle
    #	  9) sum of imbalances discovered (if any) with each call to
    #        load_balance in this domain when the cpu was busy
    #	 10) sum of imbalances discovered (if any) with each call to
    #        load_balance in this domain when the cpu was just becoming idle
    #	 11) # of times in this domain load_balance was called but did not
    #	     find a busier queue while the cpu was idle
    #	 12) # of times in this domain load_balance was called but did not
    #	     find a busier queue while the cpu was busy
    #	 13) # of times in this domain load_balance was called but did not
    #	     find a busier queue while the cpu was just becoming idle
    #	 14) # of times in this domain a busier queue was found while the
    #	     cpu was idle but no busier group was found
    #	 15) # of times in this domain a busier queue was found while the
    #	     cpu was busy but no busier group was found
    #	 16) # of times in this domain a busier queue was found while the
    #	     cpu was just becoming idle but no busier group was found
    #
    # Next two fields are dealing with sched_balance_exec():
    #	 17) # of times in this domain sched_balance_exec() successfully pushed
    #	     a task to a new cpu
    #	 18) # of times in this domain sched_balance_exec() tried but failed
    #	     to push a task to a new cpu
    #	 19) # of times in this domain try_to_wake_up() tried to move a task
    #	     based on affinity and cache warmth
    #	 20) # of times in this domain try_to_wake_up() tried to move a task
    #	     based on load balancing
    #

}

summarize_data();
print_diffs() if ($opt_t);

sub print_diffs {
    my $t;

    if ($timestamp > $timestart) {
	$t = $timestamp-$timestart;
    } else {
	$t = $timestamp + (~0 - $timestart);
    }

    printf "%02d:%02d:%02d--------------------------------------------------------------\n",
	$t/3600000, ($t/60000)%60, ($t/1000)%60;

    #print "@domain_diff_all\n";
    #
    # sched_yield() stats
    #
    printf "    %7d          sys_sched_yield()\n", $diff[$YLD_CNT];
    printf "    %7d(%6.2f%%) found (only) active queue empty on current cpu\n",
	$diff[$YLD_ACT_EMPTY]-$diff[$YLD_BOTH_EMPTY],
	$diff[$YLD_CNT] ?
	    (100*($diff[$YLD_ACT_EMPTY]-$diff[$YLD_BOTH_EMPTY])/
		$diff[$YLD_CNT]) : 0;
    printf "    %7d(%6.2f%%) found (only) expired queue empty on current cpu\n",
	$diff[$YLD_EXP_EMPTY],
	$diff[$YLD_CNT] ? (100*$diff[$YLD_EXP_EMPTY]/$diff[$YLD_CNT]) : 0;
    printf "    %7d(%6.2f%%) found both queues empty on current cpu\n",
	$diff[$YLD_BOTH_EMPTY],
	$diff[$YLD_CNT] ? (100*$diff[$YLD_BOTH_EMPTY]/$diff[$YLD_CNT]) : 0;
    printf "    %7d(%6.2f%%) found neither queue empty on current cpu\n\n",
	$diff[$YLD_CNT]-($diff[$YLD_EXP_EMPTY]+$diff[$YLD_ACT_EMPTY]),
	$diff[$YLD_CNT] ?
	    100*($diff[$YLD_CNT]-($diff[$YLD_EXP_EMPTY]+$diff[$YLD_ACT_EMPTY]))/
		$diff[$YLD_CNT] : 0;

    #
    # schedule() stats
    #
    print "\n";
    printf "    %7d          schedule()\n", $diff[$SCHED_CNT];
    printf "    %7d(%6.2f%%) switched active and expired queues\n",
	$diff[$SCHED_SWITCH], $diff[$SCHED_CNT] ? (100*$diff[$SCHED_SWITCH]/$diff[$SCHED_CNT]) : 0;
    printf "    %7d(%6.2f%%) used existing active queue\n",
	$diff[$SCHED_NOSWITCH], $diff[$SCHED_CNT] ? (100*$diff[$SCHED_NOSWITCH]/$diff[$SCHED_CNT]) : 0;
    #
    # this stat is incorrect
    #
    #printf "    %7d(%6.2f%%) processor went idle\n\n",
#	$diff[$SCHED_CNT] - $diff[$SCHED_SWITCH] - $diff[$SCHED_NOSWITCH], $diff[$SCHED_CNT] ? (100*($diff[$SCHED_CNT] - $diff[$SCHED_SWITCH] - $diff[$SCHED_NOSWITCH])/$diff[$SCHED_CNT]) : 0;

    #
    # try_to_wake_up() stats
    #
    print "\n\n";
    printf "    %7d          try_to_wake_up()\n", $diff[$TTWU_CNT];
    printf "    %7d(%6.2f%%) task already running, or killed\n",
	$diff[$TTWU_CNT] - $diff[$TTWU_ATTEMPTS], $diff[$TTWU_CNT] ?
	    (100*($diff[$TTWU_CNT] - $diff[$TTWU_ATTEMPTS])/$diff[$TTWU_CNT]) : 0;
    printf "    %7d(%6.2f%%) successfully moved a task to waking cpu\n",
	$diff[$TTWU_MOVED], $diff[$TTWU_CNT] ?
	    (100*$diff[$TTWU_MOVED]/$diff[$TTWU_CNT]) : 0;
    printf "    %7d(%6.2f%%) task started on previous cpu\n",
	$diff[$TTWU_ATTEMPTS] - $diff[$TTWU_MOVED], $diff[$TTWU_CNT] ?
	    (100*($diff[$TTWU_ATTEMPTS] - $diff[$TTWU_MOVED])/$diff[$TTWU_CNT]) : 0;
    if (!$opt_d) {
	printf "\n    %7d(%6.2f%%) tried to move a task because of possible affinity\n",
	    $domain_diff_all[$TTWU_TRY_AFFINE],
	    $diff[$TTWU_ATTEMPTS] ?
	    (100*$domain_diff_all[$TTWU_TRY_AFFINE]/$diff[$TTWU_ATTEMPTS]) : 0;
	printf "    %7d(%6.2f%%) tried to move a task to improve load balancing\n",
	    $domain_diff_all[$TTWU_TRY_BALANCE],
	    $diff[$TTWU_ATTEMPTS] ?
	    (100*$domain_diff_all[$TTWU_TRY_BALANCE]/$diff[$TTWU_ATTEMPTS]) : 0;
    }

    #
    # wake_up_forked_thread() stats
    #
    print "\n\n";
    printf "    %7d          wake_up_forked_thread()\n", $diff[$WUFT_CNT];
    printf "    %7d(%6.2f%%) successfully moved a task\n",
	$diff[$WUFT_MOVED], $diff[$WUFT_CNT] ?
	    (100*$diff[$WUFT_MOVED]/$diff[$WUFT_CNT]) : 0;

    #
    # pull_task() stats
    #
    print "\n\n";
    $total = $idle = $busy = $alb = $newidle = 0;
    for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	@arr = @{$per_cpu_diff[$cpu]};
	$total += $arr[$PT_GAINED_NEWIDLE] + $arr[$PT_GAINED_IDLE]
	    + $arr[$PT_GAINED_NOTIDLE] + $arr[$ALB_GAINED];
	$newidle += $arr[$PT_GAINED_NEWIDLE];
	$idle += $arr[$PT_GAINED_IDLE];
	$busy += $arr[$PT_GAINED_NOTIDLE];
	$alb += $arr[$ALB_GAINED];
    }
    printf "    %7d          pull_task()\n", $total;
    if ($opt_c) {
	for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	    @arr = @{$per_cpu_diff[$cpu]};
	    if ($arr[$PT_GAINED_NEWIDLE] || $arr[$PT_LOST_NEWIDLE]) {
		printf "    %7d/%-7d  cpu %2d lost/gained task to/from another cpu when newly idle\n",
		    $arr[$PT_GAINED_NEWIDLE], $arr[$PT_LOST_NEWIDLE], $cpu;
	    }
	    if ($arr[$PT_GAINED_IDLE] || $arr[$PT_LOST_IDLE]) {
		printf "    %7d/%-7d  cpu %2d lost/gained task to/from another cpu while idle\n",
		    $arr[$PT_GAINED_IDLE], $arr[$PT_LOST_IDLE], $cpu;
	    }
	    if ($arr[$PT_GAINED_NOTIDLE] || $arr[$PT_LOST_NOTIDLE]) {
		printf "    %7d/%-7d  cpu %2d lost/gained task to/from another cpu when busy\n",
		    $arr[$PT_GAINED_NOTIDLE], $arr[$PT_LOST_NOTIDLE], $cpu;
	    }
	    if ($arr[$ALB_GAINED] || $arr[$ALB_LOST]) {
		printf "    %7d/%-7d  cpu %2d lost/gained task to/from another cpu from active_load_balance()\n",
		    $arr[$ALB_GAINED], $arr[$ALB_LOST], $cpu;
	    }
	}
    } else {
	for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	    @arr = @{$per_cpu_diff[$cpu]};
	}
	printf "    %7d(%6.2f%%) moved when newly idle\n",
	    $newidle, $total ? 100*($newidle/$total) : 0;
	printf "    %7d(%6.2f%%) moved while idle\n",
	    $idle, $total ? 100*($idle/$total) : 0;
	printf "    %7d(%6.2f%%) moved while busy\n",
	    $busy, $total ? 100*($busy/$total) : 0;
	printf "    %7d(%6.2f%%) moved from active_load_balance()\n",
	    $alb, $total ? 100*($alb/$total) : 0;
    }
    print "\n";


    #
    # function call counts
    #
    printf "    %7d          active_load_balance()\n", $diff[$ALB_CNT];
    printf "    %7d          sched_balance_exec()\n", $diff[$SBE_CNT];
    printf "    %7d          sched_migrate_task()\n", $diff[$SMT_CNT];
    printf("\n");

    #
    # latency stats
    #
    $totalcpu = $totaltripcnt = $totalrundelay = 0;
    for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	@arr = @{$per_cpu_diff[$cpu]};
	if ($arr[$CPU_TRIPCNT] && ($arr[$CPU_CPUTIME] || $arr[$CPU_RUNDELAY])) {
	    $totalcpu += $arr[$CPU_CPUTIME];
	    $totaltripcnt += $arr[$CPU_TRIPCNT];
	    $totalrundelay += $arr[$CPU_RUNDELAY];
	    if ($opt_c) {
		printf "    %6.2f/%-6.2f    avg runtime/latency on cpu %d (ms)\n",
		    $arr[$CPU_CPUTIME]/$arr[$CPU_TRIPCNT],
		    $arr[$CPU_RUNDELAY]/$arr[$CPU_TRIPCNT], $cpu;
	    }
	}
    }
    printf "    %6.2f/%-6.2f    avg runtime/latency over all cpus (ms)\n",
	$totalcpu/$totaltripcnt, $totalrundelay/$totaltripcnt;

    printf("\n");

    #
    # domain info
    #
    if ($opt_d) {
	foreach $domain (0..$max_domain) {
	    $domain_diff = $diff[29+$domain];
	    #print "  domain$domain @{$domain_diff}\n";
	    print "[scheduler domain #$domain]\n";
	    #
	    # load_balance() stats
	    #
	    $lb_cnt_total = $domain_diff->[$LB_CNT_IDLE] +
		$domain_diff->[$LB_CNT_NEWIDLE] + $domain_diff->[$LB_CNT_NOIDLE];
	    printf "    %7d          load_balance()\n", $lb_cnt_total;

	    #
	    # while idle
	    #
	    printf "    %7d(%6.2f%%) called while idle\n",
		$domain_diff->[$LB_CNT_IDLE],
		$lb_cnt_total ?  100*$domain_diff->[$LB_CNT_IDLE]/$lb_cnt_total : 0;
	    printf "                     %7d(%6.2f%%) tried but failed to move any tasks\n",
		$domain_diff->[$LB_FAILED_IDLE],
		$domain_diff->[$LB_CNT_IDLE] ?
		    100*$domain_diff->[$LB_FAILED_IDLE]/$domain_diff->[$LB_CNT_IDLE] :
		    0
		if ($domain_diff->[$LB_FAILED_IDLE]);
	    printf "                     %7d(%6.2f%%) found no busier queue\n",
		$domain_diff->[$LB_NOBUSYQ_IDLE],
		$domain_diff->[$LB_CNT_IDLE] ?
		    100*$domain_diff->[$LB_NOBUSYQ_IDLE]/$domain_diff->[$LB_CNT_IDLE] :
		    0
		if ($domain_diff->[$LB_NOBUSYQ_IDLE]);
	    printf "                     %7d(%6.2f%%) found no busier group\n",
		$domain_diff->[$LB_NOBUSYG_IDLE],
		$domain_diff->[$LB_CNT_IDLE] ?
		    100*$domain_diff->[$LB_NOBUSYG_IDLE]/$domain_diff->[$LB_CNT_IDLE] :
		    0
		if ($domain_diff->[$LB_NOBUSYG_IDLE]);
	    $tmp = $domain_diff->[$LB_CNT_IDLE] -
		($domain_diff->[$LB_NOBUSYG_IDLE] + $domain_diff->[$LB_NOBUSYQ_IDLE] +
		$domain_diff->[$LB_FAILED_IDLE]);
	    if ($tmp) {
		printf "                     %7d(%6.2f%%) succeeded in moving " .
		    "at least one task\n",
		    $tmp, $tmp ?  100*$tmp/$domain_diff->[$LB_CNT_IDLE] : 0;
		$imbalance = $domain_diff->[$LB_IMBALANCE_IDLE] /
		    ($tmp + $domain_diff->[$LB_FAILED_IDLE]);
		if ($imbalance < 10) {
		    $fmt = "%7.3f";
		} elsif ($imbalance < 100) {
		    $fmt = "%7.2f";
		} else {
		    $fmt = "%7.1f";
		}
		printf "                                      (average imbalance: $fmt)\n",
		    $imbalance;
	    }

	    #
	    # while busy
	    #
	    printf "    %7d(%6.2f%%) called while busy\n",
		$domain_diff->[$LB_CNT_NOIDLE],
		$lb_cnt_total ?  100*$domain_diff->[$LB_CNT_NOIDLE]/$lb_cnt_total : 0;
	    printf "                     %7d(%6.2f%%) tried but failed to move any tasks\n",
		$domain_diff->[$LB_FAILED_NOIDLE],
		$domain_diff->[$LB_CNT_NOIDLE] ?
		    100*$domain_diff->[$LB_FAILED_NOIDLE]/$domain_diff->[$LB_CNT_NOIDLE] :
		    0
		if ($domain_diff->[$LB_FAILED_NOIDLE]);
	    printf "                     %7d(%6.2f%%) found no busier queue\n",
		$domain_diff->[$LB_NOBUSYQ_NOIDLE],
		$domain_diff->[$LB_CNT_NOIDLE] ?
		    100*$domain_diff->[$LB_NOBUSYQ_NOIDLE]/$domain_diff->[$LB_CNT_NOIDLE] :
		    0
		if ($domain_diff->[$LB_NOBUSYQ_NOIDLE]);
	    printf "                     %7d(%6.2f%%) found no busier group\n",
		$domain_diff->[$LB_NOBUSYG_NOIDLE],
		$domain_diff->[$LB_CNT_NOIDLE] ?
		    100*$domain_diff->[$LB_NOBUSYG_NOIDLE]/$domain_diff->[$LB_CNT_NOIDLE] :
		    0
		if ($domain_diff->[$LB_NOBUSYG_NOIDLE]);
	    $tmp = $domain_diff->[$LB_CNT_NOIDLE] -
		($domain_diff->[$LB_NOBUSYG_NOIDLE] +
		$domain_diff->[$LB_NOBUSYQ_NOIDLE] +
		$domain_diff->[$LB_FAILED_NOIDLE]);
	    if ($tmp) {
		printf "                     %7d(%6.2f%%) succeeded in moving " .
		    "at least one task\n",
		    $tmp, $tmp ?  100*$tmp/$domain_diff->[$LB_CNT_NOIDLE] : 0;
		$imbalance = $domain_diff->[$LB_IMBALANCE_NOIDLE] /
		    ($tmp + $domain_diff->[$LB_FAILED_NOIDLE]);
		if ($imbalance < 10) {
		    $fmt = "%7.3f";
		} elsif ($imbalance < 100) {
		    $fmt = "%7.2f";
		} else {
		    $fmt = "%7.1f";
		}
		printf "                                      (average imbalance: $fmt)\n",
		    $imbalance;
	    }


	    #
	    # when newly idle
	    #
	    printf "    %7d(%6.2f%%) called when newly idle\n",
		$domain_diff->[$LB_CNT_NEWIDLE],
		$lb_cnt_total ?  100*$domain_diff->[$LB_CNT_NEWIDLE]/$lb_cnt_total
		: 0;
	    printf "                     %7d(%6.2f%%) tried but failed to move any tasks\n",
		$domain_diff->[$LB_FAILED_NEWIDLE],
		$domain_diff->[$LB_CNT_NEWIDLE] ?
		    100*$domain_diff->[$LB_FAILED_NEWIDLE]/$domain_diff->[$LB_CNT_NEWIDLE] :
		    0
		if ($domain_diff->[$LB_FAILED_NEWIDLE]);
	    printf "                     %7d(%6.2f%%) found no busier queue\n",
		$domain_diff->[$LB_NOBUSYQ_NEWIDLE],
		$domain_diff->[$LB_CNT_NEWIDLE] ?
		    100*$domain_diff->[$LB_NOBUSYQ_NEWIDLE]/$domain_diff->[$LB_CNT_NEWIDLE] :
		    0
		if ($domain_diff->[$LB_NOBUSYQ_NEWIDLE]);
	    printf "                     %7d(%6.2f%%) found no busier group\n",
		$domain_diff->[$LB_NOBUSYG_NEWIDLE],
		$domain_diff->[$LB_CNT_NEWIDLE] ?
		    100*$domain_diff->[$LB_NOBUSYG_NEWIDLE]/$domain_diff->[$LB_CNT_NEWIDLE] :
		    0
		if ($domain_diff->[$LB_NOBUSYG_NEWIDLE]);
	    $tmp = $domain_diff->[$LB_CNT_NEWIDLE] -
		($domain_diff->[$LB_NOBUSYG_NEWIDLE] +
		$domain_diff->[$LB_NOBUSYQ_NEWIDLE] +
		$domain_diff->[$LB_FAILED_NEWIDLE]);
	    if ($tmp) {
		printf "                     %7d(%6.2f%%) succeeded in moving " .
		    "at least one task\n",
		    $tmp, $tmp ?  100*$tmp/$domain_diff->[$LB_CNT_NEWIDLE] : 0;
		$imbalance = $domain_diff->[$LB_IMBALANCE_NEWIDLE] /
		    ($tmp + $domain_diff->[$LB_FAILED_NEWIDLE]);
		if ($imbalance < 10) {
		    $fmt = "%7.3f";
		} elsif ($imbalance < 100) {
		    $fmt = "%7.2f";
		} else {
		    $fmt = "%7.1f";
		}
		printf "                                      (average imbalance: $fmt)\n",
		    $imbalance;
	    }

	    #
	    # sched_balance_exec() stats
	    #
	    printf "\n    %7d          sched_balance_exec() tried to push a task\n",
		$domain_diff->[$SBE_PUSHED];
	    printf "    %7d          sched_balance_exec() succeeded in pushing a task\n",
		$domain_diff->[$SBE_ATTEMPTS] if ($domain_diff->[$SBE_PUSHED]);

	    #
	    # try_to_wake_up() stats
	    #
	    printf "\n" if ($domain_diff->[$TTWU_TRY_AFFINE] ||
		$domain_diff->[$TTWU_TRY_BALANCE]);
	    printf "    %7d          try_to_wake_up() tried to push a task because of cache warmth\n",
		$domain_diff->[$TTWU_TRY_AFFINE] if ($domain_diff->[$TTWU_TRY_AFFINE]);
	    printf "    %7d          try_to_wake_up() tried to move a task to improve load balancing\n",
		$domain_diff->[$TTWU_TRY_BALANCE] if ($domain_diff->[$TTWU_TRY_BALANCE]);
	    print "\n";
	}
    } else {
	#
	# load_balance() stats
	#
	$lb_cnt_total = $domain_diff_all[$LB_CNT_IDLE] +
	    $domain_diff_all[$LB_CNT_NEWIDLE] +
	    $domain_diff_all[$LB_CNT_NOIDLE];
	printf "    %7d          load_balance()\n", $lb_cnt_total;

	#
	# while idle
	#
	printf "    %7d(%6.2f%%) called while idle\n",
	    $domain_diff_all[$LB_CNT_IDLE],
	    $lb_cnt_total ?
		100*$domain_diff_all[$LB_CNT_IDLE]/$lb_cnt_total : 0;
	printf "                     %7d(%6.2f%%) tried but failed to move any tasks\n",
	    $domain_diff_all[$LB_FAILED_IDLE],
	    $domain_diff_all[$LB_CNT_IDLE] ?
		100*$domain_diff_all[$LB_FAILED_IDLE]/$domain_diff_all[$LB_CNT_IDLE] :
		0
	    if ($domain_diff_all[$LB_FAILED_IDLE]);
	printf "                     %7d(%6.2f%%) found no busier queue\n",
	    $domain_diff_all[$LB_NOBUSYQ_IDLE],
	    $domain_diff_all[$LB_CNT_IDLE] ?
		100*$domain_diff_all[$LB_NOBUSYQ_IDLE]/$domain_diff_all[$LB_CNT_IDLE] :
		0
	    if ($domain_diff_all[$LB_NOBUSYQ_IDLE]);
	printf "                     %7d(%6.2f%%) found no busier group\n",
	    $domain_diff_all[$LB_NOBUSYG_IDLE],
	    $domain_diff_all[$LB_CNT_IDLE] ?
		100*$domain_diff_all[$LB_NOBUSYG_IDLE]/$domain_diff_all[$LB_CNT_IDLE] :
		0
	    if ($domain_diff_all[$LB_NOBUSYG_IDLE]);
	$tmp = $domain_diff_all[$LB_CNT_IDLE] -
	    ($domain_diff_all[$LB_NOBUSYG_IDLE] + $domain_diff_all[$LB_NOBUSYQ_IDLE] +
	    $domain_diff_all[$LB_FAILED_IDLE]);
	if ($tmp) {
	    printf "                     %7d(%6.2f%%) succeeded in moving " .
		"at least one task\n",
		$tmp, $tmp ?  100*$tmp/$domain_diff_all[$LB_CNT_IDLE] : 0;
	    $imbalance = $domain_diff_all[$LB_IMBALANCE_IDLE] /
		($tmp + $domain_diff_all[$LB_FAILED_IDLE]);
	    if ($imbalance < 10) {
		$fmt = "%7.3f";
	    } elsif ($imbalance < 100) {
		$fmt = "%7.2f";
	    } else {
		$fmt = "%7.1f";
	    }
	    printf "                                      (average imbalance: $fmt)\n",
		$imbalance;
	}

	#
	# while busy
	#
	printf "    %7d(%6.2f%%) called while busy\n",
	    $domain_diff_all[$LB_CNT_NOIDLE],
	    $lb_cnt_total ?  100*$domain_diff_all[$LB_CNT_NOIDLE]/$lb_cnt_total : 0;
	printf "                     %7d(%6.2f%%) tried but failed to move any tasks\n",
	    $domain_diff_all[$LB_FAILED_NOIDLE],
	    $domain_diff_all[$LB_CNT_NOIDLE] ?
		100*$domain_diff_all[$LB_FAILED_NOIDLE]/$domain_diff_all[$LB_CNT_NOIDLE] :
		0
	    if ($domain_diff_all[$LB_FAILED_NOIDLE]);
	printf "                     %7d(%6.2f%%) found no busier queue\n",
	    $domain_diff_all[$LB_NOBUSYQ_NOIDLE],
	    $domain_diff_all[$LB_CNT_NOIDLE] ?
		100*$domain_diff_all[$LB_NOBUSYQ_NOIDLE]/$domain_diff_all[$LB_CNT_NOIDLE] :
		0
	    if ($domain_diff_all[$LB_NOBUSYQ_NOIDLE]);
	printf "                     %7d(%6.2f%%) found no busier group\n",
	    $domain_diff_all[$LB_NOBUSYG_NOIDLE],
	    $domain_diff_all[$LB_CNT_NOIDLE] ?
		100*$domain_diff_all[$LB_NOBUSYG_NOIDLE]/$domain_diff_all[$LB_CNT_NOIDLE] :
		0
	    if ($domain_diff_all[$LB_NOBUSYG_NOIDLE]);
	$tmp = $domain_diff_all[$LB_CNT_NOIDLE] -
	    ($domain_diff_all[$LB_NOBUSYG_NOIDLE] +
	    $domain_diff_all[$LB_NOBUSYQ_NOIDLE] +
	    $domain_diff_all[$LB_FAILED_NOIDLE]);
	if ($tmp) {
	    printf "                     %7d(%6.2f%%) succeeded in moving " .
		"at least one task\n",
		$tmp, $tmp ?  100*$tmp/$domain_diff_all[$LB_CNT_NOIDLE] : 0;
	    $imbalance = $domain_diff_all[$LB_IMBALANCE_NOIDLE] /
		($tmp + $domain_diff_all[$LB_FAILED_NOIDLE]);
	    if ($imbalance < 10) {
		$fmt = "%7.3f";
	    } elsif ($imbalance < 100) {
		$fmt = "%7.2f";
	    } else {
		$fmt = "%7.1f";
	    }
	    printf "                                      (average imbalance: $fmt)\n",
		$imbalance;
	}


	#
	# when newly idle
	#
	printf "    %7d(%6.2f%%) called when newly idle\n",
	    $domain_diff_all[$LB_CNT_NEWIDLE],
	    $lb_cnt_total ?  100*$domain_diff_all[$LB_CNT_NEWIDLE]/$lb_cnt_total
	    : 0;
	printf "                     %7d(%6.2f%%) tried but failed to move any tasks\n",
	    $domain_diff_all[$LB_FAILED_NEWIDLE],
	    $domain_diff_all[$LB_CNT_NEWIDLE] ?
		100*$domain_diff_all[$LB_FAILED_NEWIDLE]/$domain_diff_all[$LB_CNT_NEWIDLE] :
		0
	    if ($domain_diff_all[$LB_FAILED_NEWIDLE]);
	printf "                     %7d(%6.2f%%) found no busier queue\n",
	    $domain_diff_all[$LB_NOBUSYQ_NEWIDLE],
	    $domain_diff_all[$LB_CNT_NEWIDLE] ?
		100*$domain_diff_all[$LB_NOBUSYQ_NEWIDLE]/$domain_diff_all[$LB_CNT_NEWIDLE] :
		0
	    if ($domain_diff_all[$LB_NOBUSYQ_NEWIDLE]);
	printf "                     %7d(%6.2f%%) found no busier group\n",
	    $domain_diff_all[$LB_NOBUSYG_NEWIDLE],
	    $domain_diff_all[$LB_CNT_NEWIDLE] ?
		100*$domain_diff_all[$LB_NOBUSYG_NEWIDLE]/$domain_diff_all[$LB_CNT_NEWIDLE] :
		0
	    if ($domain_diff_all[$LB_NOBUSYG_NEWIDLE]);
	$tmp = $domain_diff_all[$LB_CNT_NEWIDLE] -
	    ($domain_diff_all[$LB_NOBUSYG_NEWIDLE] +
	    $domain_diff_all[$LB_NOBUSYQ_NEWIDLE] +
	    $domain_diff_all[$LB_FAILED_NEWIDLE]);
	if ($tmp) {
	    printf "                     %7d(%6.2f%%) succeeded in moving " .
		"at least one task\n",
		$tmp, $tmp ?  100*$tmp/$domain_diff_all[$LB_CNT_NEWIDLE] : 0;
	    $imbalance = $domain_diff_all[$LB_IMBALANCE_NEWIDLE] /
		($tmp + $domain_diff_all[$LB_FAILED_NEWIDLE]);
	    if ($imbalance < 10) {
		$fmt = "%7.3f";
	    } elsif ($imbalance < 100) {
		$fmt = "%7.2f";
	    } else {
		$fmt = "%7.1f";
	    }
	    printf "                                      (average imbalance: $fmt)\n",
		$imbalance;
	}

	#
	# sched_balance_exec() stats
	#
	printf "\n    %7d          sched_balance_exec() tried to push a task\n",
	    $domain_diff_all[$SBE_PUSHED];
	printf "    %7d          sched_balance_exec() succeeded in pushing a task\n",
	    $domain_diff_all[$SBE_ATTEMPTS] if ($domain_diff_all[$SBE_PUSHED]);

	print "\n";
    }
}
