#!/usr/bin/perl

use strict;

#scheduler:queue:high          []                arch, part, med
#scheduler:queue:med           []                arch, part, med
#scheduler:queue:low           []                arch, part, med
#scheduler:looking             [ classes ]       arch, part, med
#scheduler:times               [ type ]          arch, part, med
#
#repserver:reqpermin           [ partition ]     med
#repserver:jobs:wait:code      [ classes ]       part, med
#repserver:jobs:build:code     [ classes ]       part, med
#repserver:jobsperrepo         [ classes ]       part, med
#
#srcserver:reqpermin                             med
#
#repserver:
#  reqpermin = medval
#  jobs      = {cact}[2]medval
#
#scheduler:
#  queue     = [2]medval
#  looking   = {ct}
#  times     = {type}
#
#srcserver:
#  reqpermin = medval
#  commits   = {c}
#
#publisher:
#  jobs      = {ccf}[2]medval
#
#dispatcher:
#  workers   = arch[2]
#  queue     = arch[3]
#
#getbinariesproxy:
#  req       = [3]medval
#  reqsize   = [3]medval
#

BEGIN {
  my ($wd) = $0 =~ m-(.*)/- ;
  $wd ||= '.';
  unshift @INC,  "$wd";
}

use Storable ();
use Data::Dumper;
use Time::Local;


my %tblinfo = (
  'srcserver:commits' => {
    'hashkey' => 'project',
    'unit' => 'count',
  },
  'srcserver:reqpermin' => {
    'unit' => 'count',
  },
  'scheduler:repos' => {
    'unit' => 'count',
  },
  'scheduler:looking' => {
    'hashkey' => [ 'project', 'type' ],
    'unit' => 'seconds',
  },
  'scheduler:times' => {
    'hashkey' => 'type',
    'unit' => 'seconds',
  },
  'scheduler:queue' => {
    'arraykey' => 'type',
    'arrayvalues' => [ 'high', 'med', 'low' ],
    'needfilter' => [ 'type' ],
    'unit' => 'count',
  },
  'scheduler:pkgperprj' => {
    'unit' => 'count',
  },
  'scheduler:repoperprj' => {
    'unit' => 'count',
  },
  'scheduler:ulrepoperprj' => {
    'unit' => 'count',
  },
  'repserver:jobs' => {
    'hashkey' => [ 'project', 'arch', 'code', 'buildtype' ],
    'arraykey' => 'type',
    'arrayvalues' => [ 'wait', 'build' ],
    'needfilter' => [ 'type' ],
    'unit' => 'seconds',
  },
  'repserver:jobsperrepo' => {
    'hashkey' => [ 'project', 'arch' ],
    'unit' => 'count',
  },
  'repserver:reqpermin' => {
    'unit' => 'count',
  },
  'publisher:jobs' => {
    'hashkey' => [ 'project', 'code', 'flavor' ],
    'arraykey' => 'type',
    'arrayvalues' => [ 'wait', 'publish' ],
    'needfilter' => [ 'type' ],
    'unit' => 'seconds',
  },
  'signer:jobs' => {
    'hashkey' => [ 'project', 'arch', 'code', 'flavor' ],
    'arraykey' => 'type',
    'arrayvalues' => [ 'wait', 'sign' ],
    'needfilter' => [ 'type' ],
    'unit' => 'seconds',
  },
  'dispatcher:workers' => {
    'hashkey' => 'arch',
    'arraykey' => 'type',
    'arrayvalues' => [ 'building', 'idle', 'available' ],
    'unit' => 'count',
  },
  'dispatcher:queue' => {
    'hashkey' => 'arch',
    'arraykey' => 'type',
    'arrayvalues' => [ 'wait', 'build', 'idle' ],
    'needfilter' => [ 'type' ],
    'unit' => 'count',
  },
  'servicedispatch:jobs' => {
    'hashkey' => [ 'project', 'package', 'code' ],
    'arraykey' => 'type',
    'arrayvalues' => [ 'wait', 'run' ],
    'needfilter' => [ 'type' ],
    'unit' => 'seconds',
  },
  'getbinariesproxy:req' => {
    'hashkey' => [ 'binarytype' ],
    'arraykey' => 'type',
    'arrayvalues' => [ 'hit', 'miss', 'all' ],
    'needfilter' => [ 'type' ],
    'unit' => 'count',
  },
  'getbinariesproxy:reqsize' => {
    'hashkey' => [ 'binarytype' ],
    'arraykey' => 'type',
    'arrayvalues' => [ 'hit', 'miss', 'all' ],
    'needfilter' => [ 'type' ],
    'unit' => 'kbyte',
  },
  'getbinariesproxy:reqpermin' => {
    'unit' => 'count',
  },
  'multi:projectbuild' => {
    'unit' => 'seconds',
    'needfilter' => [ 'partition', 'project' ],
  },
);

my %defaultkey = (
  'srcserver:commits' => 'project',
  'scheduler:repos' => 'partition',
  'scheduler:looking' => 'project',
  'scheduler:times' => 'type',
  'repserver:jobs' => 'project',
  'repserver:jobsperrepo' => 'project',
  'repserver:reqpermin' => 'partition',
);

my @prefixes = (
  'home',
  'openSUSE:Maintenance',
  'openSUSE',
  'SUSE:Maintenance',
  'SUSE',
  'devel',
  'PTF',
  'SLL',
);

my %unitscale = (
  'kbyte:byte'		=> 1024,
  'mbyte:byte'		=> 1024 * 1024,
  'gbyte:byte'		=> 1024 * 1024 * 1024,
  'tbyte:byte'		=> 1024 * 1024 * 1024 * 1024,
  'mbyte:kbyte'		=> 1024,
  'gbyte:kbyte'		=> 1024 * 1024,
  'tbyte:kbyte'		=> 1024 * 1024 * 1024,
  'gbyte:mbyte'		=> 1024,
  'tbyte:mbyte'		=> 1024 * 1024,
  'tbyte:gbyte'		=> 1024,

  'K:count'		=> 1000,
  'M:count'		=> 1000 * 1000,
  'G:count'		=> 1000 * 1000 * 1000,
  'T:count'		=> 1000 * 1000 * 1000 * 1000,

  'minutes:seconds'	=> 60,
  'hours:seconds'	=> 60 * 60,
  'hours:minutes'	=> 60,
);

# default filenames
my $filename     = '%Y/%M-%D/%F.stat';
my $filename_col = '%Y/%M-%D.stat';

my @percent = qw(0 0.1 0.2 0.5 1 2 3 4 5 10 15 20 25 30 40 50 60
                 70 75 80 85 90 95 96 97 98 99 99.5 99.8 99.9 100);

### helpers from BSUtil

sub ls {
  my $d; 
  opendir($d, $_[0]) || return (); 
  my @r = grep {$_ ne '.' && $_ ne '..'} readdir($d);
  closedir $d; 
  return @r; 
}

sub mkdir_p {
  my ($dir) = @_;

  return 1 if -d $dir;
  my $pdir;
  if ($dir =~ /^(.+)\//) {
    $pdir = $1;
    mkdir_p($pdir) || return undef;
  }
  while (!mkdir($dir, 0777)) {
    my $e = $!;
    return 1 if -d $dir;
    if (defined($pdir) && ! -d $pdir) {
      mkdir_p($pdir) || return undef;
      next;
    }
    $! = $e;
    warn("mkdir: $dir: $!\n");
    return undef;
  }
  return 1;
}

sub clone {
  return Storable::dclone($_[0]);
}

sub store {
  my ($fn, $fnf, $dd) = @_;
  die("nstore $fn: $!\n") if !Storable::nstore($dd, $fn);
  return unless defined $fnf;
  rename($fn, $fnf) || die("rename $fn $fnf: $!\n");
}

sub retrieve {
  my ($fn) = @_;
  my $dd = ref($fn) ? Storable::fd_retrieve($fn) : Storable::retrieve($fn);
  die("retrieve $fn: $!\n") unless $dd;
  return $dd;
}

sub tostorable {
  return 'pst0'.Storable::nfreeze($_[0]);
}

sub unify {
  my %h = map {$_ => 1} @_;
  return grep(delete($h{$_}), @_);
}

### other helpers

sub proj2class {
  my ($projid) = @_;
  for my $p (@prefixes) {
    return $p if substr("$projid:", 0, length($p) + 1) eq "$p:";
  }
  return 'other';
}

sub toperc {
  my ($d, $perc) = @_; 
  die("toperc: need data\n") unless @$d;
  my @ds = sort {$a <=> $b} @$d;
  my $n = @ds;
  my $sum = 0;
  my $sum2 = 0;
  $sum += $_ for @$d;
  $sum2 += $_ * $_ for @$d;
  return [ $n, $sum, $sum2, undef, \@ds ] unless $perc;
  my @r; 
  for my $p (@$perc) {
    my $t = int($n * $p / 100);
    $t = $n - 1 if $t >= $n; 
    push @r, $ds[$t];
  }
  return [ $n, $sum, $sum2, $perc, \@r ];
}

sub fromperc {
  my ($d) = @_;
  die("fromperc: bad data\n") unless @$d >= 5 && ref($d->[4]) eq 'ARRAY';
  return @{$d->[4]} unless defined($d->[3]);
  die("fromperc: bad data\n") unless ref($d->[3]) eq 'ARRAY' && @{$d->[3]} == @{$d->[4]};
  my $n = $d->[0];
  return () unless $n;
  my @m; 
  my $i; 
  my $m = $d->[4];
  for my $p (@{$d->[3]}) {
    my $t = int($n * $p / 100);
    $t = $n - 1 if $t >= $n; 
    $m[$t] = $m->[$i++];
  }
  # fill gaps
  my $lasti = 0;
  for ($i = 0; $i <= $n; $i++) {
    next unless defined $m[$i];
    my $j; 
    for ($j = $lasti + 1; $j < $i; $j++) {
      $m[$j] = int(($m[$lasti] * ($j - $lasti) + $m[$i] * ($i - $j)) / ($i - $lasti));
    }   
    $lasti = $i; 
  }
  return @m; 
}

sub selperc {
  my ($d, $p) = @_;
  die("selperc: bad data\n") unless @$d >= 5 && ref($d->[4]) eq 'ARRAY';
  if (defined($d->[3])) {
    die("selperc: bad data\n") unless ref($d->[3]) eq 'ARRAY' && @{$d->[3]} == @{$d->[4]};
    my @p = @{$d->[3]};
    for (@{$d->[4]}) {
      return $_ if shift(@p) == $p;
    }
  }
  $p = 0 if $p < 0;
  $p = 100 if $p > 100;
  my @m = fromperc($d);
  my $n = @m;
  my $t = int($n * $p / 100);
  $t = $n - 1 if $t >= $n; 
  return $m[$t];
}

sub combperc {
  my (@p) = @_;
  die("nothing to combine\n") unless @p;
  # combine percent arrays
  my %perc;
  for my $p (@p) {
    next unless defined $p->[3];
    $perc{$_} = 1 for @{$p->[3]};
  }
  if (!%perc) {
    my @m = map {@{$_->[4]}} @p;
    return toperc(\@m, undef);
  }
  my $perc = [ sort {$a <=> $b} keys %perc ];
  my @m;
  my $n = 0;
  my $sum = 0;
  my $sum2 = 0;
  for my $p (@p) {
    $n += $p->[0];
    $sum += $p->[1];
    $sum2 += $p->[2];
    push @m, fromperc($p);
  }
  my $np = toperc(\@m, $perc);
  die("combperc: internal error\n") unless $np->[0] == $n;
  $np->[1] = $sum;
  $np->[2] = $sum2;
  return $np;
}

sub scaleperc {
  my ($d, $scale) = @_;
  die("scaleperc: bad data\n") unless @$d >= 5 && ref($d->[4]) eq 'ARRAY';
  $d = clone($d);
  $d->[1] *= $scale;
  $d->[2] *= $scale * $scale;
  $_ *= $scale for @{$d->[4]};
  return $d;
}

# parse and strip date/time
my $datetime_re = qr/^(\d{4}-\d\d-\d\d) (\d\d):(\d\d):(\d\d): \[[0-9.]+\] +/;

my $cmd = shift @ARGV;
die("usage: bs_statistics <cmd>\n") unless $cmd;

my $gentype = '';
my $day;

if ($cmd eq 'gen' || $cmd eq 'generate') {
  $gentype = shift @ARGV if @ARGV && $ARGV[0] !~ /^-/;
  while (@ARGV) {
    if ($ARGV[0] eq '-t') {
      (undef, $gentype) = splice(@ARGV, 0, 2);
      next;
    }
    if ($ARGV[0] eq '-o') {
      (undef, $filename) = splice(@ARGV, 0, 2);
      next;
    }
    if ($ARGV[0] eq '-d') {
      (undef, $day) = splice(@ARGV, 0, 2);
      next;
    }
    last;
  }
  die("usage: bs_statistics generate -t <type>\n") unless $gentype;
  require BSConfig;
}

my $start;
my $end;

my $data = {};

if ($gentype eq 'repserver') {
  my %jwait;
  my %jbuild;
  my %jprp;
  my %reqpermin;
  while (<STDIN>) {
    chomp;
    if (s/$datetime_re//) {
      if (!defined($start)) {
	next if $day && $day ne $1;
	next if $2 > 12;
	$start = $2 * 3600 + $3 * 60 + $4;
	$day = $1;
      }
      last if $1 ne $day;
      $end = $2 * 3600 + $3 * 60 + $4;
      s/ \[[-_\.a-zA-Z0-9]+\]$//;		# strip request id
      next unless /^(GET|PUT|POST|DELETE) \(.*?\)\s+\/([^?]*)(?:\?(.*))?$/;
      my $min = int($end / 60);
      $reqpermin{$min}++;
      next;
    }
    next unless $start;
    next unless /^job statistics: /;
    my (undef, undef, undef, $in_1) = split(' ', $_, 4);
    my ($projid, $repoid, $in_2) = split('/', $in_1, 3);
    my ($arch, $code, $times, $worker, $buildtype) = split(' ', $in_2);
    my ($tsched, $tstart, $tend) = split('-', $times);
    my $twait = $tstart - $tsched;
    my $tbuild = $tend - $tstart;
    $twait = 0 if $twait < 0;
    $tbuild = 0 if $tbuild < 0;
    my $c = proj2class($projid);
    my $cac = "$c/$arch/$code";
    $cac .= "/$buildtype" if $buildtype;
    push @{$jwait{$cac}}, $twait;
    push @{$jbuild{$cac}}, $tbuild;
    $jprp{"$projid/$repoid/$arch"}++;
  }
  die("bad input: no day found\n") unless $day;
  my %jobdata;
  for my $cac (sort keys %jwait) {
    $jobdata{$cac} = [ toperc($jwait{$cac}, \@percent), toperc($jbuild{$cac}, \@percent) ];
  }
  my %jobsperrepo;
  for my $prpa (sort keys %jprp) {
    my ($projid, $repoid, $arch) = split('/', $prpa, 3);
    my $c = proj2class($projid);
    push @{$jobsperrepo{"$c/$arch"}}, $jprp{$prpa};
  }
  for my $ca (sort keys %jobsperrepo) {
    $jobsperrepo{$ca} = toperc($jobsperrepo{$ca}, \@percent);
  }
  my @reqpermin;
  my $min = int($start / 60);
  while ($min * 60 < $end) {
    push @reqpermin, $reqpermin{$min++} || 0;
  }
  $data->{'version'} = 1;
  $data->{'type'} = 'repserver';
  $data->{'jobs'} = \%jobdata;
  $data->{'jobsperrepo'} = \%jobsperrepo;
  $data->{'reqpermin'} = toperc(\@reqpermin, \@percent) if @reqpermin;
  $data->{'partition'} = $BSConfig::partition if $BSConfig::partition;
}

if ($gentype eq 'srcserver') {
  my %commits;
  my %reqpermin;

  while (<STDIN>) {
    chomp;
    next unless s/$datetime_re//;
    if (!defined($start)) {
      next if $day && $day ne $1;
      next if $2 > 12;
      $start = $end = $2 * 3600 + $3 * 60 + $4;
      $day = $1;
    }
    last if $1 ne $day;
    $end = $2 * 3600 + $3 * 60 + $4;
    s/ \[[-_\.a-zA-Z0-9]+\]$//;		# strip request id
    next unless /^(GET|PUT|POST|DELETE) \(.*?\)\s+\/([^?]*)(?:\?(.*))?$/;
    my ($act, $path, $qu) = ($1, $2, $3);
    my @p = split ('/', $path);
    next unless @p > 1;
    $qu = '' unless $qu;
    my $min = int($end / 60);
    $reqpermin{$min}++;
    if ($p[0] eq 'source' && $qu =~ /cmd=commit/) {
      next if $qu =~ /withvalidate=1/;    # hack
      my $c = proj2class($p[1]);
      $commits{$c}++;
    }
  }
  die("bad input: no day found\n") unless $day;
  my @reqpermin;
  my $min = int($start / 60);
  while ($min * 60 < $end) {
    push @reqpermin, $reqpermin{$min++} || 0;
  }
  $data->{'version'} = 1;
  $data->{'type'} = 'srcserver';
  $data->{'commits'} = \%commits;
  $data->{'reqpermin'} = toperc(\@reqpermin, \@percent) if @reqpermin;
}

if ($gentype eq 'getbinariesproxy') {
  my %reqpermin;
  my %req;
  my %reqsize;

  while (<STDIN>) {
    chomp;
    if (s/$datetime_re//) {
      if (!defined($start)) {
	next if $day && $day ne $1;
	next if $2 > 12;
	$start = $2 * 3600 + $3 * 60 + $4;
	$day = $1;
      }
      last if $1 ne $day;
      $end = $2 * 3600 + $3 * 60 + $4;
      s/ \[[-_\.a-zA-Z0-9]+\]$//;		# strip request id
      next unless /^(GET|PUT|POST|DELETE) \(.*?\)\s+\/([^?]*)(?:\?(.*))?$/;
      my $min = int($end / 60);
      $reqpermin{$min}++;
      next;
    }
    next unless $start;
    next unless /^getbinariesproxy statistics: /;
    my (undef, undef, $nhit, $khit, $nmiss, $kmiss, $binarytype, $prpa) = split(' ', $_, 8);
    $binarytype ||= 'unknown';
    push @{$req{$binarytype}->[0]}, $nhit;
    push @{$req{$binarytype}->[1]}, $nmiss;
    push @{$req{$binarytype}->[2]}, $nhit + $nmiss;
    push @{$reqsize{$binarytype}->[0]}, $khit;
    push @{$reqsize{$binarytype}->[1]}, $kmiss;
    push @{$reqsize{$binarytype}->[2]}, $khit + $kmiss;
  }
  die("bad input: no day found\n") unless $day;
  my @reqpermin;
  my $min = int($start / 60);
  while ($min * 60 < $end) {
    push @reqpermin, $reqpermin{$min++} || 0;
  }
  $data->{'version'} = 1;
  $data->{'type'} = 'getbinariesproxy';
  for (values %req, values %reqsize) {
    $_->[0] = toperc($_->[0], \@percent);
    $_->[1] = toperc($_->[1], \@percent);
    $_->[2] = toperc($_->[2], \@percent);
  }
  $data->{'req'} = \%req;
  $data->{'reqsize'} = \%reqsize;
  $data->{'reqpermin'} = toperc(\@reqpermin, \@percent) if @reqpermin;
  $data->{'partition'} = $BSConfig::getbinariesproxyserver_partition if $BSConfig::getbinariesproxyserver_partition;
}

if ($gentype eq 'publisher') {
  my %jwait;
  my %jbuild;
  while (<STDIN>) {
    chomp;
    s/^(?:\S+\.)?\d+: //;	# strip prefix
    if (s/$datetime_re//) {
      if (!defined($start)) {
	next if $day && $day ne $1;
	next if $2 > 12;
	$start = $2 * 3600 + $3 * 60 + $4;
	$day = $1;
      }
      last if $1 ne $day;
      $end = $2 * 3600 + $3 * 60 + $4;
      next;
    }
    next unless $start;
    next unless /^publish statistics: /;
    my (undef, undef, $in_1) = split(' ', $_, 3);
    my ($projid, $repoid, $in_2) = split('/', $in_1, 3);
    my ($arch, $code, $times, $flavor) = split(' ', $in_2);
    my ($tsched, $tstart, $tend) = split('-', $times);
    my $twait = $tstart - $tsched;
    my $tbuild = $tend - $tstart;
    $twait = 0 if $twait < 0;
    $tbuild = 0 if $tbuild < 0;
    my $c = proj2class($projid);
    my $ccf = "$c/$code/$flavor";
    push @{$jwait{$ccf}}, $twait;
    push @{$jbuild{$ccf}}, $tbuild;
  }
  die("bad input: no day found\n") unless $day;
  my %jobdata;
  for my $ccf (sort keys %jwait) {
    $jobdata{$ccf} = [ toperc($jwait{$ccf}, \@percent), toperc($jbuild{$ccf}, \@percent) ];
  }
  $data->{'version'} = 1;
  $data->{'type'} = 'publisher';
  $data->{'jobs'} = \%jobdata;
  $data->{'partition'} = $BSConfig::partition if $BSConfig::partition;
}

if ($gentype eq 'servicedispatch') {
  my %jwait;
  my %jbuild;
  while (<STDIN>) {
    chomp;
    if (s/$datetime_re//) {
      if (!defined($start)) {
	next if $day && $day ne $1;
	next if $2 > 12;
	$start = $2 * 3600 + $3 * 60 + $4;
	$day = $1;
      }
      last if $1 ne $day;
      $end = $2 * 3600 + $3 * 60 + $4;
      next;
    }
    next unless $start;
    next unless /^servicedispatch statistics: /;
    my (undef, undef, undef, $in_1) = split(' ', $_, 4);
    my ($projid, $packid, $in_2) = split('/', $in_1, 3);
    my (undef, $code, $times) = split(' ', $in_2);
    my ($tsched, $tstart, $tend) = split('-', $times);
    my $twait = $tstart - $tsched;
    my $tbuild = $tend - $tstart;
    $twait = 0 if $twait < 0;
    $tbuild = 0 if $tbuild < 0;
    my $c = proj2class($projid);
    push @{$jwait{"$c/$code"}}, $twait;
    push @{$jbuild{"$c/$code"}}, $tbuild;
  }
  die("bad input: no day found\n") unless $day;
  my %jobdata;
  for my $cc (sort keys %jwait) {
    $jobdata{$cc} = [ toperc($jwait{$cc}, \@percent), toperc($jbuild{$cc}, \@percent) ];
  }
  $data->{'version'} = 1;
  $data->{'type'} = 'servicedispatch';
  $data->{'jobs'} = \%jobdata;
}

if ($gentype eq 'dispatcher') {
  my %workerstats;
  my %queuestats;
  while (<STDIN>) {
    chomp;
    if (s/$datetime_re//) {
      if (!defined($start)) {
	next if $day && $day ne $1;
	next if $2 > 12;
	$start = $2 * 3600 + $3 * 60 + $4;
	$day = $1;
      }
      last if $1 ne $day;
      $end = $2 * 3600 + $3 * 60 + $4;
      next;
    }
    next unless $start;
    if (/^worker statistics: (.*)/) {
      for (split(' ', $1)) {
	next unless /^(\S+):(\d+):(\d+)/;
	$workerstats{$1}->{'t'} ||= $start;
	while ($workerstats{$1}->{'t'} <= $end) {
	  push @{$workerstats{$1}->{'building'}}, $2;
	  push @{$workerstats{$1}->{'idle'}}, $3;
	  push @{$workerstats{$1}->{'available'}}, $2 + $3;
	  $workerstats{$1}->{'t'}++;
	}
      }
    }
    if (/^queue statistics: (.*)/) {
      for (split(' ', $1)) {
	next unless /^(\S+):(\d+):(\d+):(\d+)/;
	$queuestats{$1}->{'t'} ||= $start;
	while ($queuestats{$1}->{'t'} <= $end) {
	  push @{$queuestats{$1}->{'wait'}}, $3;
	  push @{$queuestats{$1}->{'build'}}, $2;
	  push @{$queuestats{$1}->{'idle'}}, $4;
	  $queuestats{$1}->{'t'}++;
	}
      }
    }
  }
  die("bad input: no day found\n") unless $day;
  for my $arch (sort keys %workerstats) {
    while ($workerstats{$arch}->{'t'} <= $end) {
      push @{$workerstats{$arch}->{'building'}}, $workerstats{$arch}->{'building'}->[-1];
      push @{$workerstats{$arch}->{'idle'}}, $workerstats{$arch}->{'idle'}->[-1];
      push @{$workerstats{$arch}->{'available'}}, $workerstats{$arch}->{'available'}->[-1];
      $workerstats{$arch}->{'t'}++;
    }
    $workerstats{$arch} = [ toperc($workerstats{$arch}->{'building'}, \@percent),
                            toperc($workerstats{$arch}->{'idle'}, \@percent),
                            toperc($workerstats{$arch}->{'available'}, \@percent) ];
  }
  for my $arch (sort keys %queuestats) {
    while ($queuestats{$arch}->{'t'} <= $end) {
      push @{$queuestats{$arch}->{'wait'}}, $queuestats{$arch}->{'wait'}->[-1];
      push @{$queuestats{$arch}->{'build'}}, $queuestats{$arch}->{'build'}->[-1];
      push @{$queuestats{$arch}->{'idle'}}, $queuestats{$arch}->{'idle'}->[-1];
      $queuestats{$arch}->{'t'}++;
    }
    $queuestats{$arch} = [ toperc($queuestats{$arch}->{'wait'}, \@percent),
                           toperc($queuestats{$arch}->{'build'}, \@percent),
                           toperc($queuestats{$arch}->{'idle'}, \@percent) ];
  }
  if ($BSConfig::masterdispatcher) {
    die("not the master dispatcher\n") if !%workerstats && !%queuestats;
  }
  $data->{'version'} = 1;
  $data->{'type'} = 'dispatcher';
  $data->{'workers'} = \%workerstats if %workerstats;
  $data->{'queue'} = \%queuestats if %queuestats;
  $data->{'partition'} = $BSConfig::partition if $BSConfig::partition && !$BSConfig::masterdispatcher;
}

if ($gentype eq 'signer') {
  my %jwait;
  my %jbuild;
  while (<STDIN>) {
    chomp;
    if (s/$datetime_re//) {
      if (!defined($start)) {
	next if $day && $day ne $1;
	next if $2 > 12;
	$start = $2 * 3600 + $3 * 60 + $4;
	$day = $1;
      }
      last if $1 ne $day;
      $end = $2 * 3600 + $3 * 60 + $4;
      next;
    }
    next unless $start;
    next unless /^sign statistics: /;
    my (undef, undef, undef, $in_1) = split(' ', $_, 4);
    my ($projid, $repoid, $in_2) = split('/', $in_1, 3);
    my ($arch, $code, $times, $flavor) = split(' ', $in_2);
    my ($tsched, $tstart, $tend) = split('-', $times);
    my $twait = $tstart - $tsched;
    my $tbuild = $tend - $tstart;
    $twait = 0 if $twait < 0;
    $tbuild = 0 if $tbuild < 0;
    my $c = proj2class($projid);
    my $cacf = "$c/$arch/$code/$flavor";
    push @{$jwait{$cacf}}, $twait;
    push @{$jbuild{$cacf}}, $tbuild;
  }
  die("bad input: no day found\n") unless $day;
  my %jobdata;
  for my $cacf (sort keys %jwait) {
    $jobdata{$cacf} = [ toperc($jwait{$cacf}, \@percent), toperc($jbuild{$cacf}, \@percent) ];
  }
  $data->{'version'} = 1;
  $data->{'type'} = 'signer';
  $data->{'jobs'} = \%jobdata;
  $data->{'partition'} = $BSConfig::partition if $BSConfig::partition;
}

if ($gentype eq 'scheduler') {
  my $arch = shift @ARGV;
  die("usage: bs_statistics generate -t scheduler <arch>\n") unless $arch;
  my @high;
  my @med;
  my @low;
  my $qt;
  my $last;
  my $oldlooking;
  my $oldlookingprio;
  my $oldevent;
  my %looking;
  my %times;
  my $numprps;
  my %stats;
  while (<STDIN>) {
    chomp;
    if (!s/$datetime_re//) {
      next unless defined $start;
      if (/^(pkg|prp|ulprp) statistics: (.*)/) {
	my $what = $1;
	my %k;
	for (split(' ', $2)) {
	  $k{$1} = $2 if /^(.*?):(.*)$/;
	}
	next if $stats{$what} && $k{'missing'} && ($stats{$what}->{'missing'} || 0) < $k{'missing'};

	$stats{$1} = \%k;
      }
      next;
    }
    if (!defined($start)) {
      next if $day && $day ne $1;
      next if $2 > 12;
      $start = $end = $2 * 3600 + $3 * 60 + $4;
      $day = $1;
    }
    last if $1 ne $day;
    $last = $end;
    $end = $2 * 3600 + $3 * 60 + $4;

    if (/^starting build service scheduler/) {
      my $t = $end - $last;
      push @{$times{'down'}}, $t;
      undef $oldlooking;
      undef $oldlookingprio;
      undef $oldevent;
    }

    # finish old stuff
   if ($oldlooking) {
      my $t = $end - $last;
      push @{$times{'looking'}}, $t;
      my $c = proj2class($oldlooking);
      push @{$looking{"$c/$oldlookingprio"}}, $t;
      undef $oldlooking;
      undef $oldlookingprio;
    }
    if ($oldevent) {
      my $t = $end - $last;
      push @{$times{$oldevent}}, $t;
      undef $oldevent;
    }

    if (/^looking at (low|med|high) prio (\S+) \((\d+)\/(\d+)\/(\d+)\/(\d+)\/(\d+)/) {
      my ($prio, $prp, $qhigh, $qmed, $qlow, $qnext, $nprp) = ($1, $2, $3, $4, $5, $6, $7);
      $numprps = $7 if !defined($numprps) || $7 > $numprps;
      $qlow += $qnext;
      my ($projid, $repoid) = split('/', $prp, 2);
      $oldlooking = $projid;
      $oldlookingprio = $prio;
      $qt = $start unless defined $qt;
      while ($qt < $end) {
	push @high, $qhigh;
	push @med, $qmed;
	push @low, $qlow;
	$qt++;
      }
    }
    if (/^(?:remote )?event (\S+)/) {
      $oldevent = $1;
    }
    if (/^waiting for an event/) {
      $oldevent = 'sleeping';
    }
  }
  die("bad input: no day found\n") unless $day;
  push @high, 0 unless @high;
  push @med, 0 unless @med;
  push @low, 0 unless @low;

  $_ = toperc($_, \@percent) for values %looking;
  $_ = toperc($_, \@percent) for values %times;

  for my $what (qw{pkg prp ulprp}) {
    my $s = $stats{$what};
    next unless $s && defined($s->{'cnt'});
    my @p = sort {$a <=> $b} grep {/^\d*(?:\.\d*)?$/} keys %$s;
    my $p;
    if ($s->{'cnt'}) {
      next unless defined($s->{'sum'}) && defined($s->{'sum2'});
      $p = [ $s->{'cnt'}, $s->{'sum'}, $s->{'sum2'}, \@p, [ map {$s->{$_}} @p ] ];
    } else {
      $p = [ 0, 0, 0, [], [] ];
    }
    my $w = "${what}perprj";
    $w =~ s/prp/repo/;
    $data->{$w} = $p;
  }
  $data->{'version'} = 1;
  $data->{'type'} = 'scheduler';
  $data->{'queue'} = [ toperc(\@high, \@percent), toperc(\@med, \@percent), toperc(\@low, \@percent) ];
  $data->{'repos'} = $numprps if defined $numprps;
  $data->{'looking'} = \%looking;
  $data->{'times'} = \%times;
  $data->{'arch'} = $arch;
  $data->{'partition'} = $BSConfig::partition if $BSConfig::partition;
}

if ($gentype) {
  die("unknown generator type $gentype\n") unless %$data;
  undef $day unless $start;
  undef $day unless $start && $end > 4 * 3600;
  die("bad input: day not found\n") unless $day;
  $data->{'day'} = $day;
  $data->{'start'} = $start;
  $data->{'end'} = $end;
  $data->{'percents'} = \@percent;
  $data->{'prefixes'} = \@prefixes;
  for my $tbl (keys %$data) {
    $data->{'tblinfo'}->{$tbl} = $tblinfo{"$data->{'type'}:$tbl"} if $tblinfo{"$data->{'type'}:$tbl"};
  }
  my ($yy, $mm, $dd) = split('-', $day, 3);
  my $ff = "$data->{'type'}";
  $ff .= "-$data->{'partition'}" if $data->{'partition'};
  $ff .= "-$data->{'arch'}" if $data->{'arch'};

  $filename =~ s/%Y/$yy/g;
  $filename =~ s/%M/$mm/g;
  $filename =~ s/%D/$dd/g;
  $filename =~ s/%F/$ff/g;
  mkdir_p($1) if $filename =~ /(.*)\//;
  store($filename, undef, $data);
  exit(0);
}

sub relday {
  my ($ref, $dayoff) = @_;
  die("bad ref $ref\n") unless $ref =~ /^(\d\d\d\d)-(\d\d)-(\d\d)$/;
  my @gt = gmtime(timegm(0, 0, 12, $3, $2 - 1, $1) + $dayoff * 24 * 3600);
  return sprintf("%04d-%02d-%02d", $gt[5] + 1900, $gt[4] + 1, $gt[3]);
}

sub reldate {
  my ($ref, $off, $refsign) = @_;
  die unless $off =~ s/^([-+])//;
  my $offsign = $1;
  my ($offy, $offm, $offd) = split('-', $off);
  $offy = 0 if defined($offy) && $offy eq '';
  $offm = 0 if defined($offm) && $offm eq '';
  $offd = 0 if defined($offd) && $offd eq '';
  $offy = -$offy if defined($offy) && $offsign eq '-';
  $offm = -$offm if defined($offm) && $offsign eq '-';
  $offd = -$offd if defined($offd) && $offsign eq '-';
  die("bad date '$ref'\n") if length($ref) != 4 && length($ref) != 7 && length($ref) != 10;
  if ($refsign && $refsign eq '-') {
    $ref .= '-01' while length($ref) < 10;
  } elsif (length($ref) == 4) {
    $ref .= '-12-31';
  } elsif (length($ref) == 7) {
    $ref = reldate("$ref-01", '+0000-00');
  }
  $ref = relday($ref, $offd) if $offd;
  my ($refy, $refm, $refd) = split('-', $ref);
  $refy += $offy if $offy;
  $refm += $offm if $offm;
  while ($refm > 12) {
    $refm -= 12;
    $refy++;
  }
  while ($refm < 1) {
    $refm += 12;
    $refy--;
  }
  return sprintf("%04d", $refy) if !defined $offm;
  return sprintf("%04d-%02d", $refy, $refm) if !defined $offd;
  $ref = sprintf("%04d-%02d-%02d", $refy, $refm, $refd);
  my $nextm = sprintf("%04d-%02d-01", $refm == 12 ? $refy + 1 : $refy, $refm == 12 ? 1 : $refm + 1);
  my $lastm = relday($nextm, -1);
  $ref = $lastm if $ref gt $lastm;
  return $ref;
}

sub readtables_findmaxdate {
  my (@files) = @_;
  my $max = '0000-00-00';
  @files = sort {$b cmp $a} @files;
  while (@files) {
    my $f = shift @files;
    if (-d $f) {
      next if $f =~ /(?:^|\/)(\d\d\d\d)$/s && $max gt "$1-99-99";
      next if $f =~ /(?:^|\/)(\d\d\d\d)\/(\d\d)$/s && $max gt "$1-$2-99";
      next if $f =~ /(?:^|\/)(\d\d\d\d)\/(\d\d)\/(\d\d)$/s && $max gt "$1-$2-$3";
      my @e = sort {$b cmp $a} (grep {!/^\./} ls($f));
      unshift @files, map {"$f/$_"} @e;
      next;
    }
    my $dc = retrieve($f);
    my $colls = $dc->{'type'} eq 'collection' ? $dc->{'collection'} : [ $dc ];
    for my $d (@$colls) {
      $max = $d->{'day'} if $d->{'day'} && $d->{'day'} gt $max;
    }
  }
  return $max ne '0000-00-00' ? $max : undef;
}

sub dayrange_to_minmax {
  my ($dayrange, @files) = @_;

  my ($min, $max) = split(':', ($dayrange || ''), 2);
  # deal with relative dates
  if ($min =~ /^[+-]/ && (!$max || $max =~ /^[+-]/)) {
    die("+ not allowed for min date range\n") if $min =~ /^\+/;
    my $last = readtables_findmaxdate(@files);
    die("could not determine last day\n") unless $last;
    $min = reldate($last, $min);
    $max = reldate($max =~ /^-/ ? $last : $min, $max, '-') if $max;
  } elsif ($min && $min =~ /^[+-]/) {
    $min = reldate($max, $min, '+');
  } elsif ($max && $max =~ /^[+-]/) {
    $max = reldate($min, $max, '-');
  }
  # extend
  $max = $min unless defined $max;
  $min ||= '0000-00-00';
  $max ||= '9999-99-99';
  $min .= '-00' while length($min) < 10;
  $max .= '-99' while length($max) < 10;
  return ($min, $max);
}

sub readtables {
  my ($dayrange, $type, @files) = @_;

  die("nothing to read\n") unless @files;
  my ($min, $max) = dayrange_to_minmax($dayrange, @files);
  my @tables;
  my %uni;
  while (@files) {
    my $f = shift @files;
    if (-d $f) {
      next if $f =~ /(?:^|\/)(\d\d\d\d)$/s && ("$1-99-99" lt $min || "$1-00-00" gt $max);
      next if $f =~ /(?:^|\/)(\d\d\d\d)\/(\d\d)$/s && ("$1-$2-99" lt $min || "$1-$2-00" gt $max);
      next if $f =~ /(?:^|\/)(\d\d\d\d)\/(\d\d)\/(\d\d)$/s && ("$1-$2-$3" lt $min || "$1-$2-$3" gt $max);
      my @e = sort(grep {!/^\./} ls($f));
      unshift @files, map {"$f/$_"} @e;
      next;
    }
    my $dc = retrieve($f);
    my $colls = $dc->{'type'} eq 'collection' ? $dc->{'collection'} : [ $dc ];
    for my $d (@$colls) {
      # FIXME
      next if $d->{'type'} eq 'dispatcher' && !$d->{'partition'} && !$d->{'workers'} && !$d->{'queue'};
      next unless !defined($type) || $type eq 'multi' || $d->{'type'} eq $type;
      die("data in $f has no day defined\n") unless $d->{'day'};
      next if $d->{'day'} lt $min;
      next if $d->{'day'} gt $max;
      my $uni = "$d->{'type'}/$d->{'day'}";
      $uni .= "/$d->{'partition'}" if $d->{'partition'};
      $uni .= "/$d->{'arch'}" if $d->{'arch'};
      die("$f: already have $uni from $uni{$uni}\n") if $uni{$uni};
      $uni{$uni} = $f;
      push @tables, $d;
    }
  }
  return @tables;
}

if ($cmd eq 'combine') {
  $filename = $filename_col;
  my $datatype;
  my $dayrange;
  while (@ARGV) {
    if ($ARGV[0] eq '-t') {
      (undef, $datatype) = splice(@ARGV, 0, 2);
      next;
    }
    if ($ARGV[0] eq '-o') {
      (undef, $filename) = splice(@ARGV, 0, 2);
      next;
    }
    if ($ARGV[0] eq '-d') {
      (undef, $dayrange) = splice(@ARGV, 0, 2);
      next;
    }
    last;
  }
  die("nothing to combine\n") unless @ARGV;
  my @collection = readtables($dayrange, $datatype, @ARGV);
  my (%yy, %mm, %dd);
  for my $d (@collection) {
    my ($yy, $mm, $dd) = split('-', $d->{'day'}, 3);
    $yy{$yy} = 1;
    $mm{$mm} = 1;
    $dd{$dd} = 1;
  }
  if (@collection == 1) {
    $data = $collection[0];
  } else {
    $day = $collection[0]->{'day'} if keys(%yy) == 1 && keys(%mm) == 1 && keys(%dd) == 1;
    $data->{'version'} = 1;
    $data->{'type'} = 'collection';
    $data->{'day'} = $day if $day;
    $data->{'collection'} = \@collection;
  }
  die("filename contains %Y but data has multiple years\n") if keys(%yy) > 1 && $filename =~ /%Y/;
  die("filename contains %M but data has multiple months\n") if keys(%mm) > 1 && $filename =~ /%M/;
  die("filename contains %D but data has multiple days\n") if keys(%dd) > 1 && $filename =~ /%D/;
  my ($yy, $mm, $dd, $ff);
  $yy = keys(%yy) == 1 ? (keys(%yy))[0] : '**';
  $mm = keys(%mm) == 1 ? (keys(%mm))[0] : '**';
  $dd = keys(%dd) == 1 ? (keys(%dd))[0] : '**';
  $ff = 'collection';
  if ($filename eq '-') {
    print tostorable($data);
    exit(0);
  }
  $filename =~ s/%Y/$yy/g;
  $filename =~ s/%M/$mm/g;
  $filename =~ s/%D/$dd/g;
  $filename =~ s/%F/$ff/g;
  mkdir_p($1) if $filename =~ /(.*)\//;
  store($filename, undef, $data);
  my $ncollection = @collection;
  print "combined $ncollection tables into $filename\n";
  exit(0);
}

if ($cmd eq 'list') {
  my $datatype;
  my $dayrange;
  while (@ARGV) {
    if ($ARGV[0] eq '-t') {
      (undef, $datatype) = splice(@ARGV, 0, 2);
      next;
    }
    if ($ARGV[0] eq '-d') {
      (undef, $dayrange) = splice(@ARGV, 0, 2);
      next;
    }
    last;
  }
  my %have;
  my %haveall;
  for my $d (readtables($dayrange, $datatype, @ARGV)) {
    my $uni = "";
    $uni .= "/$d->{'partition'}" if $d->{'partition'};
    $uni .= "/$d->{'arch'}" if $d->{'arch'};
    $uni .= "/$d->{'type'}";
    $uni =~ s/^\///;
    $have{$d->{'day'}}->{$uni} = 1;
    $haveall{$uni} = 1;
  }
  my @all = sort keys %haveall;
  my %alln;
  my $n = 1;
  $alln{$_} = $n++ for @all;
  printf "%3d %s\n", $alln{$_}, $_ for @all;
  print "\n";

  sub printhead {
    print "           ";
    print "-" for @all;
    print "\n";
    for my $d (100, 10, 1) {
      next if $d == 100 && $n < 100;
      next if $d == 10 && $n < 10;
      print "           ";
      for (@all) {
	my $nn = int($alln{$_} / $d) % 10;
	print $alln{$_} < $d ? ' ' : $nn;
      }
      print "\n";
    }
    print "           ";
    print "-" for @all;
    print "\n";
  }
  my $first = 1;
  for my $day (sort keys %have) {
    printhead() if $first || $day =~ /01$/;
    $first = 0;
    my $h = $have{$day};
    print "$day ";
    for (@all) {
      print $h->{$_} ? "x" : " ";
    }
    print "\n";
  }
  printhead();
  exit(0);
}

die("unknown command $cmd\n") unless $cmd eq 'plot';

die("usage: bs_statistics plot -t <tbl>\n") unless @ARGV;
my $datatype;
my $tbl;
my $dayrange;
my $key;
my %filters;
my $sum = '';
my $top;
my $cut;

my $unit;
my $style;
my $stacked;
my $keyaxis;
my @keyvalues;
my $terminal = '';
my $combine;

while (@ARGV) {
  if ($ARGV[0] eq '-t') {
    (undef, $datatype) = splice(@ARGV, 0, 2);
    next;
  }
  if ($ARGV[0] eq '-d') {
    (undef, $dayrange) = splice(@ARGV, 0, 2);
    next;
  }
  if ($ARGV[0] eq '-f') {
    my $filter;
    (undef, $filter) = splice(@ARGV, 0, 2);
    my ($filterkey, $filtervalue) = split(':', $filter, 2);
    if (!defined($filtervalue) || $filtervalue eq '') {
      $filters{$filterkey}->{''} = 1;
      next;
    }
    $filters{$filterkey}->{$_} = 1 for split(',', $filtervalue, -1);
    next;
  }
  if ($ARGV[0] eq '-k') {
    (undef, $key) = splice(@ARGV, 0, 2);
    next;
  }
  if ($ARGV[0] eq '-s') {
    (undef, $sum) = splice(@ARGV, 0, 2);
    next;
  }

  if ($ARGV[0] eq '-T') {
    (undef, $top) = splice(@ARGV, 0, 2);
    next;
  }
  if ($ARGV[0] eq '-C') {
    (undef, $cut) = splice(@ARGV, 0, 2);
    next;
  }
  if ($ARGV[0] eq '-Y') {
    (undef, $style) = splice(@ARGV, 0, 2);
    next;
  }
  if ($ARGV[0] eq '-S') {
    $stacked = 1;
    shift @ARGV;
    next;
  }
  if ($ARGV[0] eq '-v') {
    push @keyvalues, $ARGV[1];
    splice(@ARGV, 0, 2);
    next;
  }
  if ($ARGV[0] eq '-u') {
    (undef, $unit) = splice(@ARGV, 0, 2);
    next;
  }
  if ($ARGV[0] eq '-O') {
    (undef, $terminal) = splice(@ARGV, 0, 2);
    next;
  }
  if ($ARGV[0] eq '-c') {
    (undef, $combine) = splice(@ARGV, 0, 2);
    next;
  }
  last;
}

die("bad combine range\n") if $combine && ($combine ne 'week' && $combine ne 'month' && $combine ne 'year');

($datatype, $tbl) = split(':', $datatype, 2);
($datatype, $tbl) = ('multi', $datatype) unless defined $tbl;

$keyaxis = 1 if $key && $key =~ s/axis$//;

my $tblinfo = $tblinfo{"$datatype:$tbl"};
die("unknown table $datatype:$tbl\n") unless $tblinfo;

# set key
$key ||= 'percent' if @keyvalues > 1;
$key ||= $defaultkey{"$datatype:$tbl"} || 'percent';

# check if this table needs a filter
for my $needfilter (@{$tblinfo->{'needfilter'} || []}) {
  next if $filters{$needfilter} || ($key && $needfilter eq $key);
  my $msg = $needfilter;
  $msg .= ' ('.join(',', @{$tblinfo->{'arrayvalues'}}).")" if ($tblinfo->{'arraykey'} || '') eq $needfilter;
  die("please specify a $msg\n");
}

@keyvalues = unify(sort {$a <=> $b} @keyvalues) if @keyvalues;
@keyvalues = @percent if $key eq 'percent' && $keyaxis && !@keyvalues;
@keyvalues = (10, 50, 90) if $key eq 'percent' && !@keyvalues;

die("bad table for perday sum\n") if $sum eq 'perday' && $tblinfo->{'unit'} ne 'seconds';
die("need a value for le sum\n") if $sum eq 'le' && !@keyvalues;
$key = 'percent' if $sum eq 'le' && @keyvalues > 1;

die("stacking days is not meaningful\n") if $keyaxis && $stacked;
die("stacking percentages is not meaningful\n") if ($key eq 'percent' || $sum eq 'le') && $stacked;

# which percentile to show
my $percselect = @keyvalues ? $keyvalues[-1] : 50;

sub mapday {
  my ($d) = @_;
  die("bad day $d\n") unless $d =~ /^(\d\d\d\d)-(\d\d)-(\d\d)$/;
  return "$1-$2-01" if $combine eq 'month';
  return "$1-01-01" if $combine eq 'year';
  if ($combine eq 'week') {
    my @gt = gmtime(timegm(0, 0, 12, $3, $2 - 1, $1));
    $gt[6] = 7 if $gt[6] == 0;
    return relday($d, -($gt[6] - 1));
  }
  die("unknown combine mode $combine\n");
}

# read data;
my %dataperday;
my %ndays;
my %ndays_seen;
for my $d (readtables($dayrange, $datatype, @ARGV)) {
  my $dday = $d->{'day'};
  $dday = mapday($dday) if $combine;
  push @{$dataperday{$dday}}, $d;
  $ndays{$dday}++ if $combine && !$ndays_seen{$d->{'day'}}++;
}

# make sure that there is an entry for all days
if (!$keyaxis) {
  my $minday = '9999-99-99';
  my $maxday = '0000-00-00';
  for my $d (map {@$_} values %dataperday) {
    $minday = $d->{'day'} if $d->{'day'} lt $minday;
    $maxday = $d->{'day'} if $d->{'day'} gt $maxday;
  }
  $day = $minday;
  while ($day le $maxday) {
    my $dday = $day;
    $dday = mapday($dday) if $combine;
    $dataperday{$dday} ||= [];
    $day = relday($day, 1);
  }
}

my $adddata_arraykey = $tblinfo->{'arraykey'};
my $adddata_arrayvalues = $tblinfo->{'arrayvalues'};
my $adddata_scale;

sub adddata {
  my ($comb, $data, $v) = @_;
  if (ref($v) eq 'ARRAY' && @$v < 5) {
    return unless $adddata_arrayvalues && @$v;
    my $i = 0;
    for (@$adddata_arrayvalues) {
      last if $i >= @$v;
      $data->{$adddata_arraykey} = $_;
      adddata($comb, $data, $v->[$i++]);
    }
    return;
  }
  return if $key ne 'percent' && $key ne '-' && !$data->{$key};
  if (%filters) {
    for my $f (keys %filters) {
      return if $data->{$f} && !$filters{$f}->{$data->{$f}};
    }
  }
  $v = [ 1, $v, $v * $v, undef, [ $v ] ] unless ref($v);
  $v = scaleperc($v, $adddata_scale) if $adddata_scale;
  if ($key eq 'percent') {
    push @{$comb->{$_}}, $v for @keyvalues;
  } elsif ($key eq '-') {
    push @{$comb->{'-'}}, $v;
  } else {
    push @{$comb->{$data->{$key}}}, $v;
  }
}

my $didcombine;
my $didscale;

# externals: sum filters key keyvalues didcombine percselect
sub calcday {
  my ($day, $type, $tbl) = @_;

  return calcday_projectbuild(@_) if $type eq 'multi' && $tbl eq 'projectbuild';
  my %comb;
  # collect data from the tables
  for my $d (@{$dataperday{$day}}) {
    next unless $d->{'type'} eq $type;
    my $dd = $d->{$tbl};
    next unless $dd;
    my %data;
    $data{'arch'} = $d->{'arch'} if $d->{'arch'};
    $data{'partition'} = $d->{'partition'} if $d->{'partition'};
    next if $filters{'arch'} && $data{'arch'} && !$filters{'arch'}->{$data{'arch'}};
    next if $filters{'partition'} && !$filters{'partition'}->{$data{'partition'} || ''};
    my $ti = ($d->{'tblinfo'} || {})->{$tbl} || $tblinfo{"$type:$tbl"};
    die("unknown table $type:$tbl in day $day\n") unless $ti;
    $adddata_arraykey = $ti->{'arraykey'};
    $adddata_arrayvalues = $ti->{'arrayvalues'};
    $adddata_scale = undef;
    my $tiunit = $ti->{'unit'};
    $tiunit ||= $tblinfo{"$type:$tbl"}->{'unit'};
    if ($unit && $tiunit && $unit ne $tiunit) {
      $adddata_scale = $unitscale{"$tiunit:$unit"} if $unitscale{"$tiunit:$unit"};
      $adddata_scale = 1 / $unitscale{"$unit:$tiunit"} if $unitscale{"$unit:$tiunit"};
      $didscale = 1 if $adddata_scale;
    }
    $adddata_scale = 100 / ($d->{'end'} - $d->{'start'} + 1) if $sum eq 'perday';
    if (ref($dd) eq 'HASH') {
      my $hashkey = $ti->{'hashkey'};
      die("missing hashkey\n") unless $hashkey;
      if (ref($hashkey)) {
        for my $x (sort keys %$dd) {
	  my @x = split('/', $x);
	  my $n = @x > @$hashkey ? $#{$hashkey} : $#x;
          adddata(\%comb, { %data, map {$hashkey->[$_], $x[$_]} 0..$n }, $dd->{$x});
	}
      } else {
	adddata(\%comb, { %data, $hashkey => $_ }, $dd->{$_}) for sort keys %$dd;
      }
    } else {
      adddata(\%comb, \%data, $dd);
    }
  }
  # combine collected data into a single value
  for my $kk (sort keys %comb) {
    my $cv = $comb{$kk};
    my $v = 0;
    if ($sum eq 'count') {
      $v += $_->[0] for @$cv;
      $v /= $ndays{$day} if $combine;
      $v *= $unitscale{"count:$unit"} if $unit && $unitscale{"count:$unit"};
      $v /= $unitscale{"$unit:count"} if $unit && $unitscale{"$unit:count"};
    } elsif ($sum eq 'sum' || $sum eq 'perday') {
      $v += $_->[1] for @$cv;
      $v /= $ndays{$day} if $combine;
    } elsif ($sum eq 'avg') {
      my $nv = 0;
      $nv += $_->[0] for @$cv;
      $v += $_->[1] for @$cv;
      $v = $nv ? ($v / $nv) : undef;
    } elsif ($sum eq 'sd') {
      my $nv = 0;
      $nv += $_->[0] for @$cv;
      if ($nv) {
        $v += $_->[1] for @$cv;
        $v = -($v * $v) / $nv;
        $v += $_->[2] for @$cv;
        $v = $v >= 0 ? sqrt($v / $nv) : undef;
      } else {
	$v = undef;
      }
    } elsif ($sum eq 'le') {
      my $d = combperc(@$cv);
      my $cut = $key eq 'percent' ? $kk : $percselect;
      my @m = fromperc($d);
      $v = !@m ? undef : scalar(grep {$_ <= $cut} @m) * 100 / scalar(@m);
    } else {
      my $d = combperc(@$cv);
      $v = selperc($d, $key eq 'percent' ? $kk : $percselect);
      $didcombine = 1 if @$cv > 1 || (@$cv == 1 && $cv->[0]->[0] > 1);
    }
    $comb{$kk} = $v;
  }
  return \%comb;
}

sub calcday_projectbuild {
  my ($day, $type, $tbl) = @_;
  my $buildsperproject = 3;
  $filters{'type'} = { 'high' => 1 };
  my $sched_high = calcday($day, 'scheduler', 'queue');
  $filters{'type'} = { 'med' => 1 };
  my $sched_med = calcday($day, 'scheduler', 'queue');
  $filters{'type'} = { 'wait' => 1 };
  my $jobs_wait = calcday($day, 'repserver', 'jobs');
  $filters{'type'} = { 'build' => 1 };
  my $jobs_build = calcday($day, 'repserver', 'jobs');
  my $val = {};
  for my $k (keys %$jobs_build) {
    my $sched_high_v = $sched_high->{$k};
    my $sched_med_v = $sched_med->{$k};
    my $jobs_wait_v = $jobs_wait->{$k};
    my $jobs_build_v = $jobs_build->{$k};
    next unless defined($sched_high_v) && defined($sched_med_v) && defined($jobs_wait_v) && defined($jobs_build_v);
    print "$day $k: $sched_high_v $sched_med_v $jobs_wait_v $jobs_build_v\n";
    $val->{$k} = $sched_high_v + ($sched_med_v + $jobs_wait_v + 1 * $jobs_build_v) * $buildsperproject;
  }
  return $val;
}

my @days = sort keys %dataperday;
die("no days found\n") unless @days;

my %vals;
for my $day (@days) {
  print STDERR "\rday $day";
  $vals{$day} = calcday($day, $datatype, $tbl);
}
print STDERR "\rall days processed, plotting...\n";

# postprocess: find top entries for each key
if ($top) {
  my %topvals;
  for my $day (@days) {
    my $d = $vals{$day} || {};
    for my $k (keys %$d) {
      push @{$topvals{$k}}, $d->{$k} if defined $d->{$k};
    }
  }
  $_ = (sort {$b <=> $a} @$_)[0] for values %topvals;
  my @topvals = sort {$topvals{$b} <=> $topvals{$a} || $a cmp $b} keys %topvals;
  my @bad = splice(@topvals, $top);
  for my $day (@days) {
    my $d = $vals{$day} || {};
    my @other;
    for (@bad) {
      push @other, delete $d->{$_} if exists $d->{$_};
    }
    if (@other) {
      my $other = 0;
      $other += $_ for @other;
      $d->{'other'} = $other;
    }
  }
}

# postprocess: cut values if requested
if ($cut) {
  for my $day (@days) {
    for (values %{$vals{$day} || {}}) {
      $_ = $cut if defined($_) && $_ > $cut;
    }
  }
}

# postprocess: find keys
my %keyknown;
for my $day (@days) {
  $keyknown{$_} = 1 for keys %{$vals{$day} || {}};
}
my @keys = sort keys %keyknown;
@keys = sort {$b <=> $a} keys %keyknown if $key eq 'percent';
die("No data found\n") unless @keys;

# postprocess: switch keys and days
if ($keyaxis) {
  my %nvals;
  for my $k (@keys) {
    for my $d (@days) {
      my $d2 = $d;
      $d2 =~ s/-\d\d$// if $combine && $combine eq 'month';
      $d2 =~ s/-\d\d-\d\d$// if $combine && $combine eq 'year';
      $nvals{$k}->{$d2} = $vals{$d}->{$k} if defined $vals{$d}->{$k};
    }
  }
  %vals = %nvals;
  @days = @keys;
  @days = sort {$a <=> $b} @days if $key eq 'percent';
  %keyknown = ();
  for my $day (@days) {
    $keyknown{$_} = 1 for keys %{$vals{$day} || {}};
  }
  @keys = sort keys %keyknown;
}

# generate gnuplot input

$terminal = 'svg' if !$terminal && -f STDOUT;
if ($terminal =~ s/^raw//) {
  open(GP, ">&STDOUT") || die("stdout: $!\n");
} else {
  open(GP, "|-", 'gnuplot', '-p', '-') || die("gnuplot: $!\n");
}
$terminal = $terminal ? "set terminal $terminal" : '';

if (!$style) {
  $style = $stacked ? 'histogram' : 'lines';
  #$style = 'boxes' if $key eq 'percent' && !$keyaxis;
}

# dump data table
my $autotitle = 'set key off';
print GP "\$Data  << EOD\n";
if (@keys != 1 || $keys[0] ne '-') {
  my $keysuffix = '';
  $keysuffix = '%' if $key eq 'percent' && !$keyaxis;
  print GP "day";
  for (@keys) {
    my $k = "$_$keysuffix";
    $k = "<= $_" if $key eq 'percent' && $sum eq 'le';
    $k =~ s/([\\\@\'\"_])/\\\\$1/g;
    print GP " \"$k\"";
  }
  print GP "\n";
  $autotitle='set key autotitle columnheader';
}
for my $day (@days) {
  my @v = map {$vals{$day}->{$_}} @keys;
  if ($stacked && $style ne 'histogram') {
    my $oldv = 0;
    for my $v (@v) {
      $oldv = $v = $oldv + (defined($v) ? $v : 0);
    }
  }
  print GP "$day";
  for my $v (@v) {
    $v = 'NaN' unless defined $v;
    print GP " $v";
  }
  print GP "\n";
}
print GP "EOD\n";

# calculate tics
my $tics = '';
my $nday = 0;
my $ndays = @days;
$ndays *= 7 if $combine && $combine eq 'week';
$ndays *= 30 if $combine && $combine eq 'month';
$ndays *= 365 if $combine && $combine eq 'year';
for my $day (@days) {
  if ($keyaxis) {
    my $d = $day;
    $d =~ s/([\\\@\'\"_])/\\\\$1/g;
    $tics .= ", \"$d\" $nday";
  } else {
    my $dname = $day;
    $dname =~ s/-\d\d$// if $combine && $combine eq 'month';
    $dname =~ s/-\d\d-\d\d$// if $combine && $combine eq 'year';
    die unless $day =~ /^(\d\d\d\d)-(\d\d)-(\d\d)$/;
    my @gt = gmtime(timegm(0, 0, 12, $3, $2 - 1, $1));
    $gt[3] = 1 if $combine && $combine eq 'week' && $gt[3] < 8;
    if ($ndays < 30 || ($ndays < 120 && $gt[6] == 1) || ($ndays >= 120 && $ndays < 1000 && $gt[3] == 1) || ($ndays >= 1000 && $gt[3] == 1 && $gt[4] == 0)) {
      $tics .= ", \"$dname\" $nday";
    } elsif ($ndays < 120 || ($ndays >= 120 && $ndays < 1000 && $gt[6] == 1) || ($ndays >= 1000 && $gt[3] == 1)) {
      $tics .= ", \"\" $nday 1";
    }
  }
  $nday++;
}
$tics = substr($tics, 2);
$tics = "($tics)" if $tics;

my $ordered = $key eq 'percent' ? 1 : 0;

my $histostyle = $stacked ? 'set style histogram rowstacked' : '';

my $fill = '';
$fill = "set style fill solid 1.0 border -1" if $style eq 'histogram' || ($style eq 'boxes' && $ordered) || @keys == 1;
my $invertkey = $style eq 'histogram' ? 'invert' : '';

# calculate titles
my @ftitle;
for my $f (keys %filters) {
  push @ftitle, "$f:".join(',', sort keys %{$filters{$f}});
}
my $title = "$datatype $tbl";
$title .= " ".join(' ', sort @ftitle) if @ftitle;
$title .= " <=@keyvalues" if $sum eq 'le' && $key ne 'percent';
$title .= " ($percselect% perc.)" if !$sum && $didcombine && $key ne 'percent';
$title =~ s/([\\\@\'_])/\\$1/g;

my $ylabel = '';
$ylabel =~ s/([\\\@\'_])/\\$1/g;
$ylabel = $ylabel ? "set ylabel \"$ylabel\"" : '';

my $yaxissuffix = '';
my $yunit = $tblinfo->{'unit'};
$yunit = $unit if $unit && $didscale;
$yunit = 'percent' if $sum eq 'perday' || $sum eq 'le';
$yunit = 'count' if $sum eq 'count';
$yunit = $unit if $sum eq 'count' && ($unitscale{"count:$unit"} || $unitscale{"$unit:count"});
$yaxissuffix = '%%' if $yunit eq 'percent';
$yaxissuffix = ' s' if $yunit eq 'seconds';
$yaxissuffix = ' m' if $yunit eq 'minutes';
$yaxissuffix = ' h' if $yunit eq 'hours';
$yaxissuffix = ' KB' if $yunit eq 'kbyte';
$yaxissuffix = ' MB' if $yunit eq 'mbyte';
$yaxissuffix = ' GB' if $yunit eq 'gbyte';
$yaxissuffix = ' TB' if $yunit eq 'tbyte';
$yaxissuffix = ' K' if $yunit eq 'K';
$yaxissuffix = ' M' if $yunit eq 'M';
$yaxissuffix = ' G' if $yunit eq 'G';
$yaxissuffix = ' T' if $yunit eq 'T';

my $plots = '';
for (my $i = 1; $i < @keys; $i++) {
  $plots .= ", '' using ".($i + 2)
}

print GP <<"EOD";
$terminal
set key outside $invertkey
$autotitle
set border 3
set format y "%g$yaxissuffix"
set tics nomirror out scale 0.75
set grid
set style data $style
$histostyle
$ylabel
$fill
set boxwidth 1 absolute
set xtics rotate by -90 $tics
set title '$title'
plot \$Data using 2$plots
EOD

close(GP) || die("gnuplot: $?\n");

