#!/usr/bin/perl -w
#
# Copyright (c) 2006, 2007 Michael Schroeder, Novell Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see the file COPYING); if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#
################################################################
#
# The Job Dispatcher
#

BEGIN {
  my ($wd) = $0 =~ m-(.*)/- ;
  $wd ||= '.';
  # FIXME: currently the bs_dispatcher makes assumptions on being in a
  # properly set up working dir, e.g. with subdirs 'worker' and
  # 'build'.  Either that is cleaned up or this stays in, for the sake
  # of startproc and others being able to start a bs_srcserver without
  # knowing that it has to be started in the right directory....

  chdir "$wd";
  unshift @INC,  "build";
}

use POSIX;
use Data::Dumper;
use Digest::MD5 ();
use List::Util;
use Fcntl qw(:DEFAULT :flock);
use XML::Structured ':bytes';
use Storable;

use BSConfig;
use BSRPC;
use BSUtil;
use BSXML;

use strict;

my $user = $BSConfig::bsuser;
my $group = $BSConfig::bsgroup;

!defined($user) || defined($user = (getpwnam($user))[2]) || die("unknown user\n");
!defined($group) || defined($group = (getgrnam($group))[2]) || die("unknown group\n");
if (defined $group) {
  ($(, $)) = ($group, $group);
  die "setgid: $!\n" if ($) != $group);
}
if (defined $user) {
  ($<, $>) = ($user, $user); 
  die "setuid: $!\n" if ($> != $user); 
}

my $port = 5252;        #'RR'
$port = $1 if $BSConfig::reposerver =~ /:(\d+)$/;

my %cando = (
  'i586'    => [          'i586',         'armv4l', 'armv5el', 'armv7el',                 'sh4'],
  'i686'    => [          'i586', 'i686', 'armv4l', 'armv5el', 'armv7el',                 'sh4'],
  'x86_64'  => ['x86_64', 'i586', 'i686', 'armv4l', 'armv5el', 'armv7el',                 'sh4'],
  'ppc'     => [                                                          'ppc'                ],
  'ppc64'   => [                                                          'ppc', 'ppc64',      ],
  'armv4l'  => [                          'armv4l'                                             ],
  'armv5el' => [                          'armv4l', 'armv5el'                                  ],
  'armv7el' => [                          'armv4l', 'armv5el', 'armv7el'                       ],
  'sh4'     => [                                                                          'sh4'],
  'ia64'    => ['ia64'],
  's390'    => ['s390'],
  's390x'   => ['s390x', 's390'],
  'sparc'   => ['sparc'],
  'sparc64' => ['sparc64', 'sparcv9:sparc32', 'sparcv8:sparc32', 'sparc:sparc32'],
  'mips'    => ['mips'],
  'mips64'  => ['mips64', 'mips'],
  'local'   => ['local'],
);

# 4h build will add .5 to the load
# 4h idle will half the load
my $decay = log(.5)/(4*3600);

my $rundir = $BSConfig::rundir || "$BSConfig::bsdir/run";
my $workersdir = "$BSConfig::bsdir/workers";
my $jobsdir = "$BSConfig::bsdir/jobs";
my $eventdir = "$BSConfig::bsdir/events";

my $reporoot = "$BSConfig::bsdir/build";

sub getcodemd5 {
  my ($dir, $cache) = @_;
  my $md5 = '';
  my %new;
  my $doclean;
  my @files = grep {!/^\./} ls($dir);
  my @bfiles = grep {!/^\./} ls("$dir/Build");
  my %bfiles = map {$_ => 1} @bfiles;
  @files = sort(@files, @bfiles);
  $cache ||= {};
  for my $file (@files) {
    my $f = $bfiles{$file} ? "$dir/Build/$file" : "$dir/$file";
    next unless -f $f;
    my @s = stat _;
    my $id = "$s[9]/$s[7]/$s[1]";
    $new{$id} = 1; 
    if ($cache->{$id}) {
      $md5 .= "$cache->{$id}  $file\n";
      next;
    }    
    $cache->{$id} = Digest::MD5::md5_hex(readstr($f));
    $md5 .= "$cache->{$id}  $file\n";
    $doclean = 1; 
  }
  if ($doclean) {
    for (keys %$cache) {
      delete $cache->{$_} unless $new{$_};
    }    
  }
  return Digest::MD5::md5_hex($md5);
}

my $workerdircache = {};
my $builddircache = {};

my %badhost;

sub assignjob {
  my ($job, $idlename, $arch) = @_;
  local *F;

  print "assignjob $arch/$job -> $idlename\n";
  my $jobstatus = {
    'code' => 'dispatching',
  };
  if (!BSUtil::lockcreatexml(\*F, "$jobsdir/$arch/.dispatch.$$", "$jobsdir/$arch/$job:status", $jobstatus, $BSXML::jobstatus)) {
    print "job lock failed!\n";
    return undef;
  }

  # got the lock, re-check if job is still there
  if (! -e "$jobsdir/$arch/$job") {
    unlink("$jobsdir/$arch/$job:status");
    close F;
    print "job disappered!\n";
    return undef;
  }

  # prepare job data
  my $infoxml = readstr("$jobsdir/$arch/$job");
  my $jobid = Digest::MD5::md5_hex($infoxml);
  my $info = XMLin($BSXML::buildinfo, $infoxml);

  my $workercode = getcodemd5('worker', $workerdircache);
  my $buildcode = getcodemd5('build', $builddircache);

  # get the worker data
  my $worker = readxml("$workersdir/idle/$idlename", $BSXML::worker, 1);
  if (!$worker) {
    unlink("$jobsdir/$arch/$job:status");
    close F;
    print "worker is gone!\n";
    return undef;
  }

  eval {
    BSRPC::rpc({
      'uri'     => "http://$worker->{'ip'}:$worker->{'port'}/build",
      'timeout' => 10,
      'request' => "PUT",
      'headers' => [ "Content-Type: text/xml" ],
      'data'    => $infoxml,
    }, undef, "port=$port", "workercode=$workercode", "buildcode=$buildcode");
  };
  if ($@) {
    my $err = $@;
    print "rpc error: $@";
    unlink("$jobsdir/$arch/$job:status");
    close F;
    if ($err =~ /cannot build this package/) {
      $badhost{"$info->{'project'}/$info->{'package'}/$info->{'arch'}/@{[(split(':', $idlename, 2))[1]]}"} = time();
      return 'badhost';
    }
    unlink("$workersdir/idle/$idlename");	# broken client
    return undef;
  }
  unlink("$workersdir/idle/$idlename");	# no longer idle
  $jobstatus->{'code'} = 'building';
  $jobstatus->{'uri'} = "http://$worker->{'ip'}:$worker->{'port'}";
  $jobstatus->{'workerid'} = $worker->{'workerid'} if defined $worker->{'workerid'};
  $jobstatus->{'starttime'} = time();
  $jobstatus->{'hostarch'} = $worker->{'hostarch'};
  $jobstatus->{'jobid'} = $jobid;

  # put worker into building list
  $worker->{'job'} = $job;
  $worker->{'arch'} = $arch;
  mkdir_p("$workersdir/building");
  writexml("$workersdir/building/.$idlename", "$workersdir/building/$idlename", $worker, $BSXML::worker);

  # write new status and release lock
  writexml("$jobsdir/$arch/.$job:status", "$jobsdir/$arch/$job:status", $jobstatus, $BSXML::jobstatus);
  close F;
  return 'assigned';
}

sub sendeventtosrcserver {
  my ($ev) = @_;
  my @args;
  for ('type', 'project', 'package', 'repository', 'arch', 'job') {
    push @args, "$_=$ev->{$_}" if defined $ev->{$_};
  }
  my $param = {
    'uri' => "$BSConfig::srcserver/event",
    'timeout' => 10,
  };
  BSRPC::rpc($param, undef, @args);
}

$| = 1;
$SIG{'PIPE'} = 'IGNORE';
print "starting build service dispatcher\n";

# get lock
mkdir_p($rundir);
open(RUNLOCK, '>>', "$rundir/bs_dispatch.lock") || die("$rundir/bs_dispatch.lock: $!\n");
flock(RUNLOCK, LOCK_EX | LOCK_NB) || die("dispatcher is already running!\n");
utime undef, undef, "$rundir/bs_dispatch.lock";

while (1) {

  if (-s "$jobsdir/finished") {
    local *F;
    if (open(F, '<', "$jobsdir/finished")) {
      unlink("$jobsdir/finished");
      my $load;
      eval {$load = Storable::retrieve("$jobsdir/load");};
      $load ||= {};
      while (<F>) {
	next unless /\n$/s;
	my @s = split('\|', $_);
	s/%([a-fA-F0-9]{2})/chr(hex($1))/ge for @s;
	my ($projid, $repoid, $arch, $packid, $start, $end, $result, $workerid, $hostarch) = @s;
	next unless $start =~ /^[0-9]+$/s;
	next unless $end=~ /^[0-9]+$/s;
	next if $end <= $start;
	my $prpa = "$projid/$repoid/$arch";
	$load->{$prpa} = [0, 0] unless $load->{$prpa};
	my $l = $load->{$prpa};
	if ($l->[0] < $end) {
	  my $d = $end - $l->[0];
	  $l->[1] *= exp($decay * $d);
	  $l->[1] += (1 - exp($decay * ($end - $start)));
	  $l->[0] = $end;
	} else {
	  my $d = $l->[0] - $end;
	  $l->[1] += (1 - exp($decay * ($end - $start))) * exp($decay * $d);
	}
      }
      close F;
      Storable::nstore($load, "$jobsdir/load.new");
      rename("$jobsdir/load.new", "$jobsdir/load") || die("rename $jobsdir/load.new $jobsdir/load");
    }
  }

  my $assigned = 0;
  my @idle = grep {!/^\./} ls("$workersdir/idle");
  my %idlearch;
  for my $idle (@idle) {
    my $harch = (split(':', $idle, 2))[0];
    for (@{$cando{$harch} || []}) {
      push @{$idlearch{$_}}, $idle;
    }
  }
  my @archs = List::Util::shuffle(keys %idlearch);
  for my $arch (@archs) {

    if ($BSConfig::specialdispatch && grep {$arch eq $_} values %$BSConfig::specialdispatch) {
      # FIXME: do not duplicate code!
      print "specials for $arch\n";
      my @specials = grep {$BSConfig::specialdispatch->{$_} eq $arch} keys %$BSConfig::specialdispatch;
      for my $special (@specials) {
	my ($sproj, $srepo, $sarch) = split('/', $special, 3);
	my @b = grep {!/^\./} ls("$jobsdir/$sarch");
	my %locked = map {$_ => 1} grep {/:status$/} @b;
	@b = grep {!/:(?:dir|status|new)$/} @b;
	@b = grep {!$locked{"$_:status"}} @b;
	for my $job (List::Util::shuffle(@b)) {
	  my $info = readxml("$jobsdir/$sarch/$job", $BSXML::buildinfo, 1);
	  next unless $info->{'project'} eq $sproj && $info->{'repository'} eq $srepo;
	  my @idle = List::Util::shuffle(@{$idlearch{$arch} || []});
	  for my $idle (@idle) {
	    my $res = assignjob($job, $idle, $sarch);
	    if (!$res) {
	      my $harch = (split(':', $idle, 2))[0];
	      for (@{$cando{$harch} || []}) {
		$idlearch{$_} = [ grep {$_ ne $idle} @{$idlearch{$_}} ];
	      }
	      next;
	    }
	    next if $res eq 'badhost';
	    my $harch = (split(':', $idle, 2))[0];
	    for (@{$cando{$harch} || []}) {
	      $idlearch{$_} = [ grep {$_ ne $idle} @{$idlearch{$_}} ];
	    }
	    $assigned++;
	    last;
	  }
	}
      }
      next;
    }

    my @b = grep {!/^\./} ls("$jobsdir/$arch");
    my %locked = map {$_ => 1} grep {/:status$/} @b;
    @b = grep {!/:(?:dir|status|new)$/} @b;
    @b = grep {!$locked{"$_:status"}} @b;

    # pool mans priority, please replace with something sane...
    if ($BSConfig::dispatch_prio) {
      for my $pr (@{$BSConfig::dispatch_prio}) {
        my @bn;
        if ($pr =~ /^!/) {
          my $npr = substr($pr, 1);
          @bn = grep {!/$npr/} @b;
        } else {
          @bn = grep {/$pr/} @b;
        }
        @b = @bn if @bn;
      }
    }

    my $tries = 0;
    for my $job (List::Util::shuffle(@b)) {
      last if $assigned && $tries >= 5;
      my @idle = List::Util::shuffle(@{$idlearch{$arch} || []});
      last unless @idle;
      if (%badhost || $BSConfig::specialdispatch || %BSConfig::specialbuildhost) {
	my $info = readxml("$jobsdir/$arch/$job", $BSXML::buildinfo, 1);
	next unless $info;
	next if $BSConfig::specialdispatch && $BSConfig::specialdispatch->{"$info->{'project'}/$info->{'repository'}/$arch"};
	@idle = grep {!$badhost{"$info->{'project'}/$info->{'package'}/$info->{'arch'}/@{[(split(':', $_, 2))[1]]}"}} @idle;
        if ( defined $BSConfig::specialbuildhost{"$info->{'project'}/$info->{'package'}"}->{'good'} ) {
           # positive list given for this package
           my @newidle;
           for my $i (@{$BSConfig::specialbuildhost{"$info->{'project'}/$info->{'package'}"}->{'good'}}) {
             push @newidle, grep {/$i/} @idle;
           }
           @idle = @newidle;
        }
        if ( defined $BSConfig::specialbuildhost{"$info->{'project'}/$info->{'package'}"}->{'bad'} ) {
           # positive list given for this package
           for my $i (@{$BSConfig::specialbuildhost{"$info->{'project'}/$info->{'package'}"}->{'bad'}}) {
             @idle = grep {!/^$i$/} @idle;
           }
        }
      }
      for my $idle (@idle) {
	next unless -e "$jobsdir/$arch/$job";
        last if $assigned && $tries >= 5;
	$tries++;
	my $res = assignjob($job, $idle, $arch);
	if (!$res) {
	  my $harch = (split(':', $idle, 2))[0];
	  for (@{$cando{$harch} || []}) {
	    $idlearch{$_} = [ grep {$_ ne $idle} @{$idlearch{$_}} ];
	  }
	  next;
	}
	next if $res eq 'badhost';
	my $harch = (split(':', $idle, 2))[0];
	for (@{$cando{$harch} || []}) {
	  $idlearch{$_} = [ grep {$_ ne $idle} @{$idlearch{$_}} ];
	}
	$assigned++;
	last;
      }
    }
  }
  for my $evname (ls("$eventdir/repository")) {
    next if $evname =~ /^\./;
    my $ev = readxml("$eventdir/repository/$evname", $BSXML::event, 1);
    next unless $ev;
    eval {
      sendeventtosrcserver($ev);
    };
    if ($@) {
      warn($@);
    } else {
      unlink("$eventdir/repository/$evname");
    }
  }
  for my $evname (ls("$eventdir/dispatch")) {
    next if $evname =~ /^\./;
    my $ev = readxml("$eventdir/dispatch/$evname", $BSXML::event, 1);
    next unless $ev;
    next if $ev->{'due'} && time() < $ev->{'due'};
    delete $ev->{'due'};
    eval {
      if ($ev->{'type'} eq 'built') {
        # resend to rep server
      } elsif ($ev->{'type'} eq 'badhost') {
        print "badhost event: $ev->{'project'}/$ev->{'package'}/$ev->{'arch'}/$ev->{'job'}\n";
	$badhost{"$ev->{'project'}/$ev->{'package'}/$ev->{'arch'}/$ev->{'job'}"} = time();
      } else {
        sendeventtosrcserver($ev);
      }
    };
    if ($@) {
      warn($@);
    } else {
      unlink("$eventdir/dispatch/$evname");
    }
  }
  sleep(1) unless $assigned;
  printf("assigned $assigned jobs\n") if $assigned;
  if (%badhost) {
    my $now = time();
    for (keys %badhost) {
      if ($badhost{$_} + 24*3600 < $now) {
        print "deleting badhost $_\n";
        delete $badhost{$_};
      }
    }
  }
}
