#!/usr/bin/perl -w

$| = 1;
use strict;

my %dirs;
my @dirs;
my %modes;
my @modes;
my @modes_type;
my @modes_ghost;
my %files;
my %filesc;

$dirs{'/'} = 0;
push @dirs, '/';

$modes{'40755 0 root:root'} = 0;
push @modes, '40755 0 root:root';
push @modes_type, 040000;
push @modes_ghost, 0;

my $pkg = '';
my $fls = 0;
my $prv = 0;
my $con = 0;
my $obs = 0;

my %con;
my %obs;
my %whatprovides;

die("Usage: findfileconflicts2 packages[.gz]\n") unless @ARGV == 1;

my @ftypes;
$ftypes[001] = 'p';
$ftypes[002] = 'c';
$ftypes[004] = 'd';
$ftypes[006] = 'b';
$ftypes[010] = '-';
$ftypes[012] = 'l';
$ftypes[014] = 's';

sub beautify_mode {
  my @m = split(' ', $modes[$_[0]], 3);
  my $fm = oct($m[0]);
  my $ft = $fm & 0770000;
  $fm &= ~0770000;
  $ft = $ftypes[$ft >> 12 & 077] || '?';

  my $rts = '';
  my $rt = oct($m[1]);
  $rts .= 'd' if $rt  & 02;
  $rts .= 'c' if $rt  & 01;
  $rts .= 'm' if $rt  & 010;
  $rts .= 'n' if $rt  & 020;
  $rts .= 'g' if $rt  & 0100;
  $rts .= 'l' if $rt  & 0200;
  $rts .= 'r' if $rt  & 0400;
  $rt &= ~0733;
  $rts .= sprintf("%o", $rt) if $rt;
  $rts .= ' ' if $rts;
  return "$rts$ft".sprintf("%03o", $fm)." $m[2]";
}

print "scanning file list\n";
if ($ARGV[0] =~ /\.gz$/) {
  open(FL, "-|", 'gunzip', '-dc', $ARGV[0]) || die("open $ARGV[0]: $!\n");
} else {
  open(FL, '<', $ARGV[0]) || die("open $ARGV[0]: $!\n");
}

while(<FL>) {
  chomp;
  if ($fls) {
    if ($_ eq '-Flx:') {
      $fls = 0;
      next;
    }
    my $lnk = '';
    if (/^(12.*)( -> .*?)$/) {
      $_ = $1;
      $lnk = $2;
    }
    next unless /^(\d+ (\d+) \S+) (.*\/)(.*?)$/;
    my $perms = $1;
    my $n = $dirs{$3};
    if (!defined($n)) {
      $n = @dirs;
      $dirs{$3} = $n;
      $dirs[$n] = $3;
    }
    # ignore link targets and permissions of ghosts
    if ((oct($2) & 0100) != 0) {
      $lnk = '' if $lnk;
      $perms =~ s/^100000/100644/;
    }
    my $m = $modes{"$perms$lnk"};
    if (!defined($m)) {
      $m = @modes;
      $modes{"$perms$lnk"} = $m;
      $modes[$m] = "$perms$lnk";
      $modes_type[$m] = oct($perms) & 07770000;
      $modes_ghost[$m] = oct($2) & 0100;
    }
    my $f = "$n/$4";
    if (exists $files{$f}) {
      $filesc{$f} ||= [ $files{$f} ];
      push @{$filesc{$f}}, "$pkg/$m";
    } else {
      $files{$f} = "$pkg/$m";
    }
    next;
  }
  if ($prv) {
    if ($_ eq '-Prv:') {
      $prv = 0;
      next;
    }
    s/ .*//;	# no version stuff;
    push @{$whatprovides{$_}}, $pkg;
    next;
  }
  if ($con) {
    if ($_ eq '-Con:') {
      $con = 0;
      next;
    }
    s/ .*//;	# no version stuff;
    s/^otherproviders\((.*)\)$/$1/;
    push @{$con{$pkg}}, $_;
    next;
  }
  if ($obs) {
    if ($_ eq '-Obs:') {
      $obs= 0;
      next;
    }
    s/ .*//;	# no version stuff;
    push @{$obs{$pkg}}, $_;
    next;
  }
  if (/^=Pkg: (.*)/) {
    $pkg = $1;
    my $n = $pkg;
    $n =~ s/ .*//;
    push @{$obs{$pkg}}, $n;
    next;
  }
  if ($_ eq '+Con:') {
    $con = 1 if $pkg;
    next;
  }
  if ($_ eq '+Obs:') {
    $obs = 1 if $pkg;
    next;
  }
  if ($_ eq '+Prv:') {
    $prv = 1 if $pkg;
    next;
  }
  if ($_ eq '+Flx:') {
    $fls = 1;
    next;
  }
}
close(FL) || die("close failed\n");

print "currently have ".@dirs." dirs and ".@modes." modes\n";

# connect dirs and add all dirs as files
print "connecting ".@dirs." directories\n";
my @implicit_conflicts;
for (@dirs) {
  next unless /^(.*\/)(.*?)\/$/;
  my $n = $dirs{$1};
  if (!defined $n) {
    $n = @dirs;
    $dirs{$1} = $n;
    $dirs[$n] = $1;
    next;
  }
  my $f = "$n/$2";
  next unless $files{$f};
  my (undef, $m) = split('/', $files{$f}, 2);
  next if $modes_type[$m] == 040000;
  # whoa, have a conflict. search for other dirs
  my $have_dir;
  for my $pkg (@{$filesc{$f} || []}) {
    (undef, $m) = split('/', $pkg, 2);
    $have_dir = 1 if $modes_type[$m] == 040000;
  }
  next if $have_dir;
  push @implicit_conflicts, $f;
}
print "now ".@dirs." directories\n";

# the old and fast way
#
#for my $f (@implicit_conflicts) {
#  $filesc{$f} ||= [ $files{$f} ];
#  push @{$filesc{$f}}, "implicit_directory 0 0 noarch pkg/0";
#}

if (@implicit_conflicts) {
  print "have implicit conflicts, calculating dir owners\n";
  my @pdirs;	# parent dirs
  for (@dirs) {
    next unless /^(.*\/)(.*?)\/$/;
    $pdirs[$dirs{$_}] = $dirs{$1};
  }
  my %baddir;
  for (@implicit_conflicts) {
    my ($n, $x) = split('/', $_, 2);
    $baddir{$dirs{"$dirs[$n]$x/"}} = $_;
  }
  my $done;
  while (!$done) {
    $done = 1;
    my $i = -1;
    for (@pdirs) {
      $i++;
      next unless defined $_;
      next unless $baddir{$_} && !$baddir{$i};
      $baddir{$i} ||= $baddir{$_};
      undef $done;
    }
  }
  undef @pdirs;
  # this is not cheap, sorry
  my %baddir_pkgs;
  for my $ff (keys %files) {
    my ($n, undef) = split('/', $ff, 2);
    next unless $baddir{$n};
    for (@{$filesc{$ff} || [ $files{$ff} ]}) {
      my ($pkg, undef) = split('/', $_, 2);
      $baddir_pkgs{$baddir{$n}}->{"$pkg/0"} = 1;
    }
  }
  for my $f (@implicit_conflicts) {
    $filesc{$f} ||= [ $files{$f} ];
    $baddir_pkgs{$f} ||= { "implicit_directory 0 0 noarch pkg/0" => 1 };
    push @{$filesc{$f}}, sort keys %{$baddir_pkgs{$f}};
  }
}

%files = ();	# free mem

# reduce all-dir conflicts and trivial multiarch conflicts
print "reducing trivial conflicts\n";
for my $f (sort keys %filesc) {
  my $allm;
  my $allc = 1;
  my $pkgn;
  my $pl;
  for my $pkg (@{$filesc{$f}}) {
    my ($p, $m) = split('/', $pkg, 2);
    die unless $p =~ /^([^ ]+) /;
    $allm = $m unless defined $allm;
    $allm = -1 if $allm != $m;
    $pkgn = $1 unless defined $pkgn;
    $allc = 0 if $pkgn ne $1;
    $allc = 0 if $pl && $p eq $pl;
    $pl = $p;
  }
  if ($allc) {
    delete $filesc{$f};
    next;
  }
  if (defined($allm) && $allm >= 0 && $modes_type[$allm] == 040000) {
    delete $filesc{$f};
    next;
  }
}

print "checking conflicts\n";
my %pkgneeded;
my %tocheck;
my %tocheck_files;
for my $f (sort keys %filesc) {
  my @p = sort(@{$filesc{$f}});	# normalize
  $filesc{$f} = [ @p ];
  s/\/.*// for @p;
  $pkgneeded{$_} = 1 for @p;
  my $pn = join("\n", @p);
  $tocheck{$pn} ||= [ @p ];
  push @{$tocheck_files{$pn}}, $f;
}

my %conflicts;
for my $pkg (sort keys %con) {
  next unless $pkgneeded{$pkg};
  for my $c (@{$con{$pkg}}) {
    for my $p (@{$whatprovides{$c} || []}) {
      next if $p eq $pkg;
      $conflicts{"$pkg\n$p"} = 1;
      $conflicts{"$p\n$pkg"} = 1;
    }
  }
}

for my $pkg (sort keys %obs) {
  next unless $pkgneeded{$pkg};
  for my $c (@{$obs{$pkg}}) {
    for my $p (@{$whatprovides{$c} || []}) {
      next if $p eq $pkg;
      next unless $p =~ /^\Q$c\E /;
      $conflicts{"$pkg\n$p"} = 1;
      $conflicts{"$p\n$pkg"} = 1;
    }
  }
}

# let 32bit packages conflict with the i586 version
for my $pkg (sort keys %pkgneeded) {
  next unless $pkg =~ /^([^ ]+)-32bit /;
  my $n = $1;
  for my $p (@{$whatprovides{$n} || []}) {
    next unless $p =~ /^\Q$n\E .* i[56]86$/;
    next if $p eq $pkg;
    $conflicts{"$pkg\n$p"} = 1;
    $conflicts{"$p\n$pkg"} = 1;
  }
}

print "found ".(keys %tocheck)." conflict candidates\n";
print "checking...\n";
# now check each package combination for all candidates
for my $tc (sort keys %tocheck) {
  my @p = @{$tocheck{$tc}};
  while (@p) {
    my $p1 = shift @p;
    for my $p2 (@p) {
      next if $conflicts{"$p1\n$p2"};
      my @con;
      for my $f (@{$tocheck_files{$tc}}) {
	my @pp = grep {s/^(?:\Q$p1\E|\Q$p2\E)\///} map {$_} @{$filesc{$f}};
	next unless @pp;
	# ignore if (all directories or all ghosts or all links) and all same mode;
	my %allm = map {$_ => 1} @pp;
	my $info = '';
	if (keys(%allm) == 1) {
	  my $m = (keys(%allm))[0];
	  # all modes/flags are the same
	  # no conflict if all dirs or all ghosts or all links
	  next if $modes_type[$m] == 040000 || $modes_type[$m] == 0120000 || $modes_ghost[$m] == 0100;
	} else {
	  # don't report mode mismatches for files/symlinks that are not ghosts
	  for my $m (keys %allm) {
	    if (($modes_type[$m] != 0100000 && $modes_type[$m] != 0120000) || $modes_ghost[$m] == 0100) {
	      $info = ' [mode mismatch: '.join(', ', map {beautify_mode($_)} @pp).']';
	      last;
	    }
	  }
	}
	# got one!
	$f =~ /^(\d+)\/(.*)/;
	push @con, "$dirs[$1]$2$info";
      }
      next unless @con;
      my @sp1 = split(' ', $p1);
      my @sp2 = split(' ', $p2);
      print "found conflict of $sp1[0]-$sp1[1]-$sp1[2].$sp1[3] with $sp2[0]-$sp2[1]-$sp2[2].$sp2[3]:\n";
      print "  - $_\n" for @con;
    }
  }
}

