#!/usr/bin/perl -w
# -*- cperl-indent-level: 8; indent-tabs-mode: t -*-
# Checks all .po files under the current directory (and subdirectories)
#
# Written by Francois-Xavier Duranceau <duranceau@free.fr>
# and Stephan Kulow <coolo@kde.org>
# and David Faure <faure@kde.org>

use strict;
use Cwd;
use File::Basename;

my $line_number;
my $look_ahead = "";
my $fuzzy;
my $no_c_format;
my $get_msg_current_file;
my $topdir;

sub read_line {
	$line_number++;
	$look_ahead = <INPUT>;
	return $look_ahead;
}

# Read a msgid and return three values:
#  @(comment, msgid (reformatted in one line), raw msgid (as read))
# Also sets a few vars: $fuzzy, $no_c_format
sub get_msgid() {
	my $line = '';
	$fuzzy = 0;
	$no_c_format = 0;
	my $comment = "";

	while ( $line = read_line() ) {
		die "msgstr found when a msgid was expected ($get_msg_current_file:$line_number)"
		if ( $line =~ /^\s*msgstr/ );
		
		if ( $line =~ /^\#,.*fuzzy/) {
			$fuzzy = 1;
		}

		if ( $line =~ /^\#,.*no-c-format/) {
			$no_c_format = 1;
		}

		if ( $line !~ /^\s*msgid\s+"(.*)"\s*\n/ ) {
			$comment .= $line;
			next;
		}

		my $rawstr = 'msgid "' . $1 . "\"\n";
		#               print "found a msgid (`$1')\n";
		my $str = $1;
		while ( $line = read_line() ) {
			last if($line !~ /^\s*"(.*)"\s*\n/ );
			$str .= $1;
			$rawstr .= $line;
		}

		return ($comment, $str, $rawstr);
	}
	return ($comment,undef,undef);
}


# Read a complete msgstr and return @(msgstr, raw msgstr)
sub get_msgstr() {

	my $rawstr = $look_ahead;
	die "expected msgstr not found at $get_msg_current_file:$line_number"
		if( $look_ahead !~ /^\s*msgstr\s+"(.*)"/ );
		
#	print "found a msgstr (`$1')\n";

	my $str = $1;
	my $line = '';
	while ( $line = read_line() ) {
		last if($line !~ /^\s*"(.*)"/ );
		$str .= $1;
		$rawstr .= $line;
	}
	return ($str, $rawstr);
}

# Read msgid from INPUT, and write it into OUTPUT
my %plurals =
(
 'af' => 2,
 'ar' => 4,
 'az' => 1,
 'be' => 3,
 'bg' => 2,
 'bn' => 2,
 'br' => 1,
 'bs' => 3,
 'ca' => 2,
 'cs' => 3,
 'csb' => 3,
 'cy' => 2,
 'da' => 2,
 'de' => 2,
 'el' => 2,
 'en_GB' => 2,
 'eo' => 2,
 'es' => 2,
 'et' => 2,
 'eu' => 2,
 'fa' => 1,
 'fi' => 2,
 'fo' => 2,
 'fr' => 2,
 'fy' => 2,
 'ga' => 5,
 'gl' => 2,
 'he' => 2,
 'hi' => 2,
 'hu' => 1,
 'hr' => 3,
 'hsb' => 4,
 'id' => 1,
 'is' => 2,
 'it' => 2,
 'ja' => 1,
 'ka' => 1,
 'kk' => 1,
 'km' => 1,
 'ko' => 1,
 'ku' => 2,
 'ky' => 2,
 'lb' => 2,
 'lo' => 1,
 'lt' => 3,
 'lv' => 3,
 'mk' => 3,
 'mn' => 2,
 'ms' => 2,
 'mt' => 4,
 'nl' => 2,
 'nb' => 2,
 'nds' => 2,
 'ne' => 2,
 'nn' => 2,
 'nso' => 2,
 'pa' => 2,
 'pl' => 3,
 'pt' => 2,
 'pt_BR' => 2,
 'ro' => 3,
 'ru' => 3,
 'rw' => 2,
 'se' => 2,
 'sk' => 3,
 'sl' => 4,
 'sr' => 3,
 'sr@Latn' => 3,
 'ss' => 2,
 'sv' => 2,
 'ta' => 2,
 'te' => 2,
 'tg' => 2,
 'th' => 1,
 'tr' => 1,
 'tt' => 1,
 'uk' => 3,
 'uz' => 2,
 'ven' => 2,
 'vi' => 1,
 'wa' => 2,
 'xh' => 2,
 'xx' => 1,
 'zh_CN' => 1,
 'zh_HK' => 1,
 'zh_TW' => 1,
 'zu' => 2
);

# Check that the two strings have matching %1, %2 etc.
# (only digits, not other words)
# Returns 0 (false) on error and 1 (true) on success
sub check_for_percent_digit($$$$) {
	my ( $first, $second, $second_descr, $current_file ) = @_;
	#print STDERR "Checking for percent: \n   $first\n    $second\n";
	while ( $first =~ m/(%[0-9]+)/g ) {  # for each %[0-9] in $first
		my $tok = $1;
		my $rtok = $tok;
		while ( $second !~ s/$tok// ) { # try finding+removing
			chop $tok; # didn't work, remove last char
			if ( $tok eq '%' || $tok eq '%0' ) {
				print STDERR "$second_descr doesn't contain $rtok at $current_file:$line_number\n";
				return 0;
			}
		}
	}
	if ( $second =~ m/(%[0-9]+)/ ) {
		print STDERR "remaining $1 in $second_descr $second at $current_file:$line_number\n";
		return 0;
	}
	return 1;
}

# Check a msgid/msgstr pair for consistency
# Returns 0 on error (i.e. fuzzify this msgstr) and 1 on success (think false/true)
sub check_msgstr($$$$$){
	my ($msgid, $msgstr,$desktop_file,$plural_forms,$current_file) = @_;
	if ($desktop_file) {
		my $tag = $msgid;
		$tag =~ s/=.*//;
		if (substr($msgstr,0,length($tag)+1) ne "$tag=") { # means $msgstr.startsWith("$tag=")
			print STDERR "Tag doesn't match in $current_file:$line_number\n";
			return 0;
		}
		if ($msgstr =~ m/[^\\]\\n/) {
		        print STDERR "translation contains newline in $current_file:$line_number\n";
			return 0;
		}
	} else {
		if (substr($msgstr,0,2) eq '_:') {
			print STDERR "Severe error '_:' encountered in msgstr at $current_file:$line_number\n";
			return 0;
		}

		if (substr($msgstr,0,3) eq '_n:') {
			print STDERR "Severe error '_n:' encountered in msgstr at $current_file:$line_number\n";
			return 0;
		}

		if (substr($msgid,0,3) eq '_n:') {
			if (!$plural_forms) {
				# An error message was already given when trying to find the plural form
				# print STDERR "can't find number of plural forms for $f\n";
				# Return 1 to avoid fuzzing message where the problem is totally elsewhere
				return 1;
			}
			# count the number of \\n, code from http://perl.active-venture.com/pod/perlfaq4-datastrings.html
			# not used anymore though, since we also need to split the lines
			#my $current_plurals = () = $msgstr =~ /\\n/g;
			#if ($current_plurals + 1 != $plural_forms) {
			#	print STDERR "Found wrong number of plural in $current_file:$line_number : ".($current_plurals+1)."\n";
			#	return 0;
			#}
			my @msgid_lines = split(/\\n/,$msgid);
			if ( $#msgid_lines != 1 ) {
				print STDERR "Huh? $#msgid_lines lines in msgid $current_file:$line_number\n";
				return 0;
			}
			my @msgstr_lines = split(/\\n/,$msgstr);
			if ( $#msgstr_lines + 1 != $plural_forms ) {
				print STDERR "Found wrong number of plural in $current_file:$line_number : ".($#msgstr_lines+1)."\n";
				return 0;
			}
			# Checks for consistency in plural forms:
			# If msgid has %n, msgstr should have it on its last line
			my $msgid_lastline = $msgid_lines[1];
			if ( $msgid_lastline =~ m/%n/ ) {
				my $msgstr_lastline = $msgstr_lines[$#msgstr_lines];
				if ( $msgstr_lastline !~ m/%n/ ) {
					print STDERR "msgstr doesn't contain %n at $current_file:$line_number\n";
					return 0;
				}
			}
			# Check that msgid is consistency itself for %0-%9
			#### TODO: support for checking .pot files, and doing this only then?
			my $msgid_firstline = $msgid_lines[0];
			#print STDERR "msgid_firstline = $msgid_firstline\n";
			#print STDERR "msgid_lastline = $msgid_lastline\n";
			return 0 if ( !check_for_percent_digit( $msgid_firstline, $msgid_lastline, "msgid last line", $current_file ) );

			# Now check each line in msgstr for %0-%9
			foreach $_ (@msgstr_lines) {
				#print STDERR "msgstr_line = $_\n";
				return 0 if ( !check_for_percent_digit( $msgid_firstline, $_, "msgstr line", $current_file ) );
			}

			return 1;
		}

		#print STDERR "testing $msgid vs $msgstr\n";


		if ( $msgid =~ m/\\n$/ && $msgstr !~ m/\\n$/ ) {
			print "line feed mismatch: $msgid at $current_file:$line_number\n";
			return 1;
		}
		if ( $msgid !~ m/\\n$/ && $msgstr =~ m/\\n$/ ) {
			print "line feed mismatch: $msgid at $current_file:$line_number\n";
			return 1;
		}

		if ($msgid eq '%l:%M%P' || $no_c_format ) { # exceptions to the %foo check
			return 1;
		}

		if ( substr($msgid,0,3) eq '_: ' ) {
			# Remove comment from msgid
			$msgid =~ s/^_: .*?\\n//;
		}
		while ( $msgid =~ m/(%\w+)/g ) { # for each %foo in msgid
			my $tok = $1;
			my $rtok = $tok;
			while ( $msgstr !~ s/$tok// ) {
				chop $tok; # remove last char
				if ($tok eq '%' || $tok eq '%0') {
					print STDERR "msgstr doesn't contain $rtok at $current_file:$line_number\n";
					return 0;
				}
			}
		}
		if ( $msgstr =~ m/(%\w+)/ ) {
			print STDERR "remaining $1 in $msgstr at $current_file:$line_number\n";
			return 0;
		}
	}

	return 1;
}

sub check_template($)
{
	my $fullrelpath = $_[0]; # kept around for the print
	my $relpath = $fullrelpath;
	# For ./lang/messages/kdebase/foo.po, check if templates/messages/kdebase/foo.pot exists
	$relpath =~ s,^[^/]*/,,; # remove language
	my $template = "templates/${relpath}t";
	#print "template=$template\n";
	if ( ! -f "$topdir/$template" ) {
		# Try to find if it moved
		my $filename = basename($template);
		my $prefix = "";
		$prefix = $1 if ( $relpath =~ /^([^\/]+)/ ); # keep messages or docmessages
		my $pattern = "$topdir/templates/$prefix/*/$filename";
		#print "looking in $pattern\n";
		my $newloc = `ls -1 $pattern 2> /dev/null`;
		if ( $newloc ) {
			chomp($newloc);
			$newloc =~ s,^$topdir/templates/$prefix/,,;
			$newloc =~ s,t$,,;
			$relpath =~ s,$prefix/,,;
			print STDERR "Warning: $fullrelpath looks like the template moved to $newloc. Consider using scripts/rename_or_move $relpath $newloc\n";
		} else {
			#print STDERR "Warning: $fullrelpath looks like an orphan - there is no more $template (check for renamings with svnlastlog)\n";
			#system("cvs rm -f $fullrelpath");
			#next;
		}
	}
}

sub find_plural_form($)
{
	my $current_file = $_[0];
	# print "Processing " . dirname($current_file) . "...\n";
	my $plural_forms = -1;
	my $kdelibs_dir = dirname($current_file);
	while (!($kdelibs_dir eq '/')) {
		if (-f "$kdelibs_dir/messages/kdelibs/kdelibs.po") {
			$kdelibs_dir = basename($kdelibs_dir);
			if (defined $plurals{$kdelibs_dir}) {
				$plural_forms = $plurals{$kdelibs_dir};
			} else {
				print STDERR "no plural forms defined for $kdelibs_dir (kdelibs.po)\n";
				$plural_forms = 0;
			}
			last;
		} elsif (-f "$kdelibs_dir/messages/entry.desktop") { # If we have no kdelibs (KOffice stable branches), may be we have an entry.desktop instead
			$kdelibs_dir = basename($kdelibs_dir);
			if (defined $plurals{$kdelibs_dir}) {
				$plural_forms = $plurals{$kdelibs_dir};
			} else {
				print STDERR "no plural forms defined for $kdelibs_dir (entry.desktop)\n";
				$plural_forms = 0;
			}
			last;
		}
		$kdelibs_dir = dirname($kdelibs_dir);
	}
	if ($plural_forms eq -1 ) {
		print STDERR "no plural forms defined for $current_file (kdelibs.po not found!)\n";
		$plural_forms = 0;
	}
	return $plural_forms;
}

# We must find the topdir (l10n) since the templates are under there
$topdir = Cwd::realpath(cwd());
while ( ! -d $topdir."/scripts" ) {
  $topdir = dirname($topdir);
  if ($topdir eq '/') {
    $topdir = Cwd::realpath(cwd());
    die "scripts not found when going up from $topdir";
  }
}
#print STDERR "topdir=$topdir\n";

my $plural_forms = "";
my $last_language = "";

sub checkfile($) {
	my ($current_file) = @_;
	$get_msg_current_file = $current_file; # global var used by get_msgstr/get_msgid
	#print "Processing $current_file\n";
	my %tofuzzy = ();
	my %toremove = ();
	my $desktop_file;
	if (basename($current_file) =~ "^desktop_.*\.po") {
		$desktop_file = 1;
	} else {
		$desktop_file = 0;
	}

	my $relpath = substr($current_file, length($topdir) + 1);
	check_template( $relpath );

	if ( $relpath =~ m/^([^\/]+)/ ) {
		my $lang = $1;
		if ( $lang ne $last_language ) {
			$plural_forms = find_plural_form($current_file);
			$last_language = $lang;
		}
	} else { die; }

	# Now open the po file
	# my %msgstrs = ();
	my %seen_msgids = ();
	open( INPUT, $current_file ) or die "Can't open $current_file!";

	my $msgid;
	my $msgstr;
	$line_number = 0;
	
	# Retrieve $msgid and $msgstr
	(undef,$msgid,undef) = get_msgid();
	($msgstr,undef) = get_msgstr();
	print STDERR "Warning: No header in $current_file\n" if( length($msgid));

	while ( 1 ) {
		(undef,$msgid,undef) = get_msgid();
		last unless defined $msgid;
		($msgstr,undef) = get_msgstr();

		next if (!length($msgstr));
		if ( $seen_msgids{$msgid}++ ) {
			print STDERR "msgid \'$msgid\' seen twice at $current_file:$line_number\n";
			$tofuzzy{$msgid} = 1;
			$toremove{$msgid}++;
		} else {
			if (!$fuzzy) {
				if (!check_msgstr($msgid, $msgstr, $desktop_file, $plural_forms, $current_file)) {
					$tofuzzy{$msgid} = 1;
				}
			}
                        elsif (!length($msgid)) {
                                print STDERR "ERROR: empty msgid is marked as fuzzy!\n";
                        }
			# $msgstrs{$msgid} = $msgstr;
		}
	}
	close( INPUT );

	if (%tofuzzy || %toremove) {
		# Write out modified file
		open( OUTPUT, ">$current_file.NEW" ) || die;
		open( INPUT, $current_file ) || die;
		my $comment;
		my $rawmsgid;
		my $rawmsgstr;
		my $msgid;

		while( 1 ) {
			($comment,$msgid,$rawmsgid) = get_msgid();
			last unless defined $msgid;
			(undef,$rawmsgstr) = get_msgstr();
			print OUTPUT $comment;

			if ( exists $toremove{$msgid} && $toremove{$msgid} > 0 ) {
				$toremove{$msgid}--; # keep the last one
				next;
			}

			if ( exists $tofuzzy{$msgid} ) {
				print OUTPUT "#, fuzzy\n";
			}
			print OUTPUT $rawmsgid;
			print OUTPUT $rawmsgstr;
			print OUTPUT "\n";
		}
		print OUTPUT $comment;
		close(OUTPUT);
		close(INPUT);
		system("if cmp -s $current_file $current_file.NEW; then rm $current_file.NEW; else echo 'Fixed $current_file'; mv -f $current_file.NEW $current_file; fi");
		#system("cat $current_file.NEW; rm $current_file.NEW");
	}
	#	print "-----\n";
}

sub recursive_check($)
{
	my $dir = shift;
	opendir (DIR, $dir) or die "Can't open $dir: $!";
	my @filenames = grep { /^[^\.]/ } readdir(DIR);
	#print "Entering $dir...\n" if ($#filenames > 0);
	FILENAMELOOP: for my $f (@filenames)
	{
		my $filename = "$dir/$f";
		#print "$filename\n";
		if (-d $filename) {
                        # skip directories with the name "internal"
                        # (PO files being there might be non-KDE and should not be checked with KDE criteria.)
                        next FILENAMELOOP if ( $f =~ /^internal$/ );
                        # do not try to mess with the templates
                        next FILENAMELOOP if ( $f =~ /^templates$/ );
			&recursive_check($filename);
		}
                elsif (-z $filename) {
                        print STDERR "File $filename is empty! Skipping!\n";
                }
		elsif (-f $filename && $filename =~ /\.po$/) {
			if (-x $filename) {
				system("svn -q propdel svn:executable '$filename'");
				system("chmod a-x '$filename'") if -x $filename;
				print STDERR "File $filename had executable bit set, removed.\n";
			}
			&checkfile($filename);
		}
		elsif (-f $filename && $filename =~ /\.pot$/) {
			print STDERR "File $filename has .pot extension, perhaps it should be renamed?\n";
		}
	}
	closedir(DIR);
}

my $startdir;
if ($ARGV[0]) {
	$startdir = $ARGV[0];
	$startdir = cwd() . "/" . $startdir unless ($startdir =~ m,^/,);
} else {
 $startdir = cwd();
}
#print "startdir=$startdir\n"

&recursive_check($startdir);
