#!/usr/local/bin/perl
# Richard Beaufort 06/99
# Used to compute word accuracy over Timbl output
# command : compar2vec.pl aligned_dictionary output_Timbl
#
# aligned dictionary:	term [TAG] p h o n e s
# Timbl's output structure : 	l e t t e r s x y
#	with 	x = class
# 		   y = Timbl's prediction

unless (@ARGV)
{
	 print "\n------------------------------------------------------\n";
	 print "command : compar2vec.pl aligned_library ";
	 print "output_Timbl\n\n";
	 print "aligned_library's structure : \tterm [TAG] ";
	 print "p h o n e s\n";
	 print "output_Timbl's structure : \tl e t t e r s x y\n";
	 print "	\twith \tx = class\n";
	 print " \t\t\ty = Timbl's prediction\n";
	 print "------------------------------------------------------\n\n";
	 die;
}
# standard input reading
my $dico= shift @ARGV;
my $vector= shift @ARGV;

# handles opening
open(DICO, $dico);
open(VECTOR, $vector);

# correct phonems are still unknown
my $phoncorr=0;
my $phontot=0;
# library's words analising, one per times
while (<DICO>) {
	 chomp;
	 # Only interested in word and tag
	 my ($mot, $tag, $rest)=split(/\s+/, $_, 3); 
	 my @mot=split(//,$mot); 

	 my $good=0;	 
	 my @predic;
	 my @phon;
	 
	 # for every letter's word
	 for $i (0 .. $#mot) {
		  my $vectorX =<VECTOR>;
		  chomp($vectorX);
		  # letters separation
		  my @vectorX=split(/ /, $vectorX); 
		  
		  my $predicted=  $vectorX[$#vectorX];
		  my $wanted= $vectorX[$#vectorX-1];
		  $phon[$i] = $wanted;
		  $predic[$i] = $predicted;
		  
		  # class and prediction comparison
		  if ($predicted eq $wanted) 
		  { 
				# this letter is good
				$good++; 
				$phoncorr++;
		  }
		  $phontot++;
	 }
	 
	 # number of words
	 $totmot++;

	 # Remove _ to compare words
	 my $orig= join(" ", @phon);
	 my $predic= join(" ",@predic);

	 $orig= $orig." ";
	 $predic= $predic." ";	 
	 $orig =~ s/_ //g;
	 $predic =~ s/_ //g;
	 
	 # total transcription comparison (class - prediction)
	 if ($orig eq $predic) 
	 { $totgood++; }
	 else 
	 { print "Wrong $mot\n$tag\n$predic\n$orig\n"; }
}

close(DICO);
close(VECTOR);

# total and percent
my $pourcentgood=$totgood/$totmot*100;
my $pourcentgoodphon=$phoncorr/$phontot*100;
print "\nWords corrects: $totgood / $totmot ($pourcentgood %)\n";
print "Letters corrects: $phoncorr / $phontot ($pourcentgoodphon %)\n";

