#!/usr/local/bin/perl
# File: t2p_affinities_aling.pl
# Authors: Kevin Lenzo and Vincent Pagel
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation version 1
#
# This program is distributed in the hope that it will be useful, 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# HISTORY:
# created by kevin lenzo   11/97
#
# vincent pagel 04/98 -> introduce function dtw to replace the
#      allNChooseK in the final alignment (faster, and can have a
#      clear decision procedure for the transcription of double
#      consonants  
#      e.g. previously kill -> k I _ l  and hill -> h I l _
#      now kill -> k I _ l and hill -> h I _ l (emit _ as soon
#      as possible)
#      Still assume that one letter emits 0 or 1 phoneme
# 
# vincent pagel 28/04/98: include kevin's extern affinity matrix
#
# vincent pagel 02/02/99: keep track of weakest probability => allows
# you to remove some words and avoid numerical instabilities

$verbosity = 2;

unless (@ARGV) {
    print STDERR "
    usage: $0 <affinity_file> <dictionary> <nb_skip> <THRESHOLD>

           finds an alignment between letters and the phones
           for the given dictionaries.

           No dictionary given -- please specify a dictionary; 
           for example,

              $0 cmu_affinity cmu58k94.dic > cmu58k94.t2p_dict

           Input is a dictionary with orthography on the left
           and phones on the right, white space seperated. 

              HOWLS HH AW L Z

           one per line.  The units may be any set.
           
           Nb_skip indicate the number of TAGS in the line
";
    exit(1);
}

$t2p_affinities = shift @ARGV;
$dict= shift @ARGV;
$nb_skip= shift @ARGV;
$threshold= shift @ARGV;

do $t2p_affinities || die "can't open $t2p_affinities: $!\n";
print STDERR "# loaded affinities.\n";

# Load each dictionnary
my $grandCount = 0;
print STDERR "getting dictionary $dict...\n" if ($verbosity);
my $count = 0 ;
	 
if (!open(DICTIONARY, $dict)) 
{
	 warn "\tNo dictionary file $dict\n";
	 die();
} 
	 
while (<DICTIONARY>) 
{
	 my ($word,$phon) = split(/\s+/, $_,2);
	 
	 $word =~ s/(\S)\(.*/$1/; # strip alternatives 
	 $word =~ tr/A-Z/a-z/;    # downcase
	 
	 &dtw($word,$phon);
	 $count++;
}
$grandCount += $count;
if ($verbosity) {
	 print STDERR "\t$count ($grandCount) dictionary words loaded\n";
}

# Dynamic time warping of letter onto phonemes
sub dtw {
	 my ($w,$define)=@_;
	 
	 @letters = split(//, $w);
	 @phones = split(/\s+/, $define);

	 @tag= @phones[0..$nb_skip-1];
	 @phones= @phones[$nb_skip..scalar(@phones)-1];

	 # Add a common starting point in the matrix for DTW
	 unshift @letters,'_';
	 unshift @phones,'_';
	 
	 $diff= $#letters - $#phones;
	 
	 if ($diff<0)
	 {
		  # assume that one letter emits 0 or 1 phoneme
		  warn "Won't align @letters and @phones\n";
		  return;
	 }
	 
	 # DTW iteration, allowed move:  
	 #                   from (l,p) to (l+1,p)
	 #                   from (l,p) to (l+1,p+1)
	 # 
	 # Consequence of possible moves the matrix is sparse ( compute
	 # values around diagonal )
	 
	 # Cumulated weight matrix
	 $cumul->[0][0]=1.0;   
	
    # Best letter2phone alignement so far
	 $path->[0][0]='';
	 
	 # start at letter 1, as [0][0] is given
	 foreach $l (1..$#letters)
	 {
		  # constraint on accessible paths due to sparseness
		  foreach $p ($l-$diff..$l)  
		  {
				if ( ($p>=0) && ($p<=$#phones)) # in bounds
				{
					 # best previous score to reach that position
					 #my $max= MinFloat;
					 my $local_max= -1e99;
					 my $transcript;
					 
					 if ( ($p-1) >= 0) # in bounds
					 {   # diagonal move
						  
						  $transcript= $path->[$l-1][$p-1] . @phones[$p]. " ";
						  $local_max= $cumul->[$l-1][$p-1] * &proba(@letters[$l],@phones[$p]);
					 }
					 
					 if ( ($l-1)>= $p ) # in bounds
					 {   # horizontal move
						  my $new_max;
						  $new_max= $cumul->[$l-1][$p] * &proba( @letters[$l],'_');

						  # Fight against numerical instability -> we want to issue 
					     # epsilon as late as possible (French gets a lot of mute in
						  # the final syllable
						  if ($new_max * 1.000001  >= $local_max)
						  {
								$local_max=$new_max;
								$transcript= $path->[$l-1][$p] . '_ ';
						  }
					 }
					 
					 # Best path to arrive here
					 $cumul->[$l][$p]= $local_max;
					 $path->[$l][$p]= $transcript;

                print " $local_max $transcript\n";
					 # print " Cumul is $local_max*$local_weakest path is $transcript\n";
				}
		  }
	 }

	 # Should we eliminate weak alignments !?
	 if ( ( $threshold ) &&
			(log($cumul->[$#letters][$#phones])/$#letters < log(0.1)))
	 {
		  warn "Average proba won't align @letters and @phones for $cumul->[$#letters][$#phones]/$#letters\n";
		  return;
	 }
	 # Upper corner contains the best pasth to go from [0,0] to [l,p]
	 print "$w @tag $path->[$#letters][$#phones]\n";
}

# All letters should have a default probability
sub proba
{
	 my ($let,$phon)=@_;

	 if (exists($assocLP{ $let }{ $phon }))
	 {
		  $assocLP{ $let }{ $phon }; 
	 }
	 else
	 {
		  1e-7; # Well, allow an association with the devil ....
	 }
}
