char ProgInfo[]=
"/*                                                \n"
"* File: t2p_affinities_from_align.cpp             \n"
"* Purpose: DTW alignement letter/phoneme          \n"
"* Author: Vincent Pagel ( pagel@tcts.fpms.ac.be ) \n"
"* Version : 0.4                                   \n"
"* Time-stamp: <00/03/09 14:00:22 pagel>         \n"
"*                                                 \n"
"* Copyright (c) 2000 Faculte Polytechnique de Mons (TCTS lab) \n"
"*                                                             \n"
"* This program is free software; you can redistribute it and/or modify \n"
"* it under the terms of the GNU General Public License as published by \n"
"* the Free Software Foundation version 1 \n"
"* \n"
"* This program is distributed in the hope that it will be useful, \n"
"* but WITHOUT ANY WARRANTY; without even the implied warranty of \n"
"* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the \n"
"* GNU General Public License for more details. \n"
"* \n"
"* You should have received a copy of the GNU General Public License \n"
"* along with this program; if not, write to the Free Software \n"
"* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. \n"
"* \n"
"* History: \n"
"* \n"
"*  19/02/99 : Created from Kevin Lenzo perl affinity module \n"
"*             Novelty -> evaluate probability of multi-letters/phoneme associations\n"
"*   \n"
"*  01/03/99 : Novelty -> p(multi-letters| phoneme AND next_letter)\n"
"*  06/03/00 : bug, extra white spaces mismatch alignment\n"
"*/\n";

#include <fstream>
#include <vector>
#include "Affinity.hpp"
#include <iomanip.h>

// String counter
class StringCounter: public map<string, long, less<string> > 
{
public:
  // Construct and empty assoc table
  StringCounter() {};
};

// String counter
class PhoneLetterCounter: public map<string,StringCounter, less<string> > 
{
public:
  // Construct and empty assoc table
  PhoneLetterCounter() {};
};

static PhoneLetterCounter plc; // probability of phoneme knowing the letters
static StringCounter phone_counter; // count the phoneme occurences

static bool multi;						  // If true enable pairs, triplets and so on
static const string epsilon="_";    // To avoid constructing epsilon again and again

// Normalize number of occurences to a priori probabilities
void
display_assoc()
{
  cout << setprecision(16);
  for(PhoneLetterCounter::const_iterator i=plc.begin(); i!= plc.end(); i++)
	 {
		long total_phone= phone_counter[ (*i).first ];
		const StringCounter *sc= &((*i).second); // table at the second level
		
		for(StringCounter::const_iterator j= sc->begin(); j!= sc->end(); j++)
		  {
			 cout << (*j).first << "|" << (*i).first << char(0) 
					<< (double) ((double) (*j).second/ (double) total_phone) << endl;
		  }
	 }
}

// Add a new association in the affinity matrix
void add_assoc(const string& key, const string& phoneme)
{
  // is the phoneme allready present? if not add empty map
  StringCounter *local_counter= &plc[ phoneme ];
		
  (*local_counter)[ key ]++;  // if the letters were not present, added
  phone_counter[ phoneme ]++; // same for the phoneme
}


// Add each letter/phoneme pair
void
accumulate_assoc(const string& letters, const vector<string>& phones)
{
  // WARNING: last letter is a dummy special '$' marker for right context

  // Old good association one_letter/ one_phone or epsilon for back-off
  for(size_t l=0; l< phones.size(); l++)
	 add_assoc( string(letters,l,1), phones[l]); 

  // Association one_letter | (one_phone or epsilon) + 1 next letter context
  for(size_t l=0; l< phones.size(); l++)
	 add_assoc( string(letters,l,1+1), phones[l]); // yep boy, add 1 letter context
  
  // Treat now association of more than one letter with a phoneme
  if (!multi)
	 return;
  
  // Begin with PAIRS, up to nuplets of the word's size
  for(int size_nuplet=2; size_nuplet<= (signed) phones.size(); size_nuplet++)
	 {
		int phone_nuplet=0;       // Number of phonemes in the shifting nuplet
		int phone_nuplet_index=0; // Index of emitting phoneme
		bool new_added=false;     // Check if we still find nuplets

		int begin;                // begining point of the shifting window
		int end;						  // end point of the shifting window
		
		// Initialization phase of the nuplet
		for(end=0; end< size_nuplet-1; end++)
		  {
			 // Insert a new phoneme in the shifting n-uplet
			 if (phones[end]!=epsilon)
				{
				  phone_nuplet++;
				  phone_nuplet_index=end;
				}
		  }

		// Shift the nuplet along
		for(begin=0 ; end < (signed) phones.size(); begin++,end++)
		  {
			 // Insert a new phoneme in the shifting n-uplet
			 if (phones[end]!=epsilon)
				{
				  phone_nuplet++;
				  phone_nuplet_index=end;
				}
			 
			 // The nuplet contains 1 phoneme, the rest is epsilon
			 if (phone_nuplet==1) 
				{
				  // yep boy, size_nuplet+1 for the extra 1 letter context
				  add_assoc( string(letters,begin,size_nuplet+1), phones[phone_nuplet_index]);
				  new_added=true;
				}
			 
			 // The nuplet is made of epsilon only
			 // if (phone_nuplet==0)
			 // {
			 // yep boy, add 1 letter context
			 //  add_assoc( string(letters, begin, size_nuplet+1), epsilon);
			 //				  new_added=true;
			 // }
			 
			 // For next iteration, does a phoneme leaves the shifting n-uplet?
			 if (phones[begin]!=epsilon)
				phone_nuplet--;
		  }

		// no new n-uplet found, useless to find bigger n-uplets
		if (!new_added)
		  break;
	 }
}


// Split the word and phonetization
void
parse_assoc(char* head_word, char* phonet)
{
  string letters(head_word);
  vector<string> phones;
  
  // Parse the list of space separated phonemes
  char* local_phonet= strdup(phonet); 
  char* phone=strtok(local_phonet," ");
  while (phone!=NULL)
	 {
		phones.push_back(phone);
		phone=strtok(NULL," ");
	 }
  free(local_phonet);
  
  // THE WORDS MUST BE ALIGNED !!!!!
  if ( (letters.size() - phones.size()) !=0 )
	 {
		cerr << "NOT ALIGNED: " << head_word << " and " << phonet << endl;
		return;
	 }

  // yep boy, for the one 1 letter context to the right, mark the end of the 
  // word with a dummy '$'
  letters+="$"; 
  accumulate_assoc(letters,phones);
}


int main(int argc, char **argv)
{
  if ( (argc!=3)  &&
		 (argc!=4))
	 {
		cerr << ProgInfo;
		cerr << "Usage: " << argv[0] << " aligned_dictionary nb_tags [multi]" << endl;
		return 1;
	 }

  char* dictionary_in_name= argv[1];
  int nb_tags=atoi(argv[2]);
  multi= (argc>=4);

  // *********************************
  // Get each word alignment
  // *********************************
  ifstream dictionary_in(dictionary_in_name);
  if (!dictionary_in.is_open())
	 {
		cerr << "Dictionary fails\n";
		return 2;
	 }
  
  // Warning: don't use notation "dictionary_in >> head_word" because
  // it requires you set "locale" properly when compiling foreign languages
  // unless you wish unwanted behavior upon accented chars
  int counter=0;
  char head_word[1024];
  while(dictionary_in.getline(head_word, sizeof(head_word),' '))
	 {
		dictionary_in >> ws; // eat white spaces

		// Remove eventual tags
		for(int i=0;i<nb_tags;i++)
		  {
			 char tag[255];
			 dictionary_in.getline(tag, sizeof(tag),' ');
			 dictionary_in >> ws; // eat white spaces
		  }
		
		char phonet[1024];
		dictionary_in.getline(phonet, sizeof(phonet));
		parse_assoc(head_word,phonet);

		counter++;
		if ((counter%1000) == 0 )
		  cerr << counter << endl;
	 }
  display_assoc();
}
