char ProgInfo[]=
"/* \n"
"* File: id3.c\n"
"* Purpose: ID3 learning algorithm\n"
"* Author: Vincent Pagel ( pagel@tcts.fpms.ac.be )\n"
"* Version : 0.2\n"
"* Time-stamp: <1999-04-23 13:45:32 pagel>\n"
"*\n"
"* Copyright (c) 1998 Faculte Polytechnique de Mons (TCTS lab)\n"
"* \n"
"* This program is free software; you can redistribute it and/or modify\n"
"* it under the terms of the GNU General Public License as published by\n"
"* the Free Software Foundation version 1\n"
"*\n"
"* Please quote LETTER TO SOUND RULES FOR ACCENTED LEXICON COMPRESSION\n"
"*              Vincent Pagel, Kevin Lenzo and Alan W. Black\n"
"*              Proc ICSLP98 Sydney\n"
"* in scientific publication using this work\n"
"*\n"
"* This program is distributed in the hope that it will be useful,\n"
"* but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
"* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
"* GNU General Public License for more details.\n"
"*\n"
"* You should have received a copy of the GNU General Public License\n"
"* along with this program; if not, write to the Free Software\n"
"* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"
"*\n"
"* History:\n"
"*\n"
"*  11/08/98 : Created. Tiny and fast reimplementation of the id3\n"
"* algorithm released in PERL by Kevin Lenzo ( t2p_id3.pl )\n"
"*\n"
"* Uses qsort at the moment ....\n"
"*\n"
"*  13/08/98 : Well as planned qsort is a pain ( more than 1hour for CMU with\n"
"*     seven letter context )... as memory allocation when splitting against\n"
"*     the first feature is not that huge, I build subtables .... \n"
"*     (qsort of Unix is definetly not n*log(n) and don't know why)\n"
"*\n"
"*  22/04/99 : features on 1byte instead of 4 ... somehow slower but\n"
"*             it spares so much memory\n"
"*/\n";

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <stdarg.h>

#define True 1
#define False 0

#define MODE_PERL 0 
#define MODE_BIN  1
#define MODE_C    2

/* Output mode -> either a program either a data structure 
	both are equivallent, but the problem is that the compilers are not always 
	able to handle huge nested SWITCH of IF structures
 */
int mode;

/* minimum amount of progress to justify further expansion */
double mingain = 0.001;

typedef char int8;
typedef short int16;

/* Counting the number of phoneme for a given partition */
typedef int Counter;

/* A feature was a char, it's now an int8 to spare memory 
 * (note: on SUN it consume time (less efficient to access 8bits data)
 */
typedef int8 Feature;

/* A learning vector is a fixed size vector of features */
typedef Feature* LearningVector;

/* A table of learning data */
typedef struct 
{
  int nb_vector;
  int available;
  int vector_size;
  LearningVector* tab;
} Data;
#define nb_vector(D)  (D->nb_vector)
#define available(D)  (D->available)
#define vector_size(D)  (D->vector_size)
#define tab(D) (D->tab)

/* An attribute is the index of an attrib in a vector */
typedef int8 Attribute;

/* A list of attributes where to find the most discriminent */
typedef struct
{
  int nb_attrib;
  Attribute* attribs;
} Attribs;     /* attribute list */

#define attribs(AT)  (AT->attribs)
#define nb_attrib(AT) (AT->nb_attrib)

/*
 * Counters for partition of a set into phonemic class
 */
typedef struct
{
  int nb_class;
  Counter total;    /* total number of item    */
  Counter* perclass;   /* subtotal for each class */
} Partition;
#define nb_class(P) (P->nb_class)
#define total(P) (P->total)
#define perclass(P) (P->perclass)

typedef struct
{
  Partition** feature_tab;
  Feature nb_feature;
} PartitionPerFeature;
#define nb_feature(P) (P->nb_feature)
#define feature_tab(P) (P->feature_tab)

typedef struct
{
  PartitionPerFeature** part_tab; /* Table of Partitions for each column */
  Feature nb_att;                 /* Number of column */
  Feature best_phone;           /* Most probable phone (most frequent) */
  Partition* global_part;         /* Partition against phonemes */
} PartitionPerAttrib;
#define nb_att(P) (P->nb_att)
#define part_tab(P) (P->part_tab)
#define global_part(P) (P->global_part)
#define best_phone(P) (P->best_phone)

/*
 * Global variables 
 */

/* !!! Defines the attribute for quicksort */
Attribute current_attrib=0;

/* Reference tables to encode the phoneme set */
Feature nb_phoneme=0;
char* phoneme_list[255];

/* Reference tables to encode the feature alphabet */
Feature nb_feature=0;
char* feature_list[255];

/* Buffering for txt output -> sometimes need to unbuffer last output */
char buffer[2048]="";
char tabul[256]="";


/*
 * HERE WE GO
 */

void tabulate(int depth)
	  /* print the right number of space to justify the ouput */
{
  int i;

  strcpy(tabul,"");
  for(i=0; i<depth; i++)
	 strcat(tabul,"  ");
}

void unprint_out()
	  /*
		* Forget last printed frame 
		* it's used to avoid the situation:
		*  switch(val) {  case a: return A; case b: return B; default: return A;}
		* of course testing against 'a' is useless when knowing the default return
		*/
{
  strcpy(buffer,"");
  strcpy(tabul,"");
}

void print_out(int tab, const char *format, /* args */ ...)
	  /*
		* Uses the format of a printf function
		* buffered txt output because we can decide to drop a conditional during the recursivity,
		* hence not to print the previous output.
		*/
{
  va_list ap;

  /* flush previous string */

  if (mode==MODE_BIN)
	 printf("%s",buffer);
  else
	 printf("%s%s",tabul,buffer);
  
  tabulate(tab);

  va_start(ap,format);
  vsprintf(buffer, format, ap);
  va_end(ap);
}


Feature encode_phoneme(char* phoneme)
	  /*
		* Register a new phoneme, or give its code
		*/
{
  Feature i=0;
  while ( (i<nb_phoneme) &&
			 ( strcmp(phoneme, phoneme_list[i])!=0))
	 {
		i++;
	 }

  if (i==nb_phoneme)
	 { /* New unknown phoneme, add it */
		fprintf(stderr,"New phoneme %s code %i\n",phoneme,nb_phoneme);
		phoneme_list[nb_phoneme]= strdup(phoneme);
		nb_phoneme++;
	 }

  return i;	/* code  */
}

Feature encode_feature(char* feature)
	  /*
		* Register a new feature, or give its code
		*/
{
  Feature i=0;
  while ( (i<nb_feature) &&
			 ( strcmp(feature, feature_list[i])!=0))
	 {
		i++;
	 }
  
  if (i==nb_feature)
	 { /* New unknown feature, add it */
		fprintf(stderr,"New feature %s code %i\n",feature,nb_feature);
		feature_list[nb_feature]= strdup(feature);
		nb_feature++;
	 }
  return i;	/* code  */
}

/*
 * PARTITIONS
 */

Partition* init_Partition(int nb_class)
{
  Partition* my_part= (Partition*) malloc(sizeof(Partition));
  total(my_part)=0;
  nb_class(my_part)=nb_class;
  perclass(my_part)= (Counter*) calloc(nb_class,sizeof(Counter));
  return(my_part);
}

void close_Partition(Partition* pt)
	  /* Release the memory */
{
  free(perclass(pt));
  free(pt);
}

void print_Partition(Partition* pt)
{
  int i;

  printf("Total: %i\n",total(pt));
  for(i=0; i<nb_class(pt); i++)
		printf(" Phoneme: %s %i\n",phoneme_list[i],perclass(pt)[i]);
}

double entropy_Partition(Partition* partition)
	  /*
		* Entropy of the set partition
		*/
{
  int i;
  double h=0.0;					  /* entropy */

  if (total(partition)!=0)
	 {
		for (i=0; i< nb_phoneme; i++)
		  {
			 double prob;
			 if ( perclass(partition)[i] != 0)
				{
				  prob= (double) perclass(partition)[i] / (double) total(partition);
				  h-= prob*log(prob)/log(2);
				}
		  }
	 }
  return h;
}

PartitionPerFeature* init_PartitionPerFeature(Feature nb_feature,Feature nb_class)
	  /* Allocate memory */
{
  int i;
  PartitionPerFeature* my_part= (PartitionPerFeature*) malloc(sizeof(PartitionPerFeature));

  nb_feature(my_part)=nb_feature;
  feature_tab(my_part)= (Partition**) malloc(nb_feature* sizeof(Partition*));

  /* may be violent, but don't consume that much memory */
  for(i=0; i< nb_feature; i++)
	 feature_tab(my_part)[i]= init_Partition(nb_class);

  return(my_part);
}

void close_PartitionPerFeature(PartitionPerFeature* pt)
	  /* Release memory */
{
  int i;
  
  for(i=0; i< nb_feature(pt); i++)
	 close_Partition( feature_tab(pt)[i] );

  free(feature_tab(pt));
  free(pt);
}

void print_PartitionPerFeature( PartitionPerFeature* pt)
{
  int i;
  
  for(i=0; i< nb_feature(pt); i++)
	 {
		printf("FEATURE %s :\n", feature_list[i]);
		print_Partition( feature_tab(pt)[i] );
	 }
}

double entropy_PartitionPerFeature(PartitionPerFeature* pt)
	  /* 
		* Sum of entropies for the partitions on each feature
		*/
{
  double h=0.0;
  int i;

  /* Sum of local entropies */  
  for(i=0;i< nb_feature(pt); i++)
	 {
		h+= entropy_Partition(feature_tab(pt)[i]) * total(feature_tab(pt)[i]);
	 }
  return h;
}

PartitionPerAttrib* init_PartitionPerAttrib( Data* dba, Attribs* attribs )
	  /* Scans along available attributes, 
		* and sum the co-occurences Feature-Phoneme 
		*/
{
  Counter best_count=0;   
  int i,j;
  PartitionPerAttrib* my_part= (PartitionPerAttrib*) malloc(sizeof(PartitionPerAttrib));
  nb_att(my_part)= nb_attrib(attribs);

  part_tab(my_part)= (PartitionPerFeature**) malloc( nb_att(my_part)*sizeof(PartitionPerFeature*));
  for(i=0; i< nb_att(my_part); i++)
		part_tab(my_part)[i]= init_PartitionPerFeature(nb_feature,nb_phoneme);

  global_part(my_part)= init_Partition(nb_phoneme);

  /* scan the database */
  for(i=0; i< nb_vector(dba); i++)
	 {
		Feature phoneme= tab(dba)[i][0];
		perclass(global_part(my_part))[phoneme]++;
		total(global_part(my_part))++;

		/* collect the predominent phonemic output */
		if (perclass(global_part(my_part))[phoneme] > best_count)
		  {
			 best_count= perclass(global_part(my_part))[phoneme];
			 best_phone(my_part)= phoneme;
		  }
		
		/* scan the attribs */
		for(j=0; j<nb_attrib(attribs); j++)
		  {
			 Feature feat=    tab(dba)[i][ attribs(attribs)[j] ];
			 
			 perclass( feature_tab( part_tab(my_part)[j] )[feat] )[ phoneme ] ++;
			 total( feature_tab( part_tab(my_part)[j] )[feat] ) ++;
		  }
	 }
  
  return(my_part);
}

void close_PartitionPerAttrib( PartitionPerAttrib* my_part)
	  /* Release the memory */
{
  int i;
  for(i=0; i< nb_att(my_part); i++)
	 close_PartitionPerFeature(part_tab(my_part)[i]);

  close_Partition( global_part(my_part));
  free(part_tab(my_part));
  free(my_part);
}

Feature bestgain_PartitionPerAttrib(PartitionPerAttrib* my_part, double min_gain)
	  /* Return the index of the best gain column
		  The minimum gain must be reached 
		*/
{
  int i;
  Feature best_gainer=-1;
  double best_gain=1e30;
  double start_h;

  for(i=0; i< nb_att(my_part); i++)
	 {
		double gain= entropy_PartitionPerFeature( part_tab(my_part)[i] );
		
		if (gain < best_gain)
		  {
			 best_gain= gain;
			 best_gainer=i;
		  }
	 }

  start_h= entropy_Partition( global_part(my_part) );
  best_gain= start_h - best_gain /  total(global_part(my_part));

  if (best_gain <= min_gain)
	 {
		best_gainer= -1;
	 }

  return(best_gainer);
}


/* 
 * Attribute list still to process
 */

Attribs* init_Attribs(int size)
	  /*
		* First attribute list -> all but the 1st one which is the target 
		*/
{
  int i;

  Attribs* my_att= (Attribs*) malloc(sizeof(Attribs));
  attribs(my_att)= (Attribute*) malloc(size*sizeof(Attribute));
  nb_attrib(my_att)=size-1;

  for(i=0;i<size-1;i++)
	 attribs(my_att)[i]=i+1;

  return(my_att);
}

void close_Attribs(Attribs* att)
	  /* Release the memory */
{
  free( attribs(att) );
  free( att );
}

void print_Attribs(Attribs* att)
	  /* First attribute list -> all but the last one which is the target */
{
  int i;
  
  printf("ATTRIB: ");
  for(i=0; i<nb_attrib(att); i++)
	 printf("%i ",attribs(att)[i]);
  printf("\n");
}

Attribs* copy_Attribs(Attribs* att, Feature remove_index)
	  /* Copy constructor... remove attribute number  */
{
  int i;
  int offset=0;
  
  Attribs* my_att= (Attribs*) malloc(sizeof(Attribs));
  attribs(my_att)= (Attribute*) malloc( nb_attrib(att) * sizeof(Attribute));
  nb_attrib(my_att)=  nb_attrib(att)-1;

  /* Leave appart attribute remove_index */
  for(i=0; i<nb_attrib(my_att); i++)
	 {
		if (i==remove_index)
		  offset++;
		
		attribs(my_att)[i]= attribs(att)[i+offset];
	 }
  return(my_att);
}

/********************
 * Learning vectors *
 ********************/
LearningVector init_LearningVector(int size)
{
  LearningVector my_vector=  (LearningVector) malloc(size*sizeof(Feature));
  
  if (!my_vector)
	 {
		fprintf(stderr,"Out of memory\n");
		exit(1);
	 }
  return(my_vector);
}

void close_LearningVector(LearningVector vector)
{
  free( vector );
}

int parse_LearningVector(LearningVector vector, char* line)
	  /* Read multiple features in a line */
{
  char* feature;
  Feature index=0;
  Feature code;

  /* The first feature is the ouput phoneme */
  feature= strtok(line," \n");
  code=encode_phoneme(feature);
  vector[index++]= code;

  /* The rest is features list */
  while ((feature=strtok(NULL," \n")) != NULL)
	 {
		code=encode_feature(feature);
		vector[index++]= code;
	 }
  
  return(index);
}

void print_LearningVector(LearningVector vector, int size)
{
  int i;
  
  printf("%s  ", phoneme_list[ vector[0] ]);
  
  for(i=1;i<size;i++)
	 printf(" %s|",feature_list[ vector[i] ] );
  printf("\n");
}

int compare_Vector(const void* t1,const void* t2)
	  /* Comparison function for quicksort 
		  WARNING: current_attrib is a global variable 
	  */
{
  const LearningVector *v1= (LearningVector *) t1;
  const LearningVector *v2= (LearningVector *) t2;
  
  return ( (*v1)[ current_attrib ] - (*v2)[ current_attrib ]);
}

/*******************
 * VECTOR DATABASE *
 *******************/

Data* init_Data()
	  /* Constructor of a void Vector Database */
{
  Data* my_data= (Data*) malloc(sizeof(Data));

  if (!my_data)
	 {
		fprintf(stderr,"Out of memory\n");
		exit(1);
	 }
  
  nb_vector(my_data)=0;
  available(my_data)=0;
  tab(my_data)= NULL;

  return(my_data);
}

void close_Data(Data* dba)
	  /* Destructor of a  Vector Database */
{
  /* Check if we're on a clone */
  if (available(dba)!=0)
	 {
		int i;
		for(i=0; i< nb_vector(dba); i++)
		  close_LearningVector( tab(dba)[i]);
		free( tab(dba) );
	 }
  free(dba);
}

void close_clone_Data(Data* dba)
	  /* Destructor of a  Vector Database */
{
  /* Check if we're on a clone */
  if (available(dba)!=0)
	 {
		free( tab(dba) );
	 }
  free(dba);
}

Data* clone_Data(Data* dba, Counter low, Counter up)
	  /* Copy constructor -> Clone high-level 
		* (used with quicksort)
		*/
{
  Data* my_data= (Data*) malloc(sizeof(Data));
  
  if (!my_data)
	 {
		fprintf(stderr,"Out of memory\n");
		exit(1);
	 }
  nb_vector(my_data)= up - low ;
  vector_size(my_data)= vector_size(dba);
  available(my_data)=0;          /* The mark of a clone */
  tab(my_data)= & tab(dba)[low];
  return(my_data);
}

void append_Data(Data* data,LearningVector vector)
	  /* Append a vector at the end of the data table (no copy on the vector) */
{
  if (nb_vector(data)==available(data))
	 {
		tab(data)= (LearningVector*) realloc( tab(data), 
														 (100+available(data))*sizeof(LearningVector));
		if (!tab(data))
		  {
			 fprintf(stderr,"Out of memory\n");
			 exit(1);
		  }
		available(data)+=100;
	 }
  tab(data)[ nb_vector(data) ] = vector;
  nb_vector(data)++;
}

int parse_Data(Data* database, FILE* input_file)
	  /* load the database of learning vectors -> return the size of each learning vector */
					{
  char line[1024];
  int position=0;
  int vector_size;
  int size;
  LearningVector vector;
  
  /* The first line describes the feature list */
  fgets(line,sizeof(line),input_file);
  switch (mode)
	 {
	 case MODE_PERL:
		printf("# %s",line);
		break;
	 case MODE_C:
		printf("/* %s */",line);
		break;
	 case MODE_BIN:
		printf("%s",line);
		break;
	 default:
		break;
	 }

  /* To be compatible with PERL output, try to read the sorted list of phonemes */
  fgets(line,sizeof(line),input_file);
  sscanf(line," PHONEME_SET %n", &position);
  if (position)
	 {
		char phoneme[1024];
		int new_pos;
		
		while (sscanf(&line[position]," %s %n",phoneme,&new_pos)==1)
		  {
			 encode_phoneme(phoneme);
			 position+=new_pos;
		  }
		fgets(line,sizeof(line),input_file);
		position=0;
	 }
  
  /* To be compatible with PERL output, read the sorted list of features */
  sscanf(line," FEATURE_SET %n", &position);
  if (position)
	 {
		char feature[1024];
		int new_pos;
		
		while (sscanf(&line[position]," %s %n",feature,&new_pos)==1)
		  {
			 encode_feature(feature);
			 position+=new_pos;
		  }
		fgets(line,sizeof(line),input_file);
	 }

  /* Get the vector size on the 1st vector */
  /* Initial security, 100 features !!!! */
  vector= init_LearningVector(100);
  vector_size= parse_LearningVector(vector,line);
  append_Data(database,vector);

  fprintf(stderr,"# OPEN DATABASE Vector size=%i\n",vector_size);
  
  /* Proceed other vectors */
  while (!feof(input_file))
	 {
		if ( fgets(line,sizeof(line),input_file))
			  {
				 /* This time initialize with the right size */
				 vector= init_LearningVector(vector_size);
				 size=parse_LearningVector(vector,line);
				 
				 if (size != vector_size)
					{
					  fprintf(stderr,"VECTOR SIZE: %s\n",line);
					  exit(1);
					}
				 append_Data(database,vector);
			  }
	 }
  vector_size(database)=vector_size;
  return(vector_size);
}

void print_Data(Data* dba)
{
  int i;

  printf("NB_VECTOR=%i AVAIL=%i VECTOR_SIZE=%i\n", 
			 nb_vector(dba), available(dba), vector_size(dba));
  
  for(i=0; i< nb_vector(dba); i++)
	 print_LearningVector( tab(dba)[i], vector_size(dba) );
}

int id3(int depth, int max_depth, Data* dba, Attribs* attribs, Feature mute_pho)
	  /* Recursive ID3 sorting 
		  
		  depth = current depth
		  max_depth = limit
		  dba= excerpt of the database we're sorting
		  attribs= columns that we can put at a decision node
		  mute_pho= phoneme that don't generate an output because treated in default case 

		  returns true if the output was no muted
	  */
{
  int result=True;				  /* True if the output was not muted */
  PartitionPerAttrib* partitions;
  Feature best_gainer_index; /* index in Attribs of the best gainer */
  Feature best_phone; /* predominent phoneme in the database */

  /* Big co-occurence matrix */
  partitions=init_PartitionPerAttrib(dba, attribs);
  best_phone = best_phone(partitions);

  if (depth==0) 
	 fprintf(stderr,"Done initPartition\n");

  /* Test if the best_phone is the last and unique class */
  if ( perclass(  global_part(partitions) )[best_phone] == total( global_part(partitions) ))
	 {
		if (mute_pho==best_phone)
		  {
			 unprint_out();
			 result=False;
		  }
		else
		  {
			 switch (mode)
				{
				case MODE_PERL:
				  print_out(depth,
								"return '%s'; # Unique at depth %i\n",
								phoneme_list[best_phone],
								depth);
				  break;

				case MODE_C:
				   print_out(depth,
								"return %i; /* Unique at depth %i */\n",
								best_phone,
								depth);
					break;

				case MODE_BIN:
				  print_out(depth,"%s ", phoneme_list[best_phone]);
				  break;
				default:
				  break;
				}
		  }
		close_PartitionPerAttrib(partitions); /* now useless */
		return result;
	 }

  /* Don't want to go deeper ? */
  else if (depth>= max_depth)
	 {
		if (mute_pho==best_phone)
		  {
			 unprint_out();
			 result=False;
		  }
		else
		  {
			 switch (mode)
				{
				case MODE_PERL:
				  print_out(depth, "return '%s'; # Depth limit, %i cases out of %i occurences\n", 
								phoneme_list[best_phone],
								perclass(  global_part(partitions) )[best_phone], 
								total( global_part(partitions) ));
				  break;
				 
				case MODE_C:
				  print_out(depth, "return %i; /* Depth limit, %i cases out of %i occurences*/\n", 
								best_phone,
								perclass(  global_part(partitions) )[best_phone], 
								total( global_part(partitions) ));
				  break;

				case MODE_BIN:
				  print_out(depth,"%s ", phoneme_list[best_phone]);
				  break;
				default:
				  break;
				}
		  }
		close_PartitionPerAttrib(partitions); /* now useless */
		return result;
	 }

  /* exploited every attributes */
  else if ( nb_attrib(attribs)==0 )
	 {
		if (mute_pho==best_phone)
		  {
			 unprint_out();
			 result=False;
		  }
		else
		  {
			 switch (mode)
				{
				case MODE_PERL:
				  print_out(depth, "return '%s'; # Exhausted, %i cases out of %i occurences\n", 
								phoneme_list[best_phone],
								perclass(  global_part(partitions) )[best_phone], 
								total( global_part(partitions) ));
				  break;
				  
				case MODE_C:
				  print_out(depth, "return %i; /* Exhausted, %i cases out of %i occurences*/\n", 
								best_phone,
								perclass(  global_part(partitions) )[best_phone], 
								total( global_part(partitions) ));
				  break;

				case MODE_BIN:
				  print_out(depth,"%s ", phoneme_list[best_phone]);
				  break;
				default:
				  break;
				}
		  }
		close_PartitionPerAttrib(partitions); /* now useless */
		return result;
	 }

  /* Interesting case .... */
  else
	 {
		Attribs* new_att;
		Feature best_gainer,f;

		/* Find a column to split */
		best_gainer_index= bestgain_PartitionPerAttrib(partitions, mingain);
		close_PartitionPerAttrib(partitions); /* avoid memory accumulation with recursivity */

		if (best_gainer_index<0)
		  {
			 /* The gain of the split is too low */
			 if (mute_pho==best_phone)
				{
				  unprint_out();
				  result=False;
				}
			 else
				{
				  switch (mode)
					 {
					 case MODE_PERL:
						print_out(depth, "return '%s'; # Gain too low, %i cases out of %i occurences\n", 
									 phoneme_list[best_phone],
									 perclass(  global_part(partitions) )[best_phone], 
									 total( global_part(partitions) ));
						break;
						
					 case MODE_C:
						print_out(depth, "return %i; /* Gain too low, %i cases out of %i occurences*/\n", 
									 best_phone,
									 perclass(  global_part(partitions) )[best_phone], 
									 total( global_part(partitions) ));
						break;

					 case MODE_BIN:
						print_out(depth,"%s ", phoneme_list[best_phone]);
						break;
					 default:
						break;
					 }
				}
			 return result;
		  }
		
		best_gainer= attribs(attribs)[best_gainer_index];

		/* Remove the winner from the attribute list */
		new_att=copy_Attribs(attribs,best_gainer_index);

		switch (mode)
		  {
		  case MODE_BIN:
			 print_out(depth,"[ %i %s ",best_gainer,phoneme_list[best_phone]);
			 break;
			 
		  case MODE_C:
			 print_out(depth, "switch (Attrib%i) {\n", best_gainer);
			 break;

		  default:
			 break;
		  }
		
		/* Iterate on the feature and build sublists .... yeah, it's not great */
		for(f=0;f<nb_feature;f++)
		  {
			 int i;
			 
			 Data* new_dba=init_Data();

			 vector_size(new_dba)=vector_size(dba);

			 /* Built mamoth sublist :-( */
			 for(i=0; i< nb_vector(dba); i++ )
				{
				  if ( tab(dba)[i][best_gainer] == f )
					 append_Data( new_dba, tab(dba)[i]);
				}

			 if (nb_vector(new_dba)!=0)
				{ 
				  switch (mode) 
					 {
					 case MODE_PERL: 
						print_out(depth,
									 "if ($L[%i-1] eq '%s') {\n", 
									 best_gainer, 
									 feature_list[f]);
						break;

					 case MODE_BIN:
						print_out(depth,"%s ",feature_list[f]);
						break;

					 case MODE_C:
						print_out(depth,"case '%s':",feature_list[f]);
						break;
					 default:
						break;
					 }
				  
				  if ( id3(depth+1,max_depth, new_dba, new_att, best_phone) )  /* Recursivity */
					 {
						if (mode==MODE_PERL)
						  print_out(depth,"}\n");
					 }
				}
			 close_clone_Data(new_dba);
		  }

		close_Attribs(new_att);

		/* Print the default case */
		switch (mode) 
		  {
		  case MODE_PERL:
			 print_out(depth,
						  "return '%s'; # bestphone\n",
						  phoneme_list[best_phone]);  
			 break;

		  case MODE_BIN:
			 print_out(depth,"] ");
			 break;

		  case MODE_C:
			 print_out(depth, " default: return '%i'; /* bestphone */ }; /* end switch */\n",
						  best_phone);  
		  default:
			 break;
		  }
	 }
  return True;
}


int main(int argc, char **argv)
{
  int max_depth;
  Data* dba;
  Attribs* first_attrib;
  char* file_name;
  FILE* input_file;

  if (argc<2)
	 {
		fprintf(stderr,ProgInfo);
		fprintf(stderr,"USAGE: %s vector_file max_depth [MODE_BIN | MODE_PERL | MODE_C]\n",argv[0]);
		return(1);
	 }

  mode=MODE_BIN;				  /* Default ouput mode */
  if (argc==4) 
	 {
		if (strcmp(argv[3],"MODE_BIN")==0)
		  {
			 mode=MODE_BIN;
		  }
		else if (strcmp(argv[3],"MODE_C")==0)
		  {
			 mode=MODE_C;
		  }
		else if (strcmp(argv[3],"MODE_PERL")==0)
		  {
			 mode=MODE_PERL;
		  }
		else 
		  {
			 fprintf(stderr,"unknown output mode\n");
			 exit(1);
		  }
	 }

  if (argc>2)
	 max_depth=atoi(argv[2]);
  else
	 max_depth=32000;				  /* No limit */
  
  file_name=argv[1];
  if (strcmp(file_name,"-")==0)
	 {	input_file= stdin; }
  else
	 {
		input_file= fopen(file_name,"rt");
		if (!input_file)
		  {	
			 fprintf(stderr,"Can't open %s\n",file_name);
			 exit(1);
		  }
	 }
  
  dba=init_Data();
  parse_Data(dba,input_file); /* load the dba */

  fprintf(stderr,"# READ FROM DATABASE NB_VECTOR=%i AVAIL=%i\n", 
			 nb_vector(dba), available(dba));
  fprintf(stderr,"# Nb_Phoneme: %i Nb_features: %i\n", nb_phoneme, nb_feature);

  if (mode==MODE_C)
	 {
		int i;
		/* print the correspondence table */

		printf("char* phoneme_list[]={ ");
		for(i=0; i< nb_phoneme; i++)
		  printf(" \"%s\", ", phoneme_list[i]);
		printf("}\n");

		printf("char* feature_list[]={ ");
		for(i=0; i< nb_feature; i++)
		  printf(" \"%s\", ", feature_list[i]);
		printf("}\n");
	 }
    
  if (mode==MODE_PERL)
	 printf("sub context2phone {\n  my @L = @_;\n\n");

  /* Start with depth 0, full dba, and full attrib list */
  first_attrib= init_Attribs(vector_size(dba));
  id3(0, max_depth, dba, first_attrib,-1);

  print_out(0," That's all folks ");

  if (mode==MODE_PERL)
	 printf("}\n1;\n");
  return(0);
}
