#!/usr/local/bin/perl
# File: tenfold_crossvalid_split-same.pl
# Purpose: Split a set into training and testing subset for ten fold cross validation
#          following the same splitting as in the example dictionnary
#          Usefull when you want to compare 2 letter|phoneme alignement strategies
#
# Author: Vincent Pagel ( pagel@tcts.fpms.ac.be ) 
# Time-stamp: <1999-05-05 12:21:43 pagel> 
#
# Copyright (c) 1998 Faculte Polytechnique de Mons (TCTS lab)
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation version 1
#
# This program is distributed in the hope that it will be useful, 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# History:
# 
# 29/04/98: Created

$file_select=shift;
$file_data=shift;

open(SELECT,"<$file_select");
open(DATA,"<$file_data");

while (<SELECT>)
{
	 ($word_select,$rest_select)= split(/\s+/,$_,2);
	 $not_found=1;
	 do
	 {
		  $line = <DATA>;
		  ($word_data,$rest_data)=split(/\s+/,$line,2);
		  if ($word_data eq $word_select)
		  {
				print $line;
				$not_found=0;
		  }
	 } while ( $not_found );
}

