#!/usr/bin/perl5.8.5

# input:  prefix for each flattened output file,
#         directories with LDC LTF-formatted files;
# output: each segment on a separate line in file <prefix><filename>.flat.

# usage
if(scalar(@ARGV) < 2){
    die "usage: flatten-ltf.pl <vcb> <prefix_1> <dir_1> .. <prefix_i> <dir_i>\n";
}
# for each directory given ..
while(scalar(@ARGV)){
    # get prefix
    $prefix = shift(@ARGV);
    # open directory
    $arg = shift(@ARGV);
    print "$arg ..\n";
    opendir(DIR, $arg);
    @files = readdir(DIR);
    closedir(DIR);
    # look at contents of directory
    foreach $filename (@files){
	print STDERR $filename."\n";
	# for each file
	if(($filename ne ".") && ($filename ne "..")){
	    print "  $filename\n";
	    # open output file
	    $filename =~ /([^\/]*?)$/;
	    $outfile = "$prefix$1.flat";
	    open(OUTP, ">$outfile") || die("can't open output file ($outfile)!");
	    # read in the file
	    open(FILE, "<$arg/$filename") || die("can't open \"$arg/$filename\"");
	    $file = join("", <FILE>);
	    close(FILE);
	    # while there are segments
	    while($file =~ s/\<SEG.*?\>(.*?)\<\/SEG\>//s){
		$seg = $1;
		# while there are tokens
		my $str = "";
		while($seg =~ s/\<TOKEN.*?\>(.*?)\<\/TOKEN\>//s){
			if($1=~/^\< \/HL/){
				print STDERR "ERROR: $filename - $1 \n";
			}
		    $token = $1;
		    # output token to output file.
		    $str.="$token ";
                }
	        print OUTP "$str\n";
	    }
	    # close output file
	    close(OUTP);
	}
    }
}
