#!/usr/local/bin/perl5

# /afs/cs.cmu.edu/project/nnspeech-6/Nespole/Data/cstar2/get-input-line.pl
# November 2001 by Dorcas Wallace  <dorcas@cs.cmu.edu>


#---------------------------------------------------------------------------
# SAMPLE CALL
#
# > ./get-input-line.pl /afs/cs.cmu.edu/project/nnspeech-4/Nespole/Data/cstar2/payment.if /afs/cs.cmu.edu/project/nnspeech-4/Nespole/Data/cstar2/cstar-examples.db.dmg 
#
# .if file is grepped from .db file according to desired concept (e.g. +payment)
#
# PRODUCES .phx and .spk files
#
# SORT
# > sort -n +0 +1 +2 <filestem>.phx.unsorted > ! <filestem>.phx
#
#

#------------------------------------------------------------------------------
# PROCESS COMMAND LINE ARGUMENTS
#------------------------------------------------------------------------------

$filename_if = $ARGV[0];
$filename_db = $ARGV[1];


#------------------------------------------------------------------------------
# OUTPUT FILES
#------------------------------------------------------------------------------

# Extract base file name
$filename_if =~ s/^.*\///;
$filename_if =~ s/.if$//;
$filename_db =~ s/^.*\///;
$filename_db =~ s/^.*.db(.*)/.db$1/;

$out4fn =">/afs/cs.cmu.edu/project/nnspeech-4/Nespole/Data/cstar2/$filename_if$filename_db.all.unsorted";
$out5fn =">/afs/cs.cmu.edu/project/nnspeech-4/Nespole/Data/cstar2/$filename_if$filename_db.phx.unsorted";
$out6fn =">/afs/cs.cmu.edu/project/nnspeech-4/Nespole/Data/cstar2/$filename_if$filename_db.spk.unsorted";

print "\nnew .all file: $out4fn\n";
print "\nnew .phx file: $out5fn\n";
print "\nnew .spk file: $out6fn\n";

my $iffile = shift || die("You must specify an IF file containing IFs only.\n");
open(IF, "$iffile") || die("$iffile does not exist.\n");
my @if_lines=<IF>;
close(IF);

my $dbfile = shift || die("You must specify a database file containing both input utterances and IFs.\n");
open(DB, "$dbfile") || die("$dbfile does not exist.\n");
my @db_lines=<DB>;
close(DB);

open(OUT4, $out4fn);
open(OUT5, $out5fn);
open(OUT6, $out6fn);

IF: foreach $if_line (@if_lines) {

    if ( $if_line =~ m/^\s*$/ ) {         # skip line if blank
	&reset_items;
	next IF;
    }

    if ( $if_line =~ /^(\d+\.\d+\.\d+).*\s+IF\s+Prv\s+.*\s+([ac])\:/ ) {
	$if_num = $1;
	$if_num =~ s/\./0/g;
	$spk    = $2;

#    print "IF NUM $if_num\n";
    }

  DB: foreach $db_line (@db_lines) {
	  
      if ( $db_line =~ m/(\d+\.\d+\.\d+).*\s+olang\s+E\s+lang\s+E\s+Prv\s+.*\s+\"(.*)\"\s*$/ ) {

	  $utt_num = $1;
	  $utt_num =~ s/\./0/g;
	  $utt     = $2;

#	  print "UTT NUM $utt_num\n";
#	  print "UTT1 $utt\n";

	  if ($if_num == $utt_num) {

	      print OUT4 "\n$if_num\n$utt_num $utt\n$if_line\n";
	      print OUT5 "$utt\n";
	      print OUT6 "$spk\n";

#	      print "UTT NUM $utt_num\n";
#	      print "UTT2 $utt\n\n";
	      &reset_items;
	      next IF;
	  }
      }
      next DB;
  }
    &reset_items;
    next IF;
}

close(OUT4, $out4fn);
close(OUT5, $out5fn);
close(OUT6, $out6fn);

#----------------------------------------------------------
# END MAIN
#----------------------------------------------------------

sub reset_items {
    $if_line="";
    $if_num="";
    $db_line="";
    $utt_num="";
    $prv="";
    $utt="";
    $spk="";
}
