#!/usr/local/bin/perl5 

# Ariadna Font Llitjos
# December 9, 2005


## Currently this only processes log files with one sentence correction
## for multiple corrections, need to modify this script so that every time
## it finds counter = num, it starts a new instantiation...


#./processTCToolLogFiles.pl <TCToolLogFile>

# output stored in TCToolLogFile-processed

# Plan to call this script from within C++ (RR.cpp)

# debugging parameters: 0 (no debugging), 1 (current debugging), 2 (everything)
$debug = 1;

if (!$ARGV[0]){
    print STDERR "You need to give it a TCTool Log File.\n";
} else { 
    $INfilename = $ARGV[0];
    open(IN, "< $INfilename") || die "cannot open $INfilename for reading: $!\n";
    $OUTfilename = $INfilename;
    $OUTfilename .= "-processed"; 
    open(OUT, "> $OUTfilename") || die "cannot open $OUTfilename for writting: $!\n";
}


%INFO; # array that will store all the relevant information
@SLWords;
@CTLWords;


# counting separators... not so useful, since there might be separators 
# with other kind of informatio (confidence, necessity, etc.)

#$i = 0;
#while(<IN>) {
#    if ( /----------.*/ ) { $i++; }
#}
#print "there are $i separators in this log file\n";
#close(IN);
#open(IN, "< $INfilename") || die "cannot open $INfilename for reading: $!\n";

$AlignmentInfo = "";
$ActionCounter = 0;
while (<IN>) {
    chomp;

    if ( /sl[\s\t]+=[\s\t]+(\w+.*)/ ){
	$INFO{"sl"} = $1;
	if ($debug == 0){ print "sl should now be: $1\n"; }
	@SLWords = split(" ", $1);
	if ($debug == 0){ 
	    foreach $word (@SLWords) {
		print "[$word] "; 
	    } print "\n";
	}
    }
    if ( /tl[\d]*[\s\t]+=[\s\t](\w+.*)/ ){
	$INFO{"tl"} = $1;
	if ($debug == 0){ print "tl should now be: $1\n";}
#	@TLWords = split(" ", $1);
#	if ($debug == 0){ 
#	    foreach $word (@TLWords) {
#		print "[$word] "; 
#	    } print "\n";
#	}
    }
    if ( /[.]*al[\s\t]+=[\s\t](.+)/ ){
	$INFO{"al"} = $1;
	if ($debug == 0){ print "al should now be: $1\n";}
    }
    
    ### When user EDITED a word ##################################
    if ( /Current Word Edited:/ ) {
	$i++;
	$key = "Action" . $i;
 	$INFO{$key} = "edit";
	$tmp = $INFO{$key};
	if ($debug == 0){ print "$key is set to $tmp\n";}
	# go to next line and keep processing
    }
    # storing the error and the correction
    if ( /\* Word changed from \"(\s*\w+.*)\" to \"(\s*\w+.*)\"/ ) {
	# key here is the last thing that was instantiated with key
	$keyWi = $key . "-Wi";
	$keyWiC = $key . "-WiC";
	$error = $1;
	$correction = $2;
	# careful if user adds a multiword entry
#	if ($correction =~ /\w+ \w+.*/){
#	    $correction = "2++" .  $correction;
#	}
	$INFO{$keyWi} = $error;	
	$INFO{$keyWiC} = $correction;
	$Wi = $INFO{$keyWi};
	$WiC =  $INFO{$keyWiC};
	if ($debug == 0){ print "Wi is \"$Wi\" and WiC is \"$WiC\"\n";}
    }

    ### When user MOVED a word ##################################
    if ( /Word has moved:(.*) /) {
	$i++;
	$key = "Action" . $i;
	$INFO{$key} = "cwo";
	$tmp = $INFO{$key};
	if ($debug == 0){ print "$key is set to $tmp\n";}
	# go to next line and keep processing
    }
    ########## PENDING ##################
    # need to process the strings so that I can figure out what word
    # was moved where...
    # need to check if the tctool log gives any further info
    # maybe moved and modified. 

    ### When user ADDED a word ##################################
    if ( /New word created:(.*) /) {
	$i++;
	$key = "Action" . $i;
	$INFO{$key} = "add";
	$tmp = $INFO{$key};
	if ($debug == 1){ print "$key is set to $tmp\n";}
	# go to next line and keep processing
    }
    # storing the correction
    if ( /\* The word created is \"(\s*\w+.*)\"/ ){
	$keyWiC = $key . "-WiC";
	$correction = $1;
	# careful if user adds a multiword entry
#	if ($correction =~ /\w+ \w+.*/){
#	    $correction = "2++" .  $correction;
#	}
	$INFO{$keyWiC} = $correction;
	$WiC =  $INFO{$keyWiC};
	if ($debug == 1){ print "WiC is \"$WiC\"\n";}
    }	

    ### When user DELETED a word ##################################
    if ( /Move to trash:(.*) /) {
	$i++;
	$key = "Action" . $i;
	$INFO{$key} = "del";
	$tmp = $INFO{$key};
	if ($debug == 1){ print "$key is set to $tmp\n";}
    }
    # storing the error
    if ( /\* The target word \"(\s*\w+.*)\" has been moved to the/ ){
	$keyWi = $key . "-Wi";
	$INFO{$keyWi} = $1;	
	$Wi = $INFO{$keyWi};
	if ($debug == 1){ print "Wi is \"$Wi\"\n";}
    }

   if ( /\* Reason: (.*)/ ) {
       if ($debug == 0){ print "Reason is $1\n";}
        # instanciate Wc if it's there...
    }

    # Extracting CTLS...
    ## Very hacky, tried reading in multiple lines at the same time, but then
    ## I couldn't instantiate $1 with the right part of the RE...
    if ( /Final Sentences:/ ) {
	$final = 1;
    }
    if ( (m/\* Target Language Sentence: \"(\w+.*)/) && ($final == 1) ){
# and flag eq 1
	$ctl = $1;
	$rest = 1;
	if ($debug == 0){ print "1: $ctl\n";}
    }    
    # to get the rest of a running sentence...
    if ( (m/\G(\w+.*)\"/) && ($rest == 1)  ) {
	if ($debug == 0){print "2: $1\n";}
	$tmp = $ctl . $1;
	if ($debug == 0){print "ctl is now $tmp\n";}
	$INFO{"ctl"} = $tmp;
	@CTLWords = split(" ", $tmp);
	if ($debug == 0){ 
	    foreach $word (@CTLWords) {
		print "[$word] "; 
	    } print "\n";
	}
    }


    # in order to process multiple contiuous lines at the same time
    # do: if ( /beginningRE/ .. /endRE/ ) { ... }

    # what's the symbol for blank line in perl RE?
    # some say: ^\d*$  should work, but it doesn't...
#    if ( /\* Alignments:/ .. /^\d*$/ ) {
    if ( /\* Alignments:/ .. /\-------------.*/ ) {
	if ($debug == 0){ print "Alignment info: $_\n";}
	$AlignmentInfo .= $_;
	$AlignmentInfo .= "\n";
	$INFO{"AlignmentInfo"} = $AlignmentInfo;
    }

}


# Processing the string of AlignmentInfo into ((1,1),...)
$align = $INFO{"AlignmentInfo"};
@lines = split("\n", $align);    
$alignments = "(";
foreach $line (@lines) {
#    print "$line\n";
    if ($line =~ /\* \"(\w+.*)\" to \"(\w+.*)\"/ ){
	$SLWord = $1;
	$TLWord = $2;
	if ($debug == 0){ print "SL word is $SLWord and TL word is $TLWord\n"; }
	for ($n = 0; $n<@SLWords; $n++) {
	    $slword = $SLWords[$n];
	    if ($SLWord eq $slword) {
		if ($debug == 0){ print "$SLWord = $slword\n";}
		$sal = $n + 1;
		if ($debug == 0){ print "SL position is now $n and SL alignment is $sal\n";}
		$alignments .= "($sal,";
		
		for ($m = 0; $m<@CTLWords; $m++) {
		    $tlword = $CTLWords[$m];

		    # if TL word is actually multiple words...
		    if ( $TLWord =~ /(\w+\s\w+.*)/ ){
			if ($debug == 0){ print "looking at a multiple word \"$TLWord\"\n";}
			@tlw = split(" ", $TLWord);
			if ($debug == 0){ 
			    foreach $w (@tlw) {
				print "[$w] ";
			    } print "\n";
			}
			# if the next TL word matches the first word of a 
			# mutliple word entry...
			if ($tlw[0] eq $tlword) {
			    if ($debug == 0){ print "$TLWord = $tlword\n";}
			    $tal = $m + 1;
			    if ($debug == 0){ print "TL position is now $m and TL alignment is $tal\n";}
			    $alignments .= "$tal),";

			    ### NEED TO DEBUG WITH OTHER MULTIPLE WORDS WHICH
			    ## ARE NOT AT THE END OF THE SENTENCE
			    $m = $m + @tlw -1; # since the incrementer will also add one count.
			}
		    } else { 
			if ($TLWord eq $tlword) {
			    if ($debug == 0){ print "$TLWord = $tlword\n";}
			    $tal = $m + 1;
			    if ($debug == 0){ print "TL position is now $m and TL alignment is $tal\n";}
			    $alignments .= "$tal),";
			}
		    }    
		} last;
	    } 
	}
    }
}
# get rid of last comma
chop($alignments);
$alignments .= ")";
$INFO{"cal"} = $alignments;


############################################################
### PRINTING OUT INFORMATION AS NEEDED BY THE BACKEND ######

# temporary hack (need to add a hidden parameter to the tctool cgi scripts
# and java script so that I get the corresponding parse tree for each sentence
#$parse = "<((S,0 (VP,1 (V,1:2 \"VEO\") ) ) )> <((NP,8 (DET,1:3 \"EL\") (N,1:5 \"AUTO\") (ADJ,1:4 \"ROJA\") ) )>";

$sl = $INFO{"sl"};
$tl = $INFO{"tl"};
$al = $INFO{"al"};
$ctl = $INFO{"ctl"};
$cal = $INFO{"cal"};

## OUT:
# this number changes due to cwo after delete and edit
# print "Actions = $i\n"; #Number of Actions
print OUT "sl = $sl\ntl = $tl\nal = $al\nctl = $ctl\ncal = $cal\n";

for($j=1; $j <= $i; $j++){
    $key = "Action" . $j;
    $val = $INFO{$key};
    
    # !!! NEED TO TEST WITH ALL THE DIFFERENT COMBINATIONS OF CWO
    # not printing out cwo that happened after edit or delete
    if ( $val eq "cwo") {
	$check = $j-1;
	if ($debug == 0 ) {print $check . "\n";}
	$keycheck = "Action" . $check;
	$PrevVal = $INFO{$keycheck};
	if ( ($PrevVal eq "edit")|| ($PrevVal eq "delete") ){
	    # do nothing
	} else { 
	    print OUT "action = $val\n";
	}
    } else {
	print OUT "action = $val\n";
    }
	
    if ($val eq "edit"){
	$keyWi = $key . "-Wi";
	$keyWiC = $key . "-WiC";
	$Wi = $INFO{$keyWi};
	$WiC =  $INFO{$keyWiC};
	print OUT "Wi = $Wi\n";
	print OUT  "WiC = $WiC\n";
    } elsif ($val eq "add"){
	$keyWiC = $key . "-WiC";
	$WiC =  $INFO{$keyWiC};
	print OUT "WiC = $WiC\n";
    } elsif ($val eq "del"){
	$keyWi = $key . "-Wi";
	$Wi = $INFO{$keyWi};
	print OUT "Wi = $Wi\n";
    # if cwo is preceeded by add, need to store the position where WiC is now stored	
    } elsif ($val eq "cwo") {
	$check = $j-1;
	if ($debug == 0 ) {print $check . "\n";}
	$keycheck = "Action" . $check;
	$PrevVal = $INFO{$keycheck};
	if ($PrevVal eq "add") {
	    # store the position of WiC
	}
    }
    # figure out what else needs to be output for cwo
}


 








