#!/usr/local/bin/perl58

# ARGS
# Name of training lattice
$lattice = "";
# Name of source grammar file
$grammar = "";


# Read in Grammar

open(GRA, "$grammar") or die $!;
while ($line = <GRA>) {
    next if $line =~ m/^\s*;/; # Skip single line comments
    if ($line =~ m/\#\|/) { # Skip multiple line comments
	while ($line !~ m/\|\#/ and !eof(GRA)) {
	    $line = <GRA>;
	}
	next;
    }

    # Get list of rule IDs and associated source non-terminals
    if ($line =~ m/\{([^\}]+)\}) {
	$ruleid = $1;
	$ruleids{$ruleid} = 1;
    } elsif ($line =~ m/^\s*([^:]+)::/) {
	$srcnt = $1;
	$ruleids{$ruleid} = $srcnt;
    }
}
close(GRA)


# Read in Lattice

if ($lattice =~ m/\.gz/) {
    open(LTC, "gunzip -c $lattice |") or die $!;
} else {
    open(LTC, "$lattice") or die $!;
}

$totalarcs = 0;
$sentarcs = 0;

while ($line = <LTC>) {
    $line =~ s/[\r\n]*$//;
    if ($line =~ m/^\((\d+)\s+(\d+)\s+\"([^\"]*)\"\s+(\S+)\s+\"([^\"]*)\"\s+\"(.*?)\"\)\s+:\s+(\S+)$/) {
	$arccount++;
	($start, $end, $tgt, $oldscore, $src, $trace, $score) = 
	    ($line =~ m/^\((\d+)\s+(\d+)\s+\"([^\"]*)\"\s+(\S+)\s+\"([^\"]*)\"\s+\"\@?(.*?)\"\)/);

	next if $score < 0.90;  # Have a high threshold for using arc in scoring

	$trace =~ s///;


    } elsif ($line =~ m/^\(/) {
	$sentarcs = 0;
    } elsif ($line =~ m/^\)/) {

    }
}


close(LTC);

print "Arc count $totalarcs\n";


