#!/usr/local/bin/perl

open(SRC, "<:encoding(gbk)", "mt03_chinese_evlset_v0.src") or die $!;
while ($line = <SRC>) {
    push @sents, $line;
}
close(SRC);

binmode(STDOUT, ":utf8");

$sentcount = 0;
print "Now checking $sentcount\n";
open(LTC, "<:encoding(gbk)", "mt03_060127.ltc") or die $!;
while ($line = <LTC>) {
    $line =~ s/[\r\n]*$//;
    if ($line =~ m/^\)$/) {
	$sentcount++;
	print "Now checking $sentcount\n";
    } elsif ($line =~ m/^\(.*\)$/) {
	($start, $end, $tgt, $score, $src, $trace) =
	    ($line =~ m/^\((\d+)\s+(\d+)\s+\"([^\"]*)\"\s+(\S+)\s+\"([^\"]*)\"\s+(.*?)/);
	$src =~ s/\s+//g;
	$src =~ s/_//g;
	next if $src =~ m/[a-zA-Z0-9\?\.\*]/;
	for ($i = 0; $i < length($src); $i++) {
	    $char = substr($src, $i, 1);
	    if ($sents[$sentcount] !~ m/$char/) {
		print "SENT: $sents[$sentcount]\nSRC: $src\n";
		print "No match at $sentcount\n";
	    }
	}
    }
}
close(LTC);


