#!/usr/bin/perl 

# 26 bytes

open(REF, "mt03_refs.txt") or die $!;
$refcount = 0;
while (!eof(REF)) {
    $ref1 = <REF>; $ref1 =~ s/[\r\n ]*$//;
    $ref2 = <REF>; $ref2 =~ s/[\r\n ]*$//;
    $ref3 = <REF>; $ref3 =~ s/[\r\n ]*$//;
    $ref4 = <REF>; $ref4 =~ s/[\r\n ]*$//;
    $line = <REF>;
    $refs[$refcount][0] = $ref1;
    $refs[$refcount][1] = $ref2;
    $refs[$refcount][2] = $ref3;
    $refs[$refcount][3] = $ref4;
    $refcount++;
}
close(REF);

$linecount = 0;
open(MT, "<:encoding(gbk)", "mt03_chinese_evlset_v0.src") or die $!;
open(CENT, ">:encoding(gbk)", "mt03_chinese_top200.src") or die $!;
while ($line = <MT>) {
    $line =~ s/[\r\n]*$//;
    if (length($line) < 26) {
	print CENT "$line\n";
	$devlines++;
	push @devs, $linecount;
	#print STDERR "$linecount\n";
    }
    $linecount++;
}
close(CENT);
close(MT);

open(REF, "> mt03_chinese_top200-ref.sgm");
print REF '<refset setid="mt03_chinese_top200-ref" srclang="Chinese" trglang="English">' . "\n";
for ($i = 0; $i < 4; $i++) {
    print REF "<DOC docid=\"TOP200\" sysid=\"SYS0$i\">\n";
    for ($j = 0; $j < @devs; $j++) {
	print REF "<seg> $refs[$devs[$j]][$i] </seg>\n";
    }
    print REF "</DOC>\n";
}
print REF "</refset>\n";
close(REF);

open(REF, "> mt03_chinese_top200-ref.tmp");
for ($j = 0; $j < @devs; $j++) {
    for ($i = 0; $i < 4; $i++) {
	print REF lc($refs[$devs[$j]][$i]), "\n";
    }
}
close(REF);

`perl /afs/cs/user/vogel/bin/Optimize/PrePunctNormalize_Bleu_with_deTokenization.pl < mt03_chinese_top200-ref.tmp > mt03_chinese_top200-ref.txt`;

print $devlines, "\n";
