#!/usr/local/bin/perl

open(RULES, "d:/cmu/chinese_treebank.old/data/ctbrules.txt") or die $!;
while ($line = <RULES>) {
    $line =~ s/[\r\n]*$//;
    $rulesfull{$line}++;
    $line =~ s/(\S)\-\S+/$1/g;
    $rules{$line}++;
}
close(RULES);

sub bycount {
    $rulesfull{$b} <=> $rulesfull{$a};
}

open(RUL, "> ctbrules-feat.txt") or die $!;
foreach $rule (sort bycount keys %rulesfull) {
    $nicerule = $rule;
    $nicerule =~ s/^(\S+)\s/$1 \-\> /;
    print RUL "$nicerule\t" . $rulesfull{$rule} . "\n";
}
close(RUL);

# Print out rules without any extra features (e.g. "-OBJ")
sub bycount2 {
    $rules{$b} <=> $rules{$a};
}

open(RUL, "> ctbrules.txt") or die $!;
foreach $rule (sort bycount2 keys %rules) {
    $nicerule = $rule;
    $nicerule =~ s/^(\S+)\s/$1 \-\> /;
    print RUL "$nicerule\t" . $rules{$rule} . "\n";
}
close(RUL);
