#!/usr/local/bin/perl

## Collapse the closed class POS

use strict;
use warnings;

my %collapse = (OTHER => 'CL', ADV => '200', VERB => '200', FW => 'CL',  WHQ => 'CL', POST => 'CL', DET => 'CL');

my $top200 = shift;
my $fileName = shift;
my %top = ();
open(IFILE, $top200) or die("Couldn't open the file $fileName\n");
while(<IFILE>){
	chomp;
	(my $word, my $tag) = split /\-/;
	$top{$word} = 1;
}
close IFILE;

open(IFILE, $fileName) or die("Couldn't open the file $fileName\n");
while(<IFILE>){
	chomp;
	my @tokens = split /\s+/;
	for(my $i = 0; $i <= $#tokens; $i++){
		(my $word, my $tag) = split /_/,$tokens[$i];
		my $root = $word;
		if($tag eq 'NOUN'){
			$root = $1 if($word =~ /(.+)(و|ے|ی)ں$/);
		}
		elsif($tag eq 'VERB'){
			$root = 'گا' if($word =~ /^گ(ے|ی)$/);
			$root = $1 if($word =~ /(.+)(ت|ن)(ے|ی|ا)$/);  # ta te ti na ne ni
			$root = $1 if($word =~ /(.+)ئ(ے|ی)$/); # ye yi
			$root = $1 if($word =~ /(.+)یے$/); # ye
			$root = $1 if($word =~ /(.+)یا$/); # ya
			$root = $1 if($word =~ /(.+)ئیں$/); # yiN
			$root = $1 if($word =~ /(.+)ئیے$/);#yie
			$root = $1 if($word =~ /(.+)سک(ت)(ے|ی|ا)$/); # skte skti skta
			$root = 'تھا' if($word =~ /^تھی$/);
			$root = 'تھے' if($word =~ /^تھیں$/);
		}
		if(defined($collapse{$tag})){
			if($collapse{$tag} eq '200'){
				$tag = 'CL' if(defined $top{$root});
			}
			else{
				$tag = 'CL';
			}
		}
		print $word."_".$tag." ";
	}
	print "\n";
}
close IFILE;
