#!/usr/local/bin/perl

## Sentence end pos n-grams

use strict;
use warnings;

my $fileName = shift;

my %wngCount = ();
my %pngCount = ();
my %totalCount = ();
open(IFILE, $fileName) or die("Couldn't open the file $fileName\n");
while(<IFILE>){
	chomp;
	my @tokens = split /\s+/;
	if($tokens[$#tokens] =~ /[\.\?!]/){
		next if($#tokens < 3);
		my $wng = "$tokens[$#tokens-3] $tokens[$#tokens-2] $tokens[$#tokens-1]";
		$wng =~ s/(_[A-Z]+)//g;
		$wngCount{$wng} += 1;
		my $png = "$tokens[$#tokens-3] $tokens[$#tokens-2] $tokens[$#tokens-1]";
		$png =~ s/([^A-Z]+)/ /g;
		$png =~ s/\s+/ /;
		$pngCount{$png} += 1;
	}
	else{
		next if($#tokens < 2);
		my $wng = "$tokens[$#tokens-2] $tokens[$#tokens-1] $tokens[$#tokens]";
		$wng =~ s/(_[A-Z]+)//g;
		$wngCount{$wng} += 1;
		my $png = "$tokens[$#tokens-2] $tokens[$#tokens-1] $tokens[$#tokens]";
		$png =~ s/([^A-Z ]+)//g;
		$png =~ s/\s+/ /g;
		$png =~ s/^\s+//g;
		$pngCount{$png} += 1;
	}
	
	if($#tokens < 60){
		my $psen = $_;
		$psen =~ s/([^A-Z ]+)//g;
		$psen =~ s/\s+/ /g;
		my @ptags = split /\s+/,$psen;
		for(my $i = 0; $i <=$#ptags-2; $i++ ){
# 			print $ptags[$i].' '.$ptags[$i+1].' '.$ptags[$i],"\n";
			$totalCount{$ptags[$i].' '.$ptags[$i+1].' '.$ptags[$i]}++;
		}
	}
}
close IFILE;

# foreach (sort {$wngCount{$b} <=> $wngCount{$a}} keys %wngCount){
# 	print "$_ $wngCount{$_}\n";
# }

foreach (sort {$pngCount{$b} <=> $pngCount{$a}} keys %pngCount){
	print "$_ $pngCount{$_} $totalCount{$_}\n";
}
