#!/usr/local/bin/perl5
# look up word net 

#1. searh wn_s.pl for $1 string with POS
$TaggedFile = shift(@ARGV);
@IFtags;
@AgentSDUs;
@ClientSDUs;
@UnkSpkSDUs;
$TaggedFileStats = ($TaggedFile . ".stats");
$OldTaggedFileStats = ($TaggedFileStats . ".old");
$line = "";
$tagcount = 0;

if (-e $TaggedFileStats) {
    system "mv $TaggedFileStats $OldTaggedFileStats ";}


if ((-e $TaggedFile) && open (SOURCE, $TaggedFile)) {
	$line = <SOURCE>;
        #print stderr ("LINE: >$line< \n");
        while ($line ne "") {
            chop($line);

            if (($line =~ m/[ac]\d*\:/) && (!($line =~ m/comments\:/)) && (!($line =~ m/IFO\s*Prv/)) && (!($line =~ m/IFBp\s*Prv/)) && (!($line =~ m/IFB\s*Prv/)))

	    { $line =~ s/([ac]\d*\:)\s*$/$1nottaggedyet/g;
              $line =~ s/([ac]\d*\:)\s+([a-z])/$1$2/g;

              $IFtags [$tagcount] = $line;
              ++$tagcount;}
	    $line = <SOURCE>;}}

#Get totals
sort_spk_tags (@IFtags);

open (OUTFILE, (">" . $TaggedFileStats));

counttags ("a", @AgentSDUs);
counttags ("c", @ClientSDUs);
#counttags ("u", @UnkSpkSDUs);

counttags ("T", @IFtags);




sub sort_spk_tags {
    my (@tags) = @_;
    my $tag = "";
    my($index) = 0;
    my($aindex) = 0;
    my($cindex) = 0;
    my($uindex) = 0;

    while ($index < $tagcount) {
	$tag = $tags[$index];
        #print stderr ("$tag \n");
	++$index;

	if ($tag =~ m/c\d*\:/) {
	    $ClientSDUs[$cindex] = $tag;
	    ++$cindex;
        } elsif ($tag =~ m/a\d*\:/) {
	    $AgentSDUs[$aindex] = $tag;
	    ++$aindex;
        } else { $UnkSpkSDUs[$uindex] = $tag;
                 print SDTERR ("Unknown speaker in IF tag: $tag");
                 ++$uindex;}
    }}


sub counttags {
    my ($spkr, @tags) = @_;
    my ($spkcount) = 0;
    my ($spkindex) = 0;
    my (@IF__Prv); 
    my (@IFd_Prv);
    my (@IFdqPrv);
    my (@IFq_Prv);
    my (@IFp_Prv);
    my (@IFu_Prv);
    my ($IF__count) = 0; 
    my ($IFd_count) = 0;
    my ($IFdqcount) = 0;
    my ($IFq_count) = 0;
    my ($IFp_count) = 0;
    my ($IFu_count) = 0;
    my ($IF) = "";

    $spkcount = @tags;

    while ($spkindex < $spkcount) {
	$IF = $tags[$spkindex];
        #print stderr ("$IF \n");
        ++$spkindex;
        if ($IF =~ m/IF\s+Prv/) {
	    $IF__Prv[$IF__count] = $IF;
            ++$IF__count;
	}elsif ($IF =~ m/IF2\s+Prv/) {
	    $IFd_Prv[$IFd_count] = $IF;
            ++$IFd_count;
        } elsif ($IF =~ m/IF\d\s+Prv/) {
	} elsif (($IF =~ m/IF2\?\s*Prv/) ||  ($IF =~ m/IF\?2\s*Prv/)) {
	    $IFdqPrv[$IFdqcount] = $IF;
            ++$IFdqcount;
	}elsif (($IF =~ m/IF\d\?\s*Prv/) ||  ($IF =~ m/IF\?\d\s*Prv/)) {
	}elsif ($IF =~ m/IF\?\s+Prv/) {
	    $IFq_Prv[$IFq_count] = $IF;
            ++$IFq_count;
	}elsif ($IF =~ m/IFp\s+Prv/) {
	    $IFp_Prv[$IFp_count] = $IF;
            ++$IFp_count;
	}else {
	    print stderr ("Unknown type of IF coding: $IF\n");
	}}

    print OUTFILE ("Stats for Speaker: $spkr \n");

    counttypes ($spkr, "IF",  @IF__Prv);
    counttypes ($spkr, "IFp",  @IFp_Prv);
    counttypes ($spkr, "IF?",  @IFq_Prv);
    counttypes ($spkr, "IF2",  @IFd_Prv);
    counttypes ($spkr, "IF2?",  @IFdqPrv);
    counttypes ($spkr, "IFT",  @IF__Prv, @IFp_Prv, @IFq_Prv);
}


sub counttypes {
    my ($spk, $code, @tags) = @_;
    my (@full);
    my (@desc);
    my (@notag);
    my (@notext);
    my (@nottagged);

    my ($tag) = "";

    my ($fullindex) = 0;
    my ($descindex) = 0;
    my ($notagindex) = 0;
    my ($notextindex) = 0;
    my ($nottaggedindex) = 0;

    my ($tagcount);

    $tagcount = @tags;

    while ($tagcount > 0) {
	$tag = $tags [$tagcount - 1];
        #print stderr ($tag, "\n");
        --$tagcount;
        lc($tag);
        if ($tag =~ m/\:descriptive/) {
	    $desc[$descindex] = $tag;
            ++$descindex; 
	} elsif (($tag =~ m/\:empty/) || ($tag =~ m/\:noise/)) {
	    $notext[$notextindex] = $tag;
            ++$notextindex; 
	}  elsif ($tag =~ m/\:no\-tag/) {
	    $notag[$notagindex] = $tag;
            ++$notagindex;
	} elsif ($tag =~ m/\:nottagged/) {
          
            $nottagged[$nottaggedindex] = $tag;
            ++$nottaggedindex;

	} elsif ($tag =~ m/\:[a-z]/) {
           
            $full[$fullindex] = $tag;
            ++$fullindex;

	} else { print stderr ("IF Tag unknown: $tag \n");}

    }
    my (@total);


    @total = (@full, @desc, @notag, @nottagged);


    
    print OUTFILE ("      Code: $code \n");

    
    printtypestats ("Fully tagged   ", @full);
    printtypestats ("Descriptive    ", @desc);
    printtypestats ("No-Tag         ", @notag);
    printtypestats ("Not tagged yet ", @nottagged);
    printtypestats ("No Lang Content", @notext);
    printtypestats ("Total Lang Cont", @total);
}


sub printtypestats {
    my ($type, @tags) = @_;

    my ($count) = 0;

    $count = @tags;
     
    



    print OUTFILE ("          $type: $count \n") ;
}



