#!/usr/local/bin/perl5 # MI(word,speech_act) = ln_{2} ( P(word & speech_act) / P(speech_act)*P(word) ) # MI(word,speech_act) = ln_{2} ( P(speech act|word)/P(word)) $binDir = "/afs/cs.cmu.edu/project/cmt-8/enthusiast/mlb/CMU-Cam_Toolkit_v2/bin"; $homeDir = "/afs/cs.cmu.edu/project/cmt-8/enthusiast/mlb/MI"; $WORD = 1; $ACT = 2; $UNION = 3; sub eat_parse_comments { for(my $i=0; $i<3; $i++) { $_ = ; } } sub getSpeechAct { my $str = $_[0]; $str =~ s/\[(.*)\]:.*/\1/; $str =~ s/(.*?)\+.*/\1/; $str; } sub loadActInventory { %speech_acts; my $inventory = "/afs/cs.cmu.edu/project/cmt-8/enthusiast/mlb/acts.txt"; open(INV, "<$inventory") or die "Unable to open $inventory for input\n"; while() { chomp; $speech_acts{$_} = 1; } close(INV); } sub loadMI { %MI; my $data = "$homeDir/mi.data"; open(MI, "<$data") or die "Unable to open $data for input\n"; my $word; while() { if(/.W/) { $word = ; chomp($word); } else { split(/ /, $_); my $act = $_[0]; my $mi = $_[1]; chomp($mi); $MI{$word}{$act} = $mi; } } close(MI); } sub getPrediction { my $utt = $_[0]; $max = 0; my $maxAct; my @words = split(/\s+/, $utt); for $act (keys %speech_acts) { my $totalMI = 0; for $word (@words) { my $mi = $MI{$word}{$act}; $totalMI += $mi; # print "MI($word,$act) = $mi\n"; } # print "MI wrt $act = $totalMI\n"; if($totalMI > $max) { $max = $totalMI; $maxAct = $act; } } # print "\n\tARGMAX = $maxAct, maxMI = $max\n\n"; $maxAct; } sub Main { @ARGV == 2 or die "Usage: MI.pl "; my $infile = $ARGV[0]; my $outfile = $ARGV[1]; open(OUT, ">$outfile") or die "Unable to open file: $outfile for output\n"; &loadActInventory; &loadMI; my $soupCall = "/afs/cs/project/cmt-46/trans/Soup/soup_s -grammar /afs/cs.cmu.edu/project/cmt-46/trans/Phoenix/Grammars/Travel/stable/all.1.gra"; $position = 0; @sequence = (); my $line; open(SOUP, "$soupCall < $infile | ") or die "Could not run SOUP."; while($line = ) { chomp($line); # at beginning of parse if($line =~ /\A\; [0-9]+:>/) { $line =~ /\`{2}(.*)\'{2}/; my $utt = $1; if($utt) { print OUT "\"$utt\"\n"; &eat_parse_comments; $done = 0; my @generated = (); while(!$done) { my $interp = ; if($interp =~ /\A\n\Z/) { $done = 1; } else { chomp($interp); $interp =~ s/\A\s+//; push(@generated, $interp); print OUT "$interp\n"; } } my $Sn; # no ambiguity if(@generated == 1) { $Sn = &getSpeechAct($generated[0]); print OUT "$Sn\n\n"; } # get prediction from ngram component using previous two @seq else { # MI prediction $Sn = &getPrediction($utt); print OUT "$Sn\n$max\n\n"; } push(@sequence, $Sn); $position++; } } } close(SOUP); } Main;