package edu.cmu.minorthird.text.model;

import cern.colt.matrix.impl.AbstractFormatter;
import edu.cmu.minorthird.text.BasicTextBase;
import edu.cmu.minorthird.text.FancyLoader;
import edu.cmu.minorthird.text.Span;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Map;

/* loaded from: input_file:edu/cmu/minorthird/text/model/UnigramModel.class */
public class UnigramModel {
    private static final Double[] CACHED_DOUBLES = new Double[10];
    private Map freq = new HashMap();
    private double total = 0.0d;

    public void load(File file) throws IOException, FileNotFoundException {
        LineNumberReader lineNumberReader = new LineNumberReader(new FileReader(file));
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                lineNumberReader.close();
                return;
            }
            String[] split = readLine.trim().split("\\s+");
            if (split.length != 2) {
                badLine(readLine, lineNumberReader);
            }
            int i = 0;
            try {
                i = Integer.parseInt(split[0]);
            } catch (NumberFormatException e) {
                badLine(readLine, lineNumberReader);
            }
            this.total += i;
            this.freq.put(split[1], getDouble(i));
        }
    }

    private void badLine(String str, LineNumberReader lineNumberReader) {
        throw new IllegalStateException(new StringBuffer().append("bad input at line ").append(lineNumberReader.getLineNumber()).append(": ").append(str).toString());
    }

    public void save(File file) throws IOException {
        PrintStream printStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(file)));
        for (Map.Entry entry : this.freq.entrySet()) {
            printStream.println(new StringBuffer().append(((Double) entry.getValue()).intValue()).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).append(entry.getKey()).toString());
        }
        printStream.close();
    }

    private Double getDouble(int i) {
        return i < CACHED_DOUBLES.length ? CACHED_DOUBLES[i] : new Double(i);
    }

    public double score(Span span) {
        double d = 0.0d;
        double d2 = 0.1d / this.total;
        for (int i = 0; i < span.size(); i++) {
            d += estimatedLogProb(getFrequency(span.getToken(i).getValue().toLowerCase()), this.total, d2, 1.0d);
        }
        return d;
    }

    public double getTotalWordCount() {
        return this.total;
    }

    public int getFrequency(String str) {
        Double d = (Double) this.freq.get(str.toLowerCase());
        if (d == null) {
            return 0;
        }
        return d.intValue();
    }

    public void incrementFrequency(String str) {
        String lowerCase = str.toLowerCase();
        this.freq.put(lowerCase, getDouble(getFrequency(lowerCase) + 1));
    }

    private double estimatedLogProb(double d, double d2, double d3, double d4) {
        return Math.log((d + (d3 * d4)) / (d2 + d4));
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length == 0) {
            System.out.println("usage 1: modelfile span1 span2...");
            System.out.println("usage 2: textbase modelfile");
        }
        if (strArr.length == 2) {
            UnigramModel unigramModel = new UnigramModel();
            Span.Looper documentSpanIterator = FancyLoader.loadTextLabels(strArr[0]).getTextBase().documentSpanIterator();
            while (documentSpanIterator.hasNext()) {
                Span nextSpan = documentSpanIterator.nextSpan();
                for (int i = 0; i < nextSpan.size(); i++) {
                    unigramModel.incrementFrequency(nextSpan.getToken(i).getValue());
                }
            }
            unigramModel.save(new File(strArr[1]));
            return;
        }
        UnigramModel unigramModel2 = new UnigramModel();
        unigramModel2.load(new File(strArr[0]));
        BasicTextBase basicTextBase = new BasicTextBase();
        for (int i2 = 1; i2 < strArr.length; i2++) {
            basicTextBase.loadDocument(new StringBuffer().append("argv.").append(i2).toString(), strArr[i2]);
        }
        Span.Looper documentSpanIterator2 = basicTextBase.documentSpanIterator();
        while (documentSpanIterator2.hasNext()) {
            Span nextSpan2 = documentSpanIterator2.nextSpan();
            System.out.println(new StringBuffer().append(nextSpan2.asString()).append(" => ").append(unigramModel2.score(nextSpan2)).toString());
            for (int i3 = 0; i3 < nextSpan2.size(); i3++) {
                String value = nextSpan2.getToken(i3).getValue();
                System.out.print(new StringBuffer().append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).append(value).append(":").append(unigramModel2.getFrequency(value)).toString());
            }
            System.out.println();
        }
    }

    static {
        for (int i = 0; i < CACHED_DOUBLES.length; i++) {
            CACHED_DOUBLES[i] = new Double(i);
        }
    }
}
