package edu.cmu.minorthird.text.learn;

import edu.cmu.minorthird.text.BasicTextBase;
import edu.cmu.minorthird.text.BasicTextLabels;
import edu.cmu.minorthird.text.NestedTextLabels;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextLabels;
import edu.cmu.minorthird.text.TextLabelsLoader;
import edu.cmu.minorthird.text.mixup.Mixup;
import edu.cmu.minorthird.text.mixup.MixupProgram;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/cmu/minorthird/text/learn/SampleExtractionProblem.class */
public class SampleExtractionProblem {
    private static Logger log;
    private static String[] trainStrings;
    private static String[] testStrings;
    public static final String LABEL = "trueName";
    private static String[] labelingProgram;
    static Class class$edu$cmu$minorthird$text$learn$SampleExtractionProblem;

    public static TextLabels trainLabels() {
        try {
            BasicTextBase trainBase = trainBase();
            BasicTextLabels basicTextLabels = new BasicTextLabels(trainBase);
            new MixupProgram(labelingProgram).eval(basicTextLabels, trainBase);
            Span.Looper documentSpanIterator = trainBase.documentSpanIterator();
            while (documentSpanIterator.hasNext()) {
                basicTextLabels.closeTypeInside(LABEL, documentSpanIterator.nextSpan());
            }
            new TextLabelsLoader().closeLabels(basicTextLabels, 1);
            return basicTextLabels;
        } catch (Mixup.ParseException e) {
            throw new IllegalStateException(new StringBuffer().append("error: ").append(e).toString());
        }
    }

    public static TextLabels taggerTrainLabels() {
        return tagNames(trainLabels());
    }

    public static TextLabels taggerTestLabels() {
        return tagNames(testLabels());
    }

    private static TextLabels tagNames(TextLabels textLabels) {
        try {
            NestedTextLabels nestedTextLabels = new NestedTextLabels(textLabels);
            new MixupProgram(new String[]{"defTokenProp partOfName:true =: ... [@trueName] ... "}).eval(nestedTextLabels, nestedTextLabels.getTextBase());
            return nestedTextLabels;
        } catch (Mixup.ParseException e) {
            throw new IllegalStateException(new StringBuffer().append("error: ").append(e).toString());
        }
    }

    public static BasicTextBase trainBase() {
        BasicTextBase basicTextBase = new BasicTextBase();
        for (int i = 0; i < trainStrings.length; i++) {
            basicTextBase.loadDocument(new StringBuffer().append("trainStrings[").append(i).append("]").toString(), trainStrings[i]);
        }
        return basicTextBase;
    }

    public static TextLabels testLabels() {
        try {
            BasicTextBase testBase = testBase();
            BasicTextLabels basicTextLabels = new BasicTextLabels(testBase);
            new MixupProgram(labelingProgram).eval(basicTextLabels, testBase);
            new TextLabelsLoader().closeLabels(basicTextLabels, 1);
            return basicTextLabels;
        } catch (Mixup.ParseException e) {
            throw new IllegalStateException(new StringBuffer().append("error: ").append(e).toString());
        }
    }

    public static BasicTextBase testBase() {
        BasicTextBase basicTextBase = new BasicTextBase();
        for (int i = 0; i < testStrings.length; i++) {
            basicTextBase.loadDocument(new StringBuffer().append("testStrings[").append(i).append("]").toString(), testStrings[i]);
        }
        return basicTextBase;
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError(e.getMessage());
        }
    }

    static {
        Class cls;
        if (class$edu$cmu$minorthird$text$learn$SampleExtractionProblem == null) {
            cls = class$("edu.cmu.minorthird.text.learn.SampleExtractionProblem");
            class$edu$cmu$minorthird$text$learn$SampleExtractionProblem = cls;
        } else {
            cls = class$edu$cmu$minorthird$text$learn$SampleExtractionProblem;
        }
        log = Logger.getLogger(cls);
        trainStrings = new String[]{"Hello there, William Cohen, and welcome to CMU.", "William Clinton is a former US president.", "There's a new book by Hillary Clinton.", "George Washington was father of our country.", "Where in the world is Carmen Sandiego?", "Which George Bush was most damaging to the economy?", "I love books about Curious George the monkey."};
        testStrings = new String[]{"Does William Cohen rock or what?", "Say what you like, William Clinton definitely had bad hair", "Who was George Mason anyway? did he invent jars?", "Don't blame me, I never voted for anyone named George Bush"};
        labelingProgram = new String[]{"defDict fn = William, Carmen, George, Curious, Hillary", "defTokenProp cap:t =: ... [re('^[A-Z][a-z]+')] ...", "defTokenProp name:first =: ... [a(fn)] ... ", "defTokenProp name:last =: ... a(fn) [any] ... ", "defSpanType trueName =~ trie William Cohen,William Clinton,Hillary Clinton,George Washington,Carmen Sandiego,George Bush,Curious George,George Mason", "defSpanType bigram =: ... [any any] ... ", "defSpanProp inCapsBecause:name =: ... [@trueName] ...", "defSpanProp inCapsBecause:start =: [any]...@trueName...", "defSpanType inCapsBecauseStart =: [any]...@trueName...", "defSpanType political =: [ ... 'Clinton' ... ] || [... 'George' 'Bush' ...]", "defSpanProp subject:politics =: [@political]", "defSpanProp subject:me =: [...'William' 'Cohen'...]", "defSpanProp subject:other =top- [@political] || [...'William' 'Cohen'...]"};
    }
}
