package edu.cmu.minorthird.text;

import edu.cmu.minorthird.classify.BasicDataset;
import edu.cmu.minorthird.classify.ClassLabel;
import edu.cmu.minorthird.classify.Dataset;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.learn.SampleFE;
import edu.cmu.minorthird.text.learn.SpanFeatureExtractor;
import java.io.File;
import java.util.Iterator;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import junit.textui.TestRunner;
import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/cmu/minorthird/text/TextBaseLoaderTest.class */
public class TextBaseLoaderTest extends TestCase {
    Logger log;
    static Class class$edu$cmu$minorthird$text$TextBaseLoaderTest;

    public TextBaseLoaderTest(String str) {
        super(str);
        this.log = Logger.getLogger(getClass());
    }

    public TextBaseLoaderTest() {
        super("TextBaseLoaderTest");
        this.log = Logger.getLogger(getClass());
    }

    @Override // junit.framework.TestCase
    protected void setUp() {
        Logger.getRootLogger().removeAllAppenders();
        BasicConfigurator.configure();
    }

    @Override // junit.framework.TestCase
    protected void tearDown() {
    }

    public void testSeminarSet() {
        try {
            this.log.info("----------------- SeminarSet -----------------");
            TextBaseLoader textBaseLoader = new TextBaseLoader(1, true);
            textBaseLoader.load(new File("testData/seminar-subset"));
            textBaseLoader.getLabels();
            textBaseLoader.load(new File("testData/tblTest"));
            MutableTextLabels labels = textBaseLoader.getLabels();
            this.log.info(new StringBuffer().append("labels: ").append(labels.toString()).toString());
            this.log.debug(new StringBuffer().append("types::: ").append(labels.getTypes()).toString());
            TextBase textBase = labels.getTextBase();
            assertEquals(4, textBase.size());
            assertNotNull(textBase.documentSpan("cil-2.txt"));
            checkSeminarSample(labels);
        } catch (Exception e) {
            this.log.fatal(e, e);
            fail();
        }
        this.log.info("----------------- SeminarSet -----------------");
    }

    protected void checkSeminarSample(TextLabels textLabels) {
        checkType(textLabels, "stime", "cil-2.txt", "4:00", 1);
        checkType(textLabels, "stime", "cil-5.txt", "12:00pm", 1);
        checkType(textLabels, "stime", "cil-22.txt", "4:30 pm", 1);
        checkType(textLabels, "stime", "cil-28.txt", "12:00pm", 1);
        checkType(textLabels, "location", "cil-2.txt", "Adamson Wing, Baker Hall", 1);
        checkType(textLabels, "location", "cil-5.txt", "3005 Hamburg Hall", 1);
        checkType(textLabels, "location", "cil-22.txt", "Wean 7500", 1);
        checkType(textLabels, "location", "cil-28.txt", "Student Center, Room 207", 1);
        checkType(textLabels, "speaker", "cil-2.txt", "George W. Cobb", 1);
        checkType(textLabels, "speaker", "cil-5.txt", "Karen Schriver", 1);
        checkType(textLabels, "speaker", "cil-22.txt", "Bruce Sherwood", 1);
        checkType(textLabels, "speaker", "cil-28.txt", "David Banks", 1);
        assertEquals(2, getNumLables(textLabels, "sentence", "cil-2.txt"));
        assertEquals(2, getNumLables(textLabels, "sentence", "cil-5.txt"));
        assertEquals(3, getNumLables(textLabels, "sentence", "cil-22.txt"));
        assertEquals(4, getNumLables(textLabels, "sentence", "cil-28.txt"));
    }

    protected int getNumLables(TextLabels textLabels, String str, String str2) {
        int i = 0;
        Span.Looper instanceIterator = textLabels.instanceIterator(str, str2);
        while (instanceIterator.hasNext()) {
            this.log.debug(instanceIterator.nextSpan().asString());
            i++;
        }
        return i;
    }

    protected void checkType(TextLabels textLabels, String str, String str2, String str3, int i) {
        int i2 = 0;
        Span.Looper instanceIterator = textLabels.instanceIterator(str, str2);
        while (instanceIterator.hasNext()) {
            Span nextSpan = instanceIterator.nextSpan();
            this.log.debug(new StringBuffer().append("span type: ").append(str).append(" : ").append(nextSpan.asString()).toString());
            assertEquals(new String(str3), nextSpan.asString());
            i2++;
        }
        assertEquals(i, i2);
    }

    public void testLoadLabeledLines() {
        try {
            TextBaseLoader textBaseLoader = new TextBaseLoader(0, true);
            TextBase load = textBaseLoader.load(new File("testData/xmlLines.base"));
            MutableTextLabels labels = textBaseLoader.getLabels();
            assertEquals(7, load.size());
            assertNotNull(load.documentSpan("doc1"));
            checkType(labels, "stime", "doc1", "4:00", 1);
            checkType(labels, "location", "doc1", "Adamson Wing, Baker Hall", 1);
            checkType(labels, "speaker", "doc2", "George W. Cobb", 1);
            checkType(labels, "title", "doc3", "Title: Three Ways to Gum up a Statistics Course", 1);
            checkType(labels, "sentence", "doc4", "My talk will be in two parts", 1);
            checkType(labels, "comment", "doc5", "comments and observations", 1);
            checkType(labels, "country", "doc6", "US", 1);
            Iterator it = labels.getTypes().iterator();
            while (it.hasNext()) {
                assertEquals(0, getNumLables(labels, it.next().toString(), "doc7"));
            }
        } catch (Exception e) {
            this.log.fatal(e, e);
            fail();
        }
    }

    public void testLinesNoId() {
        try {
            TextBase load = new TextBaseLoader(0).load(new File("testData/webmaster-noid.base"));
            Span.Looper documentSpanIterator = load.documentSpanIterator();
            while (documentSpanIterator.hasNext()) {
                this.log.info(new StringBuffer().append("*").append(documentSpanIterator.nextSpan().getDocumentId()).append("*").toString());
            }
            assertEquals("Please add the attached publication to the web site in the ``Publications\" folder. The authors are Anthony Tomasic, Louiqa Raschid and Patrick Valduriez. The title is ``Scaling Access to Heterogeneous Databases with DISCO\" and it appeared in the IEEE Transactions on Knowledge and Data Engineering, 1998.", load.documentSpan("webmaster-noid.base@line:1").asString());
            assertEquals("Please add the folder ``Publications\" to the web site.", load.documentSpan("webmaster-noid.base@line:2").asString());
            assertEquals("Please change the string ``VLDB\" to ``International Conference on Very Large Databases\" on the ``Publications\" page.", load.documentSpan("webmaster-noid.base@line:3").asString());
        } catch (Exception e) {
            this.log.fatal(e, e);
            fail();
        }
    }

    public void testLines() {
        try {
            checkWebMasterLines(new TextBaseLoader(0).load(new File("testData/webmasterCommands.base")));
        } catch (Exception e) {
            this.log.fatal(e, e);
            fail();
        }
    }

    protected void checkWebMasterLines(TextBase textBase) {
        assertEquals("Please add the attached publication to the web site in the ``Publications\" folder. The authors are Anthony Tomasic, Louiqa Raschid and Patrick Valduriez. The title is ``Scaling Access to Heterogeneous Databases with DISCO\" and it appeared in the IEEE Transactions on Knowledge and Data Engineering, 1998.", textBase.documentSpan("msg01").asString());
        assertEquals("Please add the folder ``Publications\" to the web site.", textBase.documentSpan("msg02").asString());
        assertEquals("Please change the string ``VLDB\" to ``International Conference on Very Large Databases\" on the ``Publications\" page.", textBase.documentSpan("msg03").asString());
    }

    public void testBlankLines() {
        try {
            TextBase load = new TextBaseLoader(0).load(new File("testData/blankLines.base"));
            Span.Looper documentSpanIterator = load.documentSpanIterator();
            while (documentSpanIterator.hasNext()) {
                assertNotNull(documentSpanIterator.nextSpan().getTextToken(0));
            }
            assertEquals(1, load.size());
        } catch (Exception e) {
            this.log.error(e, e);
            fail();
        }
    }

    public void testDirectories() {
        try {
            TextBaseLoader textBaseLoader = new TextBaseLoader(1, false, true);
            TextBase load = textBaseLoader.load(new File("testData/20newgroups/20news-bydate-train"));
            this.log.debug("loaded training set");
            MutableTextLabels labels = textBaseLoader.getLabels();
            this.log.debug("passed first assertion");
            this.log.debug(new StringBuffer().append("base size = ").append(load.size()).toString());
            Dataset extractDataset = extractDataset(load, labels, SampleFE.BAG_OF_LC_WORDS);
            this.log.debug("extracted dataset");
            this.log.debug(new StringBuffer().append("data size = ").append(extractDataset.size()).toString());
            this.log.debug(new StringBuffer().append("got looper: ").append(extractDataset.iterator()).toString());
        } catch (Exception e) {
            this.log.error(e, e);
            fail();
        }
    }

    private Dataset extractDataset(TextBase textBase, MutableTextLabels mutableTextLabels, SpanFeatureExtractor spanFeatureExtractor) {
        BasicDataset basicDataset = new BasicDataset();
        int i = 0;
        Span.Looper documentSpanIterator = textBase.documentSpanIterator();
        while (documentSpanIterator.hasNext()) {
            int i2 = i;
            i++;
            this.log.debug(new StringBuffer().append("span: ").append(i2).toString());
            Span nextSpan = documentSpanIterator.nextSpan();
            basicDataset.add(new Example(spanFeatureExtractor.extractInstance(nextSpan), ClassLabel.binaryLabel(mutableTextLabels.hasType(nextSpan, "delete") ? 1.0d : -1.0d)));
        }
        return basicDataset;
    }

    public static Test suite() {
        Class cls;
        if (class$edu$cmu$minorthird$text$TextBaseLoaderTest == null) {
            cls = class$("edu.cmu.minorthird.text.TextBaseLoaderTest");
            class$edu$cmu$minorthird$text$TextBaseLoaderTest = cls;
        } else {
            cls = class$edu$cmu$minorthird$text$TextBaseLoaderTest;
        }
        return new TestSuite(cls);
    }

    public static void main(String[] strArr) {
        TestRunner.run(suite());
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError(e.getMessage());
        }
    }
}
