package edu.cmu.minorthird.text.mixup;

import cern.colt.matrix.impl.AbstractFormatter;
import edu.cmu.minorthird.text.BasicSpanLooper;
import edu.cmu.minorthird.text.BasicTextBase;
import edu.cmu.minorthird.text.FancyLoader;
import edu.cmu.minorthird.text.MonotonicTextLabels;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextBase;
import edu.cmu.minorthird.text.TextLabels;
import edu.cmu.minorthird.text.TextLabelsLoader;
import edu.cmu.minorthird.text.TextToken;
import edu.cmu.minorthird.text.Trie;
import edu.cmu.minorthird.text.mixup.Mixup;
import edu.cmu.minorthird.util.ProgressCounter;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/cmu/minorthird/text/mixup/MixupProgram.class */
public class MixupProgram {
    private static Logger log;
    private ArrayList statementList = new ArrayList();
    private HashMap dictionaryMap = new HashMap();
    public static Set legalKeywords;
    static Class class$edu$cmu$minorthird$text$mixup$MixupProgram;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/cmu/minorthird/text/mixup/MixupProgram$Statement.class */
    public static class Statement {
        private String keyword;
        private String property;
        private String type;
        private String startType;
        private String value;
        private Set wordSet;
        private int statementType;
        private Mixup mixupExpr;
        private String regex;
        private int regexGroup;
        private Trie trie;
        private String annotationType;
        private String fileToLoad;
        private Matcher matcher;
        private int lastTokenStart;
        private String input;
        private static int REGEX = 1;
        private static int MIXUP = 2;
        private static int FILTER = 3;
        private static int PROVIDE = 4;
        private static int REQUIRE = 5;
        private static int DECLARE = 6;
        private static int TRIE = 7;
        private static Set generatorStart = new HashSet();
        private static Set legalKeywords = new HashSet();
        private static Set colonEqualsOrCase = new HashSet();

        Statement(Mixup.MixupTokenizer mixupTokenizer, String str) throws Mixup.ParseException {
            String str2;
            this.wordSet = null;
            this.mixupExpr = null;
            this.regex = null;
            this.trie = null;
            this.keyword = str;
            if (this.keyword.equals("declareSpanType")) {
                this.statementType = DECLARE;
                this.type = mixupTokenizer.advance(null);
                return;
            }
            if (this.keyword.equals("provide")) {
                this.statementType = PROVIDE;
                this.annotationType = mixupTokenizer.advance(null);
                if (this.annotationType.charAt(0) == '\'') {
                    this.annotationType = this.annotationType.substring(1, this.annotationType.length() - 1);
                }
                mixupTokenizer.advance(null);
                return;
            }
            if (this.keyword.equals("require")) {
                this.statementType = REQUIRE;
                this.annotationType = mixupTokenizer.advance(null);
                if (this.annotationType.charAt(0) == '\'') {
                    this.annotationType = this.annotationType.substring(1, this.annotationType.length() - 1);
                }
                String advance = mixupTokenizer.advance(null);
                MixupProgram.log.debug(new StringBuffer().append("marker: ").append(advance).toString());
                if (advance != null) {
                    this.fileToLoad = mixupTokenizer.advance(null);
                    if (this.fileToLoad.charAt(0) == '\'') {
                        this.fileToLoad = this.fileToLoad.substring(1, this.fileToLoad.length() - 1);
                    }
                    mixupTokenizer.advance(null);
                    return;
                }
                return;
            }
            String advance2 = mixupTokenizer.advance(null);
            String advance3 = mixupTokenizer.advance(colonEqualsOrCase);
            if (":".equals(advance3)) {
                if (!"defSpanProp".equals(this.keyword) && !"defTokenProp".equals(this.keyword)) {
                    parseError("can't define properties here");
                }
                this.property = advance2;
                this.type = null;
                this.value = mixupTokenizer.advance(null);
                mixupTokenizer.advance(Collections.singleton("="));
            } else if (!"case".equals(advance3)) {
                if (!"defSpanType".equals(this.keyword) && !"defDict".equals(this.keyword)) {
                    parseError("illegal keyword usage");
                }
                if (!"=".equals(advance3)) {
                    parseError("expected '='");
                }
                this.type = advance2;
                this.property = null;
            } else if (!"defDict".equals(this.keyword)) {
                parseError("illegal keyword usage");
            }
            if (!"defDict".equals(this.keyword)) {
                String advance4 = mixupTokenizer.advance(null);
                if (generatorStart.contains(advance4)) {
                    this.startType = "top";
                } else {
                    this.startType = advance4;
                    advance4 = mixupTokenizer.advance(generatorStart);
                }
                if (advance4.equals(":")) {
                    this.statementType = MIXUP;
                    if (mixupTokenizer.advance()) {
                        this.mixupExpr = new Mixup(mixupTokenizer);
                        return;
                    }
                    return;
                }
                if (advance4.equals("-")) {
                    this.statementType = FILTER;
                    if (mixupTokenizer.advance()) {
                        this.mixupExpr = new Mixup(mixupTokenizer);
                        return;
                    }
                    return;
                }
                if (!advance4.equals("~")) {
                    throw new IllegalStateException(new StringBuffer().append("unexpected generatorStart '").append(advance4).append("'").toString());
                }
                String advance5 = mixupTokenizer.advance(null);
                if ("re".equals(advance5)) {
                    this.statementType = REGEX;
                    this.regex = mixupTokenizer.advance(null);
                    System.out.println(new StringBuffer().append("THIS IS THE REGEX: ").append(this.regex).toString());
                    if (this.regex.startsWith("'")) {
                        this.regex = this.regex.substring(1, this.regex.length() - 1);
                        this.regex = this.regex.replaceAll("\\\\'", "'");
                    }
                    mixupTokenizer.advance(Collections.singleton(","));
                    String advance6 = mixupTokenizer.advance(null);
                    System.out.println(new StringBuffer().append("THIS IS THE EXPECTED GROUP NUMBER: ").append(advance6).toString());
                    try {
                        this.regexGroup = Integer.parseInt(advance6);
                        advance6 = mixupTokenizer.advance(null);
                        return;
                    } catch (NumberFormatException e) {
                        parseError(new StringBuffer().append("expected a regex group number and saw ").append(advance6).toString());
                        return;
                    }
                }
                if (!"trie".equals(advance5)) {
                    parseError("expected 're' or 'trie'");
                    return;
                }
                this.statementType = TRIE;
                ArrayList arrayList = new ArrayList();
                String advance7 = mixupTokenizer.advance(null);
                advance7.trim();
                String str3 = "";
                while (advance7 != null) {
                    if (advance7.equals(",")) {
                        str3.trim();
                        arrayList.add(str3);
                        str2 = "";
                    } else {
                        str2 = new StringBuffer().append(str3).append(advance7).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).toString();
                    }
                    str3 = str2;
                    advance7 = mixupTokenizer.advance(null);
                }
                arrayList.add(str3);
                this.trie = new Trie();
                BasicTextBase basicTextBase = new BasicTextBase();
                for (int i = 0; i < arrayList.size(); i++) {
                    String[] splitIntoTokens = basicTextBase.splitIntoTokens((String) arrayList.get(i));
                    if (splitIntoTokens.length > 2 && "\"".equals(splitIntoTokens[0]) && "\"".equals(splitIntoTokens[splitIntoTokens.length - 1])) {
                        StringBuffer stringBuffer = new StringBuffer("");
                        for (int i2 = 1; i2 < splitIntoTokens.length - 1; i2++) {
                            stringBuffer.append(splitIntoTokens[i2]);
                        }
                        try {
                            LineNumberReader mixupReader = MixupProgram.mixupReader(stringBuffer.toString());
                            int i3 = 0;
                            while (true) {
                                String readLine = mixupReader.readLine();
                                if (readLine == null) {
                                    break;
                                }
                                i3++;
                                this.trie.addWords(new StringBuffer().append((Object) stringBuffer).append(".line.").append(i3).toString(), basicTextBase.splitIntoTokens(readLine));
                            }
                            mixupReader.close();
                        } catch (IOException e2) {
                            parseError(new StringBuffer().append("Error when reading ").append(stringBuffer.toString()).append(": ").append(e2).toString());
                        }
                    } else {
                        this.trie.addWords(new StringBuffer().append("phrase#").append(i).toString(), splitIntoTokens);
                    }
                }
                return;
            }
            boolean z = true;
            if ("case".equals(advance3)) {
                z = false;
                if (!"+".equals(advance2)) {
                    parseError("illegal defDict");
                }
                this.type = mixupTokenizer.advance(null);
                mixupTokenizer.advance(Collections.singleton("="));
            } else {
                this.type = advance2;
            }
            this.wordSet = new HashSet();
            while (true) {
                String advance8 = mixupTokenizer.advance(null);
                if (advance8.equals("\"")) {
                    StringBuffer stringBuffer2 = new StringBuffer("");
                    while (true) {
                        String advance9 = mixupTokenizer.advance(null);
                        if (advance9.equals("\"")) {
                            try {
                                break;
                            } catch (IOException e3) {
                                parseError(new StringBuffer().append("Error when reading ").append(stringBuffer2.toString()).append(": ").append(e3).toString());
                            }
                        } else {
                            stringBuffer2.append(advance9);
                        }
                    }
                    LineNumberReader mixupReader2 = MixupProgram.mixupReader(stringBuffer2.toString());
                    while (true) {
                        String readLine2 = mixupReader2.readLine();
                        if (readLine2 == null) {
                            break;
                        }
                        String trim = readLine2.trim();
                        if (z) {
                            trim = trim.toLowerCase();
                        }
                        this.wordSet.add(trim);
                    }
                    mixupReader2.close();
                } else {
                    this.wordSet.add(z ? advance8.toLowerCase() : advance8);
                }
                String advance10 = mixupTokenizer.advance(null);
                if (advance10 == null) {
                    return;
                }
                if (!",".equals(advance10)) {
                    parseError("expected comma");
                }
            }
        }

        public void eval(MonotonicTextLabels monotonicTextLabels, TextBase textBase) {
            Span.Looper instanceIterator;
            MixupProgram.log.info(new StringBuffer().append("Evaluating: ").append(this).toString());
            long currentTimeMillis = System.currentTimeMillis();
            if ("defDict".equals(this.keyword)) {
                MixupProgram.log.debug(new StringBuffer().append("defining dictionary of: ").append(this.wordSet).toString());
                monotonicTextLabels.defineDictionary(this.type, this.wordSet);
            } else if ("declareSpanType".equals(this.keyword)) {
                monotonicTextLabels.declareType(this.type);
            } else if (this.statementType == PROVIDE) {
                monotonicTextLabels.setAnnotatedBy(this.annotationType);
            } else if (this.statementType == REQUIRE) {
                monotonicTextLabels.require(this.annotationType, this.fileToLoad);
            } else {
                if ("top".equals(this.startType)) {
                    instanceIterator = textBase.documentSpanIterator();
                } else {
                    if (!monotonicTextLabels.isType(this.startType)) {
                        throw new IllegalStateException(new StringBuffer().append("no type '").append(this.startType).append("' defined").toString());
                    }
                    instanceIterator = monotonicTextLabels.instanceIterator(this.startType);
                }
                if (this.statementType == MIXUP) {
                    Span.Looper extract = this.mixupExpr.extract(monotonicTextLabels, instanceIterator);
                    while (extract.hasNext()) {
                        extendLabels(monotonicTextLabels, extract.nextSpan());
                    }
                    if ("defSpanType".equals(this.keyword)) {
                        monotonicTextLabels.declareType(this.type);
                    }
                } else if (this.statementType == FILTER) {
                    TreeSet treeSet = new TreeSet();
                    Span.Looper looper = instanceIterator;
                    while (looper.hasNext()) {
                        Span nextSpan = looper.nextSpan();
                        if (!hasExtraction(this.mixupExpr, monotonicTextLabels, nextSpan)) {
                            treeSet.add(nextSpan);
                        }
                    }
                    Iterator it = treeSet.iterator();
                    while (it.hasNext()) {
                        extendLabels(monotonicTextLabels, (Span) it.next());
                    }
                } else {
                    if (this.statementType != TRIE) {
                        if (this.statementType != REGEX) {
                            throw new IllegalStateException(new StringBuffer().append("illegal statement type ").append(this.statementType).toString());
                        }
                        Pattern compile = Pattern.compile(this.regex);
                        while (instanceIterator.hasNext()) {
                            Span nextSpan2 = instanceIterator.nextSpan();
                            Matcher matcher = compile.matcher(nextSpan2.asString());
                            while (matcher.find()) {
                                try {
                                    extendLabels(monotonicTextLabels, nextSpan2.charIndexProperSubSpan(matcher.start(this.regexGroup), matcher.end(this.regexGroup)));
                                } catch (IllegalArgumentException e) {
                                }
                            }
                        }
                    }
                    while (instanceIterator.hasNext()) {
                        Trie.ResultLooper lookup = this.trie.lookup(instanceIterator.nextSpan());
                        while (lookup.hasNext()) {
                            extendLabels(monotonicTextLabels, lookup.nextSpan());
                        }
                    }
                }
            }
            MixupProgram.log.info(new StringBuffer().append("time: ").append((System.currentTimeMillis() - currentTimeMillis) / 1000.0d).append(" sec").toString());
        }

        private boolean hasExtraction(Mixup mixup, TextLabels textLabels, Span span) {
            return mixup.extract(textLabels, new BasicSpanLooper(Collections.singleton(span))).hasNext();
        }

        private void extendLabels(MonotonicTextLabels monotonicTextLabels, Span span) {
            if ("defSpanType".equals(this.keyword)) {
                monotonicTextLabels.addToType(span, this.type);
                return;
            }
            if ("defSpanProp".equals(this.keyword)) {
                monotonicTextLabels.setProperty(span, this.property, this.value);
                return;
            }
            if ("defTokenProp".equals(this.keyword)) {
                for (int i = 0; i < span.size(); i++) {
                    TextToken textToken = span.getTextToken(i);
                    if (this.property == null) {
                        throw new IllegalStateException("null property");
                    }
                    monotonicTextLabels.setProperty(textToken, this.property, this.value);
                }
            }
        }

        private String setContents(Set set) {
            StringBuffer stringBuffer = new StringBuffer("");
            Iterator it = set.iterator();
            while (it.hasNext()) {
                if (stringBuffer.length() > 0) {
                    stringBuffer.append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR);
                }
                stringBuffer.append(new StringBuffer().append("'").append(it.next().toString()).append("'").toString());
            }
            return stringBuffer.toString();
        }

        private String parseError(String str) throws Mixup.ParseException {
            throw new Mixup.ParseException(new StringBuffer().append("statement error at char ").append(this.lastTokenStart).append(": ").append(str).append("\nin '").append(this.input).append("'").toString());
        }

        public String toString() {
            if ("defDict".equals(this.keyword)) {
                return new StringBuffer().append(this.keyword).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).append(this.type).append(" = ... ").toString();
            }
            if (this.statementType == DECLARE) {
                return new StringBuffer().append(this.keyword).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).append(this.type).toString();
            }
            if (this.statementType == PROVIDE) {
                return new StringBuffer().append(this.keyword).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).append(this.annotationType).toString();
            }
            if (this.statementType == REQUIRE) {
                return new StringBuffer().append(this.keyword).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).append(this.annotationType).append(",").append(this.fileToLoad).toString();
            }
            String str = "???";
            if (this.statementType == MIXUP) {
                str = new StringBuffer().append(": ").append(this.mixupExpr.toString()).toString();
            } else if (this.statementType == FILTER) {
                str = new StringBuffer().append("- ").append(this.mixupExpr.toString()).toString();
            } else if (this.statementType == REGEX) {
                str = new StringBuffer().append("~ re '").append(this.regex).append("' ,").append(this.regexGroup).toString();
            } else if (this.statementType == TRIE) {
                str = "~ trie ...";
            }
            return this.type != null ? new StringBuffer().append(this.keyword).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).append(this.type).append(" =").append(this.startType).append(str).toString() : new StringBuffer().append(this.keyword).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).append(this.property).append(":").append(this.value).append(" =").append(this.startType).append(str).toString();
        }

        static {
            legalKeywords.add("defTokenProp");
            legalKeywords.add("defSpanProp");
            legalKeywords.add("defSpanType");
            legalKeywords.add("defDict");
            legalKeywords.add("declareSpanType");
            legalKeywords.add("provide");
            legalKeywords.add("require");
            colonEqualsOrCase.add(":");
            colonEqualsOrCase.add("=");
            colonEqualsOrCase.add("case");
            generatorStart.add(":");
            generatorStart.add("~");
            generatorStart.add("-");
        }
    }

    public MixupProgram() {
    }

    public MixupProgram(String[] strArr) throws Mixup.ParseException {
        String str = "";
        for (String str2 : strArr) {
            str = new StringBuffer().append(str).append(str2).append(";\n").toString();
        }
        startProgram(str);
    }

    public MixupProgram(String str) throws Mixup.ParseException {
        String[] split = str.split(AbstractFormatter.DEFAULT_ROW_SEPARATOR);
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < split.length; i++) {
            int indexOf = split[i].indexOf("//");
            stringBuffer.append(indexOf >= 0 ? split[i].substring(0, indexOf) : split[i]);
            stringBuffer.append(AbstractFormatter.DEFAULT_ROW_SEPARATOR);
        }
        startProgram(stringBuffer.toString());
    }

    public MixupProgram(File file) throws Mixup.ParseException, FileNotFoundException, IOException {
        LineNumberReader mixupReader = file.exists() ? mixupReader(file) : mixupReader(file.getName());
        StringBuffer stringBuffer = new StringBuffer();
        while (true) {
            String readLine = mixupReader.readLine();
            String str = readLine;
            if (readLine == null) {
                mixupReader.close();
                startProgram(stringBuffer.toString());
                return;
            } else {
                int indexOf = str.indexOf("//");
                if (indexOf >= 0) {
                    str = str.substring(0, indexOf);
                }
                stringBuffer.append(str);
                stringBuffer.append(AbstractFormatter.DEFAULT_ROW_SEPARATOR);
            }
        }
    }

    public void startProgram(String str) throws Mixup.ParseException {
        str.trim();
        Mixup.MixupTokenizer mixupTokenizer = new Mixup.MixupTokenizer(new StringBuffer().append(str).append(" asdfghjkl").toString());
        String advance = mixupTokenizer.advance(legalKeywords);
        while (advance != null) {
            if (!advance.startsWith(AbstractFormatter.DEFAULT_ROW_SEPARATOR)) {
                addStatement(mixupTokenizer, advance);
            }
            advance = mixupTokenizer.advance(legalKeywords);
            if (advance == null || advance.equals("asdfghjkl")) {
                return;
            }
        }
    }

    public void eval(MonotonicTextLabels monotonicTextLabels, TextBase textBase) {
        ProgressCounter progressCounter = new ProgressCounter("mixup program", "statement", this.statementList.size());
        for (int i = 0; i < this.statementList.size(); i++) {
            ((Statement) this.statementList.get(i)).eval(monotonicTextLabels, textBase);
            progressCounter.progress();
        }
        progressCounter.finished();
    }

    public void addStatement(Mixup.MixupTokenizer mixupTokenizer, String str) throws Mixup.ParseException {
        this.statementList.add(new Statement(mixupTokenizer, str));
    }

    public void addStatement(String str) throws Mixup.ParseException {
        Mixup.MixupTokenizer mixupTokenizer = new Mixup.MixupTokenizer(str);
        addStatement(mixupTokenizer, mixupTokenizer.advance(legalKeywords));
    }

    public String toString() {
        StringBuffer stringBuffer = new StringBuffer("");
        for (int i = 0; i < this.statementList.size(); i++) {
            stringBuffer.append(new StringBuffer().append(this.statementList.get(i).toString()).append(";\n").toString());
        }
        return stringBuffer.toString();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static LineNumberReader mixupReader(String str) throws IOException, FileNotFoundException {
        File file = new File(str);
        if (file.exists()) {
            return mixupReader(file);
        }
        InputStream systemResourceAsStream = ClassLoader.getSystemResourceAsStream(str);
        if (systemResourceAsStream == null) {
            throw new IllegalArgumentException(new StringBuffer().append("No file named '").append(str).append("' found on classpath").toString());
        }
        return new LineNumberReader(new BufferedReader(new InputStreamReader(systemResourceAsStream)));
    }

    private static LineNumberReader mixupReader(File file) throws IOException, FileNotFoundException {
        return new LineNumberReader(new BufferedReader(new FileReader(file)));
    }

    public static void main(String[] strArr) {
        try {
            MixupProgram mixupProgram = new MixupProgram(new File(strArr[0]));
            System.out.println(new StringBuffer().append("program:\n").append(mixupProgram.toString()).toString());
            if (strArr.length > 1) {
                MonotonicTextLabels monotonicTextLabels = (MonotonicTextLabels) FancyLoader.loadTextLabels(strArr[1]);
                mixupProgram.eval(monotonicTextLabels, monotonicTextLabels.getTextBase());
                if (strArr.length > 2) {
                    new TextLabelsLoader().saveTypesAsOps(monotonicTextLabels, new File(strArr[2]));
                } else {
                    for (String str : monotonicTextLabels.getTypes()) {
                        System.out.println(new StringBuffer().append("Type ").append(str).append(":").toString());
                        Span.Looper instanceIterator = monotonicTextLabels.instanceIterator(str);
                        while (instanceIterator.hasNext()) {
                            System.out.println(new StringBuffer().append("\t'").append(instanceIterator.nextSpan().asString()).append("'").toString());
                        }
                    }
                }
            }
        } catch (Exception e) {
            System.out.println("usage: programFile textFile/directory [outfile]");
            e.printStackTrace();
        }
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }

    static {
        Class cls;
        if (class$edu$cmu$minorthird$text$mixup$MixupProgram == null) {
            cls = class$("edu.cmu.minorthird.text.mixup.MixupProgram");
            class$edu$cmu$minorthird$text$mixup$MixupProgram = cls;
        } else {
            cls = class$edu$cmu$minorthird$text$mixup$MixupProgram;
        }
        log = Logger.getLogger(cls);
        legalKeywords = new HashSet();
        legalKeywords.add("defTokenProp");
        legalKeywords.add("defSpanProp");
        legalKeywords.add("defSpanType");
        legalKeywords.add("defDict");
        legalKeywords.add("declareSpanType");
        legalKeywords.add("provide");
        legalKeywords.add("require");
        legalKeywords.add("//");
        legalKeywords.add(AbstractFormatter.DEFAULT_ROW_SEPARATOR);
        legalKeywords.add("asdfghjkl");
    }
}
