package edu.cmu.minorthird.text;

import cern.colt.matrix.impl.AbstractFormatter;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/cmu/minorthird/text/Tokenizer.class */
public class Tokenizer {
    public static final int REGEX = 0;
    public static final int SPLIT = 1;
    public int parseType;
    public String splitString;
    public String regexPattern;

    public Tokenizer() {
        this.parseType = 0;
        this.splitString = AbstractFormatter.DEFAULT_COLUMN_SEPARATOR;
        this.regexPattern = "\\s*([0-9]+|[a-zA-Z]+|\\W)\\s*";
    }

    public Tokenizer(int i) {
        this.parseType = 0;
        this.splitString = AbstractFormatter.DEFAULT_COLUMN_SEPARATOR;
        this.regexPattern = "\\s*([0-9]+|[a-zA-Z]+|\\W)\\s*";
        this.parseType = i;
    }

    public Tokenizer(int i, String str) {
        this.parseType = 0;
        this.splitString = AbstractFormatter.DEFAULT_COLUMN_SEPARATOR;
        this.regexPattern = "\\s*([0-9]+|[a-zA-Z]+|\\W)\\s*";
        this.parseType = i;
        if (i == 0) {
            this.regexPattern = str;
        } else if (i == 1) {
            this.splitString = str;
        }
    }

    public String[] splitIntoTokens(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = Pattern.compile(this.regexPattern).matcher(str);
        while (matcher.find()) {
            arrayList.add(matcher.group(1));
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    public TextToken[] splitIntoTokens(Document document, String str, String str2) {
        TextToken[] textTokenArr;
        if (this.parseType == 0) {
            Matcher matcher = Pattern.compile(this.regexPattern).matcher(str2);
            ArrayList arrayList = new ArrayList();
            while (matcher.find()) {
                arrayList.add(new TextToken(document, matcher.start(1), matcher.end(1) - matcher.start(1)));
            }
            textTokenArr = (TextToken[]) arrayList.toArray(new TextToken[0]);
        } else if (this.parseType == 1) {
            int i = 0;
            String[] split = str2.split(this.splitString);
            ArrayList arrayList2 = new ArrayList();
            for (int i2 = 0; i2 < split.length; i2++) {
                i = str2.indexOf(split[i2], i);
                arrayList2.add(new TextToken(document, str, i, split[i2].length(), split[i2]));
            }
            textTokenArr = (TextToken[]) arrayList2.toArray(new TextToken[0]);
        } else {
            textTokenArr = null;
        }
        return textTokenArr;
    }
}
