package com.wcohen.ss.tokens;

import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import java.util.ArrayList;
import java.util.Iterator;

/* loaded from: input_file:com/wcohen/ss/tokens/NGramTokenizer.class */
public class NGramTokenizer implements Tokenizer {
    private int minNGramSize;
    private int maxNGramSize;
    private boolean keepOldTokens;
    private Tokenizer innerTokenizer;
    public static NGramTokenizer DEFAULT_TOKENIZER = new NGramTokenizer(3, 5, true, SimpleTokenizer.DEFAULT_TOKENIZER);

    public NGramTokenizer(int i, int i2, boolean z, Tokenizer tokenizer) {
        this.minNGramSize = i;
        this.maxNGramSize = i2;
        this.keepOldTokens = z;
        this.innerTokenizer = tokenizer;
    }

    @Override // com.wcohen.ss.api.Tokenizer
    public Token[] tokenize(String str) {
        Token[] tokenArr = this.innerTokenizer.tokenize(str);
        ArrayList arrayList = new ArrayList();
        for (Token token : tokenArr) {
            String stringBuffer = new StringBuffer().append("^").append(token.getValue()).append("$").toString();
            if (this.keepOldTokens) {
                arrayList.add(intern(stringBuffer));
            }
            for (int i = 0; i < stringBuffer.length(); i++) {
                for (int i2 = this.minNGramSize; i2 <= this.maxNGramSize; i2++) {
                    if (i + i2 < stringBuffer.length()) {
                        arrayList.add(this.innerTokenizer.intern(stringBuffer.substring(i, i + i2)));
                    }
                }
            }
        }
        return (Token[]) arrayList.toArray(new BasicToken[arrayList.size()]);
    }

    @Override // com.wcohen.ss.api.Tokenizer
    public Token intern(String str) {
        return this.innerTokenizer.intern(str);
    }

    @Override // com.wcohen.ss.api.Tokenizer
    public Iterator tokenIterator() {
        return this.innerTokenizer.tokenIterator();
    }

    @Override // com.wcohen.ss.api.Tokenizer
    public int maxTokenIndex() {
        return this.innerTokenizer.maxTokenIndex();
    }

    public static void main(String[] strArr) {
        NGramTokenizer nGramTokenizer = DEFAULT_TOKENIZER;
        int i = 0;
        for (int i2 = 0; i2 < strArr.length; i2++) {
            System.out.println(new StringBuffer().append("argument ").append(i2).append(": '").append(strArr[i2]).append("'").toString());
            Token[] tokenArr = nGramTokenizer.tokenize(strArr[i2]);
            for (int i3 = 0; i3 < tokenArr.length; i3++) {
                i++;
                System.out.println(new StringBuffer().append("token ").append(i).append(":").append(" id=").append(tokenArr[i3].getIndex()).append(" value: '").append(tokenArr[i3].getValue()).append("'").toString());
            }
        }
    }
}
