/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees.international.arabic;

import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.AbstractTokenizer;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.trees.international.arabic.ArabicLexer;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Serializable;
import java.io.Writer;
import java.util.Iterator;

public class ArabicTokenizer
extends AbstractTokenizer<Word> {
    private ArabicLexer lexer;
    private final boolean eolIsSignificant;

    @Override
    protected Word getNext() {
        Word token = null;
        if (this.lexer == null) {
            return token;
        }
        try {
            token = this.lexer.next();
            while (token == ArabicLexer.crValue) {
                if (this.eolIsSignificant) {
                    return token;
                }
                token = this.lexer.next();
            }
        }
        catch (IOException iOException) {
            // empty catch block
        }
        return token;
    }

    public ArabicTokenizer(Reader r) {
        this(r, false);
    }

    public ArabicTokenizer(Reader r, boolean eolIsSignificant) {
        this.eolIsSignificant = eolIsSignificant;
        if (r != null) {
            this.lexer = new ArabicLexer(r);
        }
    }

    public static TokenizerFactory<Word> factory() {
        return new ArabicTokenizerFactory(false);
    }

    public static TokenizerFactory<Word> factory(boolean eolIsSignificant) {
        return new ArabicTokenizerFactory(eolIsSignificant);
    }

    public static void main(String[] args) throws IOException {
        if (args.length < 1) {
            System.err.println("usage: java edu.stanford.nlp.process.ArabicTokenizer [-cr] filename");
            return;
        }
        ArabicTokenizer tokenizer = new ArabicTokenizer(new InputStreamReader((InputStream)new FileInputStream(args[args.length - 1]), "UTF-8"), args[0].equals("-cr"));
        PrintWriter pw = new PrintWriter((Writer)new OutputStreamWriter((OutputStream)System.out, "UTF-8"), true);
        while (tokenizer.hasNext()) {
            Word w = (Word)tokenizer.next();
            if (w == ArabicLexer.crValue) {
                pw.println("***CR***");
                continue;
            }
            pw.println(w);
        }
    }

    static class ArabicTokenizerFactory
    implements TokenizerFactory<Word>,
    Serializable {
        private static final long serialVersionUID = 6759533831515214642L;
        private final boolean eolIsSignificant;

        public ArabicTokenizerFactory() {
            this(false);
        }

        public ArabicTokenizerFactory(boolean eolIsSignificant) {
            this.eolIsSignificant = eolIsSignificant;
        }

        @Override
        public Iterator<Word> getIterator(Reader r) {
            return this.getTokenizer(r);
        }

        @Override
        public Tokenizer<Word> getTokenizer(Reader r) {
            return new ArabicTokenizer(r, this.eolIsSignificant);
        }
    }
}

