/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees;

import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.trees.EnglishPTBTreebankCorrector;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.PennTreeReader;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeVisitor;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.FilePathProcessor;
import edu.stanford.nlp.util.FileProcessor;
import edu.stanford.nlp.util.Filter;
import edu.stanford.nlp.util.Timing;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Writer;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

public final class DiskTreebank
extends Treebank {
    private static final boolean PRINT_FILENAMES = false;
    private ArrayList<File> filePaths = new ArrayList();
    private ArrayList<FileFilter> fileFilters = new ArrayList();
    private File currentFile;

    public DiskTreebank() {
        this(new LabeledScoredTreeReaderFactory());
    }

    public DiskTreebank(String encoding) {
        this(new LabeledScoredTreeReaderFactory(), encoding);
    }

    public DiskTreebank(TreeReaderFactory trf) {
        super(trf);
    }

    public DiskTreebank(TreeReaderFactory trf, String encoding) {
        super(trf, encoding);
    }

    public DiskTreebank(int initialCapacity) {
        this(initialCapacity, new LabeledScoredTreeReaderFactory());
    }

    public DiskTreebank(int initialCapacity, TreeReaderFactory trf) {
        this(trf);
    }

    @Override
    public void clear() {
        this.filePaths.clear();
        this.fileFilters.clear();
    }

    @Override
    public void loadPath(File path, FileFilter filt) {
        this.filePaths.add(path);
        this.fileFilters.add(filt);
    }

    @Override
    public void apply(TreeVisitor tp) {
        for (Tree t : this) {
            tp.visitTree(t);
        }
    }

    public File getCurrentFile() {
        return this.currentFile;
    }

    @Override
    public Iterator<Tree> iterator() {
        return new DiskTreebankIterator();
    }

    public static void main(String[] args) throws IOException {
        if (args.length == 0) {
            System.err.println("This main method will let you variously manipulate and view a treebank.");
            System.err.println("Usage: java DiskTreebank [-flags]* treebankPath fileRanges");
            System.err.println("Useful flags include:");
            System.err.println("\t-maxLength n\t-suffix ext\t-treeReaderFactory class");
            System.err.println("\t-pennPrint\t-encoding enc\t-tlp class\t-sentenceLengths");
            System.err.println("\t-summary\t-decimate\t-yield\t-correct\t-punct");
            return;
        }
        int i = 0;
        int maxLength = -1;
        boolean normalized = false;
        boolean decimate = false;
        boolean pennPrintTrees = false;
        boolean correct = false;
        boolean summary = false;
        boolean timing = false;
        boolean yield = false;
        boolean punct = false;
        boolean sentenceLengths = false;
        String decimatePrefix = null;
        String encoding = "UTF-8";
        String suffix = "mrg";
        TreeReaderFactory trf = null;
        TreebankLanguagePack tlp = null;
        while (i < args.length && args[i].startsWith("-")) {
            Object o;
            if (args[i].equals("-maxLength") && i + 1 < args.length) {
                maxLength = Integer.parseInt(args[i + 1]);
                i += 2;
                continue;
            }
            if (args[i].equals("-normalized")) {
                normalized = true;
                ++i;
                continue;
            }
            if (args[i].equalsIgnoreCase("-tlp")) {
                try {
                    o = Class.forName(args[i + 1]).newInstance();
                    tlp = (TreebankLanguagePack)o;
                    trf = tlp.treeReaderFactory();
                }
                catch (Exception e) {
                    System.err.println("Couldn't instantiate as TreebankLangParserParams: " + args[i + 1]);
                    return;
                }
                i += 2;
                continue;
            }
            if (args[i].equals("-treeReaderFactory") || args[i].equals("-trf")) {
                try {
                    o = Class.forName(args[i + 1]).newInstance();
                    trf = (TreeReaderFactory)o;
                }
                catch (Exception e) {
                    System.err.println("Couldn't instantiate as TreeReaderFactory: " + args[i + 1]);
                    return;
                }
                i += 2;
                continue;
            }
            if (args[i].equals("-suffix")) {
                suffix = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equals("-decimate")) {
                decimate = true;
                decimatePrefix = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equals("-encoding")) {
                encoding = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equals("-correct")) {
                correct = true;
                ++i;
                continue;
            }
            if (args[i].equals("-summary")) {
                summary = true;
                ++i;
                continue;
            }
            if (args[i].equals("-yield")) {
                yield = true;
                ++i;
                continue;
            }
            if (args[i].equals("-punct")) {
                punct = true;
                ++i;
                continue;
            }
            if (args[i].equals("-pennPrint")) {
                pennPrintTrees = true;
                ++i;
                continue;
            }
            if (args[i].equals("-timing")) {
                timing = true;
                ++i;
                continue;
            }
            if (args[i].equals("-sentenceLengths")) {
                sentenceLengths = true;
                ++i;
                continue;
            }
            System.err.println("Unknown option: " + args[i]);
            ++i;
        }
        if (trf == null) {
            trf = new TreeReaderFactory(){

                @Override
                public TreeReader newTreeReader(Reader in) {
                    return new PennTreeReader(in, new LabeledScoredTreeFactory());
                }
            };
        }
        Treebank treebank = normalized ? new DiskTreebank() : new DiskTreebank(trf, encoding);
        final PrintWriter pw = new PrintWriter((Writer)new OutputStreamWriter((OutputStream)System.out, encoding), true);
        if (i + 1 < args.length) {
            treebank.loadPath(args[i], (FileFilter)new NumberRangesFileFilter(args[i + 1], true));
        } else {
            treebank.loadPath(args[i], suffix, true);
        }
        if (summary) {
            System.out.println(treebank.textualSummary());
        }
        if (sentenceLengths) {
            DiskTreebank.sentenceLengths(treebank, args[i], i + 1 < args.length ? args[i + 1] : null, pw);
        }
        if (punct) {
            DiskTreebank.printPunct(treebank, tlp, pw);
        }
        if (correct) {
            treebank = new EnglishPTBTreebankCorrector().transformTrees(treebank);
        }
        if (pennPrintTrees) {
            treebank.apply(new TreeVisitor(){

                @Override
                public void visitTree(Tree tree) {
                    tree.pennPrint(pw);
                    pw.println();
                }
            });
        }
        if (yield) {
            treebank.apply(new TreeVisitor(){

                @Override
                public void visitTree(Tree tree) {
                    pw.println(tree.yield().toString());
                }
            });
        }
        if (decimate) {
            BufferedWriter w1 = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(decimatePrefix + "-train.txt"), encoding));
            BufferedWriter w2 = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(decimatePrefix + "-dev.txt"), encoding));
            BufferedWriter w3 = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(decimatePrefix + "-test.txt"), encoding));
            treebank.decimate(w1, w2, w3);
        } else if (maxLength >= 0) {
            for (Tree t : treebank) {
                if (t.yield().length() > maxLength) continue;
                System.out.println(t);
            }
        } else if (timing) {
            DiskTreebank.runTiming(treebank);
        }
    }

    private static void printPunct(Treebank treebank, TreebankLanguagePack tlp, PrintWriter pw) {
        if (tlp == null) {
            System.err.println("The -punct option requires you to specify -tlp");
        } else {
            Filter<String> punctTagFilter = tlp.punctuationTagAcceptFilter();
            for (Tree t : treebank) {
                Sentence<TaggedWord> tws = t.taggedYield();
                for (TaggedWord tw : tws) {
                    if (!punctTagFilter.accept(tw.tag())) continue;
                    pw.println(tw);
                }
            }
        }
    }

    private static void runTiming(Treebank treebank) {
        System.out.println();
        Timing.startTime();
        int num = 0;
        for (Tree t : treebank) {
            num += t.yield().length();
        }
        Timing.endTime("traversing corpus, counting words with iterator");
        System.err.println("There were " + num + " words in the treebank.");
        treebank.apply(new TreeVisitor(){
            int num = 0;

            @Override
            public void visitTree(Tree t) {
                this.num += t.yield().length();
            }
        });
        System.err.println();
        Timing.endTime("traversing corpus, counting words with TreeVisitor");
        System.err.println("There were " + num + " words in the treebank.");
        System.err.println();
        Timing.startTime();
        System.err.println("This treebank contains " + treebank.size() + " trees.");
        Timing.endTime("size of corpus");
    }

    public static void sentenceLengths(Treebank treebank, String name, String range, PrintWriter pw) {
        int maxleng = 150;
        int[] lengthCounts = new int[152];
        int numSents = 0;
        int longestSeen = 0;
        int totalWords = 0;
        String longSent = "";
        double median = 0.0;
        DecimalFormat nf = new DecimalFormat("0.0");
        boolean foundMedian = false;
        for (Tree t : treebank) {
            ++numSents;
            int len = t.yield().length();
            if (len <= 150) {
                int n = len;
                lengthCounts[n] = lengthCounts[n] + 1;
            } else {
                lengthCounts[151] = lengthCounts[151] + 1;
            }
            totalWords += len;
            if (len <= longestSeen) continue;
            longestSeen = len;
            longSent = t.toString();
        }
        System.out.print("Files " + name + ' ');
        if (range != null) {
            System.out.print(range + ' ');
        }
        System.out.println("consists of " + numSents + " sentences");
        int runningTotal = 0;
        for (int i = 0; i <= 150; ++i) {
            System.out.println("  " + lengthCounts[i] + " of length " + i + " (running total: " + (runningTotal += lengthCounts[i]) + ')');
            if (foundMedian || runningTotal <= numSents / 2) continue;
            if (numSents % 2 == 0 && runningTotal == numSents / 2 + 1) {
                int j;
                for (j = i - 1; j > 0 && lengthCounts[j] == 0; --j) {
                }
                median = ((double)i + (double)j) / 2.0;
            } else {
                median = i;
            }
            foundMedian = true;
        }
        if (lengthCounts[151] > 0) {
            System.out.println("  " + lengthCounts[151] + " of length " + 151 + " to " + longestSeen + " (running total: " + (runningTotal += lengthCounts[151]) + ')');
        }
        System.out.println("Average length: " + nf.format((double)totalWords / (double)numSents) + "; median length: " + nf.format(median));
        System.out.println("Longest sentence is of length: " + longestSeen);
        pw.println(longSent);
    }

    private class DiskTreebankIterator
    implements Iterator<Tree> {
        private int fileUpto = -1;
        private int treeUpto;
        private List<String> files = new ArrayList<String>();
        private MemoryTreebank currentFileTrees;
        private boolean hasNext;

        private DiskTreebankIterator() {
            FileProcessor dtifp = new FileProcessor(){

                @Override
                public void processFile(File file) {
                    DiskTreebankIterator.this.files.add(file.toString());
                }
            };
            int numPaths = DiskTreebank.this.filePaths.size();
            for (int i = 0; i < numPaths; ++i) {
                FilePathProcessor.processPath((File)DiskTreebank.this.filePaths.get(i), (FileFilter)DiskTreebank.this.fileFilters.get(i), dtifp);
            }
            this.currentFileTrees = new MemoryTreebank(DiskTreebank.this.treeReaderFactory(), DiskTreebank.this.encoding());
            this.hasNext = this.primeNextFile();
        }

        private boolean primeNextFile() {
            while (this.fileUpto < this.files.size()) {
                if (this.treeUpto < this.currentFileTrees.size()) {
                    return true;
                }
                this.currentFileTrees.clear();
                ++this.fileUpto;
                this.treeUpto = 0;
                if (this.fileUpto >= this.files.size()) continue;
                String fname = this.files.get(this.fileUpto);
                DiskTreebank.this.currentFile = new File(fname);
                this.currentFileTrees.loadPath(fname);
            }
            return false;
        }

        @Override
        public boolean hasNext() {
            return this.hasNext;
        }

        @Override
        public Tree next() {
            if (!this.hasNext) {
                throw new NoSuchElementException();
            }
            Tree ret = this.currentFileTrees.get(this.treeUpto++);
            this.hasNext = this.primeNextFile();
            return ret;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }
}

