/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams;
import edu.stanford.nlp.parser.lexparser.BaseLexicon;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.WordSegmenter;
import edu.stanford.nlp.process.SerializableFunction;
import edu.stanford.nlp.process.WordSegmentingTokenizer;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.international.arabic.ArabicHeadFinder;
import edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory;
import edu.stanford.nlp.trees.international.arabic.ArabicTreebankLanguagePack;
import edu.stanford.nlp.trees.international.arabic.Buckwalter;
import edu.stanford.nlp.trees.tregex.ParseException;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.util.Filter;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.Pair;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

public class ArabicTreebankParserParams
extends AbstractTreebankParserParams {
    private String optionsString = "ArabicTreebankParserParams\n";
    private boolean retainNPTmp = false;
    private boolean retainPRD = false;
    private boolean changeNoLabels = false;
    private boolean collinizerRetainsPunctuation = false;
    private Pattern collinizerPruneRegex = null;
    private boolean discardX = false;
    private boolean collapse3LetterPrepositionVariants = false;
    private static final String[] EMPTY_STRING_ARRAY = new String[0];
    Class<? extends HeadFinder> headFinderClass = ArabicHeadFinder.class;
    private HashMap<TregexPattern, Function<TregexMatcher, String>> activeAnnotations = new HashMap();
    private Map<String, Pair<TregexPattern, Function<TregexMatcher, String>>> annotationPatterns = new HashMap<String, Pair<TregexPattern, Function<TregexMatcher, String>>>();
    private static final String genitiveNodeTregexString = "@NP > @NP $- /^N/";
    private static final String copularVerbForms = "/^(kAn|kAnt|ykwn|sykwn|tkwn|ykn|stkwn|ykwnw|ybdw|tbdw|sybdw|stbdw|bdY|ybdy|tbdy|stbdy|sybdy)$/";
    private static final String sbarVerbForms = "/^(qAl|\\>DAf|AEln|\\>wDH|ymkn|\\>Eln|\\*krt|\\>kd|AElnt|Akd|qAlt|\\>DAft|AfAd|y\\*kr|yjb|\\{Etbr|\\>wDHt|AEtbr|sbq|\\*kr|tAbE|nqlt|SrH|r\\>Y|\\>fAd|AfAdt|yqwl|\\>kdt|\\>Elnt|Akdt|yrY|tEtbr|AEtqd|yEtbr|tfyd|ytwqE|AEtbrt|ynbgy|Tlbt|qrr|ktbt|\\>blg|\\>\\$Ar|ywDH|t\\&kd|Tlb|r\\>t|yEny|nryd|nEtbr|yftrD|k\\$f|\\{Etbrt|AwDH|ytEyn|ykfy|y\\&kd|yErf|ydrk|tZhr|tqwl|tbd\\>|nEtqd|nErf|AErf|Elm|Awrdt|AwDHt|AqtrH|yryd|yErfAn|yElm|ybd\\>tstTyE|tHAwl|tEny|nrY|n\\>ml|)$/";
    private static final TregexPatternCompiler tregexPatternCompiler = new TregexPatternCompiler(new ArabicHeadFinder());
    private static final String markPRDverbString = "-markPRDverbs";
    private static final long serialVersionUID = 1L;

    public ArabicTreebankParserParams() {
        super(new ArabicTreebankLanguagePack());
        this.initializeAnnotationPatterns();
    }

    @Override
    public TreeReaderFactory treeReaderFactory() {
        return new ArabicTreeReaderFactory(this.retainNPTmp, this.retainPRD, this.changeNoLabels, this.discardX, this.collapse3LetterPrepositionVariants);
    }

    @Override
    public MemoryTreebank memoryTreebank() {
        return new MemoryTreebank(this.treeReaderFactory());
    }

    @Override
    public DiskTreebank diskTreebank() {
        return new DiskTreebank(this.treeReaderFactory());
    }

    @Override
    public HeadFinder headFinder() {
        try {
            return this.headFinderClass.newInstance();
        }
        catch (Exception e) {
            System.err.println("Error while instantiating class " + this.headFinderClass + ": " + e);
            System.err.println("Using ArabicHeadFinder instead.");
            return new ArabicHeadFinder();
        }
    }

    @Override
    public TreeTransformer collinizer() {
        return new ArabicCollinizer(this.tlp, this.collinizerRetainsPunctuation, this.collinizerPruneRegex);
    }

    @Override
    public TreeTransformer collinizerEvalb() {
        return this.collinizer();
    }

    @Override
    public String[] sisterSplitters() {
        return EMPTY_STRING_ARRAY;
    }

    @Override
    public Tree transformTree(Tree t, Tree root) {
        StringBuilder newCategory = new StringBuilder(t.label().value());
        for (Map.Entry<TregexPattern, Function<TregexMatcher, String>> e : this.activeAnnotations.entrySet()) {
            TregexMatcher m = e.getKey().matcher(root);
            if (!m.matchesAt(t)) continue;
            newCategory.append(e.getValue().apply(m));
        }
        String newCat = newCategory.toString();
        t.label().setValue(newCat);
        if (t.isPreTerminal()) {
            HasTag lab = (HasTag)((Object)t.label());
            lab.setTag(newCat);
        }
        return t;
    }

    @Override
    public void display() {
        System.err.println(this.optionsString);
    }

    private void initializeAnnotationPatterns() {
        try {
            this.annotationPatterns.put("-markFem", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("__ <<# /p$/"), new SimpleStringFunction("-FEM")));
            this.annotationPatterns.put("-markGappedVP", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("@VP > @VP $- __ $ /^(?:CC|CONJ)/ !< /^V/"), new SimpleStringFunction("-gappedVP")));
            this.annotationPatterns.put("-markGappedVPConjoiners", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("/^(?:CC|CONJ)/ $ (@VP > @VP $- __ !< /^V/)"), new SimpleStringFunction("-gappedVP")));
            this.annotationPatterns.put("-gpAnnotatePrepositions", new Pair<TregexPattern, AddRelativeNodeFunction>(TregexPattern.compile("/^(?:IN|PREP)$/ > (__ > __=gp)"), new AddRelativeNodeFunction("^^", "gp")));
            this.annotationPatterns.put("-gpEquivalencePrepositions", new Pair<TregexPattern, AddEquivalencedNodeFunction>(TregexPattern.compile("/^(?:IN|PREP)$/ > (@PP >+(/^PP/) __=gp)"), new AddEquivalencedNodeFunction("^^", "gp")));
            this.annotationPatterns.put("-gpEquivalencePrepositionsVar", new Pair<TregexPattern, AddEquivalencedNodeFunctionVar>(TregexPattern.compile("/^(?:IN|PREP)$/ > (@PP >+(/^PP/) __=gp)"), new AddEquivalencedNodeFunctionVar("^^", "gp")));
            this.annotationPatterns.put("-genitiveMark", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile(genitiveNodeTregexString), new SimpleStringFunction("-genitive")));
            this.annotationPatterns.put("-markGenitiveParent", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("@NP < (@NP > @NP $- /^N/)"), new SimpleStringFunction("-genitiveParent")));
            this.annotationPatterns.put("-maSdrMark", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^[t\\u062a].+[y\\u064a].$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark2", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a].+[y\\u064a].|<.{3,}|A.{3,})$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark3", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a<A].{3,})$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark4", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a<A].{3,})$/ > (@NN|NOUN|DTNN > (@NP < @NP)))"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark5", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (__ > (@NN|NOUN|DTNN > (@NP < @NP)))"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-mjjMark", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@JJ|DTJJ < /^m/ $+ @PP ># @ADJP "), new SimpleStringFunction("-mjj")));
            this.annotationPatterns.put("-splitPUNC", new Pair<TregexPattern, AnnotatePunctuationFunction>(tregexPatternCompiler.compile("@PUNC < __=term"), new AnnotatePunctuationFunction()));
            this.annotationPatterns.put("-markPPwithPPdescendant", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ !< @PP << @PP [ >> @PP | == @PP ]"), new SimpleStringFunction("-inPPdominatesPP")));
            this.annotationPatterns.put("-markNPwithSdescendant", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ !< @S << @S [ >> @NP | == @NP ]"), new SimpleStringFunction("-inNPdominatesS")));
            this.annotationPatterns.put("-markContainsVerb", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << (/^[CIP]?V/ < (__ !< __))"), new SimpleStringFunction("-withV")));
            this.annotationPatterns.put("-retainNPTmp", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ >># /^NP-TMP/"), new SimpleStringFunction("-TMP")));
            this.annotationPatterns.put("-markRightRecursiveNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ <<- @NP [>>- @NP | == @NP]"), new SimpleStringFunction("-rrNP")));
            this.annotationPatterns.put("-markBaseNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !< @NP !< @VP !< @SBAR !< @ADJP !< @ADVP !< @S !< @QP !< @UCP !< @PP"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markStrictBaseNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !<  (__ < (__ < __))"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markContainsSBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << @SBAR"), new SimpleStringFunction("-withSBAR")));
            this.annotationPatterns.put("-markPhrasalNodesDominatedBySBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (__ < __) >> @SBAR"), new SimpleStringFunction("-domBySBAR")));
            this.annotationPatterns.put("-markCoordinateNPs", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @CC|CONJ"), new SimpleStringFunction("-coord")));
            this.annotationPatterns.put("-splitCC", new Pair<TregexPattern, AddRelativeNodeRegexFunction>(tregexPatternCompiler.compile("@CC|CONJ < __=term"), new AddRelativeNodeRegexFunction("-", "term", "-*([^-].*)")));
            this.annotationPatterns.put("-splitIN", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@PP <<# (@IN|PREP < __=term)"), new AddRelativeNodeFunction("-", "term")));
            this.annotationPatterns.put("-markCopularVerbTags", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^V/ < /^(kAn|kAnt|ykwn|sykwn|tkwn|ykn|stkwn|ykwnw|ybdw|tbdw|sybdw|stbdw|bdY|ybdy|tbdy|stbdy|sybdy)$/"), new SimpleStringFunction("-copular")));
            this.annotationPatterns.put("-markSBARVerbTags", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^V/ < /^(qAl|\\>DAf|AEln|\\>wDH|ymkn|\\>Eln|\\*krt|\\>kd|AElnt|Akd|qAlt|\\>DAft|AfAd|y\\*kr|yjb|\\{Etbr|\\>wDHt|AEtbr|sbq|\\*kr|tAbE|nqlt|SrH|r\\>Y|\\>fAd|AfAdt|yqwl|\\>kdt|\\>Elnt|Akdt|yrY|tEtbr|AEtqd|yEtbr|tfyd|ytwqE|AEtbrt|ynbgy|Tlbt|qrr|ktbt|\\>blg|\\>\\$Ar|ywDH|t\\&kd|Tlb|r\\>t|yEny|nryd|nEtbr|yftrD|k\\$f|\\{Etbrt|AwDH|ytEyn|ykfy|y\\&kd|yErf|ydrk|tZhr|tqwl|tbd\\>|nEtqd|nErf|AErf|Elm|Awrdt|AwDHt|AqtrH|yryd|yErfAn|yElm|ybd\\>tstTyE|tHAwl|tEny|nrY|n\\>ml|)$/"), new SimpleStringFunction("-SBARverb")));
            this.annotationPatterns.put("-markNounNPargTakers", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NN|NNS|NNP|NNPS|DTNN|DTNNS|DTNNP|DTNNPS ># (@NP < @NP)"), new SimpleStringFunction("-NounNParg")));
            this.annotationPatterns.put("-markNounAdjVPheads", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NN|NNS|NNP|NNPS|JJ|DTJJ|DTNN|DTNNS|DTNNP|DTNNPS ># @VP"), new SimpleStringFunction("-VHead")));
            this.annotationPatterns.put("-markPronominalNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @PRP"), new SimpleStringFunction("-PRP")));
            this.annotationPatterns.put("-markMultiCC", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (@CC $.. @CC)"), new SimpleStringFunction("-multiCC")));
            this.annotationPatterns.put("-markHasCCdaughter", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < @CC"), new SimpleStringFunction("-CCdtr")));
            this.annotationPatterns.put("-markAcronymNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !<  (__ < (__ < __)) < (/^NN/ < /^.$/ $ (/^NN/ < /^.$/)) !< (__ < /../)"), new SimpleStringFunction("-acro")));
            this.annotationPatterns.put("-markAcronymNN", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^NN/ < /^.$/ $ (/^NN/ < /^.$/) > (@NP !<  (__ < (__ < __)) !< (__ < /../))"), new SimpleStringFunction("-acro")));
        }
        catch (ParseException e) {
            int nth = this.annotationPatterns.size() + 1;
            String nthStr = nth == 1 ? "1st" : (nth == 2 ? "2nd" : nth + "th");
            System.err.println("Parse exception on " + nthStr + " annotation pattern initialization:" + e);
        }
    }

    @Override
    public int setOptionFlag(String[] args, int i) {
        boolean didSomething = false;
        if (this.annotationPatterns.keySet().contains(args[i])) {
            Pair<TregexPattern, Function<TregexMatcher, String>> p = this.annotationPatterns.get(args[i]);
            this.activeAnnotations.put(p.first(), p.second());
            this.optionsString = this.optionsString + "Option " + args[i] + " added annotation pattern " + p.first() + " with annotation " + p.second() + '\n';
            didSomething = true;
        } else if (args[i].equals("-retainNPTmp")) {
            this.optionsString = this.optionsString + "Retaining NP-TMP marking.\n";
            this.retainNPTmp = true;
            didSomething = true;
        } else if (args[i].equals("-discardX")) {
            this.optionsString = this.optionsString + "Discarding X trees.\n";
            this.discardX = true;
            didSomething = true;
        } else if (args[i].equals("-changeNoLabels")) {
            this.optionsString = this.optionsString + "Change no labels.\n";
            this.changeNoLabels = true;
            didSomething = true;
        } else if (args[i].equals("-collapse3LetterPrepositionVariants")) {
            this.optionsString = this.optionsString + "Collapse 3 Letter Preposition Variants\n";
            this.collapse3LetterPrepositionVariants = true;
            didSomething = true;
        } else if (args[i].equals(markPRDverbString)) {
            this.optionsString = this.optionsString + "Mark PRD.\n";
            this.retainPRD = true;
            didSomething = true;
        } else if (args[i].equals("-collinizerRetainsPunctuation")) {
            this.optionsString = this.optionsString + "Collinizer retains punctuation.\n";
            this.collinizerRetainsPunctuation = true;
            didSomething = true;
        } else if (args[i].equals("-collinizerPruneRegex")) {
            this.optionsString = this.optionsString + "Collinizer prune regex: " + args[i + 1] + '\n';
            this.collinizerPruneRegex = Pattern.compile(args[i + 1]);
            ++i;
            didSomething = true;
        } else if (args[i].equals("-hf")) {
            try {
                this.headFinderClass = Class.forName(args[i + 1]).asSubclass(HeadFinder.class);
                this.optionsString = this.optionsString + "HeadFinder class: " + args[i + 1] + '\n';
            }
            catch (ClassNotFoundException e) {
                System.err.println("Error -- can't find HeadFinder class" + args[i + 1]);
            }
            ++i;
            didSomething = true;
        } else if (args[i].equals("-arabicFactored")) {
            String[][] opts;
            for (String[] opt : opts = new String[][]{{"-discardX"}, {"-markNounNPargTakers"}, {"-genitiveMark"}, {"-splitPUNC"}, {"-markContainsVerb"}, {"-splitCC"}, {"-markContainsSBAR"}}) {
                this.setOptionFlag(opt, 0);
            }
            didSomething = true;
        } else if (args[i].equals("-arabicTokenizerModel")) {
            String modelFile = args[i + 1];
            try {
                WordSegmenter aSeg = (WordSegmenter)Class.forName("edu.stanford.nlp.wordseg.ArabicSegmenter").newInstance();
                aSeg.loadSegmenter(modelFile);
                System.out.println("aSeg=" + aSeg);
                TokenizerFactory<Word> aTF = WordSegmentingTokenizer.factory(aSeg);
                ((ArabicTreebankLanguagePack)this.treebankLanguagePack()).setTokenizerFactory(aTF);
            }
            catch (RuntimeIOException ex) {
                System.err.println("Couldn't load ArabicSegmenter " + modelFile);
                ex.printStackTrace();
            }
            catch (Exception e) {
                System.err.println("Couldn't instantiate segmenter: edu.stanford.nlp.wordseg.ArabicSegmenter");
                e.printStackTrace();
            }
            ++i;
            didSomething = true;
        }
        if (didSomething) {
            ++i;
        }
        return i;
    }

    public Sentence<Word> defaultTestSentence() {
        return Sentence.toSentence("w", "lm", "tfd", "mElwmAt", "En", "ADrAr", "Aw", "DHAyA", "HtY", "AlAn", ".");
    }

    public static void main(String[] args) {
        int maxLength = Integer.parseInt(args[1]);
        boolean b2a = false;
        ArabicTreebankParserParams tlpp = new ArabicTreebankParserParams();
        if (args[2].equals("-b2a")) {
            b2a = true;
        } else {
            tlpp.setOptionFlag(args, 2);
        }
        DiskTreebank trees = tlpp.diskTreebank();
        trees.loadPath(args[0]);
        PrintWriter pw = tlpp.pw();
        TreeTransformer mapper = new TreeTransformer(){
            private Buckwalter buck = new Buckwalter();

            @Override
            public Tree transformTree(Tree t) {
                for (Tree tr : t) {
                    if (!tr.isLeaf()) continue;
                    tr.setValue(this.buck.buckwalterToUnicode(tr.value()));
                }
                return t;
            }
        };
        for (Tree t : trees) {
            if (t.yield().size() > maxLength) continue;
            pw.println(t);
            if (!b2a) continue;
            mapper.transformTree(t).pennPrint(pw);
        }
    }

    @Override
    public Lexicon lex() {
        return new BaseLexicon();
    }

    @Override
    public Lexicon lex(Options.LexOptions op) {
        if (op.uwModel == null) {
            op.uwModel = "edu.stanford.nlp.parser.lexparser.ArabicUnknownWordModel";
        }
        return new BaseLexicon(op);
    }

    private static class AnnotatePunctuationFunction
    implements SerializableFunction<TregexMatcher, String> {
        static final String key = "term";
        private static final Pattern endOfSentence = Pattern.compile("^(\\.|\\?.*)$");
        private static final Pattern comma = Pattern.compile(",");
        private static final Pattern dash = Pattern.compile("^-.*$");
        private static final Pattern quote = Pattern.compile("^\"$");
        private static final Pattern lrb = Pattern.compile("^-LRB-$");
        private static final Pattern rrb = Pattern.compile("^-RRB-$");
        private static final long serialVersionUID = 1L;

        private AnnotatePunctuationFunction() {
        }

        @Override
        public String apply(TregexMatcher m) {
            String punc = m.getNode(key).label().value();
            if (endOfSentence.matcher(punc).matches()) {
                return "-eos";
            }
            if (comma.matcher(punc).matches()) {
                return "-comma";
            }
            if (lrb.matcher(punc).matches()) {
                return "-lrb";
            }
            if (rrb.matcher(punc).matches()) {
                return "-rrb";
            }
            if (dash.matcher(punc).matches()) {
                return "-dash";
            }
            if (quote.matcher(punc).matches()) {
                return "-quote";
            }
            return "";
        }

        public String toString() {
            return "AnnotatePunctuationFunction";
        }
    }

    private static class AddEquivalencedNodeFunctionVar
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final long serialVersionUID = 1L;

        public AddEquivalencedNodeFunctionVar(String annotationMark, String key) {
            this.annotationMark = annotationMark;
            this.key = key;
        }

        @Override
        public String apply(TregexMatcher m) {
            String node = m.getNode(this.key).label().value();
            if (node.startsWith("S") || node.startsWith("V") || node.startsWith("A")) {
                return this.annotationMark + "VSA";
            }
            return "";
        }

        public String toString() {
            return "AddEquivalencedNodeFunctionVar[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    private static class AddEquivalencedNodeFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final long serialVersionUID = 1L;

        public AddEquivalencedNodeFunction(String annotationMark, String key) {
            this.annotationMark = annotationMark;
            this.key = key;
        }

        @Override
        public String apply(TregexMatcher m) {
            String node = m.getNode(this.key).label().value();
            if (node.startsWith("S")) {
                return this.annotationMark + 'S';
            }
            if (node.startsWith("V")) {
                return this.annotationMark + 'V';
            }
            return "";
        }

        public String toString() {
            return "AddEquivalencedNodeFunction[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    private static class AddRelativeNodeRegexFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private Pattern pattern;
        private static final long serialVersionUID = 1L;

        public AddRelativeNodeRegexFunction(String annotationMark, String key, String regex) {
            this.annotationMark = annotationMark;
            this.key = key;
            try {
                this.pattern = Pattern.compile(regex);
            }
            catch (PatternSyntaxException pse) {
                System.err.println("Bad pattern: " + regex);
                this.pattern = null;
            }
        }

        @Override
        public String apply(TregexMatcher m) {
            String val = m.getNode(this.key).label().value();
            if (this.pattern != null) {
                Matcher mat = this.pattern.matcher(val);
                if (m.matches()) {
                    val = mat.group(1);
                }
            }
            return this.annotationMark + val;
        }

        public String toString() {
            return "AddRelativeNodeRegexFunction[" + this.annotationMark + ',' + this.key + ',' + this.pattern + ']';
        }
    }

    private static class AddRelativeNodeFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final long serialVersionUID = 1L;

        public AddRelativeNodeFunction(String annotationMark, String key) {
            this.annotationMark = annotationMark;
            this.key = key;
        }

        @Override
        public String apply(TregexMatcher m) {
            return this.annotationMark + m.getNode(this.key).label().value();
        }

        public String toString() {
            return "AddRelativeNodeFunction[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    private static class SimpleStringFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String result;
        private static final long serialVersionUID = 1L;

        public SimpleStringFunction(String result) {
            this.result = result;
        }

        @Override
        public String apply(TregexMatcher tregexMatcher) {
            return this.result;
        }

        public String toString() {
            return "SimpleStringFunction[" + this.result + ']';
        }
    }

    private static class PunctuationTreeRejectFilter
    implements Filter<Tree> {
        final Filter<String> punctLabelFilter;
        private static final long serialVersionUID = 1L;

        PunctuationTreeRejectFilter(TreebankLanguagePack tlp) {
            this.punctLabelFilter = tlp.punctuationTagRejectFilter();
        }

        @Override
        public boolean accept(Tree tree) {
            return this.punctLabelFilter.accept(tree.value());
        }
    }

    private static class ArabicCollinizer
    implements TreeTransformer,
    Serializable {
        private TreebankLanguagePack tlp;
        private boolean retainPunctuation;
        private Pattern collinizerPruneRegex;
        private Filter<Tree> punctuationRejecter;
        private static final long serialVersionUID = 1L;

        public ArabicCollinizer(TreebankLanguagePack tlp, boolean retainPunctuation, Pattern collinizerPruneRegex) {
            this.tlp = tlp;
            this.retainPunctuation = retainPunctuation;
            this.collinizerPruneRegex = collinizerPruneRegex;
            this.punctuationRejecter = new PunctuationTreeRejectFilter(tlp);
        }

        @Override
        public Tree transformTree(Tree t) {
            if (this.tlp.isStartSymbol(t.value())) {
                t = t.firstChild();
            }
            Tree result = t.deepCopy();
            if ((result = result.prune(new Filter<Tree>(){
                private static final long serialVersionUID = 1669994102700201499L;

                @Override
                public boolean accept(Tree tree) {
                    return ArabicCollinizer.this.collinizerPruneRegex == null || tree.label() == null || !ArabicCollinizer.this.collinizerPruneRegex.matcher(tree.label().value()).matches();
                }
            })) == null) {
                return null;
            }
            for (Tree node : result) {
                if (node.label() != null && !node.isLeaf()) {
                    node.label().setValue(this.tlp.basicCategory(node.label().value()));
                }
                if (!node.label().value().equals("ADVP")) continue;
                node.label().setValue("PRT");
            }
            if (this.retainPunctuation) {
                return result;
            }
            return result.prune(this.punctuationRejecter);
        }
    }
}

