/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Tag;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.WordTag;
import edu.stanford.nlp.parser.lexparser.BaseUnknownWordModel;
import edu.stanford.nlp.parser.lexparser.ChineseLexicon;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.Train;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.Tree;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;

public class ChineseUnknownWordModel
extends BaseUnknownWordModel {
    private static final String encoding = "GB18030";
    boolean useUnicodeType = false;
    private static final String numberMatch = ".*[0-9\uff10-\uff19\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341\u767e\u5343\u4e07\u4ebf\u96f6\u3007\u25cb\u25ef].*";
    private static final String dateMatch = ".*[0-9\uff10-\uff19\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341\u767e\u5343\u4e07\u4ebf\u96f6\u3007\u25cb\u25ef].*[\u5e74\u6708\u65e5\u53f7]";
    private static final String ordinalMatch = "\u7b2c.*";
    private static final String properNameMatch = ".*[\u00b7\u0387\u2022\u2024\u2027\u2219\u22c5\u30fb].*";
    private Set<String> seenFirst = new HashSet<String>();
    private static final long serialVersionUID = 221L;

    public ChineseUnknownWordModel(Options.LexOptions op, Lexicon lex) {
        super(op, lex);
        this.useFirst = true;
        if (ChineseLexicon.useGoodTuringUnknownWordModel) {
            this.useGoodTuring();
        }
        this.useUnicodeType = op.useUnicodeType;
    }

    final void useGoodTuring() {
        this.useGT = true;
        this.useFirst = false;
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    @Override
    public float score(IntTaggedWord itw) {
        String word = itw.wordString();
        String tag = itw.tagString();
        Tag tagL = new Tag(tag);
        if (word.matches(dateMatch)) {
            if (!tag.equals("NT")) return Float.NEGATIVE_INFINITY;
            return 0.0f;
        }
        if (word.matches(numberMatch)) {
            if (tag.equals("CD") && !word.matches(ordinalMatch)) {
                return 0.0f;
            }
            if (!tag.equals("OD")) return Float.NEGATIVE_INFINITY;
            if (!word.matches(ordinalMatch)) return Float.NEGATIVE_INFINITY;
            return 0.0f;
        }
        if (word.matches(properNameMatch)) {
            if (!tag.equals("NR")) return Float.NEGATIVE_INFINITY;
            return 0.0f;
        }
        if (this.useFirst) {
            ClassicCounter wordProbs;
            char ch;
            int type;
            String first = word.substring(0, 1);
            if (this.useUnicodeType && (type = Character.getType(ch = word.charAt(0))) != 5) {
                first = Integer.toString(type);
            }
            if (!this.seenFirst.contains(first)) {
                if (this.useGT) {
                    return this.scoreGT(tagL);
                }
                first = "UNK";
            }
            if ((wordProbs = (ClassicCounter)this.tagHash.get(tagL)) == null) {
                return Float.NEGATIVE_INFINITY;
            }
            if (!wordProbs.containsKey(first)) return (float)wordProbs.getCount("UNK");
            return (float)wordProbs.getCount(first);
        }
        if (!this.useGT) return Float.NEGATIVE_INFINITY;
        return this.scoreGT(tagL);
    }

    @Override
    public void train(Collection<Tree> trees) {
        if (this.useFirst) {
            System.err.println("ChineseUWM: treating unknown word as the average of their equivalents by first-character identity. useUnicodeType: " + this.useUnicodeType);
        }
        if (this.useGT) {
            System.err.println("ChineseUWM: using Good-Turing smoothing for unknown words.");
        }
        this.trainUnknownGT(trees);
        ClassicCounter<IntTaggedWord> seenCounter = new ClassicCounter<IntTaggedWord>();
        HashMap c = new HashMap();
        ClassicCounter<Label> tc = new ClassicCounter<Label>();
        int tNum = 0;
        int tSize = trees.size();
        int indexToStartUnkCounting = (int)((double)tSize * Train.fractionBeforeUnseenCounting);
        IntTaggedWord iTotal = new IntTaggedWord(-1, -1);
        for (Tree t : trees) {
            ++tNum;
            for (Tree node : t) {
                char ch;
                int type;
                if (!node.isPreTerminal()) continue;
                Label tagL = node.label();
                String word = node.firstChild().label().value();
                String first = word.substring(0, 1);
                if (this.useUnicodeType && (type = Character.getType(ch = word.charAt(0))) != 5) {
                    first = Integer.toString(type);
                }
                String tag = tagL.value();
                if (!c.containsKey(tagL)) {
                    c.put(tagL, new ClassicCounter());
                }
                ((ClassicCounter)c.get(tagL)).incrementCount(first);
                tc.incrementCount(tagL);
                this.seenFirst.add(first);
                IntTaggedWord iW = new IntTaggedWord(word, ".*.");
                seenCounter.incrementCount(iW);
                if (tNum <= indexToStartUnkCounting || !(seenCounter.getCount(iW) < 2.0)) continue;
                IntTaggedWord iT = new IntTaggedWord(".*.", tag);
                this.unSeenCounter.incrementCount(iT);
                this.unSeenCounter.incrementCount(iTotal);
            }
        }
        for (Label tagLab : c.keySet()) {
            ClassicCounter wc = (ClassicCounter)c.get(tagLab);
            if (!this.tagHash.containsKey(tagLab)) {
                this.tagHash.put(tagLab, new ClassicCounter());
            }
            tc.incrementCount(tagLab);
            wc.setCount("UNK", 1.0);
            for (String first : wc.keySet()) {
                double prob = Math.log(wc.getCount(first) / tc.getCount(tagLab));
                ((ClassicCounter)this.tagHash.get(tagLab)).setCount(first, prob);
            }
        }
    }

    public static void main(String[] args) {
        System.out.println("Testing unknown matching");
        String s = "\u5218\u00b7\u9769\u547d";
        if (s.matches(properNameMatch)) {
            System.out.println("hooray names!");
        } else {
            System.out.println("Uh-oh names!");
        }
        String s1 = "\uff13\uff10\uff10\uff10";
        if (s1.matches(numberMatch)) {
            System.out.println("hooray numbers!");
        } else {
            System.out.println("Uh-oh numbers!");
        }
        String s11 = "\u767e\u5206\u4e4b\u56db\u5341\u4e09\u70b9\u4e8c";
        if (s11.matches(numberMatch)) {
            System.out.println("hooray numbers!");
        } else {
            System.out.println("Uh-oh numbers!");
        }
        String s12 = "\u767e\u5206\u4e4b\u4e09\u5341\u516b\u70b9\u516d";
        if (s12.matches(numberMatch)) {
            System.out.println("hooray numbers!");
        } else {
            System.out.println("Uh-oh numbers!");
        }
        String s2 = "\u4e09\u6708";
        if (s2.matches(dateMatch)) {
            System.out.println("hooray dates!");
        } else {
            System.out.println("Uh-oh dates!");
        }
        System.out.println("Testing tagged word");
        ClassicCounter<TaggedWord> c = new ClassicCounter<TaggedWord>();
        TaggedWord tw1 = new TaggedWord("w", "t");
        c.incrementCount(tw1);
        TaggedWord tw2 = new TaggedWord("w", "t2");
        System.out.println(c.containsKey(tw2));
        System.out.println(tw1.equals(tw2));
        WordTag wt1 = ChineseUnknownWordModel.toWordTag(tw1);
        WordTag wt2 = ChineseUnknownWordModel.toWordTag(tw2);
        WordTag wt3 = new WordTag("w", "t2");
        System.out.println(wt1.equals(wt2));
        System.out.println(wt2.equals(wt3));
    }

    private static WordTag toWordTag(TaggedWord tw) {
        return new WordTag(tw.word(), tw.tag());
    }

    @Override
    public String getSignature(String word, int loc) {
        throw new UnsupportedOperationException();
    }
}

