package edu.cmu.minorthird.text.learn;

import cern.colt.matrix.impl.AbstractFormatter;
import edu.cmu.minorthird.text.BasicTextBase;
import edu.cmu.minorthird.text.FancyLoader;
import edu.cmu.minorthird.text.MonotonicTextLabels;
import edu.cmu.minorthird.text.MutableTextLabels;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.SpanDifference;
import edu.cmu.minorthird.text.TextLabelsLoader;
import edu.cmu.minorthird.text.mixup.MixupProgram;
import edu.cmu.minorthird.util.BasicCommandLineProcessor;
import edu.cmu.minorthird.util.CommandLineProcessor;
import edu.cmu.minorthird.util.IOUtil;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/cmu/minorthird/text/learn/ExtractorNameMatcher.class */
public class ExtractorNameMatcher {
    private static Logger log;
    private File fromFile;
    private File saveAs;
    private MonotonicTextLabels textLabels;
    private MonotonicTextLabels annLabels;
    private String predType;
    private String spanType;
    private static double threshold;
    private ExtractorAnnotator ann;
    private SpanDifference finalSD;
    private ArrayList nameDict;
    private static final String DIV = "@#!";
    private static final int WINDOW_SIZE = 5;
    private static final int SIG_SIZE = 2;
    private static final File fixMixup;
    private ArrayList lowRiskNameList;
    private ArrayList highRiskNameList;
    private ArrayList deletedNameList;
    private static final String[] USAGE;
    static Class class$edu$cmu$minorthird$text$learn$ExtractorNameMatcher;

    /* loaded from: input_file:edu/cmu/minorthird/text/learn/ExtractorNameMatcher$MyCLP.class */
    public class MyCLP extends BasicCommandLineProcessor {
        private final ExtractorNameMatcher this$0;

        public MyCLP(ExtractorNameMatcher extractorNameMatcher) {
            this.this$0 = extractorNameMatcher;
        }

        public void loadFrom(String str) {
            this.this$0.fromFile = new File(str);
        }

        public void saveAs(String str) {
            this.this$0.saveAs = new File(str);
        }

        public void labels(String str) {
            this.this$0.textLabels = (MutableTextLabels) FancyLoader.loadTextLabels(str);
        }

        public void spanType(String str) {
            this.this$0.spanType = str;
        }

        @Override // edu.cmu.minorthird.util.BasicCommandLineProcessor, edu.cmu.minorthird.util.CommandLineProcessor
        public void usage() {
            for (int i = 0; i < ExtractorNameMatcher.USAGE.length; i++) {
                System.out.println(ExtractorNameMatcher.USAGE[i]);
            }
        }
    }

    public double getTokenPrecision() {
        return this.finalSD.tokenPrecision();
    }

    public double getTokenRecall() {
        return this.finalSD.tokenRecall();
    }

    public ExtractorNameMatcher(MonotonicTextLabels monotonicTextLabels) {
        this.fromFile = null;
        this.saveAs = null;
        this.textLabels = null;
        this.annLabels = null;
        this.predType = "_prediction";
        this.spanType = "";
        this.ann = null;
        this.finalSD = null;
        this.nameDict = new ArrayList();
        this.lowRiskNameList = new ArrayList();
        this.highRiskNameList = new ArrayList();
        this.deletedNameList = new ArrayList();
        this.annLabels = monotonicTextLabels;
    }

    public ExtractorNameMatcher() {
        this.fromFile = null;
        this.saveAs = null;
        this.textLabels = null;
        this.annLabels = null;
        this.predType = "_prediction";
        this.spanType = "";
        this.ann = null;
        this.finalSD = null;
        this.nameDict = new ArrayList();
        this.lowRiskNameList = new ArrayList();
        this.highRiskNameList = new ArrayList();
        this.deletedNameList = new ArrayList();
    }

    public CommandLineProcessor getCLP() {
        return new MyCLP(this);
    }

    public void doMain() {
        if (this.annLabels == null) {
            if (this.fromFile == null) {
                throw new IllegalStateException("need to specify -loadFrom");
            }
            try {
                this.ann = (ExtractorAnnotator) IOUtil.loadSerialized(this.fromFile);
                this.annLabels = (MonotonicTextLabels) this.ann.annotatedCopy(this.textLabels);
            } catch (IOException e) {
                throw new IllegalArgumentException(new StringBuffer().append("can't load annotator from ").append(this.fromFile).append(": ").append(e).toString());
            }
        }
        HashSet hashSet = new HashSet();
        Span.Looper instanceIterator = this.annLabels.instanceIterator(this.predType);
        while (instanceIterator.hasNext()) {
            hashSet.add(((Span) instanceIterator.next()).asString());
        }
        this.nameDict = new ArrayList(hashSet);
        Collections.sort(this.nameDict, new Comparator(this) { // from class: edu.cmu.minorthird.text.learn.ExtractorNameMatcher.1
            private final ExtractorNameMatcher this$0;

            {
                this.this$0 = this;
            }

            @Override // java.util.Comparator
            public int compare(Object obj, Object obj2) {
                return new Integer(((String) obj2).length()).compareTo(new Integer(((String) obj).length()));
            }
        });
        transformDict(new FreqAnal(this.annLabels, this.predType));
        int i = 0;
        System.out.println("Low Risk Names:");
        Iterator it = this.lowRiskNameList.iterator();
        while (it.hasNext()) {
            i++;
            System.out.println(new StringBuffer().append(i).append(". ").append(it.next()).toString());
        }
        int i2 = 0;
        System.out.println("High Risk Names:");
        Iterator it2 = this.highRiskNameList.iterator();
        while (it2.hasNext()) {
            i2++;
            System.out.println(new StringBuffer().append(i2).append(". ").append(it2.next()).toString());
        }
        int i3 = 0;
        System.out.println("Deleted Names:");
        Iterator it3 = this.deletedNameList.iterator();
        while (it3.hasNext()) {
            i3++;
            System.out.println(new StringBuffer().append(i3).append(". ").append(it3.next()).toString());
        }
        applyDict();
        MixupProgram mixupProgram = null;
        try {
            mixupProgram = new MixupProgram(new File("c:\\minorthird\\apps\\names\\fixEnv.mixup"));
        } catch (Exception e2) {
            System.out.println(e2);
        }
        mixupProgram.eval(this.annLabels, this.annLabels.getTextBase());
        if (this.saveAs != null) {
            try {
                new TextLabelsLoader().saveTypesAsOps(this.annLabels, this.saveAs);
            } catch (IOException e3) {
                try {
                    new TextLabelsLoader().saveTypesAsOps(this.annLabels, new File("name-matching-labels.env"));
                } catch (Exception e4) {
                    System.out.println(e4);
                }
            }
        }
        System.out.println("============================================================");
        System.out.println("Pre names-matching:");
        System.out.println(new SpanDifference(this.annLabels.instanceIterator(this.predType), this.annLabels.instanceIterator(this.spanType), this.annLabels.closureIterator(this.spanType)).toSummary());
        System.out.println("Post names-matching:");
        this.finalSD = new SpanDifference(this.annLabels.instanceIterator(new StringBuffer().append(this.predType).append("_updated_fixed").toString()), this.annLabels.instanceIterator(this.spanType), this.annLabels.closureIterator(this.spanType));
        System.out.println(this.finalSD.toSummary());
    }

    private void applyDict() {
        int i = 0;
        Span.Looper documentSpanIterator = this.annLabels.getTextBase().documentSpanIterator();
        while (documentSpanIterator.hasNext()) {
            Span nextSpan = documentSpanIterator.nextSpan();
            i++;
            System.out.println(new StringBuffer().append((i / this.annLabels.getTextBase().size()) * 100.0f).append("% Working on ").append(nextSpan.getDocumentId()).append("...").toString());
            int i2 = 0;
            while (i2 < nextSpan.size()) {
                Span subSpan = nextSpan.subSpan(i2, Math.min(nextSpan.size() - i2, 5));
                Span dictLookup = dictLookup(this.lowRiskNameList, subSpan);
                if (dictLookup != null) {
                    System.out.println(new StringBuffer().append("! Found: ").append(dictLookup.asString().replaceAll("[\r\n\\s]+", AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)).append(" matches ").append(subSpan.asString().replaceAll("[\r\n\\s]+", AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)).toString());
                    this.annLabels.addToType(dictLookup, new StringBuffer().append(this.predType).append("_updated").toString());
                    i2 += dictLookup.size() - 1;
                }
                i2++;
            }
            int size = nextSpan.size() - 2;
            while (size < nextSpan.size()) {
                Span subSpan2 = nextSpan.subSpan(size, Math.min(nextSpan.size() - size, 5));
                Span dictLookup2 = dictLookup(this.highRiskNameList, subSpan2);
                if (dictLookup2 != null) {
                    System.out.println(new StringBuffer().append("! Found: ").append(dictLookup2.asString().replaceAll("[\r\n\\s]+", AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)).append(" matches ").append(subSpan2.asString().replaceAll("[\r\n\\s]+", AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)).toString());
                    this.annLabels.addToType(dictLookup2, new StringBuffer().append(this.predType).append("_updated").toString());
                    size += dictLookup2.size() - 1;
                }
                size++;
            }
        }
    }

    private Span dictLookup(ArrayList arrayList, Span span) {
        BasicTextBase basicTextBase = new BasicTextBase();
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            String str = (String) it.next();
            if (span.asString().replaceAll("[\r\n\\s]+", AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).toLowerCase().matches(new StringBuffer().append("(?i)(?s)^\\Q").append(str).append("\\E(\\W|$).*").toString())) {
                return span.subSpan(0, basicTextBase.splitIntoTokens(str).length);
            }
        }
        return null;
    }

    private void transformDict(FreqAnal freqAnal) {
        Iterator it = this.nameDict.iterator();
        while (it.hasNext()) {
            Iterator it2 = transformName((String) it.next()).iterator();
            while (it2.hasNext()) {
                String str = (String) it2.next();
                boolean z = str.indexOf(DIV) == -1;
                boolean matches = str.matches("(\\w@#!)+");
                String replaceAll = str.replaceAll(DIV, "");
                Double hScore = freqAnal.getHScore(replaceAll);
                if (hScore != null && hScore.doubleValue() < threshold) {
                    this.deletedNameList.add(replaceAll);
                } else if (z) {
                    this.lowRiskNameList.add(replaceAll);
                } else if (matches) {
                    this.highRiskNameList.add(replaceAll);
                }
            }
        }
        this.lowRiskNameList = uniqueSortedList(this.lowRiskNameList);
        this.highRiskNameList = uniqueSortedList(this.highRiskNameList);
        this.deletedNameList = uniqueSortedList(this.deletedNameList);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v18, types: [int[], int[][]] */
    /* JADX WARN: Type inference failed for: r0v22, types: [int[], int[][]] */
    /* JADX WARN: Type inference failed for: r0v26, types: [int[], int[][]] */
    /* JADX WARN: Type inference failed for: r0v44, types: [int[], int[][]] */
    private ArrayList transformName(String str) {
        ArrayList arrayList = new ArrayList();
        String[] split = str.toLowerCase().trim().replaceAll("[^a-zA-Z\\- ]+", "").split("[\\- ]+");
        Object[] objArr = new Object[0];
        if (split.length == 1) {
            objArr = transform(split, new int[]{new int[]{0}});
        } else if (split.length == 2) {
            objArr = transform(split, new int[]{new int[]{0, 1}, new int[]{0}});
        } else if (split.length == 3) {
            objArr = transform(split, new int[]{new int[]{0, 1, 2}, new int[]{0, 2}, new int[]{2}, new int[]{0}});
        } else if (split.length == 4) {
            objArr = transform(split, new int[]{new int[]{0, 1, 2, 3}, new int[]{0, 1, 3}, new int[]{0, 3}, new int[]{3}, new int[]{0}});
        }
        for (Object obj : objArr) {
            String trim = ((String) obj).trim();
            if (trim.replaceAll("\\W", "").length() >= 2 && !trim.matches(".*-$")) {
                arrayList.add(trim);
            }
        }
        return arrayList;
    }

    /* JADX WARN: Multi-variable type inference failed */
    private Object[] transform(String[] strArr, int[][] iArr) {
        ArrayList arrayList = new ArrayList();
        Object[] objArr = new Object[strArr.length];
        int i = 0;
        while (i < strArr.length) {
            objArr[i] = transformToken(strArr[i], i == 0, i == strArr.length - 1);
            i++;
        }
        for (int[] iArr2 : iArr) {
            if (iArr2.length == 1) {
                for (int i2 = 0; i2 < objArr[iArr2[0]].length; i2++) {
                    arrayList.add(objArr[iArr2[0]][i2]);
                }
            } else if (iArr2.length == 2) {
                for (int i3 = 0; i3 < objArr[iArr2[0]].length; i3++) {
                    for (int i4 = 0; i4 < objArr[iArr2[1]].length; i4++) {
                        arrayList.add(new StringBuffer().append((String) objArr[iArr2[0]][i3]).append(objArr[iArr2[1]][i4]).toString());
                    }
                }
            } else if (iArr2.length == 3) {
                for (int i5 = 0; i5 < objArr[iArr2[0]].length; i5++) {
                    for (int i6 = 0; i6 < objArr[iArr2[1]].length; i6++) {
                        for (int i7 = 0; i7 < objArr[iArr2[2]].length; i7++) {
                            arrayList.add(new StringBuffer().append((String) objArr[iArr2[0]][i5]).append(objArr[iArr2[1]][i6]).append(objArr[iArr2[2]][i7]).toString());
                        }
                    }
                }
            } else if (iArr2.length == 4) {
                for (int i8 = 0; i8 < objArr[iArr2[0]].length; i8++) {
                    for (int i9 = 0; i9 < objArr[iArr2[1]].length; i9++) {
                        for (int i10 = 0; i10 < objArr[iArr2[2]].length; i10++) {
                            for (int i11 = 0; i11 < objArr[iArr2[3]].length; i11++) {
                                arrayList.add(new StringBuffer().append((String) objArr[iArr2[0]][i8]).append(objArr[iArr2[1]][i9]).append(objArr[iArr2[2]][i10]).append(objArr[iArr2[3]][i11]).toString());
                            }
                        }
                    }
                }
            }
        }
        return arrayList.toArray();
    }

    private ArrayList uniqueSortedList(ArrayList arrayList) {
        HashMap hashMap = new HashMap();
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            hashMap.put((String) it.next(), null);
        }
        ArrayList arrayList2 = new ArrayList(hashMap.keySet());
        Collections.sort(arrayList2, new Comparator(this) { // from class: edu.cmu.minorthird.text.learn.ExtractorNameMatcher.2
            private final ExtractorNameMatcher this$0;

            {
                this.this$0 = this;
            }

            @Override // java.util.Comparator
            public int compare(Object obj, Object obj2) {
                return new Integer(((String) obj2).length()).compareTo(new Integer(((String) obj).length()));
            }
        });
        return arrayList2;
    }

    private Object[] transformToken(String str, boolean z, boolean z2) {
        ArrayList arrayList = new ArrayList();
        if (str.length() == 0) {
            return arrayList.toArray();
        }
        if (z2) {
            arrayList.add(str);
        }
        if (!z2) {
            arrayList.add(new StringBuffer().append(str).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).toString());
        }
        if (!z2) {
            arrayList.add(new StringBuffer().append(str).append("-").toString());
        }
        if (!z2) {
            arrayList.add(new StringBuffer().append(str.substring(0, 1)).append(". ").toString());
        }
        if (z2) {
            arrayList.add(new StringBuffer().append(str.substring(0, 1)).append(".").toString());
        }
        arrayList.add(new StringBuffer().append(str.substring(0, 1)).append(DIV).toString());
        return arrayList.toArray();
    }

    public static void main(String[] strArr) {
        try {
            ExtractorNameMatcher extractorNameMatcher = new ExtractorNameMatcher();
            extractorNameMatcher.getCLP().processArguments(strArr);
            extractorNameMatcher.doMain();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }

    static {
        Class cls;
        if (class$edu$cmu$minorthird$text$learn$ExtractorNameMatcher == null) {
            cls = class$("edu.cmu.minorthird.text.learn.ExtractorNameMatcher");
            class$edu$cmu$minorthird$text$learn$ExtractorNameMatcher = cls;
        } else {
            cls = class$edu$cmu$minorthird$text$learn$ExtractorNameMatcher;
        }
        log = Logger.getLogger(cls);
        threshold = 16.0d;
        fixMixup = new File("fixEnv.mixup");
        USAGE = new String[]{"ExtractorNameMatcher: increase recall of a previously-learned extractor, applying a name matching scheme", "", "Parameters:", " -loadFrom FILE     where to load a previously-learner extractor from", " -labels KEY        the key for the labels, in which names are to be extracted", " [-spanType String] the span type of the true names. The default is set to true_name", " [-saveAs FILE]     a file to save the new post-name matching labels", ""};
    }
}
