package edu.cmu.minorthird.text;

import cern.colt.matrix.impl.AbstractFormatter;
import edu.cmu.minorthird.text.Span;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import java.util.Vector;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/cmu/minorthird/text/ExtractAbbrev.class */
public class ExtractAbbrev extends AbstractAnnotator {
    private Logger log;
    public static final String PROVIDED_ANNOTATION = "abbrev";
    public static final String SHORT_FORM_TYPE = "abbrevShort";
    public static final String LONG_FORM_TYPE = "abbrevLong";
    public static final String LONG_FORM_PROP = "expansion";
    public static final String SHORT_FORM_PROP = "acronym";
    private HashMap mTestDefinitions;
    private int truePositives;
    private int falsePositives;
    private int falseNegatives;
    private int trueNegatives;
    private static final char DELIMITER = '\t';
    private boolean testMode;
    private List accum;
    private boolean annotationMode;
    static Class class$edu$cmu$minorthird$text$ExtractAbbrev;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/cmu/minorthird/text/ExtractAbbrev$StringSpan.class */
    public static class StringSpan {
        public static final StringSpan EMPTY = new StringSpan("", 0, 0);
        String base;
        int lo;
        int hi;
        String mySubstring;

        public StringSpan(String str, int i, int i2) {
            this.base = str;
            this.lo = i;
            this.hi = i2;
            this.mySubstring = this.base.substring(i, i2);
        }

        public StringSpan(StringSpan stringSpan, int i, int i2) {
            this.base = stringSpan.base;
            this.lo = stringSpan.lo + i;
            this.hi = stringSpan.lo + i2;
            this.mySubstring = this.base.substring(this.lo, this.hi);
        }

        public int offset() {
            return this.lo;
        }

        public int length() {
            return this.hi - this.lo;
        }

        public char charAt(int i) {
            return this.mySubstring.charAt(i);
        }

        public int indexOf(char c) {
            return this.mySubstring.indexOf(c);
        }

        public int indexOf(char c, int i) {
            return this.mySubstring.indexOf(c, i);
        }

        public int indexOf(String str) {
            return this.mySubstring.indexOf(str);
        }

        public int lastIndexOf(String str) {
            return this.mySubstring.lastIndexOf(str);
        }

        public int lastIndexOf(String str, int i) {
            return this.mySubstring.lastIndexOf(str, i);
        }

        public String asString() {
            return this.mySubstring;
        }

        public StringSpan substring(int i, int i2) {
            return new StringSpan(this.base, this.lo + i, this.lo + i2);
        }

        public StringSpan substring(int i) {
            return new StringSpan(this.base, this.lo + i, this.hi);
        }

        public StringSpan trim() {
            StringSpan stringSpan = new StringSpan(this.base, this.lo, this.hi);
            while (stringSpan.lo < stringSpan.hi && Character.isWhitespace(stringSpan.base.charAt(stringSpan.lo))) {
                stringSpan.lo++;
            }
            while (stringSpan.hi > stringSpan.lo && Character.isWhitespace(stringSpan.base.charAt(stringSpan.hi - 1))) {
                stringSpan.hi--;
            }
            stringSpan.mySubstring = stringSpan.base.substring(stringSpan.lo, stringSpan.hi);
            return stringSpan;
        }
    }

    public ExtractAbbrev() {
        Class cls;
        if (class$edu$cmu$minorthird$text$ExtractAbbrev == null) {
            cls = class$("edu.cmu.minorthird.text.ExtractAbbrev");
            class$edu$cmu$minorthird$text$ExtractAbbrev = cls;
        } else {
            cls = class$edu$cmu$minorthird$text$ExtractAbbrev;
        }
        this.log = Logger.getLogger(cls);
        this.mTestDefinitions = new HashMap();
        this.truePositives = 0;
        this.falsePositives = 0;
        this.falseNegatives = 0;
        this.trueNegatives = 0;
        this.testMode = false;
        this.accum = new ArrayList();
        this.annotationMode = false;
    }

    @Override // edu.cmu.minorthird.text.AbstractAnnotator
    protected void doAnnotate(MonotonicTextLabels monotonicTextLabels) {
        this.annotationMode = true;
        int i = 0;
        Span.Looper documentSpanIterator = monotonicTextLabels.getTextBase().documentSpanIterator();
        while (documentSpanIterator.hasNext()) {
            this.accum.clear();
            Span nextSpan = documentSpanIterator.nextSpan();
            extractAbbrPairsFromString(nextSpan.getDocumentContents());
            Iterator it = this.accum.iterator();
            while (it.hasNext()) {
                StringSpan stringSpan = (StringSpan) it.next();
                StringSpan stringSpan2 = (StringSpan) it.next();
                Span charIndexSubSpan = nextSpan.charIndexSubSpan(stringSpan.lo, stringSpan.hi);
                this.log.debug(new StringBuffer().append("shortSpan[").append(stringSpan.lo).append("..").append(stringSpan.hi).append("] of doc: near '").append(nextSpan.getDocumentContents().substring(stringSpan.lo, stringSpan.hi)).append("'").toString());
                this.log.debug(new StringBuffer().append("shortForm='").append(stringSpan.asString()).append("' shortSpan='").append(charIndexSubSpan.asString()).append("'").toString());
                Span charIndexSubSpan2 = nextSpan.charIndexSubSpan(stringSpan2.lo, stringSpan2.hi);
                monotonicTextLabels.addToType(charIndexSubSpan, SHORT_FORM_TYPE);
                monotonicTextLabels.addToType(charIndexSubSpan2, LONG_FORM_TYPE);
                i++;
                monotonicTextLabels.setProperty(charIndexSubSpan, SHORT_FORM_PROP, Integer.toString(i));
                monotonicTextLabels.setProperty(charIndexSubSpan2, LONG_FORM_PROP, Integer.toString(i));
            }
        }
        this.annotationMode = false;
        monotonicTextLabels.setAnnotatedBy(PROVIDED_ANNOTATION);
    }

    @Override // edu.cmu.minorthird.text.AbstractAnnotator, edu.cmu.minorthird.text.Annotator
    public String explainAnnotation(TextLabels textLabels, Span span) {
        return "No explanation implemented.";
    }

    private boolean isValidShortForm(String str) {
        return hasLetter(str) && (Character.isLetterOrDigit(str.charAt(0)) || str.charAt(0) == '(');
    }

    private boolean hasLetter(String str) {
        for (int i = 0; i < str.length(); i++) {
            if (Character.isLetter(str.charAt(i))) {
                return true;
            }
        }
        return false;
    }

    private boolean hasCapital(String str) {
        for (int i = 0; i < str.length(); i++) {
            if (Character.isUpperCase(str.charAt(i))) {
                return true;
            }
        }
        return false;
    }

    private void loadTrueDefinitions(String str) {
        String str2 = "";
        HashMap hashMap = this.mTestDefinitions;
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
            while (true) {
                String readLine = bufferedReader.readLine();
                str2 = readLine;
                if (readLine == null) {
                    return;
                }
                int indexOf = str2.indexOf(9);
                String trim = str2.substring(0, indexOf).trim();
                String trim2 = str2.substring(indexOf, str2.length()).trim();
                Vector vector = (Vector) hashMap.get(trim);
                if (vector == null) {
                    vector = new Vector();
                }
                vector.add(trim2);
                hashMap.put(trim, vector);
            }
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println(str2);
        }
    }

    private boolean isTrueDefinition(String str, String str2) {
        Vector vector = (Vector) this.mTestDefinitions.get(str);
        if (vector == null) {
            return false;
        }
        Iterator it = vector.iterator();
        while (it.hasNext()) {
            if (it.next().toString().equalsIgnoreCase(str2)) {
                return true;
            }
        }
        return false;
    }

    private void extractAbbrPairsFromFile(String str) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
            String str2 = "";
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    extractAbbrPairsFromString(str2);
                    return;
                }
                str2 = new StringBuffer().append(str2).append(readLine).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).toString();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void extractAbbrPairsFromString(String str) {
        String[] split = str.split("\\.\\s{2}");
        int i = 0;
        for (int i2 = 0; i2 < split.length; i2++) {
            extractAbbrPairsFromSentence(new StringSpan(str, i, i + split[i2].length()));
            i += split[i2].length() + 3;
        }
    }

    private void extractAbbrPairsFromSentence(StringSpan stringSpan) {
        int indexOf;
        int indexOf2;
        StringSpan stringSpan2 = StringSpan.EMPTY;
        StringSpan stringSpan3 = StringSpan.EMPTY;
        int i = -1;
        this.log.debug(new StringBuffer().append("finding pairs in '").append(stringSpan.asString()).append("'").toString());
        int indexOf3 = stringSpan.indexOf(" (");
        do {
            if (indexOf3 > -1) {
                indexOf3++;
            }
            int max = Math.max(stringSpan.lastIndexOf(". "), stringSpan.lastIndexOf(", "));
            if (indexOf3 != -1 || max != -1) {
                if (indexOf3 == -1) {
                    stringSpan = stringSpan.substring(max + 2);
                } else {
                    int indexOf4 = stringSpan.indexOf(')', indexOf3);
                    i = indexOf4;
                    if (indexOf4 > -1) {
                        int max2 = Math.max(stringSpan.lastIndexOf(". ", indexOf3), stringSpan.lastIndexOf(", ", indexOf3));
                        if (max2 == -1) {
                            max2 = -2;
                        }
                        stringSpan2 = new StringSpan(stringSpan, max2 + 2, indexOf3);
                        stringSpan3 = new StringSpan(stringSpan, indexOf3 + 1, i);
                    }
                }
            }
            if (stringSpan3.length() > 0 || stringSpan2.length() > 0) {
                if (stringSpan3.length() > 1 && stringSpan2.length() > 1) {
                    if (stringSpan3.indexOf('(') > -1 && (indexOf = stringSpan.indexOf(')', i + 1)) > -1) {
                        stringSpan3 = new StringSpan(stringSpan, indexOf3 + 1, indexOf);
                        i = indexOf;
                    }
                    int indexOf5 = stringSpan3.indexOf(", ");
                    if (indexOf5 > -1) {
                        stringSpan3 = stringSpan3.substring(0, indexOf5);
                    }
                    int indexOf6 = stringSpan3.indexOf("; ");
                    if (indexOf6 > -1) {
                        stringSpan3 = stringSpan3.substring(0, indexOf6);
                    }
                    if (new StringTokenizer(stringSpan3.asString()).countTokens() > 2 || stringSpan3.length() > stringSpan2.length()) {
                        stringSpan2 = stringSpan3;
                        stringSpan3 = new StringSpan(stringSpan, stringSpan.lastIndexOf(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR, indexOf3 - 2) + 1, indexOf3 - 1);
                        if (!hasCapital(stringSpan3.asString())) {
                            stringSpan3 = StringSpan.EMPTY;
                        }
                    }
                    if (isValidShortForm(stringSpan3.asString())) {
                        extractAbbrPair(stringSpan3.trim(), stringSpan2.trim());
                    }
                }
                stringSpan = stringSpan.substring(i + 1);
            } else if (indexOf3 > -1) {
                if (stringSpan.length() - indexOf3 > 200) {
                    stringSpan.substring(indexOf3 + 1);
                    return;
                }
                return;
            }
            stringSpan3 = StringSpan.EMPTY;
            stringSpan2 = StringSpan.EMPTY;
            indexOf2 = stringSpan.indexOf(" (");
            indexOf3 = indexOf2;
        } while (indexOf2 > -1);
    }

    private StringSpan findBestLongForm(StringSpan stringSpan, StringSpan stringSpan2) {
        int length = stringSpan2.length() - 1;
        for (int length2 = stringSpan.length() - 1; length2 >= 0; length2--) {
            char lowerCase = Character.toLowerCase(stringSpan.charAt(length2));
            if (Character.isLetterOrDigit(lowerCase)) {
                while (true) {
                    if ((length < 0 || Character.toLowerCase(stringSpan2.charAt(length)) == lowerCase) && !(length2 == 0 && length > 0 && Character.isLetterOrDigit(stringSpan2.charAt(length - 1)))) {
                        break;
                    }
                    length--;
                }
                if (length < 0) {
                    return null;
                }
                length--;
            }
        }
        return stringSpan2.substring(stringSpan2.lastIndexOf(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR, length) + 1);
    }

    private void extractAbbrPair(StringSpan stringSpan, StringSpan stringSpan2) {
        StringSpan findBestLongForm;
        this.log.debug(new StringBuffer().append("finding long form for '").append(stringSpan.asString()).append("' and '").append(stringSpan2.asString()).append("'").toString());
        if (stringSpan.length() == 1 || (findBestLongForm = findBestLongForm(stringSpan, stringSpan2)) == null) {
            return;
        }
        int countTokens = new StringTokenizer(findBestLongForm.asString(), " \t\n\r\f-").countTokens();
        int length = stringSpan.length();
        for (int i = length - 1; i >= 0; i--) {
            if (!Character.isLetterOrDigit(stringSpan.charAt(i))) {
                length--;
            }
        }
        if (findBestLongForm.length() < stringSpan.length() || findBestLongForm.indexOf(new StringBuffer().append(stringSpan.asString()).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).toString()) > -1 || findBestLongForm.asString().endsWith(stringSpan.asString()) || countTokens > length * 2 || countTokens > length + 5 || length > 10) {
            return;
        }
        if (this.annotationMode) {
            this.accum.add(stringSpan);
            this.accum.add(findBestLongForm);
        }
        if (!this.testMode) {
            if (this.annotationMode) {
                return;
            }
            System.out.println(new StringBuffer().append(stringSpan.asString()).append('\t').append(findBestLongForm.asString()).toString());
        } else if (isTrueDefinition(stringSpan.asString(), findBestLongForm.asString())) {
            System.out.println(new StringBuffer().append(stringSpan.asString()).append('\t').append(findBestLongForm.asString()).append('\t').append("TP").toString());
            this.truePositives++;
        } else {
            this.falsePositives++;
            System.out.println(new StringBuffer().append(stringSpan.asString()).append('\t').append(findBestLongForm.asString()).append('\t').append("FP").toString());
        }
    }

    private static void usage() {
        System.err.println("Usage: ExtractAbbrev [-options] <filename>");
        System.err.println("       <filename> contains text from which abbreviations are extracted");
        System.err.println("       -testlist <file> = list of true abbreviation definition pairs");
        System.err.println("       -usage or -help = this message");
        System.exit(1);
    }

    public static void main(String[] strArr) {
        ExtractAbbrev extractAbbrev = new ExtractAbbrev();
        String str = null;
        String str2 = null;
        int i = 0;
        while (i < strArr.length) {
            if (strArr[i].equals("-testlist")) {
                if (i == strArr.length - 1) {
                    usage();
                }
                i++;
                str2 = strArr[i];
                extractAbbrev.testMode = true;
            } else if (strArr[i].equals("-usage")) {
                usage();
            } else if (strArr[i].equals("-help")) {
                usage();
            } else {
                str = strArr[i];
                if (i != strArr.length - 1) {
                    usage();
                }
            }
            i++;
        }
        if (str == null) {
            usage();
        }
        if (extractAbbrev.testMode) {
            extractAbbrev.loadTrueDefinitions(str2);
        }
        extractAbbrev.extractAbbrPairsFromFile(str);
        if (extractAbbrev.testMode) {
            System.out.println(new StringBuffer().append("TP: ").append(extractAbbrev.truePositives).append(" FP: ").append(extractAbbrev.falsePositives).append(" FN: ").append(extractAbbrev.falseNegatives).append(" TN: ").append(extractAbbrev.trueNegatives).toString());
        }
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError(e.getMessage());
        }
    }
}
