package websphinx;

import java.net.URL;
import java.util.Enumeration;
import rcm.util.Str;

/* loaded from: input_file:websphinx/Tagexp.class */
public class Tagexp extends Regexp {
    String stringRep;
    static HTMLParser parser = new HTMLParser();

    public Tagexp(String str) {
        super(toRegexp(str));
        this.stringRep = str;
    }

    @Override // websphinx.Regexp
    public boolean equals(Object obj) {
        if (obj instanceof Tagexp) {
            return ((Tagexp) obj).stringRep.equals(this.stringRep);
        }
        return false;
    }

    @Override // websphinx.Regexp, websphinx.Pattern
    public String toString() {
        return this.stringRep;
    }

    @Override // websphinx.Regexp, websphinx.Pattern
    public PatternMatcher match(Region region) {
        return new TagexpMatcher(this, region);
    }

    public static String toRegexp(String str) {
        Page page;
        StringBuffer stringBuffer = new StringBuffer();
        synchronized (parser) {
            page = new Page((URL) null, str, parser);
        }
        Region[] tokens = page.getTokens();
        for (int i = 0; i < tokens.length; i++) {
            if (tokens[i] instanceof Tag) {
                canonicalizeTagPattern(stringBuffer, (Tag) tokens[i]);
            } else {
                translateText(stringBuffer, tokens[i].toString());
            }
        }
        return stringBuffer.toString();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static void canonicalizeTag(StringBuffer stringBuffer, Tag tag, int i) {
        String tagName = tag.getTagName();
        if (tagName == Tag.COMMENT) {
            return;
        }
        stringBuffer.append('<');
        if (tag.isEndTag()) {
            stringBuffer.append('/');
        }
        stringBuffer.append(tagName);
        stringBuffer.append('#');
        stringBuffer.append(String.valueOf(i));
        stringBuffer.append('#');
        if (tag.countHTMLAttributes() > 0) {
            String[] hTMLAttributes = tag.getHTMLAttributes();
            sortAttrs(hTMLAttributes);
            int i2 = 0;
            while (i2 < hTMLAttributes.length) {
                int i3 = i2;
                int i4 = i2 + 1;
                String str = hTMLAttributes[i3];
                i2 = i4 + 1;
                String str2 = hTMLAttributes[i4];
                stringBuffer.append(' ');
                stringBuffer.append(str);
                if (str2 != Region.TRUE) {
                    stringBuffer.append('=');
                    stringBuffer.append(encodeAttrValue(str2));
                }
                stringBuffer.append(' ');
            }
        }
        stringBuffer.append('>');
    }

    static void canonicalizeTagPattern(StringBuffer stringBuffer, Tag tag) {
        String tagName = tag.getTagName();
        if (tagName == Tag.COMMENT) {
            return;
        }
        stringBuffer.append('<');
        if (tag.isEndTag()) {
            stringBuffer.append('/');
        }
        translatePattern(stringBuffer, tagName, "#");
        stringBuffer.append('#');
        stringBuffer.append("\\d+");
        stringBuffer.append('#');
        stringBuffer.append("[^>]*");
        if (tag.countHTMLAttributes() > 0) {
            String[] hTMLAttributes = tag.getHTMLAttributes();
            sortAttrs(hTMLAttributes);
            int i = 0;
            while (i < hTMLAttributes.length) {
                int i2 = i;
                int i3 = i + 1;
                String str = hTMLAttributes[i2];
                i = i3 + 1;
                String str2 = hTMLAttributes[i3];
                stringBuffer.append(' ');
                translatePattern(stringBuffer, str, "= >");
                if (str2 != Region.TRUE) {
                    stringBuffer.append('=');
                    translatePattern(stringBuffer, encodeAttrValue(str2), " >");
                }
                stringBuffer.append(' ');
                stringBuffer.append("[^>]*");
            }
        }
        stringBuffer.append('>');
    }

    static void sortAttrs(String[] strArr) {
        for (int i = 2; i < strArr.length; i += 2) {
            String str = strArr[i];
            String str2 = strArr[i + 1];
            int i2 = i;
            while (i2 > 0 && strArr[i2 - 2].compareTo(str) > 0) {
                strArr[i2] = strArr[i2 - 2];
                strArr[i2 + 1] = strArr[i2 - 1];
                i2 -= 2;
            }
            strArr[i2] = str;
            strArr[i2 + 1] = str2;
        }
    }

    static String encodeAttrValue(String str) {
        if (str.indexOf(37) != -1) {
            str = Str.replace(str, "%", "%25");
        }
        if (str.indexOf(32) != -1) {
            str = Str.replace(str, " ", "%20");
        }
        if (str.indexOf(60) != -1) {
            str = Str.replace(str, "<", "%3C");
        }
        if (str.indexOf(62) != -1) {
            str = Str.replace(str, ">", "%3E");
        }
        return str;
    }

    static String translatePattern(StringBuffer stringBuffer, String str, String str2) {
        String regexp = Wildcard.toRegexp(str);
        boolean z = false;
        int length = regexp.length();
        for (int i = 0; i < length; i++) {
            char charAt = regexp.charAt(i);
            if (z) {
                stringBuffer.append(charAt);
                z = false;
            } else if (charAt == '\\') {
                stringBuffer.append(charAt);
                z = true;
            } else if (charAt == '.') {
                stringBuffer.append("[^");
                stringBuffer.append(str2);
                stringBuffer.append(']');
            } else {
                stringBuffer.append(charAt);
            }
        }
        return stringBuffer.toString();
    }

    static void translateText(StringBuffer stringBuffer, String str) {
        stringBuffer.append(Str.replace(str, ".", "(?:<[^>]*>)"));
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length < 2) {
            System.err.println("usage: Tagexp <pattern> <source URL>*");
            return;
        }
        Tagexp tagexp = new Tagexp(strArr[0].replace('_', ' '));
        for (int i = 1; i < strArr.length; i++) {
            Page page = new Page(new Link(strArr[i]));
            System.out.println(new StringBuffer().append("-----------").append(strArr[i]).toString());
            PatternMatcher match = tagexp.match(page);
            Region nextMatch = match.nextMatch();
            while (true) {
                Region region = nextMatch;
                if (region == null) {
                    break;
                }
                System.out.println(new StringBuffer().append("[").append(region.getStart()).append(",").append(region.getEnd()).append("]").append(region).toString());
                Enumeration enumerateObjectLabels = region.enumerateObjectLabels();
                while (enumerateObjectLabels.hasMoreElements()) {
                    String str = (String) enumerateObjectLabels.nextElement();
                    Object objectLabel = region.getObjectLabel(str);
                    if (objectLabel instanceof Region) {
                        Region region2 = (Region) objectLabel;
                        System.out.println(new StringBuffer().append("    ").append(str).append("=[").append(region2.getStart()).append(",").append(region2.getEnd()).append("]").append(region2).toString());
                    }
                }
                nextMatch = match.nextMatch();
            }
        }
    }
}
