/*
 * Decompiled with CFR 0.152.
 */
package example.hw2;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class Indexing
extends Configured
implements Tool {
    public int run(String[] args) throws Exception {
        JobConf conf = new JobConf(this.getConf(), Indexing.class);
        conf.setJobName("Build Inverted Indexing");
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);
        conf.setMapperClass(InvertedIndexerMapper.class);
        conf.setPartitionerClass(InvertedIndexerPartitioner.class);
        conf.setReducerClass(InvertedIndexerReducer.class);
        conf.setNumReduceTasks(5);
        conf.setInputFormat(SequenceFileInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
        FileInputFormat.setInputPaths((JobConf)conf, (Path[])new Path[]{new Path(args[0])});
        FileOutputFormat.setOutputPath((JobConf)conf, (Path)new Path(args[1]));
        JobClient.runJob((JobConf)conf);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        String inDataType = "Wiki-1k.seq";
        if (args.length < 1) {
            args = new String[]{"./input-seq/" + inDataType, "./output"};
            Indexing.deleteDir(new File(args[1]));
        }
        int res = ToolRunner.run((Configuration)new Configuration(), (Tool)new Indexing(), (String[])args);
        System.exit(res);
    }

    public static boolean deleteDir(File dir) {
        if (dir.isDirectory()) {
            String[] children = dir.list();
            for (int i = 0; i < children.length; ++i) {
                boolean success = Indexing.deleteDir(new File(dir, children[i]));
                if (success) continue;
                return false;
            }
        }
        return dir.delete();
    }

    public static HashMap<Integer, List<Integer>> sortByKeyIntListInt(HashMap<Integer, List<Integer>> unsorted) {
        LinkedList<Map.Entry<Integer, List<Integer>>> list = new LinkedList<Map.Entry<Integer, List<Integer>>>(unsorted.entrySet());
        Collections.sort(list, new Comparator(){

            public int compare(Object o1, Object o2) {
                return 1 * ((Comparable)((Map.Entry)o1).getKey()).compareTo(((Map.Entry)o2).getKey());
            }
        });
        LinkedHashMap<Integer, List<Integer>> result = new LinkedHashMap<Integer, List<Integer>>();
        for (Map.Entry entry : list) {
            Integer tempKey = (Integer)entry.getKey();
            List tempValue = (List)entry.getValue();
            Collections.sort(tempValue);
            result.put(tempKey, tempValue);
        }
        return result;
    }

    public static class Stemmer {
        private char[] b = new char[50];
        private int i = 0;
        private int i_end = 0;
        private int j;
        private int k;
        private static final int INC = 50;

        public void add(char ch) {
            if (this.i == this.b.length) {
                char[] new_b = new char[this.i + 50];
                for (int c = 0; c < this.i; ++c) {
                    new_b[c] = this.b[c];
                }
                this.b = new_b;
            }
            this.b[this.i++] = ch;
        }

        public void add(char[] w, int wLen) {
            if (this.i + wLen >= this.b.length) {
                char[] new_b = new char[this.i + wLen + 50];
                for (int c = 0; c < this.i; ++c) {
                    new_b[c] = this.b[c];
                }
                this.b = new_b;
            }
            for (int c = 0; c < wLen; ++c) {
                this.b[this.i++] = w[c];
            }
        }

        public String toString() {
            return new String(this.b, 0, this.i_end);
        }

        public int getResultLength() {
            return this.i_end;
        }

        public char[] getResultBuffer() {
            return this.b;
        }

        private final boolean cons(int i) {
            switch (this.b[i]) {
                case 'a': 
                case 'e': 
                case 'i': 
                case 'o': 
                case 'u': {
                    return false;
                }
                case 'y': {
                    return i == 0 ? true : !this.cons(i - 1);
                }
            }
            return true;
        }

        private final int m() {
            int n = 0;
            int i = 0;
            while (true) {
                if (i > this.j) {
                    return n;
                }
                if (!this.cons(i)) break;
                ++i;
            }
            ++i;
            while (i <= this.j) {
                if (!this.cons(i)) {
                    ++i;
                    continue;
                }
                ++i;
                ++n;
                while (true) {
                    if (i > this.j) {
                        return n;
                    }
                    if (!this.cons(i)) break;
                    ++i;
                }
                ++i;
            }
            return n;
        }

        private final boolean vowelinstem() {
            for (int i = 0; i <= this.j; ++i) {
                if (this.cons(i)) continue;
                return true;
            }
            return false;
        }

        private final boolean doublec(int j) {
            if (j < 1) {
                return false;
            }
            if (this.b[j] != this.b[j - 1]) {
                return false;
            }
            return this.cons(j);
        }

        private final boolean cvc(int i) {
            if (i < 2 || !this.cons(i) || this.cons(i - 1) || !this.cons(i - 2)) {
                return false;
            }
            char ch = this.b[i];
            return ch != 'w' && ch != 'x' && ch != 'y';
        }

        private final boolean ends(String s) {
            int l = s.length();
            int o = this.k - l + 1;
            if (o < 0) {
                return false;
            }
            for (int i = 0; i < l; ++i) {
                if (this.b[o + i] == s.charAt(i)) continue;
                return false;
            }
            this.j = this.k - l;
            return true;
        }

        private final void setto(String s) {
            int l = s.length();
            int o = this.j + 1;
            for (int i = 0; i < l; ++i) {
                this.b[o + i] = s.charAt(i);
            }
            this.k = this.j + l;
        }

        private final void r(String s) {
            if (this.m() > 0) {
                this.setto(s);
            }
        }

        private final void step1() {
            if (this.b[this.k] == 's') {
                if (this.ends("sses")) {
                    this.k -= 2;
                } else if (this.ends("ies")) {
                    this.setto("i");
                } else if (this.b[this.k - 1] != 's') {
                    --this.k;
                }
            }
            if (this.ends("eed")) {
                if (this.m() > 0) {
                    --this.k;
                }
            } else if ((this.ends("ed") || this.ends("ing")) && this.vowelinstem()) {
                this.k = this.j;
                if (this.ends("at")) {
                    this.setto("ate");
                } else if (this.ends("bl")) {
                    this.setto("ble");
                } else if (this.ends("iz")) {
                    this.setto("ize");
                } else if (this.doublec(this.k)) {
                    --this.k;
                    char ch = this.b[this.k];
                    if (ch == 'l' || ch == 's' || ch == 'z') {
                        ++this.k;
                    }
                } else if (this.m() == 1 && this.cvc(this.k)) {
                    this.setto("e");
                }
            }
        }

        private final void step2() {
            if (this.ends("y") && this.vowelinstem()) {
                this.b[this.k] = 105;
            }
        }

        private final void step3() {
            if (this.k == 0) {
                return;
            }
            switch (this.b[this.k - 1]) {
                case 'a': {
                    if (this.ends("ational")) {
                        this.r("ate");
                        break;
                    }
                    if (!this.ends("tional")) break;
                    this.r("tion");
                    break;
                }
                case 'c': {
                    if (this.ends("enci")) {
                        this.r("ence");
                        break;
                    }
                    if (!this.ends("anci")) break;
                    this.r("ance");
                    break;
                }
                case 'e': {
                    if (!this.ends("izer")) break;
                    this.r("ize");
                    break;
                }
                case 'l': {
                    if (this.ends("bli")) {
                        this.r("ble");
                        break;
                    }
                    if (this.ends("alli")) {
                        this.r("al");
                        break;
                    }
                    if (this.ends("entli")) {
                        this.r("ent");
                        break;
                    }
                    if (this.ends("eli")) {
                        this.r("e");
                        break;
                    }
                    if (!this.ends("ousli")) break;
                    this.r("ous");
                    break;
                }
                case 'o': {
                    if (this.ends("ization")) {
                        this.r("ize");
                        break;
                    }
                    if (this.ends("ation")) {
                        this.r("ate");
                        break;
                    }
                    if (!this.ends("ator")) break;
                    this.r("ate");
                    break;
                }
                case 's': {
                    if (this.ends("alism")) {
                        this.r("al");
                        break;
                    }
                    if (this.ends("iveness")) {
                        this.r("ive");
                        break;
                    }
                    if (this.ends("fulness")) {
                        this.r("ful");
                        break;
                    }
                    if (!this.ends("ousness")) break;
                    this.r("ous");
                    break;
                }
                case 't': {
                    if (this.ends("aliti")) {
                        this.r("al");
                        break;
                    }
                    if (this.ends("iviti")) {
                        this.r("ive");
                        break;
                    }
                    if (!this.ends("biliti")) break;
                    this.r("ble");
                    break;
                }
                case 'g': {
                    if (!this.ends("logi")) break;
                    this.r("log");
                }
            }
        }

        private final void step4() {
            switch (this.b[this.k]) {
                case 'e': {
                    if (this.ends("icate")) {
                        this.r("ic");
                        break;
                    }
                    if (this.ends("ative")) {
                        this.r("");
                        break;
                    }
                    if (!this.ends("alize")) break;
                    this.r("al");
                    break;
                }
                case 'i': {
                    if (!this.ends("iciti")) break;
                    this.r("ic");
                    break;
                }
                case 'l': {
                    if (this.ends("ical")) {
                        this.r("ic");
                        break;
                    }
                    if (!this.ends("ful")) break;
                    this.r("");
                    break;
                }
                case 's': {
                    if (!this.ends("ness")) break;
                    this.r("");
                }
            }
        }

        private final void step5() {
            if (this.k == 0) {
                return;
            }
            switch (this.b[this.k - 1]) {
                case 'a': {
                    if (this.ends("al")) break;
                    return;
                }
                case 'c': {
                    if (this.ends("ance") || this.ends("ence")) break;
                    return;
                }
                case 'e': {
                    if (this.ends("er")) break;
                    return;
                }
                case 'i': {
                    if (this.ends("ic")) break;
                    return;
                }
                case 'l': {
                    if (this.ends("able") || this.ends("ible")) break;
                    return;
                }
                case 'n': {
                    if (this.ends("ant") || this.ends("ement") || this.ends("ment") || this.ends("ent")) break;
                    return;
                }
                case 'o': {
                    if (this.ends("ion") && this.j >= 0 && (this.b[this.j] == 's' || this.b[this.j] == 't') || this.ends("ou")) break;
                    return;
                }
                case 's': {
                    if (this.ends("ism")) break;
                    return;
                }
                case 't': {
                    if (this.ends("ate") || this.ends("iti")) break;
                    return;
                }
                case 'u': {
                    if (this.ends("ous")) break;
                    return;
                }
                case 'v': {
                    if (this.ends("ive")) break;
                    return;
                }
                case 'z': {
                    if (this.ends("ize")) break;
                    return;
                }
                default: {
                    return;
                }
            }
            if (this.m() > 1) {
                this.k = this.j;
            }
        }

        private final void step6() {
            int a;
            this.j = this.k;
            if (this.b[this.k] == 'e' && ((a = this.m()) > 1 || a == 1 && !this.cvc(this.k - 1))) {
                --this.k;
            }
            if (this.b[this.k] == 'l' && this.doublec(this.k) && this.m() > 1) {
                --this.k;
            }
        }

        public void stem() {
            this.k = this.i - 1;
            if (this.k > 1) {
                this.step1();
                this.step2();
                this.step3();
                this.step4();
                this.step5();
                this.step6();
            }
            this.i_end = this.k + 1;
            this.i = 0;
        }
    }

    public static class InvertedIndexerReducer
    extends MapReduceBase
    implements Reducer<Text, Text, Text, Text> {
        public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
            HashMap<Integer, List<Integer>> docPosHashMap = new HashMap<Integer, List<Integer>>();
            StringBuilder toReturn = new StringBuilder();
            while (values.hasNext()) {
                String v = values.next().toString();
                String[] tokens = v.split("\\,");
                int docID = Integer.parseInt(tokens[0]);
                int pos = Integer.parseInt(tokens[1]);
                List<Integer> temp = new ArrayList<Integer>();
                if (docPosHashMap.containsKey(docID)) {
                    temp = docPosHashMap.get(docID);
                }
                temp.add(pos);
                docPosHashMap.put(docID, temp);
            }
            docPosHashMap = Indexing.sortByKeyIntListInt(docPosHashMap);
            toReturn.append(docPosHashMap.keySet().size() + ",");
            for (Integer doc : docPosHashMap.keySet()) {
                List<Integer> tempList = docPosHashMap.get(doc);
                toReturn.append(((Object)doc).toString() + "," + tempList.size() + ",");
                for (int pos : tempList) {
                    toReturn.append(pos + ",");
                }
            }
            toReturn.deleteCharAt(toReturn.length() - 1);
            key = new Text(key.toString().split("\\.")[1]);
            output.collect((Object)key, (Object)new Text(toReturn.toString()));
        }
    }

    private static class InvertedIndexerPartitioner
    implements Partitioner<Text, Text> {
        private InvertedIndexerPartitioner() {
        }

        public int getPartition(Text key, Text value, int numPartitions) {
            return Integer.parseInt(key.toString().split("\\.")[0]) % numPartitions;
        }

        public void configure(JobConf arg0) {
        }
    }

    public static class InvertedIndexerMapper
    extends MapReduceBase
    implements Mapper<Text, Text, Text, Text> {
        private boolean caseSensitive = false;
        private static String[] stopWords = new String[]{"a", "an", "and", "are", "as", "at", "for", "i", "if", "in", "is", "it", "of", "on", "so", "that", "the", "to"};
        private static String[] skipParseParts = new String[]{"Views", "Personal tools", "Navigation", "Interaction", "Toolbox", "Languages", "References"};
        private static List<String> stopWordList;
        private static List<String> skipParsePartsList;
        private static final Text keyOutput;
        private static final Text valueOutput;
        private static final Stemmer s;
        private static final int numPartitionDefault = 5;

        public void configure(JobConf job) {
            int i;
            stopWordList = new ArrayList<String>();
            for (i = 0; i < stopWords.length; ++i) {
                stopWordList.add(stopWords[i]);
            }
            skipParsePartsList = new ArrayList<String>();
            for (i = 0; i < skipParseParts.length; ++i) {
                skipParsePartsList.add(skipParseParts[i].toLowerCase());
            }
        }

        public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
            String valueStr = this.caseSensitive ? value.toString() : value.toString().toLowerCase();
            String docID = key.toString().substring(key.toString().lastIndexOf("doc") + 3, key.toString().lastIndexOf(".txt"));
            int pID = Math.abs(docID.hashCode()) % 5;
            valueStr = this.DeleteRedundantWikipedia(valueStr);
            StringTokenizer itr = new StringTokenizer(valueStr);
            int pos = 12;
            while (itr.hasMoreTokens()) {
                String aword = itr.nextToken();
                if (!(aword = this.WordParse(aword)).equals("")) {
                    keyOutput.set(pID + "." + aword);
                    valueOutput.set(docID + "," + pos);
                    output.collect((Object)keyOutput, (Object)valueOutput);
                }
                ++pos;
            }
        }

        private String TextParse(String valueStr) {
            String outputStr = "";
            Stemmer s = new Stemmer();
            Pattern pattern = Pattern.compile("^(.*)$", 8);
            Matcher matcher = pattern.matcher(valueStr);
            String line = "";
            int ignoreFirstSentenceCount = 3;
            int lineCnt = 0;
            boolean parseProcess = true;
            while (matcher.find()) {
                String aline = matcher.group(0);
                if (lineCnt++ < ignoreFirstSentenceCount || aline.equals("")) continue;
                if (skipParsePartsList.contains(aline)) {
                    parseProcess = false;
                }
                if (!parseProcess) continue;
                aline = aline.replaceAll("\\[[^\\[]*\\]", "");
                line = line + "\n" + aline;
            }
            StringTokenizer tokenizer = new StringTokenizer(line);
            while (tokenizer.hasMoreTokens()) {
                String aWord = tokenizer.nextToken();
                if (aWord.startsWith("http") || aWord.startsWith("https") || aWord.startsWith("file")) continue;
                aWord = aWord.replaceAll("[^A-Za-z]", "");
                s.add(aWord.toCharArray(), aWord.length());
                s.stem();
                aWord = s.toString();
                if (stopWordList.contains(aWord) || aWord.equals("")) continue;
                outputStr = outputStr + " " + aWord;
            }
            return outputStr;
        }

        private String DeleteRedundantWikipedia(String valueStr) {
            Pattern pattern = Pattern.compile("^(.*)$", 8);
            Matcher matcher = pattern.matcher(valueStr);
            String line = "";
            int ignoreFirstSentenceCount = 3;
            int lineCnt = 0;
            boolean parseProcess = true;
            while (matcher.find()) {
                String aline = matcher.group(0);
                if (lineCnt++ < ignoreFirstSentenceCount || aline.equals("")) continue;
                if (skipParsePartsList.contains(aline)) {
                    parseProcess = false;
                }
                if (!parseProcess) continue;
                aline = aline.replaceAll("\\[[^\\[]*\\]", "");
                line = line + "\n" + aline;
            }
            return line;
        }

        private String WordParse(String aWord) {
            if (aWord.startsWith("http") || aWord.startsWith("https") || aWord.startsWith("file")) {
                return "";
            }
            if ((aWord = aWord.replaceAll("[^A-Za-z]", "")).equals("")) {
                return "";
            }
            s.add(aWord.toCharArray(), aWord.length());
            s.stem();
            aWord = s.toString();
            if (aWord.equals("")) {
                return "";
            }
            if (stopWordList.contains(aWord)) {
                return "";
            }
            return aWord;
        }

        public void close() throws IOException {
        }

        static {
            keyOutput = new Text();
            valueOutput = new Text();
            s = new Stemmer();
        }

        static enum Counters {
            INPUT_WORDS;

        }
    }
}

