/*
 * Decompiled with CFR 0.152.
 */
import java.util.Random;
import java.util.Vector;

public class QLearning {
    private double maxLearningRate;
    private double pjog;
    private double epsilon;
    private final int pathCost = 1;
    private Maze myMaze;
    private int[][] policy;
    private double[][][] qsa;
    private boolean decayingLR;
    public boolean isBestAct = true;
    public boolean receivedPenalty = false;
    State start;
    State currState;
    private ValueFunction currValues;
    private int numEpisodes;
    double learningRate;
    ValueFunction optVals;
    ValueFunction evaluatedVals;
    int[][] optPolicy;
    boolean isOptValCalc;
    double PRECISION = 0.01;

    public QLearning(Maze _maze, double _pjog, double _lr, double _epsilon, boolean _decayingLR) {
        this.myMaze = _maze;
        this.pjog = _pjog;
        this.maxLearningRate = _lr;
        this.epsilon = _epsilon;
        this.decayingLR = _decayingLR;
        this.start = new State(0, 0);
        this.currState = new State(0, 0);
        this.currValues = new ValueFunction(this.myMaze.width, this.myMaze.height);
        this.policy = new int[this.myMaze.width][this.myMaze.height];
        this.qsa = new double[this.myMaze.width][this.myMaze.height][4];
        this.initialize();
        this.evaluatedVals = new ValueFunction(this.myMaze.width, this.myMaze.height);
        this.optVals = new ValueFunction(this.myMaze.width, this.myMaze.height);
    }

    public void initialize() {
        int j;
        this.learningRate = this.maxLearningRate;
        this.currState.copy(this.start);
        this.numEpisodes = 0;
        this.currValues.initialize();
        Random rand = new Random();
        int i = 0;
        while (i < this.qsa.length) {
            j = 0;
            while (j < this.qsa[i].length) {
                int k = 0;
                while (k < this.qsa[i][j].length) {
                    this.qsa[i][j][k] = 0.0;
                    ++k;
                }
                ++j;
            }
            ++i;
        }
        i = 0;
        while (i < this.policy.length) {
            j = 0;
            while (j < this.policy[i].length) {
                this.policy[i][j] = -1;
                ++j;
            }
            ++i;
        }
    }

    public void setProperty(int name, String value) {
        if (name == Properties.PJOG) {
            this.pjog = Double.parseDouble(value);
        } else if (name == Properties.Epsilon) {
            this.epsilon = Double.parseDouble(value);
        } else if (name == Properties.LearningRate) {
            this.maxLearningRate = Double.parseDouble(value);
        } else if (name == Properties.DecayingLR) {
            this.decayingLR = new Boolean(value);
        }
    }

    public boolean step() {
        double transitionCost;
        if (this.reachedGoal(this.currState)) {
            this.currState.copy(this.start);
            ++this.numEpisodes;
            this.learningRate = this.decayingLR ? 1000.0 * this.maxLearningRate / (1000.0 + (double)this.numEpisodes) : this.maxLearningRate;
            if (this.numEpisodes % 1000 == 0) {
                System.out.println(String.valueOf(this.numEpisodes) + "," + this.learningRate);
            }
            return true;
        }
        int currAction = this.chooseAction(this.currState, Math.random());
        double currStateQ = this.qsa[this.currState.x][this.currState.y][currAction];
        State nextState = Action.performAction(this.currState, currAction, this.pjog);
        if (!this.myMaze.isValidTransition(this.currState, nextState)) {
            transitionCost = this.myMaze.getReward(this.currState, nextState);
            this.receivedPenalty = true;
            nextState.copy(this.currState);
        } else {
            transitionCost = 1.0;
            this.receivedPenalty = false;
        }
        double nextStateQmin = this.getMinQsa(nextState);
        this.qsa[this.currState.x][this.currState.y][currAction] = currStateQ = currStateQ * (1.0 - this.learningRate) + this.learningRate * (transitionCost + nextStateQmin);
        this.policy[this.currState.x][this.currState.y] = this.getBestAction(this.qsa[this.currState.x][this.currState.y]);
        this.currState.copy(nextState);
        this.reachedGoal(this.currState);
        return false;
    }

    public void execute(int numIterations) {
        this.currState.copy(this.start);
        while (!this.reachedGoal(this.currState)) {
            this.step();
        }
    }

    public ValueFunction getValueFunction() {
        int i = 0;
        while (i < this.myMaze.width) {
            int j = 0;
            while (j < this.myMaze.height) {
                this.currValues.stateValue[i][j] = this.getMinQsa(new State(i, j));
                ++j;
            }
            ++i;
        }
        return this.currValues;
    }

    public int[][] getPolicy() {
        return this.policy;
    }

    public double[][][] getQsa() {
        return this.qsa;
    }

    public State getCurrState() {
        return this.currState;
    }

    private int chooseAction(State currState, double randNum) {
        int bestAction = this.getBestAction(this.qsa[currState.x][currState.y]);
        double d = this.epsilon / 4.0;
        int choosenAction = bestAction;
        int i = 0;
        while (i < 4) {
            if (randNum < (double)(i + 1) * d) {
                choosenAction = i;
                break;
            }
            ++i;
        }
        Utility.show("BestAction:" + bestAction);
        Utility.show("Rand" + randNum);
        Utility.show("ChoosenAction:" + choosenAction);
        this.isBestAct = choosenAction == bestAction;
        return choosenAction;
    }

    private int getBestAction(double[] actions) {
        double min = actions[0];
        int bestAction = 0;
        int i = 1;
        while (i < actions.length) {
            if (min > actions[i]) {
                min = actions[i];
                bestAction = i;
            }
            ++i;
        }
        return bestAction;
    }

    private double getMinQsa(State st) {
        double min = this.qsa[st.x][st.y][0];
        int bestAction = 0;
        int i = 0;
        while (i < this.qsa[st.x][st.y].length) {
            if (min > this.qsa[st.x][st.y][i]) {
                min = this.qsa[st.x][st.y][i];
                bestAction = i;
            }
            ++i;
        }
        return min;
    }

    private boolean reachedGoal(State s) {
        return this.myMaze.goals.contains(s);
    }

    public int[][] getOptPolicy() {
        return this.optPolicy;
    }

    public ValueFunction getEvaluatedVals() {
        return this.evaluatedVals;
    }

    public ValueFunction getOptVals() {
        return this.optVals;
    }

    private void calcTrueValues() {
        ValueIteration valitr = new ValueIteration(this.myMaze, this.pjog, 0.01);
        while (!valitr.step()) {
        }
        this.optVals = valitr.getValueFunction();
        this.optPolicy = valitr.getPolicy();
        this.isOptValCalc = true;
    }

    private double computeScore() {
        double netScore = 0.0;
        int i = 0;
        while (i < this.evaluatedVals.stateValue.length) {
            int j = 0;
            while (j < this.evaluatedVals.stateValue[i].length) {
                netScore += Math.abs(this.optVals.stateValue[i][j] - this.evaluatedVals.stateValue[i][j]);
                ++j;
            }
            ++i;
        }
        return netScore;
    }

    public double evalPolicy() {
        int j;
        int i;
        this.evaluatedVals.initialize();
        ValueFunction evalVals = new ValueFunction(this.myMaze.width, this.myMaze.height);
        ValueFunction prevEvalVals = new ValueFunction(this.myMaze.width, this.myMaze.height);
        prevEvalVals.initialize();
        double maxDelta = 0.0;
        double delta = 0.0;
        double v = 0.0;
        double minV = 10000.0;
        boolean valueConverged = false;
        int valueIters = 0;
        int MAX_VALUE_ALLOWED = 1000;
        while (!valueConverged) {
            evalVals.initialize();
            maxDelta = 0.0;
            double maxV = 0.0;
            i = 0;
            while (i < this.myMaze.width) {
                j = 0;
                while (j < this.myMaze.height) {
                    v = 0.0;
                    State currState = new State(i, j);
                    if (this.myMaze.goals.contains(currState)) {
                        evalVals.stateValue[i][j] = 0.0;
                    } else {
                        Vector allNext = new Vector(this.myMaze.getSuccessors(currState));
                        if (-1 == this.policy[i][j]) {
                            evalVals.stateValue[i][j] = 0.0;
                        } else {
                            State desiredNextState = Action.performAction(currState, this.policy[i][j]);
                            int m = 0;
                            while (m < allNext.size()) {
                                State s = (State)allNext.get(m);
                                double prob = !desiredNextState.equals(s) ? this.pjog / 3.0 : 1.0 - this.pjog;
                                double safe = this.myMaze.isValidTransition(currState, s) ? prevEvalVals.stateValue[s.x][s.y] : this.myMaze.getReward(currState, s) + prevEvalVals.stateValue[i][j];
                                v += prob * safe;
                                ++m;
                            }
                            evalVals.stateValue[i][j] = v += 1.0;
                            maxV = maxV < v ? v : maxV;
                            delta = Math.abs(evalVals.stateValue[i][j] - prevEvalVals.stateValue[i][j]);
                            if (maxDelta < delta) {
                                maxDelta = delta;
                            }
                        }
                    }
                    ++j;
                }
                ++i;
            }
            ++valueIters;
            if (maxDelta < this.PRECISION) {
                valueConverged = true;
            }
            if (maxV > (double)MAX_VALUE_ALLOWED) {
                valueConverged = true;
            }
            i = 0;
            while (i < this.myMaze.width) {
                j = 0;
                while (j < this.myMaze.height) {
                    prevEvalVals.stateValue[i][j] = evalVals.stateValue[i][j];
                    ++j;
                }
                ++i;
            }
        }
        i = 0;
        while (i < this.myMaze.width) {
            j = 0;
            while (j < this.myMaze.height) {
                this.evaluatedVals.stateValue[i][j] = evalVals.stateValue[i][j];
                ++j;
            }
            ++i;
        }
        return this.computeScore();
    }

    static class Properties {
        public static int PJOG = 1;
        public static int LearningRate = 2;
        public static int Epsilon = 3;
        public static int DecayingLR = 4;

        Properties() {
        }
    }
}

