#!/usr/bin/env python3


import argparse
import json

from cfr_util import *




###########################################################################
# Starting from here, you should fill in the implementation of the
# different functions


def expected_utility_pl1(game, sf_strategy_pl1, sf_strategy_pl2):
    """Returns the expected utility for Player 1 in the game, when the two
    players play according to the given strategies"""

    assert_is_valid_sf_strategy(
        game["decision_problem_pl1"], sf_strategy_pl1)
    assert_is_valid_sf_strategy(
        game["decision_problem_pl2"], sf_strategy_pl2)

    # FINISH
    raise NotImplementedError


def uniform_sf_strategy(tfsdp):
    """Returns the uniform sequence-form strategy for the given tree-form
    sequential decision process"""

    # FINISH
    raise NotImplementedError


class RegretMatching(object):
    def __init__(self, action_set):
        self.action_set = set(action_set)

        # FINISH
        raise NotImplementedError

    def next_strategy(self):
        # FINISH
        # You should return a dictionary mapping each action in
        # `self.action_set` to the probability of picking that action

        # You may assume that calls will alternate 
        # `next_strategy`, `observe_utility`, ..., starting with `next_strategy`
        raise NotImplementedError

    def observe_utility(self, utility):
        assert isinstance(utility, dict) and utility.keys() == self.action_set
        # FINISH
        raise NotImplementedError


class Cfr(object):
    def __init__(self, tfsdp, rm_class=RegretMatching):
        self.tfsdp = tfsdp
        self.local_regret_minimizers = {}

        # For each decision point, we instantiate a local regret minimizer
        for node in tfsdp:
            if node["type"] == "decision":
                self.local_regret_minimizers[node["id"]] = rm_class(
                    node["actions"])

    def next_strategy(self):
        # FINISH
        raise NotImplementedError

    def observe_utility(self, utility):
        # FINISH
        raise NotImplementedError


def run_cfr(game, iterations):
    """Returns a pair of sequence-form strategies that are the result of 
    running CFR on the game for the specified number of iterations."""
    # FINISH
    raise NotImplementedError


def run_dcfr(game, iterations):
    """Returns a pair of sequence-form strategies that are the result of 
    running DCFR(alpha=1.5, beta=0, gamma=2) on the game for the specified n
    umber of iterations."""
    # This method is only necessary for the extra credit part of the homework.
    # If you are not attempting the extra credit, you may leave it unimplemented.
    # FINISH
    raise NotImplementedError

def run_pcfrp(game, iterations):
    """Returns a pair of sequence-form strategies that are the result of 
    running PCFR+ on the game for the specified number of iterations."""
    # This method is only necessary for the extra credit part of the homework.
    # If you are not attempting the extra credit, you may leave it unimplemented.
    # FINISH
    raise NotImplementedError
