"""
@author: Nihar B. Shah

The Count-Randomize-Least squares (CRL) estimator of the paper "Feeling the Bern: Adaptive Estimators for Bernoulli Probabilities of Pairwise Comparisons" by N. B. Shah, S. Balakrishnan, M. J. Wainwright (http://arxiv.org/abs/1603.06881)

INPUTS:
Y is an n x n matrix with entries in {0,1}, where n = number of items. The entries of Y above and below the diagonal are related as Y[i,j] = 1 - Y[j,i]
randomize_threshold is a non-negative real number such that the set chosen in the randomize step is the largest set such that every item in the set has number of pairwise wins at most randomize_threshold*sqrt(n)*log(n) of each other

OUTPUT:
An n x n matrix with entries in the interval [0,1]

DEPENDENCIES:
This code uses the Iso package in R for the least squares step. In order to use the package, please do the following:
- You must have R installed: https://cran.cnr.berkeley.edu/
- Please download the Iso package for R: https://cran.r-project.org/web/packages/Iso/index.html
- If the Iso package is a compressed archive, uncompress it
- Now install the package in R. One way to install it is to open R, go to the Packages menu, select add package, and then select the folder where you downloaded the Iso package.
- Install the rpy2 package which allows for interfacing python with R. On OSX, go to the terminal and type "pip install rpy2" (without the quotes). See http://rpy.sourceforge.net/rpy2/doc-dev/html/overview.html for more details.
If you are using an external IDE for python such as Spyder, you may also need to change the PYTHONPATH to point to where the rpy2 package is installed.
"""

import numpy
from pandas import *
from rpy2.robjects.packages import importr
import rpy2.robjects as ro
import pandas.rpy.common as com
from rpy2.robjects import r
from rpy2.robjects import numpy2ri


def CRL(Y, randomize_threshold = .5): 
    
    n = len(Y)
    
    #STEP 1: COUNT
    counters = numpy.sum(Y,1)
    perm = numpy.argsort( - counters) #Lower value in perm means ranked higher

    #STEP 2: RANDOMIZE
    maxclusterstart = 0
    maxclusterend = 0 #Find larges cluster with counter differences at most randomize_threshold*numpy.sqrt(n)*numpy.log(n)
    for i in numpy.arange(n):
        for j in numpy.arange(i):
            if counters[perm[j]] - counters[perm[i]] < randomize_threshold*numpy.sqrt(n)*numpy.log(n) and i - j > maxclusterend - maxclusterstart:
                maxclusterend = i
                maxclusterstart = j
                
    maxclusterperm = numpy.random.permutation(numpy.arange(maxclusterstart,maxclusterend+1)) #Randomize within chosen cluster

    perm2 = perm.copy()
        
    for i in numpy.arange(maxclusterstart,maxclusterend+1):
        perm2[ i ] = perm[ maxclusterperm[i - maxclusterstart] ]
    
    perm = perm2.copy()

    #STEP 3: LEAST SQUARES
    numpy2ri.activate()
    ro.r.library(package="Iso")#, lib_loc="~/Downloads/Iso")
    
    #Iso package requires items to be in permuted order
    Yperm = numpy.zeros((n,n))
    for i in numpy.arange(n):
        for j in numpy.arange(n):
            Yperm[perm[i],perm[j]] = Y[i, j ]
        Yperm[perm[i],perm[i]] = 0.5
    #The Iso package of R requires increasing from left to right and increasing from top to bottom
    #Our definition is decreasing from top to bottom

    #So will reverse rows now and reverse back later
    Ypermrev = numpy.zeros((n,n))
    for i in numpy.arange(n):
        for j in numpy.arange(n):
            Ypermrev[i,j] = Yperm[n-1-i, j ]
    
    #Change the thresholds below for trading off accuracy and computation time
    Mhatpermrev = numpy.array(ro.r.biviso(Ypermrev, eps = 1e-6, eps2 = 1e-6, fatal='FALSE', ncycle = 20000))
    
    Mhatperm = numpy.zeros((n,n))   
    for i in numpy.arange(n):
        for j in numpy.arange(n):
            Mhatperm[i,j] = Mhatpermrev[n-1-i, j ]
            
    Mhat = numpy.zeros((n,n))
    for i in numpy.arange(n):
        for j in numpy.arange(n):
            Mhat[i, j ]  =  Mhatperm[perm[i],perm[j]]
            
    return Mhat
