#!/bin/python3
# Author CF
# Date: Oct. 2024
# Goal: reads in a csv file of nxn similarities
#       and recover the points through SVD


import numpy as np
import numpy.linalg as npl
import random as r
import math
import matplotlib.pyplot as plt
import pandas as pd
# import scipy.linalg as spl
import click


def my_scatter(a):
    n,m = np.shape(a)
    assert m>1, f"needs at least 2 columns ({m=} given)"
    plt.scatter(a[:, 0], a[:, 1])
    plt.xlim(-1.1,1.1)
    plt.ylim(-1.1,1.1)
    plt.title('HW4 Q3c: recovered points')
    plt.grid()
    plt.show()

def get_rank(s):
    # returns the 'effective' rank:
    # in the reverse-sorted list of s,
    # keep the first $i$ elements that are > epsilon
    # and return $i$
    epsilon = 0.001
    i = 0
    while s[i] > epsilon:
        i += 1
    return i



def my_svd(a):
    U, S, Vt = npl.svd(a, full_matrices=False)
    r = get_rank(S)
    # print("compute the rank of S")
    # print(f'{r=}')
    U_trimmed = U[:, 0:r]
    S_trimmed = S[0:r]
    Vt_trimmed = Vt[0:r, :]
    return U_trimmed, S_trimmed, Vt_trimmed


@click.command()
@click.argument('fname', type=click.Path(exists=True, readable=True), nargs=1)
def main(fname):

    # fname="similarities25.csv"
    verbose = False
    df = pd.read_csv(fname, header=None)

    similarities = df.to_numpy()

    if verbose:
        print('----- similarity matrix s ------')
        print(f'{np.shape(similarities)=}')
        print(similarities)

    U, S, Vt = my_svd(similarities)
    k,l = np.shape(U)
    # print(f'{k=} {l=}')
    print(f'HW4 Q3a: effective rank of similarity matrix is: {l} ')
    # rank = 2
    S2 = np.sqrt(S)
    print(f'         (not required) Its singular values are:  {S2}')

    print(f'HW4 Q3b: min. dimensionality is {l}, exactly the rank')

    U2 = np.matmul(U, np.diag(S2))
    k,l = np.shape(U2)
    if verbose:
        print(f'U2: {k=} {l=}')
        print(U2)
    print("HW4 Q3c: plotting the reconstructed points in 2d")
    my_scatter( U2)
    new_similarities = np.matmul(U2, np.transpose(U2))
    abs_diff = np.absolute( np.subtract( similarities , new_similarities) )

    if verbose:
        print(abs_diff)
        print(f'{abs_diff.max()=}')
    print("HW4 Q3d: plotting the diff of similarities")
    plt.imshow(abs_diff, cmap='jet')
    plt.title('HW4 Q3d: diff of given vs recovered sim. scores')
    plt.clim(0,0.01)
    plt.colorbar()
    plt.show()






if __name__ == "__main__":
    main()
