import numpy as np

def load_data():
    """ Load training dataset

        Returns tuple of length 4: (X_train, y_train, X_val, y_val)
        where X_train is an N_train-x-M ndarray and y_train is an N_train-x-1 ndarray and
        where X_val is an N_val-x-M ndarray and y_val is an N_val-x-1 ndarray.
    """
    X = np.load('data/regression_train_input.npy')
    y = np.load('data/regression_train_output.npy')

    N = len(y)
    N_val = 10
    N_train = N - N_val    

    X_train = X[:N_train]
    y_train = y[:N_train]
    X_val = X[N_train:]
    y_val = y[N_train:]

    return (X_train, y_train, X_val, y_val)


def polynomial_kernel(x, z, d):
    """ Return the result of applying the polynomial kernel of degree 
        up to d on the two input vectors.
        K(x, z) = (x^Tz+1)^d

        x: Mx1 numpy ndarray
        z: Mx1 numpy ndarray
        
        Returns: float value after appying kernel to x and z
    """
    ### YOUR CODE HERE
    return None


def rbf_kernel(x, z, gamma):
    """ Return the result of applying the radial basis function kernel 
        on the two input vectors, given the hyperparameter gamma.

        x: Mx1 numpy ndarray
        z: Mx1 numpy ndarray
        gamma: float value of hyperparameter
        
        Returns: float value after appying kernel to x and z
    """
    ### YOUR CODE HERE
    return None



def predict_naive_kernel_regression(X, X_train, y_train, kernel_function):
    """ Predict the output values y for the given input design matrix X.

        X: Input matrix in NxM numpy ndarray, where we want to predict the output 
            for the vector in each row of X.
        X_train: Design matrix of training input in N_train-x-M numpy ndarray
        y_train: Training output in N_train-x-1 numpy array
        kernel_function: Function that takes two arguments that are each
            Mx1 numpy ndarrays and returns a float value.

        Returns: Nx1 numpy ndarray, where the i-th entry is the predicted value 
            corresponding the i-th row vector in X
    """
    N_train, M = X_train.shape
    N = X.shape[0]

    y = np.zeros((N, 1))
    for n in range(N):
        x_n = np.reshape(X[n], (M,1))
        for i in range(N_train):
            # Get i-th row of X_train and reshape to Mx1 ndarray
            x_i = np.reshape(X_train[i], (M,1))

            # Scale the j-th training output by the result of the kernel
            # of x and the j-th training input.
            # Note: this is the naive way of scaling, which we'll want to 
            # correct in the real kernel regression.
            y[n] += y_train[i] * kernel_function(x_n, x_i)

    return y
    
def predict_kernel_regression(X, X_train, y_train, kernel_function, lamb=0.01):
    """ Predict the output values y for the given input design matrix X.

        X: Input matrix in NxM numpy ndarray, where we want to predict the output
            for the vector in each row of X.
        X_train: Design matrix of training input in N_train-x-M numpy ndarray
        y_train: Training output in N_train-x-1 numpy array
        kernel_function: Function that takes two arguments that are each
            Mx1 numpy ndarrays and returns a float value.
        lamb: float value of regularization hyperparameter, lambda (Note, this is a 
            different hyperparameter than the hyperparameter used in RBF kernels)

        Returns: Nx1 numpy ndarray, where the i-th entry is the predicted value 
            corresponding the i-th row vector in X
    """
    ### YOUR CODE HERE
    return None


def mse(y, y_hat):
    err = y - y_hat
    sqerr = err**2
    return np.mean(sqerr)

