//Henry Leung (hgl)
//15418 Final Project

//Parallel Graph Isomorphism Solver in OpenMP

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/time.h>
#include <omp.h>
#include <string.h>

#define MAX_N 1000 //so adjacency matrix starts at "city zero"
#define INFINITY -1
#define SPACING 16
#define min( a, b ) ( ((a) < (b)) ? (a) : (b) )

typedef struct changeNode{
    struct changeNode* next;
    int i;
    int j;
} changeNode;

// Global variables

int procs = 1; //Intended # of processors
int n = 0;  //size of graph (# of vertices)
int verbose = 0;

/*Load two adjacency matrices A and B into shared address space.
 Initialize permutation matrix to all 1s.
 Initialize distance matrices as the adjacency matrices.*/

int P[MAX_N][MAX_N];  //permutation matrix P
int newP[MAX_N][MAX_N];
int adjA[MAX_N][MAX_N], adjB[MAX_N][MAX_N];  //Adjacency matrices A and B
int degA[MAX_N], degB[MAX_N];  //degrees arrays degA and degB
int sortedDegA[MAX_N], sortedDegB[MAX_N];  //sorted degree arrays sortedDegA and sortedDegB
int distA[MAX_N][MAX_N], distB[MAX_N][MAX_N];  //all-pair shortest distance matrices  and distB
int newDistA[MAX_N][MAX_N], newDistB[MAX_N][MAX_N];  //updated all-pair shortest shortest distance matrices
changeNode** allChanges;
int (*oldPtrP)[MAX_N][MAX_N] = &P;
int (*newPtrP)[MAX_N][MAX_N] = &newP;

// Timer: returns time in seconds

double gettime()
{
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_sec + tv.tv_usec / 1000000.0;
}

// Usage function

void Usage(char *program) {
    printf("Usage: %s [options]\n", program);
    printf("-p <num>\tNumber of processors to use\n");
    printf("-i <file>\tInput file name\n");
    printf("-o <file>\tOutput file name\n");
    printf("-h \t\tDisplay this help\n");
}

//Timer variables
double initializationStart;
double computationEnd;
double computationStart;

double degComputeStart;
double degSortAndCompStart;
double degFilterStart;
double floydStart;
double distFilterStart;
double checkPStart;
double exhaustiveSearchStart;
double exhaustiveSearchEnd;


void serialWSP(int citiesLeft, int lastCity, int * path, int distance, int* citiesRemaining);
void parallelWSPMain(int* path, int* citiesRemaining);
void parallelCreateNewThreads(int citiesLeft, int lastCity, int* path, int distance, int* citiesRemaining);
void parallelEachThread(int* path,int distance);
void serialWSPMain(int citiesleft, int lastcity, int * path, int distance, int*citiesRemaining);
void parallelMergeSort(int* array, int length);
int floyd(int direct, int indirect1, int indirect2);
void distFilter(int i,int j);
void exhaustiveSearch(int lastRow);
changeNode* pop();
void push(int a, int b);
int checkP();

int main(int argc, char **argv)
{
    initializationStart = gettime();
    int opt;
    int i, j,k;
    int tid;
    int blockSize;
    
    //parse arguments
    while ((opt = getopt(argc, argv, "p:i:o:v:")) != -1)
    {
        switch (opt)
        {
            case 'p':
            {
                procs = atoi(optarg);
                break;
            }
            case 'i':
            {
                if (dup2(open(optarg, O_RDONLY), STDIN_FILENO) < 0)
                {
                    perror("");
                    exit(EXIT_FAILURE);
                }
                break;
            }
            case 'o':
            {
                if (dup2(open(optarg, O_CREAT | O_WRONLY, 0644), STDOUT_FILENO) < 0)
                {
                    perror("");
                    exit(EXIT_FAILURE);
                }
                break;
            }
            case 'v':
            {
                verbose = atoi(optarg);
                break;
            }
            default:
                Usage(argv[0]);
                exit(1);
        }
    }
    
    omp_set_num_threads(procs);
    
    // read adjacency matix (stored in file as the full matrix)
    scanf("%d", &n);
    
    //Copy contents of input file into adjacency matrix A
    if(verbose) printf("\nAdjacency matrix A\n");
    for (i = 0; i < n; i++){
        for (j = 0; j < n; j++){
            scanf("%d", &adjA[i][j]);
            if(verbose)
                printf("%d ",adjA[i][j]);
        }
        if(verbose) printf("\n");
    }
    
    //Copy contents of input file into adjacency matrix B
    if(verbose) printf("\nAdjacency matrix B\n");
    for (i = 0; i < n; i++){
        for (j = 0; j < n; j++){
            scanf("%d", &adjB[i][j]);
            if(verbose)
                printf("%d ",adjB[i][j]);
        }
        if(verbose) printf("\n");
    }
    
    degComputeStart = gettime();
    
    //Parallel compute degree matrices
#pragma	omp	parallel default(shared) private(i,tid,j)
    {
        tid = omp_get_thread_num();
        blockSize = n/procs + 1;
        for(i = 0; i < blockSize; i++){
            if(i*procs+tid >= n)
                break;
            for(j = 0; j<n;j++){
                degA[i*procs+tid] += (adjA[i*procs+tid][j] > 0);
                degB[i*procs+tid] += (adjB[i*procs+tid][j] > 0);
            }
        }
    }
    if(verbose){
        printf("\nParallel compute degree matrices:\n");
        printf("\ndegA: ");
        for(i = 0; i<n; i++)
            printf("%d ",degA[i]);        
        printf("\ndegB: ");
        for(i = 0; i<n; i++)
            printf("%d ",degB[i]);
    }
    
    degSortAndCompStart = gettime();
    
    //Sort degree matrices
    memcpy(sortedDegA,degA,sizeof(int)*n);
    memcpy(sortedDegB,degB,sizeof(int)*n);
    
#pragma omp parallel
    {
#pragma omp single
        {
            parallelMergeSort(sortedDegA, n);
            parallelMergeSort(sortedDegB, n);
        }
    }
    if(verbose){
        printf("\n\nParallel merge sort degree matrices:\n");
        printf("\nsorted degA: ");
        for(i = 0; i<n; i++)
            printf("%d ",sortedDegA[i]);        
        printf("\nsorted degB: ");
        for(i = 0; i<n; i++)
            printf("%d ",sortedDegB[i]);
    }
    
    //Compare sorted degrees
    for(i = 0; i < n; i++){
        if(sortedDegA[i]!=sortedDegB[i]){
            printf("NOT ISOMORPHIC\n");
            exit(1);
        }
    }
    
    degFilterStart = gettime();
    //Parallel preprocess P with degree filtering
#pragma	omp	parallel default(shared) private(i,tid,j)
    {
        tid = omp_get_thread_num();
        blockSize = n/procs + 1;
        for(i = 0; i < blockSize; i++){
            if(i*procs+tid >= n)
                break;
            for(j = 0; j<n;j++){
                (*oldPtrP)[i*procs+tid][j] = (degA[i*procs+tid] == degB[j]);
            }
        }
    }
    if(verbose){
        printf("\n\nParallel preprocess P with degree filtering\n");
        printf("\nP:\n");
        for(i = 0; i<n; i++){
            for(j = 0; j<n; j++)
                printf("%d ",(*oldPtrP)[i][j]);
            printf("\n");
        }
    }
    
    floydStart = gettime();
    //Parallel Floyd-Warshall to compute all-pair shortest distances
    for(i = 0; i<n; i++)
        for(j = 0; j<n; j++)
            distA[i][j] = adjA[i][j];
    for(i = 0; i<n; i++)
        for(j = 0; j<n; j++)
            distB[i][j] = adjB[i][j];
    
    int (*oldPtrA)[MAX_N][MAX_N] = &distA;
    int (*oldPtrB)[MAX_N][MAX_N] = &distB;
    int (*newPtrA)[MAX_N][MAX_N] = &newDistA;
    int (*newPtrB)[MAX_N][MAX_N] = &newDistB;
    int (*tempA)[MAX_N][MAX_N];
    int (*tempB)[MAX_N][MAX_N];
    
    for(k = 0; k<n; k++){
#pragma	omp	parallel default(shared) private(i,tid,j)
        {
            tid = omp_get_thread_num();
            blockSize = n/procs + 1;
            for(i = 0; i < blockSize; i++){
                if(i*procs+tid >= n)
                    break;
                for(j = 0; j<n;j++){
                    (*newPtrA)[i*procs+tid][j] = floyd((*oldPtrA)[i*procs+tid][j],(*oldPtrA)[i*procs+tid][k],(*oldPtrA)[k][j]);
                    (*newPtrB)[i*procs+tid][j] = floyd((*oldPtrB)[i*procs+tid][j],(*oldPtrB)[i*procs+tid][k],(*oldPtrB)[k][j]);
                }
            }
        }
        tempA = oldPtrA;
        tempB = oldPtrB;
        oldPtrA = newPtrA;
        oldPtrB = newPtrB;
        newPtrA = tempA;
        newPtrB = tempB;
    }
    if(verbose){
        printf("\n\nParallel Floyd-Warshall to compute all-pair shortest distances\n");
        printf("\ndistA:\n");
        for(i = 0; i<n; i++){
            for(j = 0; j<n; j++)
                printf("%d ",newDistA[i][j]);
            printf("\n");
        }
        printf("\ndistB:\n");
        for(i = 0; i<n; i++){
            for(j = 0; j<n; j++)
                printf("%d ",newDistB[i][j]);
            printf("\n");
        }
    }
    ;
    distFilterStart = gettime();
    //Parallel preprocess P with distance filtering
    for(i = 0; i < n; i++)
        for(j = 0; j < n; j++)
            (*newPtrP)[i][j] = (*oldPtrP)[i][j];
#pragma	omp	parallel default(shared) private(i,tid,j)
    {
        tid = omp_get_thread_num();
        blockSize = n/procs + 1;
        for(i = 0; i < blockSize; i++){
            if(i*procs+tid >= n)
                break;
            for(j = 0; j<n;j++){
                if((*oldPtrP)[i*procs+tid][j])
                    distFilter(i*procs+tid,j);
            }
        }
    }
    if(verbose){
        printf("\n\nParallel preprocess P with distance filtering\n");
        printf("\nP:\n");
        for(i = 0; i<n; i++){
            for(j = 0; j<n; j++)
                printf("%d ",(*newPtrP)[i][j]);
            printf("\n");
        }
    }
    tempA = oldPtrP;
    oldPtrP = newPtrP;
    newPtrP = tempA;
    
    checkPStart = gettime();
    //Parallel check for impossibility or certainty of isomorphism
    if(checkP()==1){
        printf("\n\nISOMORPHISM FOUND:\n");
        for(i = 0; i < n; i++){
            for(j = 0; j < n; j++){
                printf("%d ",(*oldPtrP)[i][j]);
            }
        }
    }
    
    exhaustiveSearchStart = gettime();
    //Array of change stack heads
    allChanges = malloc(sizeof(changeNode*)*procs);
    for(i = 0;i<procs;i++)
        allChanges[i] = NULL;
    
    
    
    exhaustiveSearch(-1);
    
    exhaustiveSearchEnd = gettime();
    
    printf("\n\nNOTISOMORPHIC\n\n");
    
    printf("load matrices: %f seconds\n",degComputeStart - initializationStart);
    printf("degree matrices compute: %f seconds\n",degSortAndCompStart - degComputeStart);
    printf("degrees filter: %f seconds\n",degFilterStart - degSortAndCompStart);
    printf("degrees merge sort and compare: %f seconds\n",floydStart - degSortAndCompStart);
    printf("Floyd-Warshall: %f seconds\n",distFilterStart - floydStart);
    printf("distance filter: %f seconds\n",checkPStart - distFilterStart);
    printf("check permutation matrix: %f seconds\n",exhaustiveSearchStart - checkPStart);
    printf("exhaustive search: %f seconds\n",exhaustiveSearchEnd - exhaustiveSearchStart);
    return 0;
}


void parallelMergeSort(int* array, int length)
{
    int next = 0;
    int i = 1;
    while(i < length){
        while(next < length){
#pragma omp task
            {
                int j = next;
                int k = next+i;
                int tempIndex = 0;
                int done1 = 0; 
                int done2 = 0;
                int* temp = malloc(sizeof(int)*2*i);
                int tid = omp_get_thread_num();
                
                if(j >= min(next+i,length))
                    done1 = 1;
                if(k >= min(next+2*i,length))
                    done2 = 1;
                
                while(!done1 || !done2){
                    if(!done1 && done2){
                        temp[tempIndex] = array[j];
                        j++;
                    }
                    else if(done1 && !done2){
                        temp[tempIndex] = array[k];
                        k++;
                    }
                    else if(array[j] < array[k]){
                        temp[tempIndex] = array[j];
                        j++;
                    }
                    else if(array[j] > array[k]){
                        temp[tempIndex] = array[k];
                        k++;
                        
                    }
                    else if(array[j] == array[k]){
                        temp[tempIndex] = array[j];
                        tempIndex++;
                        temp[tempIndex] = array[k];
                        j++;
                        k++;
                    }
                    if(j >= min(next+i,length))
                        done1 = 1;
                    if(k >= min(next+2*i,length))
                        done2 = 1;
                    tempIndex++;
                }
                memcpy(&array[next],temp,sizeof(int)*min((length - next),2*i));
                free(temp);
            }
            next += 2*i;
        }
#pragma omp taskwait
        i *= 2;
        next = 0;
    }
}

int floyd(int direct, int indirect1, int indirect2){
    
    if((direct == -1 && indirect1 == -1)||(direct == -1 && indirect2 == -1))
        return -1;
    else if(direct == -1)
        return indirect1 + indirect2;
    else if(indirect1 == -1 || indirect2 == -1)
        return direct;
    else
        return min(direct,indirect1+indirect2);
}

void distFilter(int i,int j){
    int k,l, count = 0,assigned_i,assigned_j;
    for(k = 0; k < n; k++){
        for (l = 0; l<n; l++) {
            if((*oldPtrP)[k][l]){
                assigned_i = k;
                assigned_j = l;
                count++;
            }
        }
        if(count == 1){
            (*newPtrP)[i][j] = (newDistA[i][assigned_i] == newDistB[j][assigned_j]);
        }
        count = 0;
    }
}


changeNode* pop(){
    int tid = omp_get_thread_num();
    if(allChanges[tid]){
        changeNode* temp = allChanges[tid];
        allChanges[tid] = allChanges[tid]->next;
        return temp;
    }
    return 0;
}

void push(int a, int b){
    int tid = omp_get_thread_num();
    changeNode* newNode = (changeNode*)malloc(sizeof(changeNode));
    newNode->next = allChanges[tid];
    allChanges[tid] = newNode;
    allChanges[tid]->i = a;
    allChanges[tid]->j = b;
}

//Parallel check for impossibility or certainty of isomorphism
int checkP(){
    int* flagsIso = malloc(sizeof(int)*procs*SPACING);
    int* flagsNonIso = malloc(sizeof(int)*procs*SPACING);
    int iso = 1;
    int nonIso = 1;
    int count;
    int i,j;
    int tid,blockSize;
    
    for(i = 0; i < procs; i++){
        flagsIso[i * SPACING] = 1;
        flagsNonIso[i * SPACING] = 1;
    }
#pragma	omp	parallel default(shared) private(i,tid,j,count)
    {
        count = 0;
        tid = omp_get_thread_num();
        blockSize = n/procs + 1;
        for(i = 0; i < blockSize; i++){
            if(i*procs+tid >= n)
                break;
            for(j = 0; j<n;j++)
                count += (*oldPtrP)[i*procs+tid][j];
            if(count != 1){
                flagsIso[tid*SPACING] = 0;
            }
            if(count == 0){
                flagsNonIso[tid*SPACING] = 0;
            }
            count = 0;
        }
    }
    for(i = 0; i < procs; i++){
        if(flagsIso[i*SPACING] == 0)
            iso = 0;
        if(flagsNonIso[i*SPACING] == 0)
            nonIso = 0;
    }
    free(flagsIso);
    free(flagsNonIso);
    if(!nonIso)
        return 0;
    if(iso)
        return 1;
    return 2;
}

void exhaustiveSearch(int lastRow)
{
    //Search for the next unassigned row, beginning from right after the most recently assigned row
    int count = 1;
    int i = lastRow;
    int j;
    int k;
    int blockSize;
    int iso;
    int numChanges[procs];
    int r;
    int (*tempP)[MAX_N][MAX_N];
    
    while(count == 1){
        count = 0;
        i++;
        if(i >= n)
            return;
        for(j = 0; j < n; j++)
            count += (*oldPtrP)[i][j];
    }
    //printf("HERE1\n");
    int savedRow[n];
    for(j = 0; j < n; j++)//save row
        savedRow[j] = (*oldPtrP)[i][j];
    //printf("HERE2\n");
    changeNode* changeHead = NULL;
    j = 0;
    while(j < n){ 
        if(savedRow[j]){ //Pick one to assign
            for(k = 0; k<n;k++) 
                (*oldPtrP)[i][k] = (k == j);
            
            //Parallel distance filter based on that assumption
            for(k = 0; k < n; k++)
                for(r = 0; r < n; r++)
                    (*newPtrP)[k][r] = (*oldPtrP)[k][r];
            
#pragma	omp	parallel default(shared)
            {
                int p,q,tid;
                tid = omp_get_thread_num();
                blockSize = n/procs + 1;
                
                numChanges[tid] = 0;
                
                for(p = 0; p < blockSize; p++){
                    if(p*procs+tid >= n)
                        break;
                    for(q = 0; q<n;q++){
                        if((*oldPtrP)[p*procs+tid][q]){
                            if(!((*newPtrP)[p*procs+tid][q] = (newDistA[p*procs+tid][i] == newDistB[q][j]))){
                                push(p*procs+tid,q);
                                (numChanges[tid])++;
                            }
                        }
                    }
                }
            }
            tempP = oldPtrP;
            oldPtrP = newPtrP;
            newPtrP = tempP;
            
            //Prune?
            iso = checkP();
            if(iso == 1){
                printf("\nISOMORPHISM FOUND:\n");
                for(k = 0; k < n; k++){
                    for(r = 0; r < n; r++){
                        printf("%d ",(*oldPtrP)[k][r]);
                    }
                    printf("\n");
                }
                
                exhaustiveSearchEnd = gettime();
                printf("load matrices: %f seconds\n",degComputeStart - initializationStart);
                printf("degree matrices compute: %f seconds\n",degSortAndCompStart - degComputeStart);
                printf("degrees filter: %f seconds\n",degFilterStart - degSortAndCompStart);
                printf("degrees merge sort and compare: %f seconds\n",floydStart - degSortAndCompStart);
                printf("Floyd-Warshall: %f seconds\n",distFilterStart - floydStart);
                printf("distance filter: %f seconds\n",checkPStart - distFilterStart);
                printf("check permutation matrix: %f seconds\n",exhaustiveSearchStart - checkPStart);
                printf("exhaustive search: %f seconds\n",exhaustiveSearchEnd - exhaustiveSearchStart);
                exit(1);
            }
            else if(iso == 0){
                
                //Parallel undo changes from bad assumption
#pragma	omp	parallel default(shared) private(r)
                {
                    int tid;
                    tid = omp_get_thread_num();
                    while(numChanges[tid]>0){
                        changeNode* trash = pop();
                        (*oldPtrP)[trash->i][trash->j] = 1;
                        free(trash);
                        (numChanges[tid])--;
                    }
                }
                //Undo the assumption itself by restoring saved row
                for(r = 0; r < n; r++){
                    (*oldPtrP)[i][r] = savedRow[r];
                }
            }
            else{
                //Otherwise, keep recursing
                exhaustiveSearch(i);
            }
        }
        j++;
    }
}


