#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <vector>
#include <string>
#include <fstream>
#include <cstdlib>
#include <sstream>
#include <iostream>
#include <unistd.h>
#include <limits.h>
#include <assert.h>
#include <set>
#include <algorithm>
#include <limits>
#include <time.h>
#include <map>

using namespace std;

// A quickie program that will compute the largest shared subsequence

//fill in diff with the shared characters of a and b, using zero to
//delimit differences.  Return the number of shared characters
int difference(const vector<int>& a, const vector<int>& b, vector<int>& diff)
{
    int len = a.size();
    assert(len == b.size());
    diff.resize(len);
    int cnt = 0; //number of common chars
    
    for(int i = 0; i < len; i++)
    {
	if(a[i] == b[i])
	{
	    diff[i] = a[i];
	    cnt++;
	}
	else
	    diff[i] = 0;
    }

    return cnt;
}

void myerror(char *msg)
{
    fprintf(stderr, "ERROR: %s\n", msg);
    exit(-1);
}


// print out usage message and exit
void usage(char * progname)
{
    fprintf(stderr,"Usage: %s  -f file -k <sequence length>\n",progname);
    exit(-1);
}

int lookc(FILE *f)
{
    int c = fgetc(f);
    assert(ungetc(c,f) == c);
    return c;
}

bool verbose;

int main(int argc, char *argv[])
{
    int i;
    int k = 0;
    FILE *file = NULL;
    
    /* process cmdline arguments */

    for(i = 1; i < argc; i++)
    {
	if(strcmp(argv[i],"-v") == 0)
	{
	    verbose = true;
	}
	else if(strcmp(argv[i],"-f") == 0)
	{
	    i++;
	    if(i == argc)
		usage(argv[0]);
	    file = fopen(argv[i],"r");
	    if(!file)
		myerror("Could not open file.");	    
	}
	else if(strcmp(argv[i],"-k") == 0)
	{
	    i++;
	    if(i == argc)
		usage(argv[0]);
	    k = atoi(argv[i]);		
	}
    }

    if(!file)
	usage(argv[0]);

    int len = 0;
    vector< vector<int> > alignment;
    
    //read in file
    do
    {
	int node;
	char buf[256];
	vector<int> single_subgraph;
	while(lookc(file) != '\n')
	{
	    fscanf(file, "%d %s",&node, buf);
	    assert(fgetc(file) == '\t'); //absorb the tab
	    single_subgraph.push_back(node);
	}
	if(len == 0)
	    len = single_subgraph.size();
	else if(len != single_subgraph.size())
	    assert(0);

	alignment.push_back(single_subgraph);
    }
    while(fgetc(file) == '\n' && lookc(file) != EOF);


    if(k >= len)
	myerror("k needs to be smaller than the subgraph size");

    if(k <= 0)
	k = len - 1;

    typedef map< vector<int>, set< vector<int> > > mmap_t;
    mmap_t mmap;
    //find the most common subsequence of length k    
    for(int i = 0; i < alignment.size(); i++)
    {
	//compare it to every other subgraph
	for(int j = 0; j < i; j++)
	{
	    vector<int> diff;
	    if(difference(alignment[i],alignment[j],diff) == k)
	    {
		mmap[diff].insert(alignment[i]);
		mmap[diff].insert(alignment[j]);
	    }
	}
    }


    //sort the sequences by popularity
    multimap< int, vector<int> > sorted_diffs;
   
 
    //now display how popular each subsequence is
    for(mmap_t::iterator itr = mmap.begin(); itr != mmap.end(); itr++)
    {
	sorted_diffs.insert(pair<int, vector<int> >(itr->second.size(), itr->first));
    }
    
    //now print them out in sorted order
    for(multimap<int, vector<int> >::iterator itr = sorted_diffs.begin(); itr != sorted_diffs.end(); itr++)
    {
	for(int i = 0; i < itr->second.size(); i++)
	    cout << itr->second[i] << " ";
	cout << "\t" << itr->first << endl;
    }
    
    return 0;
}

