
// Name        : cleax.cpp
// Author      : Ming-Chi Tsai
// Version     :
// Copyright   : 
//============================================================================


// My Includes
#include "cleax.h"

// Includes
#include <algorithm>
#include <iostream>
#include <fstream>
#include <cmath>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>


#include "algorithm/consensus_algorithm.h"
#include "algorithm/metropolis_algorithm.h"
#include "algorithm/model_construction_algorithm.h"
#include "debug/likelihood_test.h"
#include "dmo/admixture_model.h"
#include "dmo/genealogy.h"
#include "dmo/partition.h"
#include "dmo/properties.h"
#include "utilities/random/randomc.h"
#include "utilities/random/stocc.h"
#include "utilities/entropy_utilities.h"
#include "utilities/set_utilities.h"
#include "properties_constants.h"


// Constants
const static int ProgModelFull = 0;
const static int ProgModeConcensus = 1;
const static int ProgModeMetropolis = 2;

// Global variables
std::vector<partition*> thePartitions;
bool isDebug = 0;
StochasticLib1* pRandom = new StochasticLib1((int)time(0));

const std::vector<partition*> load_partition_data(const std::string& input_file) {

	std::ifstream fin;
	fin.open(input_file.data(), std::ios::in);

	int linesize = 10000;
	char line[10000];
	std::vector<partition*> parts;

	// Load model bipartitions
	std::vector<partition*> model;

	fin.getline(line, linesize);
	while (!fin.eof()) {
		printf("line: %s\n" , line);  fflush(stdout);
		std::vector<bool> part;
		for (int j = 0; j < linesize; ++j) {
			char ch = line[j];
			if (ch == '\0') {
				break;
			} else if (ch == '0'){
				part.push_back(0);
			} else if (ch == '1') {
				part.push_back(1);
			}
		}
		partition* pPart = new partition(part);
		parts.push_back(pPart);

		fin.getline(line, linesize);
	}

	fin.close();

	return parts;
}

// Load input data, assume the data is k x m snp data
admixture_model* load_model_data(const std::string& input_file) {

	std::ifstream fin;
	fin.open(input_file.data(), std::ios::in);

	int linesize = 10000;
	char line[10000];

	// Initialize variables
	masked_model_partition_set* pModelSet = new masked_model_partition_set(std::vector<bool>());
	std::vector<partition*> model;
	std::vector<int> weights;
	std::vector<int> popassign;
	//std::vector<int> seqeunced_cnt;
	int popsize = 0;

	while (!fin.eof()) {

		// Load command
		fin.getline(line, linesize);


		printf("line: %s\n", line);

		// If line is weights, we expect the next line to be a list of weights
		if (strncmp("Weights", line, 7) == 0) {
			printf("Reading Weights\n"); fflush(stdout);
			if (!fin.eof()) {
				fin.getline(line, linesize);

				char* pch = strtok(line, " ");
				while (pch != NULL) {
					printf("%s ", pch);
					fflush(stdout);
					int wt = atoi(pch);
					weights.push_back(wt);
					pch = strtok(NULL, " ");
				}
				pModelSet->setNullWeight(weights[weights.size()-1]);
			}
		} else if (strncmp("Models", line, 6) == 0) {
			printf("Reading Models\n"); fflush(stdout);
			int i = 0;

			while (!fin.eof() && i < (int)weights.size()-1) {
				fin.getline(line, linesize);

				std::vector<bool> part;
				for (int j = 0; j < linesize; ++j) {
					char ch = line[j];
					if (ch == '\0') {
						break;
					} else if (ch == '0'){
						part.push_back(0);
					} else if (ch == '1') {
						part.push_back(1);
					}
				}
				partition* pPart = new partition(part);
				model.push_back(pPart);
				++i;
			}
		}
//		else if (strncmp("Ascertainments", line, 14) == 0) {
//			printf("Reading Ascertainments\n"); fflush(stdout);
//			if (!fin.eof()) {
//				fin.getline(line, linesize);
//				popsize = atoi(line);
//			}
//
//
//			if (!fin.eof()) {
//				fin.getline(line, linesize);
//
//				char* pch = strtok(line, " ");
//				while (pch != NULL) {
//					fflush(stdout);
//					int wt = atoi(pch);
//					popassign.push_back(wt);
//					pch = strtok(NULL, " ");
//				}
//			}
//
//
//			if (!fin.eof()) {
//				fin.getline(line, linesize);
//
//				char* pch = strtok(line, " ");
//				while (pch != NULL) {
//					fflush(stdout);
//					int wt = atoi(pch);
//					seqeunced_cnt.push_back(wt);
//					pch = strtok(NULL, " ");
//				}
//			}
//		}
	}

	// Close file
	fin.close();

	// Add model
	for (int i = 0; i < (int)weights.size()-1; ++i) {
		pModelSet->add_model_partition(model[i], weights[i]);
	}

	// Create model set
	printf("%s\n", pModelSet->toString().data()); fflush(stdout);
	std::vector<masked_model_partition_set*> model_sets;
	model_sets.push_back(pModelSet);

	// Create potential model
	admixture_model* pModel  = NULL;
	if (popsize > 0) {
		model_construction_algorithm model_constructor(model_sets, popsize, popassign);
		pModel = model_constructor.simple_admixture_model();
	} else {
		model_construction_algorithm model_constructor(model_sets);
		pModel = model_constructor.simple_admixture_model();
	}
	printf("%s\n", pModel->toString().data());
	fflush(stdout);

	// Debug
	for (int i = 0; i < (int)model_sets.size(); ++i) {
		printf("[%d]NULL\n", model_sets[i]->getNullWeight());
		for (int j = 0; j < (int)model_sets[i]->size(); ++j) {
			printf("[%d]%s\n", model_sets[i]->getPartition(j)->getWeight(), model_sets[i]->getPartition(j)->getBase64String().data());
		}
	}

	return pModel;
}

// Load input data, assume the data is space-delimited 0-1 matrix with rows
// representing each chromosome, column representing SNPs
void load_snp_data(const std::string& input_file) {
	std::ifstream fin;
	fin.open(input_file.data(), std::ios::in);

	// determine the number of characters
	bool hasSequence = 0;
	int rows = 0;
	int cols = 0;
	char c;
	while (!fin.eof()) {
		fin.read(&c, 1);
		if (c == '1' || c == '0') {
			hasSequence = 1;
			if (rows == 0) {
				cols++;
			}
		} else if (c == '\n') {
			if (hasSequence) {
				rows++;
			}
			hasSequence = 0;
		}
	}

	// reset seek
	fin.close();
	fin.open(input_file.data(), std::ios::in);

	// create 01 matrix
	std::vector<std::vector<bool> > matrix(cols);
	for (int i = 0; i < cols; ++i) {
		matrix[i] = std::vector<bool>(rows);
	}

	printf("NumOfRows: %d\n", rows);
	printf("NumOfCols: %d\n", cols);

	// now read partitions
	int col = 0, row = 0;
	while (!fin.eof()) {
		fin.read(&c, 1);

		if (c == '1') {
			matrix[col++][row] = (bool)1;
		} else if (c == '0') {
			matrix[col++][row] = (bool)0;
		} else if (c == '\n') {
			if (col > 0) {
				row++;
			}
			col = 0;
		}
	}
	fin.close();

	// Initialize partition vector
	for (int i = 0; i < cols; ++i) {
		partition* pPartition = new partition(matrix[i]);
		thePartitions.push_back(pPartition);
	}

}

// Clean up stored data structure at the end of the program
void cleanup() {
	for (int i = 0; i < (int)thePartitions.size(); ++i) {
		delete thePartitions[i];
	}
	thePartitions.clear();
}

int main(int argc, char* argv[]) {

	/* initialize random seed: */
	srand ( time(NULL) );

	if (argc < 2) {
		printf("USAGE: cleax [property_file] [-options ...]\n");
		//printf("\t -f input_file\t Location of the input data.");
		return EXIT_SUCCESS;
	}
	else {
		// load property file
		printf("Loading property file: %s\n", argv[1]);
		properties prop;
		prop.load(argv[1]);

		// TODO: Make sure all necessary properties are set
		// Determine the mode
		// bit 1 = consensus
		// bit 2 =
		int mode = 3;
		if (strcmp(prop.get_properties(PropertiesConstantMode).data(), "ConsensusOnly") == 0) {
			mode = 1;
			printf("Mode: Find Consensus Tree Only\n");
		} else if (strcmp(prop.get_properties(PropertiesConstantMode).data(), "MarkovOnly") == 0) {
			mode = 10;
			printf("Mode: Run Markov Chain Only\n");
		} else if (strcmp(prop.get_properties(PropertiesConstantMode).data(), "ComputeOnly") == 0) {
			mode = 16;
			printf("Mode: Computes Weights Given Model Bipartitions\n");
		}
		else {
			printf("Mode: Normal Mode\n");
		}
		fflush(stdout);

		// Open output file
		std::string outfile(prop.get_properties(PropertiesConstantOutputFile));
		std::ofstream outstream(outfile.data());

		admixture_parameters* pParamsIni = NULL;
		admixture_model* pModel = NULL;
		double theta = 0.0;
		if (mode & 1 /* Identify model biparition (Normal&Consensus Mode) */) {
			// load input data
			load_snp_data(prop.get_properties(PropertiesConstantConsensusInputFile));

			// initialize consensus algorithm
			int m = thePartitions[0]->size();
			// For now k is always going to be 3
			int maxParts = atoi(prop.get_properties(PropertiesConstantMaxParts).data());
			int maxEMIters = 1000;
			if (prop.get_properties(PropertiesConstantNumEMIters).size() > 0) {
				maxEMIters = atoi(prop.get_properties(PropertiesConstantNumEMIters).data());
			}
			double pen = m;
			if (prop.get_properties(PropertiesConstantPenalty).size() > 0) {
				pen = atof(prop.get_properties(PropertiesConstantPenalty).data());
			}

			printf("Penalty: %.1f\nMaxEMIters: %d\nMaxParts: %d\n", pen, maxEMIters, maxParts);
			fflush(stdout);

			consensus_algorithm consensus(thePartitions, maxParts, maxEMIters, pen);
			consensus.run();

			printf("Finished Running\n");
			fflush(stdout);

			//-----------------------------------------------------------
			// Creates model and weight vector
			std::vector<masked_model_partition_set*> model_sets;

			// Creates initial unmasked model bipartition set
			masked_model_partition_set* pModelPartitionSet = consensus.getModelPartitionSet();
			model_sets.push_back(pModelPartitionSet);

			model_construction_algorithm model_construction(model_sets);
			pModel = model_construction.simple_admixture_model(&consensus);

			// Output to text file
			for (int i = 0; i < (int)pModel->getData()->getNumberOfModelSets(); ++i) {
				printf("%s", pModel->getData()->getModelSet(i)->toString().data());
				outstream << pModel->getData()->getModelSet(i)->toString();
			}
		} else if (mode & 8 /* Load weights and model bipartition from file (Markov Mode)*/) {
			// load input data
			printf("Loading model file: %s\n", prop.get_properties(PropertiesConstantMCMCInputFile).data());
			fflush(stdout);
			pModel = load_model_data(prop.get_properties(PropertiesConstantMCMCInputFile));
		} else if (mode & 16 /* Compute weights (Compute Mode)*/) {
			//
			// load input data
			printf("Loading snp file: %s\n", prop.get_properties(PropertiesConstantConsensusInputFile).data());
			load_snp_data(prop.get_properties(PropertiesConstantConsensusInputFile));
			printf("Laoding partition data: %s\n", prop.get_properties(PropertiesConstantModelPartitionInputFile).data());
			std::vector<partition*> parts = load_partition_data(prop.get_properties(PropertiesConstantModelPartitionInputFile));

			consensus_algorithm consensus(thePartitions, parts);

			// Creates initial unmasked model bipartition set
			masked_model_partition_set* pModelPartitionSet = consensus.getModelPartitionSet(std::vector<bool>(0), 0, 1);

			printf("Score: %f\n", consensus.getModelScore());

			// Output to text file
			printf("%s", pModelPartitionSet->toString().data());
			outstream << pModelPartitionSet->toString();
			fflush(stdout);

			// Free memory
			delete pModelPartitionSet;
		}

		if (mode & 2) {
			printf("%s", pModel->toString().data());
			fflush(stdout);

			int numThetaLocks = atoi(prop.get_properties(PropertiesCosntantNumThetaLocks).data());
			int numBurnIns = atoi(prop.get_properties(PropertiesCosntantNumBurnIns).data());
			int numIters = atoi(prop.get_properties(PropertiesConstantNumMCMCIters).data());
			int numGenealogies = atoi(prop.get_properties(PropertiesConstantNumGenealogies).data());
			double mutRate = atof(prop.get_properties(PropertiesConstantMutationRate).data());
			double popSize = atof(prop.get_properties(PropertiesConstantEffectivePopulationSize).data());
			double seqLength = atof(prop.get_properties(PropertiesConstantSequenceLength).data());
			double initialTMax = atof(prop.get_properties(PropertiesConstantInitialTMax).data());

			if (initialTMax <= 0.0) {
				initialTMax = 1.0;
			}

			theta = (theta == 0) ? 4*seqLength*popSize*mutRate : theta;
			if (numIters <= 0) {
				numIters = 20000;
			}

			if (numGenealogies <= 0) {
				numGenealogies = 30;
			}

			if (numBurnIns <= 0) {
				numBurnIns = 1000;
			}

			outstream.precision(10);
			outstream.width(10);

			printf("Extern Theta: %f\n", theta);
			printf("InitialTMax: %f\n", initialTMax);
			fflush(stdout);

			metropolis_algorithm metropolis(pModel, numIters, numGenealogies, numBurnIns, numThetaLocks, theta, initialTMax);
			metropolis.run(pParamsIni, &outstream);
		}

		outstream.close();
	}

	// Clean up data structures
	cleanup();

	return EXIT_SUCCESS;
}
