/*
 * metropolis_algorithm.h
 *
 *  Created on: Sep 6, 2011
 *      Author: Tony
 */

#ifndef METROPOLIS_ALGORITHM_H_
#define METROPOLIS_ALGORITHM_H_

#include <iostream>
#include <vector>

class admixture_hyperparameters;
class admixture_model;
class admixture_parameters;
class admixture_output;
class coalescence_algorithm;
class genealogy;
class metropolis_cache;
class StochasticLib1;

class metropolis_algorithm {
private:
	//------------------------------------------------------------------
	// Members

	// Samples
	std::vector<admixture_output*> samples;

	// Number of markov iterations
	int numIters;

	// Number of genealogies
	int numGenealogies;

	// Number of burn ins
	int numBurnIns;

	// Number of theta lock
	int numThetaLocks;

	// External Theta
	double extern_theta;

	// Information threshold
	double infoThres;

	// Entropy of one bit
	double oneBitEntropy;

	// The number of individuals that are used to discover SNP sites, must be less the number of samples in the data
	double snpDiscoverySampleSize;

	// The initial maximum time in 4N generations
	double initialTMax;

	// Admixture model describing the necessary parameters needed to run the algorithm
	admixture_model* pModel;

	// Stochastic samplers
	StochasticLib1* pStoc;

	// Cache
	//metropolis_cache* pCache;

	//
	int key_idx;
	double m; // number of samples
	double penalty; // penalty score for assigning to null
	std::vector<std::vector<double> > logkfact; // cache

public:
	//------------------------------------------------------------------
	// Constructors
	metropolis_algorithm(admixture_model* pModel, int numIters, int numGenealogies, int burnins, int thetalock, double extern_theta, double initialTMax);
	virtual ~metropolis_algorithm();

public:
	//------------------------------------------------------------------
	// Methods

	// Run algorithm
	void run(admixture_parameters* pParamsIni, std::ofstream* pOutput = NULL);

	// Get sample output
	const std::vector<admixture_output*>& getSamples();

private:
	//------------------------------------------------------------------
	// Helpers

	// Enumerate through the genealogy and computes the weights for each
	// model bipartitions
	const std::vector<std::vector<double> > estimate_lambda(coalescence_algorithm* pCoalescence, admixture_parameters* pParams);

	// Compute the squared error ratio
	double squared_error(const std::vector<std::vector<double> >& lambdas, admixture_parameters* pParam);

	// Compute the likelihood ratio
	double likelihood_ratio(const std::vector<std::vector<double> >& weights_old, const std::vector<std::vector<double> >& weights_new, admixture_parameters* pOldParam, admixture_parameters* pNewParam, admixture_hyperparameters* pHyperParams);

	// Sample new parameters
	void parameters_sample(admixture_parameters* pParams, admixture_hyperparameters* pHyperParams, int commands, int events = -2, int direction = 0);

	// create parameter for the chain
	admixture_parameters* parameters_create(admixture_hyperparameters* pHyperParams);

	// Sample new events
	void parameters_events_sample(admixture_parameters* pParams);

	// update hyper parameter for the chain given current lambda
	void hyperparameters_update(admixture_hyperparameters* pHyperParams, const std::vector<double>& lambdas, double iter_frac, double sigma_std, int key_model_idx);

	// create a hyper parameter for the chain
	admixture_hyperparameters* hyperparameters_create();

};

class metropolis_cache {
private:
	// Cache for computing nchoosek
	int n;
	std::vector<int> discovery_sizes;
	std::vector<int> pop_sizes;
	std::vector<double> snp_discovery_prob_cache;
	std::vector<std::vector<double> > snp_undiscovery_by_pop_prob_cache;

public:
	//--------------------------------------------------------------------------
	// Constructor
	//metropolis_cache();
	metropolis_cache(int numOfSamps, int numOfSeqSamps, double variantCallErrorProb, const std::vector<int>& ascertainment, const std::vector<int>& pop_sizes);

public:
	//--------------------------------------------------------------------------
	// Methods

	// Return the probability that a branch with k leaves will be discover by n sequenced individuals
	double compute_snp_discovery_prob(int k);

	// Return the probability that a branch with k leaves group by population will be discover by n sequenced individuals
	double compute_snp_discovery_prob_by_pop(const std::vector<bool>& popvec, int nleaf);

private:
	//--------------------------------------------------------------------------
	// Helpers

	double compute_ascertainment_prob3(int j, int n, int k);
};

#endif /* METROPOLIS_ALGORITHM_H_ */
