/*******************************************************************************
 * Copyright (c) 2012 Darya Filippova.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Public License v3.0
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/gpl.html
 * 
 * Contributors:
 *     Darya Filippova - initial API and implementation
 ******************************************************************************/


import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.text.DecimalFormat;
import java.util.Comparator;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
/**
 * This class finds an arrangement of optimal dense subgraphs on an weighted 
 * adjacency matrix. Filters out subgraphs that are less dense than expected.
 * 
 * @author lynxoid
 *
 */
public class FastHeirarchicalSubgraphFinder {

	// default constructor
	public FastHeirarchicalSubgraphFinder() {
		
	}
	
	/**
	 * 
	 */
	// not called
	// public ArrayList<Clique> getCliques(MyMTJMatrix m, boolean cutoff, float alpha) {		
	
	// 	double[] OPT = computeOPT(new OParams(m,alpha));
	// 	for (int i =0; i < OPT.length; i++) {
	// 		System.out.println(i+" "+OPT[i]);
	// 	}

	// 	return new ArrayList<Clique>();
	// }

	/**
	 * 
	 */
	public ArrayList<Interval> getCliquesRob(MyMTJMatrix m, boolean cutoff, float alpha) {		
		OParams p = new OParams(m,alpha);
		OResults res = computeOPTWithTraceback(p);
		
		ArrayList<Interval> domainIntervals = new ArrayList<Interval>();

		int numIntervals = res.intervals.length;
		boolean complete = false;
		double sumOfIntervalScores = 0.0;
		int previousIntervalIndex = numIntervals - 1;
		Interval lastInterval;
		do {
			lastInterval = res.intervals[previousIntervalIndex];
			if (lastInterval.active) {
				sumOfIntervalScores += lastInterval.score;
				domainIntervals.add(lastInterval);
			}
			previousIntervalIndex = lastInterval.start-1;
		} while ( previousIntervalIndex > 0 );

		int globalOptIdx = res.OPT.length - 1;
		Collections.reverse(domainIntervals);

        System.out.println("OPT["+globalOptIdx+"] = "+res.OPT[globalOptIdx]+", sum of interval scores = "+sumOfIntervalScores);
		return domainIntervals;
	}

	/**
	 * Given a matrix M, computes all S_{ij} for all i < j.
	 * 
	 * Compute sums of weights for all ordered [i..j] subgraphs
	 * ii to ij
	 *   ......
	 *       jj
	 * 
	 * @param m
	 * @return
	 */
	// public double [][] getSumsMatrix(MyMTJMatrix M) {
	// 	int n = M.getSize();
	// 	double [][] sums = new double[n][n];
	// 	double [] column_sums;
		
	// 	// init densities
	// 	for (int i = 0; i < n; i++) {
	// 		sums[i][i] = 0.0f;//M[i][i]; ignore self co-cluster items
	// 	}

	// 	for (int i = 1; i < n; i++) { // column
	// 		column_sums = new double [i+1];
	// 		column_sums[i] = 0;//M[i][i]; // ignore self co-cluster items
	// 		for (int j = i-1; j >= 0; j--) { // row
	// 			column_sums[j] = column_sums[j+1] + M.getElement(j, i) /*M[j][i]*/ ;
	// 			sums[j][i] = sums[j][i-1] + column_sums[j];
	// 		}
	// 	}
	// 	return sums;
	// }
	

	/**
	 * Given a matrix M, computes all S_{ij} for all i < j.
	 * 
	 * Compute sums of weights for all ordered [i..j] subgraphs
	 * ii to ij
	 *   ......
	 *       jj
	 * 
	 * @param m
	 * @return
	 */
	// Jan 21, 2015: not called from anywhere in the code
	// public double [][] getSumsMatrixWithStatistics(MyMTJMatrix M, double alpha, double [] means, double [] stddevs) {
	//  	int n = M.getSize();
	// 	double [][] sums = new double[n][n];
	// 	double [] column_sums;
		
	// 	// init densities
	// 	for (int i = 0; i < n; i++) {
	// 		sums[i][i] = 0.0f;//M[i][i]; ignore self co-cluster items
	// 	}

	// 	Mean [] meansCompute = new Mean[n];
	// 	StandardDeviation [] stddevsCompute = new StandardDeviation[n];
	// 	int [] numSamps = new int[n];

	// 	for (int i = 0; i < n; i++) {
	// 		meansCompute[i] = new Mean();
	// 		stddevsCompute[i] = new StandardDeviation();
	// 	}

	// 	for (int i = 1; i < n; i++) { // column
	// 		column_sums = new double [i+1];
	// 		column_sums[i] = 0;//M[i][i]; // ignore self co-cluster items
	// 		for (int j = i-1; j >= 0; j--) { // row
	// 			column_sums[j] = column_sums[j+1] + M.getElement(j, i) /*M[j][i]*/ ;
	// 			sums[j][i] = sums[j][i-1] + column_sums[j];

	// 			int blockLength = i - j;
	// 			double s = sums[j][i] / Math.pow(blockLength, alpha);
	// 			meansCompute[blockLength].increment(s);
	// 			stddevsCompute[blockLength].increment(s);
	// 			numSamps[blockLength]++;
	// 		}
	// 	}

	// 	for (int i = 0; i < n; i++) {
	// 		means[i] = meansCompute[i].getResult();
	// 		stddevs[i] = stddevsCompute[i].getResult();
	// 		if (numSamps[i] < 100) { // require at least 100 samples to compute a Z-score
	// 			means[i] = Double.MAX_VALUE;
	// 			stddevs[i] = 1.0;
	// 		}
	// 	}

	// 	return sums;
	// }
	
	// Jan 21, 2015
	// public class BoundaryInclude {
	// 	public int boundary;
	// 	public boolean include;
	// 	BoundaryInclude(int boundary_, boolean include_) { 
	// 		boundary = boundary_;
	// 		include = include_;
	// 	}
	// };

	// public class BoundaryIncludeComparator implements Comparator<BoundaryInclude> {

	// 	@Override
	// 	public int compare(BoundaryInclude bi1, BoundaryInclude bi2) {
	// 		return bi1.boundary - bi2.boundary;
	// 	}
	// }

	/**
	 * Dynamic program to compute optimal arrangement of subgraphs to maximize 
	 * the sum of densities.
	 * TODO: ignore subgraphs of size < 2
	 * 
	 * @param density
	 * @return
	 */
	// Jan 21, 2015: not called
	// public Set<BoundaryInclude> findMaxDensity(double [][] density, float alpha, double [] means, double [] stddevs) {
	// 	// initialize
	// 	Set<BoundaryInclude> vertex_indices = new HashSet<BoundaryInclude>();
	// 	int i, j, n = density.length;
	// 	double [] d_opt = new double[n];
	// 	double s;
	// 	int [] arrows = new int[n];
	// 	boolean [] included = new boolean[n];

	// 	// init density matrix
	// 	for (i = 0; i < n; i++) {
	// 		d_opt[i] = density[i][i]; // density/1.0f
	// 	}
	// 	System.out.println("density.length = " + density.length + ", means.length = " + means.length + "\n");
	// 	// DP to compute optimal decomposition into subgraphs/patches/areas
	// 	// suppose we only allow subgraphs of size 2 and greater
	// 	arrows[0] = -1;
	// 	for (i = 1; i < n; i++) {
	// 		d_opt[i] = (density[0][i] / Math.pow(i, alpha) - means[i]) / stddevs[i];
	// 		arrows[i] = -1;
	// 		for (j = 1; j < i; j++) {
	// 			// s = d_opt[j] + density[j+1][i] / ( Math.pow(i-j, alpha) );

	// 			// Z - score
	// 			int blockLen = i - j + 1;
	// 			double zscore = ((density[j+1][i] / Math.pow(i-j, alpha) - means[blockLen]) / stddevs[blockLen]);
	// 			boolean include = true;
	// 			if (zscore < 0) { zscore = 0.0; include = false; }
	// 			s = d_opt[j] + zscore;

	// 			if (s > d_opt[i]) {
	// 				d_opt[i] = s;
	// 				arrows[i] = j; // traceback to [i][j]
	// 				included[i] = include;
	// 			}
	// 		}
	// 	}

	// 	// max is at d_opt[n-1] - traceback from there
	// 	vertex_indices.add(new BoundaryInclude(n-1, included[n-1]));
	// 	j = arrows[n-1];
	// 	boolean include = included[j];
	// 	while (j > 0) {
	// 		vertex_indices.add(new BoundaryInclude(j, include));
	// 		j = arrows[j];
	// 		if ( j > 0 ) { include = included[j]; }
	// 	}
		
	// 	return vertex_indices;
	// }

	/**
	Input parameters -- precomputes partial sums for the matrix
	**/
	class OParams {
		public MyMTJMatrix A;
		public double[][] sums;
		public double[] mu;
		public double[] sigma;
		public double[] median;
 		public int[] numSamps;
		public double qmax;
		public double gamma;
		int n;
		public OParams(MyMTJMatrix Ap,double gammap) {
			A = Ap;
			n = A.getSize();
			mu = new double[n];
			sigma = new double[n];
			median = new double[n];
			sums = new double[n][n];
			gamma = gammap;
			computeSumMuSigma();
			computeQmax();
		}

		void computeSumMuSigma() {
			// Lists for means and standard deviations for a block length
			Mean [] meansCompute = new Mean[n];
			StandardDeviation [] stddevsCompute = new StandardDeviation[n];
			DescriptiveStatistics [] dstatsCompute = new DescriptiveStatistics[n];
			numSamps = new int[n];

			// initialize
			for (int i = 0; i < n; i++) {
				meansCompute[i] = new Mean();
				stddevsCompute[i] = new StandardDeviation();
				dstatsCompute[i] = new DescriptiveStatistics();
			}
			// initialize sums
			for (int i=0; i<n; i++) {
				sums[i][i] = A.getElement(i,i);
				// if (i < 5) System.out.print("sums[" + i + "][" + i + "]=" + sums[i][i] + " ");
			}
			// System.out.println();

			for (int i = 1; i < n; i++) {
				double[] columnSums = new double [i+1];
				columnSums[i] = A.getElement(i,i);
				for (int j = i-1; j >= 0; j--) {
					columnSums[j] = columnSums[j+1] + A.getElement(j,i);
					sums[j][i] = sums[j][i-1] + columnSums[j];
					// if (i < 5 && j < 5)
						// System.out.print("sums[" + j + "]["+ i + "]=" + sums[j][i] + ", ");
					sums[i][j] = sums[j][i];

					int d = d(j,i);
					double s = sums[j][i] / Math.pow(d, gamma);
					meansCompute[d].increment(s);
					stddevsCompute[d].increment(s);
					dstatsCompute[d].addValue(s);
					numSamps[d]++;
				}
			}
			// System.out.println();
		
			for (int i = 0; i < n; i++) {
				mu[i] = meansCompute[i].getResult();
				// System.out.print("mu[" + i + "]=" + mu[i] + ", ");
				sigma[i] = stddevsCompute[i].getResult();
				median[i] = dstatsCompute[i].getPercentile(50);
				// Require at least 100 samples to compute a Z-score
				if (numSamps[i] < 100) { 
					mu[i] = Double.MAX_VALUE;
					sigma[i] = 1.0;
					median[i] = Double.MAX_VALUE;
				}
			}
			// System.out.println();
		}

		void computeQmax() {
			qmax = 0;
			for (int i = 0; i < n; i++) {
				for (int j = i+1; j < n; j++) {
					double qCandidate = q(i,j,this);
					if (qCandidate > qmax) { qmax = qCandidate; }
				}
			}
		}
	}

	double s(int k, int l, OParams p) {
		// int d = l-k;
		//System.out.println("sums[k][l]="+p.sums[k][l]);
		return p.sums[k][l]/(Math.pow(d(k,l) ,p.gamma));
	}

	double q(int k, int l, OParams p) {
		// int d = l-k;
		//System.out.println("d="+d+" p.qmax="+p.qmax+" p.sigma[d]="+p.sigma[d]);
		//System.out.println("s(k,l,p)="+s(k,l,p)+" p.mu[d]="+p.mu[d]);
		// uncommented on Jan 21, 2015
		return (s(k,l,p) - p.mu[ d(k,l) ]);// /(p.sigma[d]);
		// original code (WABI) contained this line:
		// return (s(k,l,p) - p.median[d(k,l)]);// /(p.sigma[d]);
	}

	int d(int i, int j) {return j - i;}

	class OResults {
		public double [] OPT;
		public Interval [] intervals;
	}

	OResults computeOPTWithTraceback(OParams p) {
		int n = p.n;
		
		OResults res = new OResults();
		res.OPT = new double[n];
		res.intervals = new Interval[n];

		for (int l = 0; l < n; l++) {
			
			double maxScore = 0;
			int breakPoint = 0;

			for (int k = 0; k < l; k++) {
				double OPTK; 
				if (k > 0) { OPTK = res.OPT[k-1]; } else { OPTK = 0.0; }

				double newBlockScore = Math.max( q(k,l,p), 0 );
				double candidateOPT = OPTK + newBlockScore;
				if (candidateOPT > maxScore) { maxScore = candidateOPT; breakPoint = k; }
			}

			res.OPT[l] = maxScore;
			double lastBlockScore = Math.max( q(breakPoint, l, p), 0 );
			//assert(lastBlockScore + res.OPT[breakPoint] == res.OPT[l]);
			boolean lastBlockActive = lastBlockScore > 0;
			res.intervals[l] = new Interval(breakPoint, l, lastBlockScore, lastBlockActive);
		}

		return res;
	}



	double[] computeOPT(OParams p) {
		int n = p.n;
		double[] OPT = new double[n];

		for (int l = 0; l<n; l++) {
			
			double maxScore = 0;

			for (int k = 0; k<l; k++) {
				double OPTK;
				if (k>0) { OPTK = OPT[k-1]; }
				else { OPTK = 0; }
				double candidateOPT = OPTK + Math.max( q(k,l,p), 0 );
				if (candidateOPT > maxScore) { maxScore = candidateOPT; }
			}

			OPT[l] = maxScore;

		}

		return OPT;
	}

	


	/**
	 * Given a list of indices where the matrix is cut, returns a set of Clique
	 * that each represent a subgraph. Each subgraph contains indices that ended
	 * up in it.
	 * If do_cutoff is true, then the function returns only those subgraphs that
	 * have density higher than expected (weight * S_size / 2).
	 * 
	 * @param set
	 * @param sums
	 * @param weight
	 * @param do_cutoff
	 * @return
	 */
	// Jan 21,, 2015: not called
// 	public ArrayList<Clique> constructCliques(Set<BoundaryInclude> set, double[][] sums, double [] means, double [] stddevs, double avg_weight, double edge_prob, boolean do_cutoff, int n, float alpha) {
// 		ArrayList<BoundaryInclude> list = new ArrayList<BoundaryInclude>(set);

// 		Collections.sort(list, new BoundaryIncludeComparator());
// 		Clique cliq;
// 		ArrayList<Clique> cliques = new ArrayList<Clique>();
// 		double clique_density, avg_density, s_pq;
// 		int h_pq;
// 		DecimalFormat df = new DecimalFormat("#.###");
// 		double maxCliqueQuality = -Double.MAX_VALUE;
// 		System.out.println("set.size() = " + set.size() + "\n");
// 		int prevIndex = 0, currentIndex;
// 		boolean includeCurrentIndex = false;
// 		int all_sizes = 0, max_size = 0, min_size = n;
// 		for (int i = 0; i < set.size(); i++) {
// 			currentIndex = list.get(i).boundary;
// 			includeCurrentIndex = list.get(i).include;
// 			s_pq = sums[prevIndex][currentIndex];
// 			h_pq = currentIndex - prevIndex + 1;
// 			all_sizes += h_pq;

// 			clique_density = ((s_pq / h_pq) - means[h_pq]) / stddevs[h_pq];
// 			// avg_density = (h_pq - 1) * s_pq / ( (n-1) * (n-2) ); // submitted to BMC Bioinf
// 			int m_s = h_pq * (h_pq-1) / 2; // max num of edges in the subgraph
// 			avg_density = avg_weight * edge_prob * m_s / Math.pow(h_pq, alpha);
// 			double other_density = avg_weight * m_s / Math.pow(h_pq, alpha);
			
// 			// cliques w/ density less than avg_edge_density * (k choose 2) / v don't make the cut
// 			// if (clique_density > weight * size || !do_cutoff) { // OLD
// 			// if (clique_density > avg_density || !do_cutoff) {
			
// 			// if ( (clique_density > avg_density && avg_density > 0) || !do_cutoff) { // original
// 			if ( includeCurrentIndex ) { //(clique_density > 0) || !do_cutoff) { // z-score
// 				cliq = new Clique();
// 				maxCliqueQuality = Math.max(clique_density, maxCliqueQuality);
// 				cliq.setQualityValue(clique_density);
// 				for (int j = prevIndex; j <= currentIndex; j++) {
// 					cliq.addIndex(j);
// 				}
// 				cliques.add(cliq);
				
// 				if (h_pq > max_size) max_size = h_pq;
// 				if (h_pq > 15) System.err.println(prevIndex + " " + currentIndex);
// 				if (min_size > h_pq) min_size = h_pq;
				
// //				System.out.println("size " + h_pq + " avg_dens " + df.format(avg_density) + " cliq_dens " + clique_density + " ratio " + (clique_density / avg_density) );
// 			}
// 			prevIndex = currentIndex + 1;
// 		}
		
// 		double scale = 1.0 / maxCliqueQuality;
// 		for (Clique c : cliques) {
// 			c.setQualityValue( c.getQualityValue() * scale );
// 		}

// 		System.out.println("Total cliques: " + list.size() + ", after filtering: " + cliques.size());
// 		System.out.println("Avg core size " + (all_sizes * 1.0f/ set.size()) );
// 		System.out.println("Max core size " + max_size );
// 		System.out.println("Min core size " + min_size );
// //		System.out.println("Median core size " + cliques.get( list.size() / 2 ) );
		
// 		return cliques;
// 	}
}
