/*
 * =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0
 
Copyright (c) 2010, Kornel Laskowski
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice, this list of
      conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice, this list of
      conditions and the following disclaimer in the documentation and/or other materials provided
      with the distribution.
    * Neither the name of Sigtactica Research or of the Royal Institute of Technology (KTH) or of
      Carnegie Mellon University nor the names of their contributors may be used to endorse or
      promote products derived from this software without specific prior written permission.
 
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 ===+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <getopt.h>
#include "bit.h"
#include "sutils.h"
#include "sort.h"
#include "DblMatrix.h"
#include "FileName.h"
#include "tmcount.h"
#include "tm.h"
#include "mpedo_2_0.h"

/* =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

#define BINARY 2

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

extern char *  optarg;
extern int     optind;
extern int     opterr;
extern int     optopt;

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

char *  progName = "mpedo";

static const int     default_KmaxModel     = -1;
static const int     default_KmaxTopology  = -1;
static const double  default_unkFraction   =  0.00001;

/* =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

void Usage()
{
	fprintf
		(
			stderr,
			"Usage: %s -QDir <dirName>\n",
			progName
		); 

	return;
}

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

void QReadAndEncode
	(
		const char *  inPathName,
		int **        qPtr,
		int *         TPtr,
		int *         KPtr
	)
{
	double **  Q;
	int        T;
	int        K;
	int *      q;

	DblMatrixREAD( inPathName, &Q, &T, &K );

	q = (int *) malloc( T * sizeof( int ) );
	assert( q != (int *) NULL );

	{
		int  t;

		for ( t = 0; t < T; t ++ )
		{
			int  q_t;
			int  k;

			q_t = 0;
			for ( k = 0; k < K; k ++ )
			{
				q_t += ( ((int)(Q[ t ][ k ] > 0.5)) << k );
			}
			q[ t ] = q_t;
		}
	}

	DblMatrixFREE( &Q, T, K );

	*qPtr = q;
	*TPtr = T;
	*KPtr = K;

	return;
}

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

int QCompare
	(
		const int  q1,
		const int  q2,
		const int  K
	)
{
	int  Ksame;
	int  k;

	Ksame = 0;
	for ( k = 0; k < K; k ++ )
	{
		if ( GETBIT( q1, k ) == GETBIT( q2, k ) )
		{
			Ksame ++;
		} 
	}

	return Ksame;
}

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

int main( int argc, char * argv[] ) 
{
	int                KmaxModel;
	int                KmaxTopology;
	double             unkFraction;
	char *             QDirName;
	char **            fileNameTab;
	int                nFiles;
	unsigned long *    sortedIndexTab;
	double             allSumKNormNegLogProb;
	double             allSumT;
        double             allSumKGuessStochasticCorrectNum;
        double             allSumKGuessStochasticCorrectDen;
        double             allSumKGuessDeterministicCorrectNum;
        double             allSumKGuessDeterministicCorrectDen;
	double             allSumKNormNegLogProbChange;
	double             allSumTChange;
        double             allSumKGuessStochasticCorrectNumChange;
        double             allSumKGuessStochasticCorrectDenChange;
        double             allSumKGuessDeterministicCorrectNumChange;
        double             allSumKGuessDeterministicCorrectDenChange;

	progName = argv[ 0 ];

	KmaxModel = default_KmaxModel;
	KmaxTopology = default_KmaxTopology;
	unkFraction = default_unkFraction;
	QDirName = (char *) NULL;

	{
		int  argIdx;

		printf( "Command line:" );
		for ( argIdx = 0; argIdx < argc; argIdx ++ )
		{
			printf( " %s", argv[ argIdx ] );
		}
		printf( "\n" );
	}

	while ( 1 )
	{
		int  optChar;
		int  option_index = 0;
		static struct option  long_options[] =
			{
				{ "KmaxModel",    1, 0, 1 },
				{ "KmaxTopology", 1, 0, 2 },
				{ "unkFraction",  1, 0, 3 },
				{ "QDir",         1, 0, 4 }
			};
		optChar = getopt_long_only( argc, argv, "", long_options, &option_index );
		if ( optChar == -1 )
		{
			break;
		}

		switch( optChar )
		{
		case 1: /* KmaxModel */
			KmaxModel = str2int( optarg );
			break;
		case 2: /* KmaxTopology */
			KmaxTopology = str2int( optarg );
			break;
		case 3: /* unkFraction */
			unkFraction = str2dbl( optarg );
			break;
		case 4: /* QDir */
			QDirName = optarg;
			break;
		case '?':
		default:
			fprintf( stderr, "%s: badly formatted argument list\n", progName );
			Usage();
			exit( -1 );
		}
	}

	if ( QDirName == (char *) NULL )
	{
		fprintf( stderr, "%s: -QDir not specified\n", progName );
		Usage();
		exit( -1 );
	}
	else
	{
		struct stat  statBuf;

		if ( stat( QDirName, &statBuf ) != 0 )
		{
			fprintf
				(
					stderr,
					"%s: Cannot access -QDir value [%s]\n",
					progName,
					QDirName
				);
			exit( -1 );
		}

		if ( S_ISDIR( statBuf.st_mode ) )
		{
			/* ok */
		}
		else
		{
			fprintf
				(
					stderr,
					"%s: -QDir value [%s] exists, but not a directory\n",
					progName,
					QDirName
				);
			exit( -1 );
		}
	}

        FileNameGETLIST
                (
                        QDirName,
                        "Q",
                        &fileNameTab,
                        &nFiles
                );
	sortedIndexTab = (unsigned long *) malloc( nFiles * sizeof( unsigned long ) );
	assert( sortedIndexTab != (unsigned long *) NULL );
	strindexx( nFiles, fileNameTab - 1, sortedIndexTab - 1 ); /* NRC 1-offset */

	allSumKNormNegLogProb = 0.0;
	allSumT = 0.0;

	allSumKGuessStochasticCorrectNum = 0.0;
	allSumKGuessStochasticCorrectDen = 0.0;

	allSumKGuessDeterministicCorrectNum = 0.0;
	allSumKGuessDeterministicCorrectDen = 0.0;

	allSumKNormNegLogProbChange = 0.0;
	allSumTChange = 0.0;

	allSumKGuessStochasticCorrectNumChange = 0.0;
	allSumKGuessStochasticCorrectDenChange = 0.0;

	allSumKGuessDeterministicCorrectNumChange = 0.0;
	allSumKGuessDeterministicCorrectDenChange = 0.0;

	{
		int  testFileIdx;

		for ( testFileIdx = 0; testFileIdx < nFiles; testFileIdx ++ )
		{
			char *     inPathName;
			int        T;
			int        K;
			int *      q;
			int        KmaxTopology;
			int        KmaxModel;
			double     testSumKNormNegLogProb;
			double     testSumT;
			double     testSumKGuessStochasticCorrectNum;
			double     testSumKGuessStochasticCorrectDen;
			double     testSumKGuessDeterministicCorrectNum;
			double     testSumKGuessDeterministicCorrectDen;
			double     testSumKNormNegLogProbChange;
			double     testSumTChange;
			double     testSumKGuessStochasticCorrectNumChange;
			double     testSumKGuessStochasticCorrectDenChange;
			double     testSumKGuessDeterministicCorrectNumChange;
			double     testSumKGuessDeterministicCorrectDenChange;

			inPathName = fileNameTab[ sortedIndexTab[ testFileIdx ] - 1 ];
			QReadAndEncode( inPathName, &q, &T, &K );

			KmaxTopology = K;
			KmaxModel = K;

			testSumKNormNegLogProb = 0.0;
			testSumT = 0.0;

			testSumKGuessStochasticCorrectNum = 0.0;
			testSumKGuessStochasticCorrectDen = 0.0;

			testSumKGuessDeterministicCorrectNum = 0.0;
			testSumKGuessDeterministicCorrectDen = 0.0;

			testSumKNormNegLogProbChange = 0.0;
			testSumTChange = 0.0;

			testSumKGuessStochasticCorrectNumChange = 0.0;
			testSumKGuessStochasticCorrectDenChange = 0.0;

			testSumKGuessDeterministicCorrectNumChange = 0.0;
			testSumKGuessDeterministicCorrectDenChange = 0.0;

			{
				/* Train on C, test on C */

				TMCount    tmCount;
				MPEDO_2_0  mpedo;
				TM         tm;

				if ( KmaxTopology == -1 )
				{
					tmcountALLOC( &tmCount, BINARY, K, K );
				}
				else
				{
					tmcountALLOC( &tmCount, BINARY, K, KmaxTopology );
				}
				tmcountCLEAR( &tmCount );

				{
					int  t;

					tmcountINCREMENT( &tmCount, 0, q[ 0 ] );
					for ( t = 2; t < T; t += 2 )
					{
						tmcountINCREMENT( &tmCount, q[ t - 1 ], q[ t ] );
					}
				}

				if ( KmaxModel == -1 )
				{
					mpedo_2_0ALLOC( &mpedo, K );
				}
				else
				{
					mpedo_2_0ALLOC( &mpedo, KmaxModel );
				}
				mpedo_2_0CLEAR( &mpedo );
				mpedo_2_0IMPORT( &mpedo, &tmCount );
				tmcountFREE( &tmCount );

				if ( KmaxTopology == -1 )
				{
					tmALLOC( &tm, BINARY, K, K );
				}
				else
				{
					tmALLOC( &tm, BINARY, K, KmaxTopology );
				}
				tmCLEAR( &tm ); 
				mpedo_2_0EXPORT( &mpedo, &tm, unkFraction );
				mpedo_2_0FREE( &mpedo );

				{
					double  prob;
					double  instKNormNegLogProb;
					int     guessStochastic;
					int     guessStochasticKCorrect;
					int     guessDeterministic;
					int     guessDeterministicKCorrect;
					int     t;

					prob = tmPROB( &tm, 0, q[ 0 ] );
					instKNormNegLogProb = - log2( prob ) / K;

					guessStochastic = tmPREDSTOCHASTIC( &tm, 0 );
					guessStochasticKCorrect = QCompare( guessStochastic, q[ 0 ], K );

					guessDeterministic = tmPREDDETERMINISTIC( &tm, 0 );
					guessDeterministicKCorrect = QCompare( guessDeterministic, q[ 0 ], K );

					testSumKNormNegLogProb += instKNormNegLogProb;
					testSumT += 1.0;

					testSumKGuessStochasticCorrectNum += guessStochasticKCorrect;
					testSumKGuessStochasticCorrectDen += K;

					testSumKGuessDeterministicCorrectNum += guessDeterministicKCorrect;
					testSumKGuessDeterministicCorrectDen += K;

					if ( 0 != q[ 0 ] )
					{
						testSumKNormNegLogProbChange += instKNormNegLogProb;
						testSumTChange += 1.0;

						testSumKGuessStochasticCorrectNumChange += guessStochasticKCorrect;
						testSumKGuessStochasticCorrectDenChange += K; 

						testSumKGuessDeterministicCorrectNumChange += guessDeterministicKCorrect;
						testSumKGuessDeterministicCorrectDenChange += K;
					}

					for ( t = 2; t < T; t += 2 )
					{
						prob = tmPROB( &tm, q[ t - 1 ], q[ t ] );
						instKNormNegLogProb = - log2( prob ) / K;

						guessStochastic = tmPREDSTOCHASTIC( &tm, q[ t - 1 ] );
						guessStochasticKCorrect = QCompare( guessStochastic, q[ t ], K );

						guessDeterministic = tmPREDDETERMINISTIC( &tm, q[ t - 1 ] );
						guessDeterministicKCorrect = QCompare( guessDeterministic, q[ t ], K );

						testSumKNormNegLogProb += instKNormNegLogProb;
						testSumT += 1.0;

						testSumKGuessStochasticCorrectNum += guessStochasticKCorrect;
						testSumKGuessStochasticCorrectDen += K;

						testSumKGuessDeterministicCorrectNum += guessDeterministicKCorrect;
						testSumKGuessDeterministicCorrectDen += K;

						if ( q[ t - 1 ] != q[ t ] )
						{
							testSumKNormNegLogProbChange += instKNormNegLogProb;
							testSumTChange += 1.0;

							testSumKGuessStochasticCorrectNumChange += guessStochasticKCorrect;
							testSumKGuessStochasticCorrectDenChange += K;

							testSumKGuessDeterministicCorrectNumChange += guessDeterministicKCorrect;
							testSumKGuessDeterministicCorrectDenChange += K;
						}
					}
				}

				tmFREE( &tm );
			}

			{
				/* Train on D, test on D */

				TMCount    tmCount;
				MPEDO_2_0  mpedo;
				TM         tm;

				if ( KmaxTopology == -1 )
				{
					tmcountALLOC( &tmCount, BINARY, K, K );
				}
				else
				{
					tmcountALLOC( &tmCount, BINARY, K, KmaxTopology );
				}
				
				tmcountCLEAR( &tmCount );

				{
					int  t;

					for ( t = 1; t < T; t += 2 )
					{
						tmcountINCREMENT( &tmCount, q[ t - 1 ], q[ t ] );
					}
				}

				if ( KmaxModel == -1 )
				{
					mpedo_2_0ALLOC( &mpedo, K );
				}
				else
				{
					mpedo_2_0ALLOC( &mpedo, KmaxModel );
				}
				mpedo_2_0CLEAR( &mpedo );
				mpedo_2_0IMPORT( &mpedo, &tmCount );
				tmcountFREE( &tmCount );

				if ( KmaxTopology == -1 )
				{
					tmALLOC( &tm, BINARY, K, K );
				}
				else
				{
					tmALLOC( &tm, BINARY, K, KmaxTopology );
				}
				tmCLEAR( &tm ); 
				mpedo_2_0EXPORT( &mpedo, &tm, unkFraction );
				mpedo_2_0FREE( &mpedo );

				{
					double  prob;
					double  instKNormNegLogProb;
					int     guessStochastic;
					int     guessStochasticKCorrect;
					int     guessDeterministic;
					int     guessDeterministicKCorrect;
					int     t;

					for ( t = 1; t < T; t += 2 )
					{
						prob = tmPROB( &tm, q[ t - 1 ], q[ t ] );
						instKNormNegLogProb = - log2( prob ) / K;

						guessStochastic = tmPREDSTOCHASTIC( &tm, q[ t - 1 ] );
						guessStochasticKCorrect = QCompare( guessStochastic, q[ t ], K );

						guessDeterministic = tmPREDDETERMINISTIC( &tm, q[ t - 1 ] );
						guessDeterministicKCorrect = QCompare( guessDeterministic, q[ t ], K );

						testSumKNormNegLogProb += instKNormNegLogProb;
						testSumT += 1.0;

						testSumKGuessStochasticCorrectNum += guessStochasticKCorrect;
						testSumKGuessStochasticCorrectDen += K;

						testSumKGuessDeterministicCorrectNum += guessDeterministicKCorrect;
						testSumKGuessDeterministicCorrectDen += K;

						if ( q[ t - 1 ] != q[ t ] )
						{
							testSumKNormNegLogProbChange += instKNormNegLogProb;
							testSumTChange += 1.0;

							testSumKGuessStochasticCorrectNumChange += guessStochasticKCorrect;
							testSumKGuessStochasticCorrectDenChange += K;

							testSumKGuessDeterministicCorrectNumChange += guessDeterministicKCorrect;
							testSumKGuessDeterministicCorrectDenChange += K;
						}
					}
				}

				tmFREE( &tm );
			}
			
			free( (void *) q );

			printf
				(
					"%s PP(all): %-18.16g PP(sub): %-18.16g ES(all): %6.2f ES(sub): %6.2f ED(all): %6.2f ED(sub): %6.2f\n",
					inPathName,
					pow( 2.0, ( testSumKNormNegLogProb / testSumT ) ),
					pow( 2.0, ( testSumKNormNegLogProbChange / testSumTChange ) ),
					100.0 * testSumKGuessStochasticCorrectNum / testSumKGuessStochasticCorrectDen,
					100.0 * testSumKGuessStochasticCorrectNumChange / testSumKGuessStochasticCorrectDenChange,
					100.0 * testSumKGuessDeterministicCorrectNum / testSumKGuessDeterministicCorrectDen,
					100.0 * testSumKGuessDeterministicCorrectNumChange / testSumKGuessDeterministicCorrectDenChange
				);

			allSumKNormNegLogProb += testSumKNormNegLogProb;
			allSumT += testSumT;

			allSumKGuessStochasticCorrectNum += testSumKGuessStochasticCorrectNum;
			allSumKGuessStochasticCorrectDen += testSumKGuessStochasticCorrectDen;

			allSumKGuessDeterministicCorrectNum += testSumKGuessDeterministicCorrectNum;
			allSumKGuessDeterministicCorrectDen += testSumKGuessDeterministicCorrectDen;

			allSumKNormNegLogProbChange += testSumKNormNegLogProbChange;
			allSumTChange += testSumTChange;

			allSumKGuessStochasticCorrectNumChange += testSumKGuessStochasticCorrectNumChange;
			allSumKGuessStochasticCorrectDenChange += testSumKGuessStochasticCorrectDenChange;

			allSumKGuessDeterministicCorrectNumChange += testSumKGuessDeterministicCorrectNumChange;
			allSumKGuessDeterministicCorrectDenChange += testSumKGuessDeterministicCorrectDenChange;
		}
	}

	printf
		(
			"ALL PP(all): %-18.16g PP(sub): %-18.16g ES(all): %6.2f ES(sub): %6.2f ED(all): %6.2f ED(sub): %6.2f\n",
			pow( 2.0, ( allSumKNormNegLogProb / allSumT ) ),
			pow( 2.0, ( allSumKNormNegLogProbChange / allSumTChange ) ),
			100.0 * allSumKGuessStochasticCorrectNum / allSumKGuessStochasticCorrectDen,
			100.0 * allSumKGuessStochasticCorrectNumChange / allSumKGuessStochasticCorrectDenChange,
			100.0 * allSumKGuessDeterministicCorrectNum / allSumKGuessDeterministicCorrectDen,
			100.0 * allSumKGuessDeterministicCorrectNumChange / allSumKGuessDeterministicCorrectDenChange
		);

	return 0;
}

/* =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

