/*
 * =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0
 
Copyright (c) 2011, Kornel Laskowski
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice, this list of
      conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice, this list of
      conditions and the following disclaimer in the documentation and/or other materials provided
      with the distribution.
    * Neither the name of Sigtactica Research or of the Royal Institute of Technology (KTH) or of
      Carnegie Mellon University nor the names of their contributors may be used to endorse or
      promote products derived from this software without specific prior written permission.
 
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 ===+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <getopt.h>
#include <string.h>
#include <ctype.h>
#include "sutils.h"
#include "sort.h"
#include "FileName.h"
#include "q.h"
#include "metric.h"
#include "spedo_n_nu.h"

/* =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

#define BINARY 2

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

extern char *  optarg;
extern int     optind;
extern int     opterr;
extern int     optopt;

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

char *  progName = "spedo";

static const int     default_KmaxModel     =  3;
static const int     default_nu            =  0;
static const double  default_unkFraction   =  0.00001;
static const double  default_add           =  0;
static const double  default_addbo         =  0;

/* =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

void Usage()
{
	fprintf
		(
			stderr,
			"Usage: %s -history <definitionString> -QDir <dirName> [ -KmaxModel <integer> ] [ -nu <integer> ] [ -unkFraction <float> ] [ -add <double> ]\n",
			progName
		); 

	return;
}

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

int main( int argc, char * argv[] ) 
{
	int                KmaxModel;
	int                nu;
	double             unkFraction;
	double             add;
	int                add_flag;
	double             addbo;
	int                addbo_flag;
	char *             QDirName;
	char *             definitionString;
	int                depth;
	SubTreeType *      subTreeTypeTab;
	int *              subTreeDelayTab;
	int                delayMax;
	char **            fileNameTab;
	int                nFiles;
	unsigned long *    sortedIndexTab;
	Metric             all_metric_self;
	Metric             all_metric_glob;
	Metric             all_metric_rest;

	progName = argv[ 0 ];

	KmaxModel = default_KmaxModel;
	unkFraction = default_unkFraction;
	nu = default_nu;
	add = default_add;
	addbo = default_addbo;
	QDirName = (char *) NULL;
	definitionString = (char *) NULL;

	{
		int  argIdx;

		printf( "Command line:" );
		for ( argIdx = 0; argIdx < argc; argIdx ++ )
		{
			printf( " %s", argv[ argIdx ] );
		}
		printf( "\n" );
	}

	add_flag = 0;
	addbo_flag = 0;

	while ( 1 )
	{
		int  optChar;
		int  option_index = 0;
		static struct option  long_options[] =
			{
				{ "KmaxModel",    1, 0, 1 },
				{ "add",          1, 0, 2 },
				{ "add,bo",       1, 0, 3 },
				{ "QDir",         1, 0, 4 },
				{ "history",      1, 0, 5 },
				{ "nu",           1, 0, 6 },
				{ "unkFraction",  1, 0, 7 }
			};
		optChar = getopt_long_only( argc, argv, "", long_options, &option_index );
		if ( optChar == -1 )
		{
			break;
		}

		switch( optChar )
		{
		case 1: /* KmaxModel */
			KmaxModel = str2int( optarg );
			break;
		case 2: /* add */
			add = str2dbl( optarg );
			assert( add >= 0 );
			add_flag = 1;
			break;
		case 3: /* add,bo */
			addbo = str2dbl( optarg );
			assert( addbo >= 0 );
			addbo_flag = 1;
			break;
		case 4: /* QDir */
			QDirName = optarg;
			break;
		case 5: /* history */
			definitionString = optarg;
			break;
		case 6: /* nu */
			nu = str2int( optarg );
			break;
		case 7: /* unkFraction */
			{
				int cIdx;
				for ( cIdx = 0; cIdx < strlen( optarg ); cIdx ++ )
				{
					optarg[ cIdx ] = toupper( optarg[ cIdx ] );
				}
			}
			if ( strcmp( optarg, "MIN" ) == 0 )
			{
				unkFraction = DBL_MIN;
			}
			else
			{
				unkFraction = str2dbl( optarg );
			}
			break;
		case '?':
		default:
			fprintf( stderr, "%s: badly formatted argument list\n", progName );
			Usage();
			exit( -1 );
		}
	}

	if ( ( unkFraction < 0.0 ) || ( unkFraction > 1.0 ) )
	{
		fprintf( stderr, "%s: unkFraction must lie in [0,1]\n", progName );
		Usage();
		exit( -1 );
	}

	if ( nu < 0 )
	{
		fprintf( stderr, "%s: nu must be non-negative\n", progName );
		Usage();
		exit( -1 );
	}

	if ( ( add_flag + addbo_flag ) > 1 )
	{
		fprintf( stderr, "%s: only one of -add, -addbo can be specified\n", progName );
		Usage();
		exit( -1 );
	}

	if ( QDirName == (char *) NULL )
	{
		fprintf( stderr, "%s: -QDir not specified\n", progName );
		Usage();
		exit( -1 );
	}
	else
	{
		struct stat  statBuf;

		if ( stat( QDirName, &statBuf ) != 0 )
		{
			fprintf
				(
					stderr,
					"%s: Cannot access -QDir value [%s]\n",
					progName,
					QDirName
				);
			exit( -1 );
		}

		if ( S_ISDIR( statBuf.st_mode ) )
		{
			/* ok */
		}
		else
		{
			fprintf
				(
					stderr,
					"%s: -QDir value [%s] exists, but not a directory\n",
					progName,
					QDirName
				);
			exit( -1 );
		}
	}

	if ( definitionString == (char *) NULL )
	{
		fprintf( stderr, "%s: -history not specified\n", progName );
		Usage();
		exit( -1 );
	}
	else
	{
		if
			(
				spedo_n_nuPARSEDEFSTRING
					(
						definitionString,
						&depth,
              			  		&subTreeTypeTab,
               			 		&subTreeDelayTab,
						&delayMax
					)
				!= 0 
			)
		{
			fprintf( stderr, "%s: -history string is mis-formatted\n", progName );
			Usage();
			exit( -1 );
		}
	}

	FileNameGETLIST
		(
			QDirName,
			"Q",
			&fileNameTab,
			&nFiles
		);
	sortedIndexTab = (unsigned long *) malloc( nFiles * sizeof( unsigned long ) );
	assert( sortedIndexTab != (unsigned long *) NULL );
	strindexx( nFiles, fileNameTab - 1, sortedIndexTab - 1 ); /* NRC 1-offset */

	metricCLEAR( &all_metric_self );
	metricCLEAR( &all_metric_glob );
	metricCLEAR( &all_metric_rest );

	{
		int  testFileIdx;

		for ( testFileIdx = 0; testFileIdx < nFiles; testFileIdx ++ )
		{
			SPEDO_n_nu  spedo_self;   /* foreach test meeting r in R: train on r, test on r */
			SPEDO_n_nu  spedo_glob;   /* foreach test meeting r in R: train on R, test on r */
			SPEDO_n_nu  spedo_rest;   /* foreach test meeting r in R: train on R-r, test on r */
			int        trainFileIdx;
			Metric     metric_self;
			Metric     metric_glob;
			Metric     metric_rest;
			char *     inPathName_test;
			int        T_test;
			int        K_test;
			int *      q_test;

			spedo_n_nuALLOC( &spedo_self, 2, KmaxModel, nu, depth, subTreeTypeTab, subTreeDelayTab, delayMax );
			spedo_n_nuCLEAR( &spedo_self );

			spedo_n_nuALLOC( &spedo_glob, 2, KmaxModel, nu, depth, subTreeTypeTab, subTreeDelayTab, delayMax );
			spedo_n_nuCLEAR( &spedo_glob );

			spedo_n_nuALLOC( &spedo_rest, 2, KmaxModel, nu, depth, subTreeTypeTab, subTreeDelayTab, delayMax );
			spedo_n_nuCLEAR( &spedo_rest );

			/*
			 * Train all three models.
			 */

			for ( trainFileIdx = 0; trainFileIdx < nFiles; trainFileIdx ++ )
			{
				char *  inPathName_train;
				int     T_train;
				int     K_train;
				int *   q_train;

				inPathName_train = fileNameTab[ sortedIndexTab[ trainFileIdx ] - 1 ];
				QReadAndEncode( inPathName_train, &q_train, &T_train, &K_train );

				{
					int  t; 

					for ( t = 0; t < T_train; t ++ )
					{
						if ( trainFileIdx == testFileIdx )
						{
							spedo_n_nuACCUM( &spedo_self, K_train, q_train, t );
						}

						spedo_n_nuACCUM( &spedo_glob, K_train, q_train, t );

						if ( trainFileIdx != testFileIdx )
						{
							spedo_n_nuACCUM( &spedo_rest, K_train, q_train, t );
						}
					}
				}

				free( (void *) q_train );
			}

			/*
			 * Smooth all three models.
			 */

			if ( add_flag )
			{
				spedo_n_nuBUILD_add( &spedo_self, EvalType_ALL, add );
				spedo_n_nuBUILD_add( &spedo_glob, EvalType_ALL, add );
				spedo_n_nuBUILD_add( &spedo_rest, EvalType_ALL, add );
			}
			else if ( addbo_flag )
			{
				spedo_n_nuBUILD_addbo( &spedo_self, EvalType_ALL, addbo );
				spedo_n_nuBUILD_addbo( &spedo_glob, EvalType_ALL, addbo );
				spedo_n_nuBUILD_addbo( &spedo_rest, EvalType_ALL, addbo );
			}
			else
			{
				spedo_n_nuBUILD_ml( &spedo_self, EvalType_ALL, unkFraction );
				spedo_n_nuBUILD_ml( &spedo_glob, EvalType_ALL, unkFraction );
				spedo_n_nuBUILD_ml( &spedo_rest, EvalType_ALL, unkFraction );
			}

			/*
			 * Test all three models.
			 */

			metricCLEAR( &metric_self );
			metricCLEAR( &metric_glob );
			metricCLEAR( &metric_rest );

			inPathName_test = fileNameTab[ sortedIndexTab[ testFileIdx ] - 1 ];
			QReadAndEncode( inPathName_test, &q_test, &T_test, &K_test );

			{
				int  t;

				for ( t = 0; t < T_test; t ++ )
				{
					metricACCUM( &metric_self, &spedo_self, EvalType_ALL, K_test, q_test, t );
					metricACCUM( &metric_glob, &spedo_glob, EvalType_ALL, K_test, q_test, t );
					metricACCUM( &metric_rest, &spedo_rest, EvalType_ALL, K_test, q_test, t );
				}
			}

			free( (void *) q_test );

			printf( "\t%s\n", inPathName_test );
			printf( "\t\tself " );
			metricPRINT( &metric_self );
			printf( "\t\tglob " );
			metricPRINT( &metric_glob );
			printf( "\t\trest " );
			metricPRINT( &metric_rest );
			fflush( stdout );

			metricADD( &all_metric_self, &metric_self );
			metricADD( &all_metric_glob, &metric_glob);
			metricADD( &all_metric_rest, &metric_rest );
			
			spedo_n_nuFREE( &spedo_self );
			spedo_n_nuFREE( &spedo_glob );
			spedo_n_nuFREE( &spedo_rest );
		}
	}

	free( (void *) subTreeTypeTab );
	free( (void *) subTreeDelayTab );

	{
		int  fileIdx;

		for ( fileIdx = 0; fileIdx < nFiles; fileIdx ++ )
		{
			free( (void *) (fileNameTab[ fileIdx ]) );
		}
		free( (void *) fileNameTab );

		free( (void *) sortedIndexTab );
	}

	printf( "ALL self %s KmaxModel %d", definitionString, KmaxModel );
	if ( add_flag )
	{
		printf( " add %f ", add );
	}
	else if ( addbo_flag )
	{
		printf( " add,bo %f ", addbo );
	}
	else
	{
		printf( " ml %f ", unkFraction );
	}
	metricPRINT( &all_metric_self );

	printf( "ALL glob %s KmaxModel %d", definitionString, KmaxModel );
	if ( add_flag )
	{
		printf( " add %f ", add );
	}
	else if ( addbo_flag )
	{
		printf( " add,bo %f ", addbo );
	}
	else
	{
		printf( " ml %f ", unkFraction );
	}
	metricPRINT( &all_metric_glob );

	printf( "ALL rest %s KmaxModel %d", definitionString, KmaxModel );
	if ( add_flag )
	{
		printf( " add %f ", add );
	}
	else if ( addbo_flag )
	{
		printf( " add,bo %f ", addbo );
	}
	else
	{
		printf( " ml %f ", unkFraction );
	}
	metricPRINT( &all_metric_rest );
	fflush( stdout );

	return 0;
}

/* =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

