/*
 * =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0
 
Copyright (c) 2011, Kornel Laskowski
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice, this list of
      conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice, this list of
      conditions and the following disclaimer in the documentation and/or other materials provided
      with the distribution.
    * Neither the name of Sigtactica Research or of the Royal Institute of Technology (KTH) or of
      Carnegie Mellon University nor the names of their contributors may be used to endorse or
      promote products derived from this software without specific prior written permission.
 
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 ===+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <getopt.h>
#include "sutils.h"
#include "sort.h"
#include "FileName.h"
#include "q.h"
#include "metric.h"
#include "spedo_n_nu.h"

/* =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

#define BINARY 2

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

extern char *  optarg;
extern int     optind;
extern int     opterr;
extern int     optopt;

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

char *  progName = "spedo";

static const int     default_KmaxModel     =  3;
static const double  default_unkFraction   =  0.00001; 
static const int     default_nu            =  0;
static const double  default_add           =  0;
static const double  default_addbo         =  0;

/* =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

void Usage()
{
	fprintf
		(
			stderr,
			"Usage: %s -history <definitionString> -trainQDir <dirName> -devQDir <dirName> -testQDir <dirName> [ -KmaxModel <integer> ] [ -nu <integer> ] [ -unkFraction <float> ] [ -add <double> ]\n",
			progName
		); 

	return;
}

/* -+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+-- */

int main( int argc, char * argv[] ) 
{
	int                KmaxModel;
	double             unkFraction;
	int                nu;
	double             add;
	int                add_flag;
	double             addbo;
	int                addbo_flag;
	char *             trainQDirName;
	char *             devQDirName;
	char *             testQDirName;
	char *             definitionString;
	int                depth;
	SubTreeType *      subTreeTypeTab;
	int *              subTreeDelayTab;
	int                delayMax;
	SPEDO_n_nu          spedo;
	char **            trainFileNameTab;
	int                trainNFiles;
	unsigned long *    trainSortedIndexTab;
	char **            devFileNameTab;
	int                devNFiles;
	unsigned long *    devSortedIndexTab;
	char **            testFileNameTab;
	int                testNFiles;
	unsigned long *    testSortedIndexTab;

	progName = argv[ 0 ];

	KmaxModel = default_KmaxModel;
	unkFraction = default_unkFraction;
	nu = default_nu;
	add = default_add;
	addbo = default_addbo;
	trainQDirName = (char *) NULL;
	devQDirName = (char *) NULL;
	testQDirName = (char *) NULL;
	definitionString = (char *) NULL;

	{
		int  argIdx;

		printf( "Command line:" );
		for ( argIdx = 0; argIdx < argc; argIdx ++ )
		{
			printf( " %s", argv[ argIdx ] );
		}
		printf( "\n" );
	}

	add_flag = 0;
	addbo_flag = 0;

	while ( 1 )
	{
		int  optChar;
		int  option_index = 0;
		static struct option  long_options[] =
			{
				{ "KmaxModel",    1, 0, 1 },
				{ "add",          1, 0, 2 },
				{ "add,bo",       1, 0, 3 },
				{ "trainQDir",    1, 0, 4 },
				{ "devQDir",      1, 0, 5 },
				{ "testQDir",     1, 0, 6 },
				{ "history",      1, 0, 7 },
				{ "nu",           1, 0, 8 },
				{ "unkFraction",  1, 0, 9 }
			};
		optChar = getopt_long_only( argc, argv, "", long_options, &option_index );
		if ( optChar == -1 )
		{
			break;
		}

		switch( optChar )
		{
		case 1: /* KmaxModel */
			KmaxModel = str2int( optarg );
			break;
		case 2: /* add */
			add = str2dbl( optarg );
			assert( add >= 0 );
			add_flag = 1;
			break;
		case 3: /* add,bo */
			addbo = str2dbl( optarg );
			assert( addbo >= 0 );
			addbo_flag = 1;
			break;
		case 4: /* trainQDir */
			trainQDirName = optarg;
			break;
		case 5: /* devQDir */
			devQDirName = optarg;
			break;
		case 6: /* testQDir */
			testQDirName = optarg;
			break;
		case 7: /* history */
			definitionString = optarg;
			break;
		case 8: /* nu */
			nu = str2int( optarg );
			break;
		case 9: /* unkFraction */
			unkFraction = str2dbl( optarg );
			break;
		case '?':
		default:
			fprintf( stderr, "%s: badly formatted argument list\n", progName );
			Usage();
			exit( -1 );
		}
	}

	if ( nu < 0 )
	{
		fprintf( stderr, "%s: nu must be non-negative\n", progName );
		Usage();
		exit( -1 );
	}

	if ( ( unkFraction < 0.0 ) || ( unkFraction > 1.0 ) )
	{
		fprintf( stderr, "%s: unkFraction must lie in [0,1]\n", progName );
		Usage();
		exit( -1 );
	}

	if ( ( add_flag + addbo_flag ) > 1 )
	{
		fprintf( stderr, "%s: only one of -add, -addbo can be specified\n", progName );
		Usage();
		exit( -1 );
	}

	if ( trainQDirName == (char *) NULL )
	{
		fprintf( stderr, "%s: -trainQDir not specified\n", progName );
		Usage();
		exit( -1 );
	}
	else
	{
		struct stat  statBuf;

		if ( stat( trainQDirName, &statBuf ) != 0 )
		{
			fprintf
				(
					stderr,
					"%s: Cannot access -trainQDir value [%s]\n",
					progName,
					trainQDirName
				);
			exit( -1 );
		}

		if ( S_ISDIR( statBuf.st_mode ) )
		{
			/* ok */
		}
		else
		{
			fprintf
				(
					stderr,
					"%s: -trainQDir value [%s] exists, but not a directory\n",
					progName,
					trainQDirName
				);
			exit( -1 );
		}
	}

	if ( devQDirName == (char *) NULL )
	{
		fprintf( stderr, "%s: -devQDir not specified\n", progName );
		Usage();
		exit( -1 );
	}
	else
	{
		struct stat  statBuf;

		if ( stat( devQDirName, &statBuf ) != 0 )
		{
			fprintf
				(
					stderr,
					"%s: Cannot access -devQDir value [%s]\n",
					progName,
					devQDirName
				);
			exit( -1 );
		}

		if ( S_ISDIR( statBuf.st_mode ) )
		{
			/* ok */
		}
		else
		{
			fprintf
				(
					stderr,
					"%s: -devQDir value [%s] exists, but not a directory\n",
					progName,
					devQDirName
				);
			exit( -1 );
		}
	}

	if ( testQDirName == (char *) NULL )
	{
		fprintf( stderr, "%s: -testQDir not specified\n", progName );
		Usage();
		exit( -1 );
	}
	else
	{
		struct stat  statBuf;

		if ( stat( testQDirName, &statBuf ) != 0 )
		{
			fprintf
				(
					stderr,
					"%s: Cannot access -testQDir value [%s]\n",
					progName,
					testQDirName
				);
			exit( -1 );
		}

		if ( S_ISDIR( statBuf.st_mode ) )
		{
			/* ok */
		}
		else
		{
			fprintf
				(
					stderr,
					"%s: -testQDir value [%s] exists, but not a directory\n",
					progName,
					testQDirName
				);
			exit( -1 );
		}
	}

	if ( definitionString == (char *) NULL )
	{
		fprintf( stderr, "%s: -history not specified\n", progName );
		Usage();
		exit( -1 );
	}
	else
	{
		if
			(
				spedo_n_nuPARSEDEFSTRING
					(
						definitionString,
						&depth,
						&subTreeTypeTab,
						&subTreeDelayTab,
						&delayMax
					)
				!= 0 
			)
		{
			fprintf( stderr, "%s: -history string is mis-formatted\n", progName );
			Usage();
			exit( -1 );
		}
	}

	FileNameGETLIST
		(
			trainQDirName,
			"Q",
			&trainFileNameTab,
			&trainNFiles
		);
	trainSortedIndexTab = (unsigned long *) malloc( trainNFiles * sizeof( unsigned long ) );
	assert( trainSortedIndexTab != (unsigned long *) NULL );
	strindexx( trainNFiles, trainFileNameTab - 1, trainSortedIndexTab - 1 ); /* NRC 1-offset */

	FileNameGETLIST
		(
			devQDirName,
			"Q",
			&devFileNameTab,
			&devNFiles
		);
	devSortedIndexTab = (unsigned long *) malloc( devNFiles * sizeof( unsigned long ) );
	assert( devSortedIndexTab != (unsigned long *) NULL );
	strindexx( devNFiles, devFileNameTab - 1, devSortedIndexTab - 1 ); /* NRC 1-offset */

	FileNameGETLIST
		(
			testQDirName,
			"Q",
			&testFileNameTab,
			&testNFiles
		);
	testSortedIndexTab = (unsigned long *) malloc( testNFiles * sizeof( unsigned long ) );
	assert( testSortedIndexTab != (unsigned long *) NULL );
	strindexx( testNFiles, testFileNameTab - 1, testSortedIndexTab - 1 ); /* NRC 1-offset */

	/*
	 * Train the model.
	 */

	spedo_n_nuALLOC( &spedo, 2, KmaxModel, nu, depth, subTreeTypeTab, subTreeDelayTab, delayMax );
	spedo_n_nuCLEAR( &spedo );

	{
		int  trainFileIdx;

		for ( trainFileIdx = 0; trainFileIdx < trainNFiles; trainFileIdx ++ )
		{
			char *  inPathName_train;
			int     T_train;
			int     K_train;
			int *   q_train;

			inPathName_train = trainFileNameTab[ trainSortedIndexTab[ trainFileIdx ] - 1 ];
			QReadAndEncode( inPathName_train, &q_train, &T_train, &K_train );

			{
				int  t; 

				for ( t = 0; t < T_train; t ++ )
				{
					spedo_n_nuACCUM( &spedo, K_train, q_train, t );
				}
			}

			free( (void *) q_train );
		}
	}

	/*
	 * Smooth the model.
	 */

	if ( add_flag )
	{
		spedo_n_nuBUILD_add( &spedo, EvalType_ALL, add );
	}
	else if ( addbo_flag )
	{
		spedo_n_nuBUILD_addbo( &spedo, EvalType_ALL, addbo );
	}
	else
	{
		spedo_n_nuBUILD_ml( &spedo, EvalType_ALL, unkFraction );
	}

	/*
	 * Test the model on all three datasets.
	 */

	{
		Metric  all_trainMetric;
		int     trainFileIdx;

		metricCLEAR( &all_trainMetric );

		for ( trainFileIdx = 0; trainFileIdx < trainNFiles; trainFileIdx ++ )
		{
			char *  inPathName_train;
			int     T_train;
			int     K_train;
			int *   q_train;
			Metric  trainMetric;

			metricCLEAR( &trainMetric );

			inPathName_train = trainFileNameTab[ trainSortedIndexTab[ trainFileIdx ] - 1 ];
			QReadAndEncode( inPathName_train, &q_train, &T_train, &K_train );

			{
				int  t;

				for ( t = 0; t < T_train; t ++ )
				{
					metricACCUM( &trainMetric, &spedo, EvalType_ALL, K_train, q_train, t );
				}
			}

			free( (void *) q_train );

			printf( "\t%s\n", inPathName_train );
			printf( "\t\ttrain " );
			metricPRINT( &trainMetric );
			fflush( stdout );

			metricADD( &all_trainMetric, &trainMetric );
		}

		printf( "ALL train %s KmaxModel %d", definitionString, KmaxModel );
		if ( add_flag )
		{
			printf( " add %f ", add );
		}
		else if ( addbo_flag )
		{
			printf( " add,bo %f ", addbo );
		}
		else
		{
			printf( " ml %f ", unkFraction );
		}
		metricPRINT( &all_trainMetric );
		fflush( stdout );
	}

	{
		Metric  all_devMetric;
		int     devFileIdx;

		metricCLEAR( &all_devMetric );

		for ( devFileIdx = 0; devFileIdx < devNFiles; devFileIdx ++ )
		{
			char *  inPathName_dev;
			int     T_dev;
			int     K_dev;
			int *   q_dev;
			Metric  devMetric;

			metricCLEAR( &devMetric );

			inPathName_dev = devFileNameTab[ devSortedIndexTab[ devFileIdx ] - 1 ];
			QReadAndEncode( inPathName_dev, &q_dev, &T_dev, &K_dev );

			{
				int  t;

				for ( t = 0; t < T_dev; t ++ )
				{
					metricACCUM( &devMetric, &spedo, EvalType_ALL, K_dev, q_dev, t );
				}
			}

			free( (void *) q_dev );

			printf( "\t%s\n", inPathName_dev );
			printf( "\t\tdev " );
			metricPRINT( &devMetric );
			fflush( stdout );

			metricADD( &all_devMetric, &devMetric );
		}

		printf( "ALL dev %s KmaxModel %d", definitionString, KmaxModel );
		if ( add_flag )
		{
			printf( " add %f ", add );
		}
		else if ( addbo_flag )
		{
			printf( " add,bo %f ", addbo );
		}
		else
		{
			printf( " ml %f ", unkFraction );
		}
		metricPRINT( &all_devMetric );
		fflush( stdout );
	}

	{
		Metric  all_testMetric;
		int     testFileIdx;

		metricCLEAR( &all_testMetric );

		for ( testFileIdx = 0; testFileIdx < testNFiles; testFileIdx ++ )
		{
			char *  inPathName_test;
			int     T_test;
			int     K_test;
			int *   q_test;
			Metric  testMetric;

			metricCLEAR( &testMetric );

			inPathName_test = testFileNameTab[ testSortedIndexTab[ testFileIdx ] - 1 ];
			QReadAndEncode( inPathName_test, &q_test, &T_test, &K_test );

			{
				int  t;

				for ( t = 0; t < T_test; t ++ )
				{
					metricACCUM( &testMetric, &spedo, EvalType_ALL, K_test, q_test, t );
				}
			}

			free( (void *) q_test );

			printf( "\t%s\n", inPathName_test );
			printf( "\t\ttest " );
			metricPRINT( &testMetric );
			fflush( stdout );

			metricADD( &all_testMetric, &testMetric );
		}

		printf( "ALL test %s KmaxModel %d", definitionString, KmaxModel );
		if ( add_flag )
		{
			printf( " add %f ", add );
		}
		else if ( addbo_flag )
		{
			printf( " add,bo %f ", addbo );
		}
		else
		{
			printf( " ml %f ", unkFraction );
		}
		metricPRINT( &all_testMetric );
		fflush( stdout );
	}

	free( (void *) subTreeTypeTab );
	free( (void *) subTreeDelayTab );

	{
		int  fileIdx;

		for ( fileIdx = 0; fileIdx < trainNFiles; fileIdx ++ )
		{
			free( (void *) (trainFileNameTab[ fileIdx ]) );
		}
		free( (void *) trainFileNameTab );
		free( (void *) trainSortedIndexTab );

		for ( fileIdx = 0; fileIdx < devNFiles; fileIdx ++ )
		{
			free( (void *) (devFileNameTab[ fileIdx ]) );
		}
		free( (void *) devFileNameTab );
		free( (void *) devSortedIndexTab );

		for ( fileIdx = 0; fileIdx < testNFiles; fileIdx ++ )
		{
			free( (void *) (testFileNameTab[ fileIdx ]) );
		}
		free( (void *) testFileNameTab );
		free( (void *) testSortedIndexTab );
	}

	return 0;
}

/* =+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+== */

