/************************************************************************
 *                                                                      *
 *  Program package "tooldiag":                                         *
 *                                                                      *
 *                                                                      *
 *  Version 1.3                                                         *
 *  Date: 15 November 1993                                              *
 *                                                                      *
 *  NOTE: This program package is copyrighted in the sense that it      *
 *  may be used for scientific purposes. The package as a whole, or     *
 *  parts thereof, cannot be included or used in any commercial         *
 *  application without written permission granted by the author.       *
 *  No programs contained in this package may be copied for commercial  *
 *  distribution.                                                       *
 *                                                                      *
 *  All comments  concerning this program package may be sent to the    *
 *  e-mail address 'tr@fct.unl.pt'.                                     *
 *                                                                      *
 ************************************************************************/

#include <stdio.h>
#include <string.h>
#include "def.h"

extern universe *U;
extern bool verbose;
extern bool is_ascii_file();
extern bool *train_smp, *test_smp;

static bool done;
static str80 buf;
static str100 linebuf;
static char defNameTrain[] = "_train", defNameTest[] = "_test";

#define MAXSTRLEN 100


static void merge_data_files()
{
 FILE *in1 = NULL, *in2 = NULL, *out = NULL;
 str80 in1Nam, in2Nam, outNam;
 str80 classNam1, classNam2;
 bool in1Ascii, in2Ascii, outAscii;
 FeatVector featVec1 = NULL, featVec2 = NULL, featVec3 = NULL;
 int i, j, k1, k2, dim1, dim2, dim3;

 printf("\n--- Merging two data files ---\n");
 printf("\tMerge file: "); gets( in1Nam );
 if( in1Nam[0] == '\0' ) { printf("File name empty..."); gets(buf); return; }
 in1Ascii = is_ascii_file( in1Nam );
 switch( in1Ascii )
 {
   case TRUE : in1 = fopen( in1Nam, f_open_text_r ); break;
   case FALSE : in1 = fopen( in1Nam, f_open_bin_r ); break;
   case EMPTY : printf("File not found..."); gets(buf); return;
   default: fprintf(stderr, "Error in merge_data_files, exit...\n"); exit(1);
 }
 printf("\t  and file: "); gets( in2Nam );
 if( in2Nam[0] == '\0' ) { printf("File name empty..."); gets(buf); return; }
 if( strcmp(in1Nam,in2Nam)==0 ) { printf("File names identical...");
                 gets(buf); return; }
 in2Ascii = is_ascii_file( in2Nam );
 switch( in2Ascii )
 {
   case TRUE : in2 = fopen( in2Nam, f_open_text_r ); break;
   case FALSE : in2 = fopen( in2Nam, f_open_bin_r ); break;
   case EMPTY : printf("File not found..."); gets(buf); fclose(in1); return;
   default: fprintf(stderr, "Error in merge_data_files, exit...\n"); exit(1);
 }
 printf("\t   to file: "); gets( outNam );
 if( outNam[0] == '\0' ) { printf("File name empty..."); gets(buf); return; }
 if( strcmp(in1Nam,outNam)==0 ) { printf("File names identical...");
                gets(buf); return; }
 if( strcmp(outNam,in2Nam)==0 ) { printf("File names identical...");
                gets(buf); return; }
 outAscii = FALSE;
 printf(" Write %s in b)inary or a)scii mode ?b\b", outNam ); gets( buf );
 if( buf[0] == 'a' )
   outAscii = TRUE;
 if( outAscii )
   out = fopen( outNam, f_open_text_w );
 else
   out = fopen( outNam, f_open_bin_w );
 if( out == NULL )
 {
   printf("Cannot open %s! Exitus...\n", outNam );
   fclose(in1); fclose(in2);
 }

 if( verbose )
   printf("\tMerging %s and %s to %s\n", in1Nam, in2Nam, outNam );

 dataline( in1, linebuf ); sscanf( linebuf, "%d", &dim1 );
 dataline( in2, linebuf ); sscanf( linebuf, "%d", &dim2 );
 dim3 = dim1 + dim2;
 featVec1 = (FeatVector) malloc(sizeof(FeatVector*) * dim1);
 featVec2 = (FeatVector) malloc(sizeof(FeatVector*) * dim2);
 featVec3 = (FeatVector) malloc(sizeof(FeatVector*) * dim3);
 fprintf( out, "%d\n", dim3 );

 /* scan the input files line by line and write the result to the output */
 while( !feof( in1 ) && !feof( in2 ) )
 {
   k1 = 0; k2 = 0;
   while( k1 < dim1 && !feof( in1 ) )
   {
     if( in1Ascii )
       fscanf( in1, "%f", &(featVec1[k1]) );
     else
       fread( &(featVec1[k1]), sizeof(float), 1, in1 );
     k1++;
   }
   while( k2 < dim2 && !feof( in2 ) )
   {
     if( in2Ascii )
       fscanf( in2, "%f", &(featVec2[k2]) );
     else
       fread( &(featVec2[k2]), sizeof(float), 1, in2 );
     k2++;
   }
   /* get the first name */
   if( !feof( in1 ) )
   {
     if( in1Ascii )
       fscanf( in1, "%s", classNam1 );
     else
     {
       j = 0;
       do
       {
         fread( &(classNam1[j]), sizeof(char), 1, in1 );
         if( classNam1[j] != '\n' )
           j++;
       }
       while( classNam1[j] != '\n' && j < MAXSTRLEN );
       classNam1[j] = '\0';
     }
   }
   /* get the second name */
   if( !feof( in2 ) )
   {
     if( in2Ascii )
       fscanf( in2, "%s", classNam2 );
     else
     {
       j = 0;
       do
       {
         fread( &(classNam2[j]), sizeof(char), 1, in2 );
         if( classNam2[j] != '\n' )
           j++;
       }
       while( classNam2[j] != '\n' && j < MAXSTRLEN );
       classNam2[j] = '\0';
     }
   }
   if( !feof( in1 ) && !feof( in2 ) )
   {
     if( strcmp(classNam1,classNam2) == 0 )
     {
       /* write the new line */
       if( outAscii )
       {
         for( k1 = 0; k1 < dim1; k1++ )
           fprintf( out, "%f ", featVec1[k1] );
         for( k2 = 0; k2 < dim2; k2++ )
           fprintf( out, "%f ", featVec2[k2] );
         fprintf( out, "%s\n", classNam1 );
       }
       else
       {
         fwrite( featVec1, sizeof(FeatVector*), dim1, out );
         fwrite( featVec2, sizeof(FeatVector*), dim2, out );
         fprintf( out, "%s\n", classNam1 );
       }
     }
     else
     {
       printf("ERROR: Detected different class names in files %s and %s\n",
     	   in1Nam, in2Nam );
       fprintf(stderr,"\t>>>%s<<<  ----  >>>%s<<< ...", classNam1, classNam2 );
       gets( buf );
       FREE( featVec1 ); FREE( featVec2 ); FREE( featVec3 );
       fclose(in1); fclose(in2); fclose(out); return;
     }
   }
 }
 FREE( featVec1 ); FREE( featVec2 ); FREE( featVec3 );
 fclose(in1); fclose(in2); fclose(out);
}


static void split_train_test()
{
 FILE *outTrain = NULL, *outTest = NULL, *out;
 str80 outNam1, outNam2, classNam, tmpStr;
 bool outAscii, ok;
 float percentTrain, f = 70.0;
 int s, k, dim, class, smp;

 printf("\n--- Splitting the actual data set into training and test ---\n");
 printf("\tTraining file name: %s\b\b\b\b\b\b", defNameTrain ); gets( linebuf );
 strcpy( outNam1, DATA_DIR );
 if( linebuf[0] != '\0' )
   strcpy( tmpStr, linebuf );
 else
   strcpy( tmpStr, defNameTrain );
 strcat( outNam1, tmpStr );

 strcpy( outNam2, DATA_DIR );
 printf("\t    Test file name: %s\b\b\b\b\b", defNameTest ); gets( linebuf );
 if( linebuf[0] != '\0' )
 {
   if( strcmp( tmpStr, linebuf ) == 0 )
   {
      printf("File names identical, using default.\n");
      strcpy( outNam1, DATA_DIR );
      strcat( outNam1, defNameTrain );
      strcpy( tmpStr, defNameTest );
   }
   else
     strcpy( tmpStr, linebuf );
 }
 else
   strcpy( tmpStr, defNameTest );
 strcat( outNam2, tmpStr );

 outAscii = FALSE;
 printf(" Write in b)inary or a)scii mode ?b\b" ); gets( buf );
 if( buf[0] == 'a' )
   outAscii = TRUE;
 if( outAscii )
 {
   outTrain = fopen( outNam1, f_open_text_w );
   outTest = fopen( outNam2, f_open_text_w );
 }
 else
 {
   outTrain = fopen( outNam1, f_open_bin_w );
   outTest = fopen( outNam2, f_open_bin_w );
 }
 if( outTrain == NULL || outTest == NULL )
 {
   if( outTrain == NULL )
     printf("Cannot open %s!\n", outNam1 );
   if( outTest == NULL )
     printf("Cannot open %s!\n", outNam2 );
   printf("\tExitus...\n" );
   if( outTrain != NULL ) fclose( outTrain );
   if( outTest != NULL ) fclose( outTest );
   exit(1);
 }

 printf("Split data into ? percent training data? 70\b\b");
 do
 {
   gets( linebuf );
   if( linebuf[0] == '\0' )
     ok = TRUE; 
   else
   {
     sscanf( linebuf,"%f", &f );
     ok = ( f >= 0.0 && f <= 100.0 );
   }
   if( ! ok )
     printf("Invalid value! Again ? ");
   else
     percentTrain = f;
 }
 while( ! ok );

 if( verbose )
   printf("\tSplitting samples into\n\t%s and\n\t%s\n", outNam1, outNam2 );

 split_tt( U->sumSampl, percentTrain );

 fprintf( outTrain, "%d\n", U->nrFeat );
 fprintf( outTest, "%d\n", U->nrFeat );

 class = 0; smp = 0;
 for( s = 0; s < U->sumSampl; s++ )
 {
   if( train_smp[s] )
     out = outTrain;
   else if ( test_smp[s] )
     out = outTest;
   else { fprintf(stderr,"Sample neither train nor test. Exit...\n");exit(1);}

   if( outAscii )
     for( k = 0; k < U->nrFeat; k++ )
       fprintf( out, "%f ", U->C[class].S[smp*U->nrFeat+k] );
   else
     fwrite( &(U->C[class].S[smp*U->nrFeat]), sizeof(FeatVector*),
		U->nrFeat, out );
   fprintf( out, "%s\n", U->C[class].name );
   /* fprintf( stdout, "out=%d  s=%d  smp=%d  train=%d  test=%d  %s\n",
	(int)out, s, smp, train_smp[s], test_smp[s], U->C[class].name );	/**/
   smp++;
   if( smp == U->C[class].numSampl )
   {
     class++;
     smp = 0;
   }
 }
 fclose(outTrain); fclose(outTest); out = NULL;
}


void intFaceLoop()
{
 printf("\n>>>>>----- INTERFACE MENU -----<<<<<<\n");
 printf("(1) Learning Vector Quantizer (LVQ)\n");
 printf("(2) Stuttgart Neural Network Simulator (SNNS)\n");
 printf("(3) Merge two data files to a single data file\n");
 printf("(4) Split a data file randomly into two (train & test)\n");
 printf("(Q)uit\n\n");
 printf("Choice: ");

 gets(buf); done = FALSE;
 switch( buf[0] )
  {
   case '?': help( LOOP_INTERFACE, buf ); break;
   case '1': lvqLoop(); break;
   case '2': snnsLoop(); break;
   case '3': merge_data_files(); break;
   case '4': split_train_test(); break;
   case 'q': case 'Q': done = TRUE; break;
   default: showUniv( stdout ); break;
  }
}


void intface()
{
 if( U->nrClass > 1 )
   do
   {
     intFaceLoop();
   }
   while( !done );
 else
   { printf("Please load universe first !...");gets( buf );}
}
