/************************************************************************
 *                                                                      *
 *  Program package "tooldiag":                                         *
 *                                                                      *
 *                                                                      *
 *  Version 1.3                                                         *
 *  Date: 15 November 1993                                              *
 *                                                                      *
 *  NOTE: This program package is copyrighted in the sense that it      *
 *  may be used for scientific purposes. The package as a whole, or     *
 *  parts thereof, cannot be included or used in any commercial         *
 *  application without written permission granted by the author.       *
 *  No programs contained in this package may be copied for commercial  *
 *  distribution.                                                       *
 *                                                                      *
 *  All comments  concerning this program package may be sent to the    *
 *  e-mail address 'tr@fct.unl.pt'.                                     *
 *                                                                      *
 ************************************************************************/

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "def.h"

#define MINSAMPLE /*10*/ 6
#define MAXSTRLEN 100

extern bool err_directory();
extern bool next_directory();
extern bool verbose;
extern bool is_ascii_file();

/* global variables */

universe univ, *U = NULL;

str100 dataDir, dataFile, featNameFile;
static str100 classFile;
static str80 linebuf;
static char wheel[] = "|/-\\";
static bool ascii_file = FALSE;

#ifdef DOS
static char slash = '\\';
#else
static char slash = '/';
#endif

void init_Class( C )
Class *C;
{
 C->name[0] = '\0';
 C->numSampl = 0;
 C->S = NULL;
 C->mean = NULL;
 C->stddev = NULL;
 C->sqrsum = NULL;
 C->a_priori_prob = 0.0;
}


void init_universe()
{
 strcpy( U->name, "UNDEFINED" );
 U->nrClass = 0;
 U->sumSampl = 0;
 U->nrFeat = 0;
 U->normalized = FALSE;
 U->min = NULL;
 U->max = NULL;
 U->nrSelFeat = 0;
 U->FSV = NULL;
 U->C = NULL;
}


void free_universe()
{
 int i;

 if( U == NULL )
   return;

 for( i = 0; i < U->nrClass; i++ )
 {
   FREE( U->C[i].S );
   FREE( U->C[i].mean ); FREE( U->C[i].stddev ); FREE( U->C[i].sqrsum );
   init_Class( &(U->C[i]) );
 }
 FREE( U->min ); FREE( U->max );
 FREE( U->FSV );
 FREE( U->C );
 init_universe();
 free_feat_names();
}


void init_admin()
{
 U = &univ;
 init_universe();
}


void showUniv( f )
FILE *f;
{
 int i;

 fprintf( f, "\n-----------------------------------------------------------\n");
 fprintf( f, "    UNIVERSE                      : %20s", U->name );
 if( U->normalized )
   fprintf( f, " (normalized to [0,1])");
 fprintf( f, "\n      OBJECT CLASSES              : %20d\n", U->nrClass );
 fprintf( f, "      Nr. of all samples          : %20d\n", U->sumSampl );
 fprintf( f, "      Nr. of all      features    : %20d\n", U->nrFeat );
 fprintf( f, "      Nr. of selected features    : %20d\n", U->nrSelFeat );
 fprintf( f, "                                            Nr. of samples\n" );
 for( i = 0; i < U->nrClass; i++ )
   fprintf( f, " Nr. %3d : %30s   %5d\n", i+1, U->C[i].name, U->C[i].numSampl);
 fprintf( f, "-----------------------------------------------------------\n\n");
}


void loadUnivDir()
{
 FILE *cf = NULL;
 int i, j, row, k, w = 0, dim, cptr, sumSampl = 0, offset = 1;
 bool found;
 str100 fileName;
 float value, bias;
 char chr;

 if( verbose )
   { fprintf( stderr, "Searching sample file in %s\n", dataDir ); }
 else
   { fprintf( stderr, "Searching directory...\n"); }
 /* first count the files */
 open_directory( dataDir );
 if( err_directory() )
   { printf("Cannot open directory %s ...", dataDir ); gets(linebuf); return; }

 free_universe();
 strcpy( U->name, dataDir );
 if( dataDir[strlen(dataDir)-1] != slash )
 {
   dataDir[strlen(dataDir)] = slash;
   dataDir[strlen(dataDir)+1] = '\0';
 }
 /* copy only the name of the data file to the universe name */
 i = strlen( dataDir ) - 2;
 while( dataDir[i] != slash && i > 0 )
   i--;
 if( i == 0 )
   offset = 0; 
 strncpy( U->name, &(dataDir[i+offset]), strlen(&(dataDir[i+offset]))-1 );
 U->name[strlen(&(dataDir[i+offset]))-1] = '\0';

 do
 {
   found = next_directory( dataDir, fileName );
   if( found )
     (U->nrClass)++;
 }
 while( found );
 close_directory();
 if( verbose )
   printf("Found %d files\n", U->nrClass );

 if( U->nrClass <= 0 || U->nrClass > MAXCLASS )
   { printf(" Number of classes invalid! Exitus...\n" ); exit(1); }
 /* allocate space for the universe */
 FREE( U->C );
 U->C = (Class*)malloc( U->nrClass* sizeof( struct Class_ ) );
 for( i = 0; i < U->nrClass; i++ )
   init_Class( &(U->C[i]) );

 /* scan through the files */
 open_directory( dataDir );
 i = 0;
 if( ! verbose )
   { fprintf( stderr, "Loading ... "); fflush( stderr ); }
 do
 {
   found = next_directory( dataDir, fileName );
   if( found )
   {
     if( ! verbose )
       fprintf( stderr,"%c\b", wheel[(w++) % strlen(wheel)] ); fflush( stderr );

     strcpy( classFile, dataDir ); 
     strcat( classFile, fileName );

     /* check the first file in the directory if all data is in ascii
  		1.) Everything ascii data
  		2.) Feature data in binary format
     */
     if( i == 0 )
       ascii_file = is_ascii_file( classFile );
     if( ascii_file == EMPTY )
       { printf("Cannot open %s! Exitus...\n", classFile ); exit(1); }
     if( ascii_file )
       cf = fopen( classFile, f_open_text_r ); 
     else
       cf = fopen( classFile, f_open_bin_r );
     if( cf == NULL )
       { printf("Cannot open %s! Exitus...\n", classFile ); exit(1); }
     if( verbose )
       printf("Scanning file: %s\n", fileName );
     /* read the name of the class */
     dataline( cf, linebuf ); sscanf( linebuf, "%s", U->C[i].name );
     /* groupname if exists */
     dataline( cf, linebuf );
     /* if the group name exits ignore it */
     cptr = 0;
     while( linebuf[cptr] == ' ' )
       cptr++;
     if( linebuf[cptr] >= '0' && linebuf[cptr] <= '9' )
       sscanf( linebuf, "%d", &dim );
     else
     {
       /* Feature vector dimension */
       dataline( cf, linebuf ); sscanf( linebuf, "%d", &dim );
     } 
     /* check consistency */
     if( i == 0 )  /* first file ? */
     {
       U->nrFeat = dim;
       FREE( U->min ); FREE( U->max );
       U->min = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
       U->max = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
       for( k = 0; k < U->nrFeat; k++ )
       {
         U->min[k] = INFINITY; U->max[k] = -INFINITY;
       }
     }
     else
       if( dim != U->nrFeat )
       {
         printf("Dim. of feature vector inconsistent: %d != %d! Exitus..\n",
		dim, U->nrFeat ); exit(1);
       }
     /* feature vector dimension */
     dataline( cf, linebuf ); sscanf( linebuf, "%d", &(U->C[i].numSampl) );
     if( U->C[i].numSampl < MINSAMPLE )
       printf("Warning file %s has only %d samples!\n",
                fileName, U->C[i].numSampl );
     sumSampl += U->C[i].numSampl;
     /* now allocate space for one class */
     dim = sizeof(FeatVector*) * U->nrFeat * U->C[i].numSampl;
     FREE( U->C[i].S );
     FREE( U->C[i].mean ); FREE( U->C[i].stddev ); FREE( U->C[i].sqrsum );
     if( (U->C[i].S = (FeatVector) malloc(dim*sizeof(FeatVector*))) == NULL )
       { printf("No space for buffer 'U->C[i].S'! Exitus...\n"); exit(1); }
     U->C[i].mean = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
     U->C[i].stddev = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
     U->C[i].sqrsum = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
     /* init mean and standard deviation and minimum and maximum values */
     for( k = 0; k < U->nrFeat; k++ )
     {
       U->C[i].mean[k] = 0.0;
       U->C[i].stddev[k] = 0.0;
       U->C[i].sqrsum[k] = 0.0;
     }
     /* read the samples and update mean and standard deviation */
     for( row = 0; row < U->C[i].numSampl; row++ )
     {
       for( k = 0; k < U->nrFeat; k++ )
       {
         if( ascii_file )
           fscanf( cf, "%f", &value );
         else
           fread( &value, sizeof(float), 1, cf );
         /* printf("i=%d  row=%d  k=%d ascii=%d   value=%f\n",
		i, row, k, ascii_file, value ); DBG; /**/
         U->C[i].S[row*U->nrFeat+k] = value;
         /* new mean value */
         U->C[i].mean[k] = 1.0/((float)(row+1))*(row*U->C[i].mean[k]+value);

         /* new auxiliary variable "sqrsum" */
         U->C[i].sqrsum[k] += value * value;

         /* new standart deviation */
         U->C[i].stddev[k] = (float)sqrt(fabs((1.0/(float)(row+1) *
                 U->C[i].sqrsum[k] - (U->C[i].mean[k] * U->C[i].mean[k]))));

         /* update minimum and maximum */
         if( value < U->min[k] )
           U->min[k] = value;
         if( value > U->max[k] )
           U->max[k] = value;
       }
     /* showFV( U->nrFeat, &(U->C[i].S[row*U->nrFeat]) ); /**/
     } 
     /* compensate for the bias of the standard deviation */
     if( U->C[i].numSampl > 1 )
     {
       bias = (float)(U->C[i].numSampl)/(float)(U->C[i].numSampl-1);
       for( k = 0; k < U->nrFeat; k++ )
         U->C[i].stddev[k] *= bias;
     }
     /* printf("mean:         "); showFV( U->nrFeat, U->C[i].mean ); /**/
     /* printf("standard dev: "); showFV( U->nrFeat, U->C[i].stddev ); /**/
     fclose( cf );
     i++; /* next class */
   }
 }
 while( found );
 if( ! verbose )
   printf("\n");
 
 U->sumSampl = sumSampl; 
 for( i = 0; i < U->nrClass; i++ )
 {
   U->C[i].a_priori_prob = (float)U->C[i].numSampl / (float)U->sumSampl;
   if( verbose )
     printf(" A priori probability of class %d=%5.2f%%\n",
		i+1, 100.0*U->C[i].a_priori_prob );
 }
 close_directory();
}


void loadUnivFile()
{
 FILE *data = NULL;
 char **classNames = NULL, **buf1 = NULL;
 int *samplesPerClass = NULL, *buf2 = NULL;
 str100 nameBuf;
 int nrClass = 0, featDim, k, i, j, w = 0, c, dim, row, offset = 1;
 float dummy, value, bias;
 FeatVector featBuf = NULL;

 /* look first type of file */
 ascii_file = is_ascii_file( dataFile );
 if( ascii_file == EMPTY )
   { printf("Cannot open %s!...", dataFile ); gets( linebuf ); return; }
 if( ascii_file )
   data = fopen( dataFile, f_open_text_r ); 
 else
   data = fopen( dataFile, f_open_bin_r );
 if( data == NULL )
   { printf("Cannot open %s!...\n", dataFile ); gets( linebuf ); return; }

 free_universe();

 /* copy only the name of the data file to the universe name */
 i = strlen( dataFile ) - 1;
 while( dataFile[i] != slash && i > 0 )
   i--;
 if( i == 0 )
   offset = 0;
 strncpy( U->name, &(dataFile[i+offset]), strlen(&(dataFile[i+offset])) );
 U->name[strlen(&(dataFile[i+offset]))] = '\0';

 fprintf( stderr, "Scanning file: %s\n", dataFile );
 /* feature vector dimension */
 dataline( data, linebuf ); sscanf( linebuf, "%d", &featDim );
 /* count the classes */
 while( !feof( data ) )
 {
   k = 0;
   while( k < featDim && !feof( data ) )
   {
     if( ascii_file )
       fscanf( data, "%f", &dummy );
     else
       fread( &dummy, sizeof(float), 1, data );
     k++;
   }
   if( !feof( data ) )
   {
     if( ascii_file )
       fscanf( data, "%s", nameBuf );
     else
     {
       j = 0;
       do
       {
         fread( &(nameBuf[j]), sizeof(char), 1, data );
         if( nameBuf[j] != '\n' )
           j++;
       }
       while( nameBuf[j] != '\n' && j < MAXSTRLEN );
       nameBuf[j] = '\0';
     }
     if( nameBuf[0] == '\0' )	/* empty name, error */
        { fprintf( stderr, "Found an empty class name. Exit...\n"); exit(1); }
     /* check if the name already exist */
     c = 0;
     while( c < nrClass && !(strcmp(nameBuf,classNames[c]) == 0) )
       c++;
     if( c == nrClass )
     {
       /* new class */
       buf1 = (char**) malloc( (1+nrClass) * sizeof(char*) );
       buf2 = (int*) malloc( (1+nrClass) * sizeof(int) );
       /* copy old values */
       for( i = 0; i < nrClass; i++ )
       {
         buf1[i] = (char*) malloc( (1+strlen(classNames[i])) * sizeof(char) );
         strcpy( buf1[i], classNames[i] );
         FREE( classNames[i] );
         buf2[i] = samplesPerClass[i];
       }
       /* insert new value */
       buf1[nrClass] = (char*) malloc( (1+strlen(nameBuf)) * sizeof(char) );
       strcpy( buf1[nrClass], nameBuf );
       buf2[nrClass] = 1;
       FREE( classNames ); FREE( samplesPerClass );
       classNames = buf1; samplesPerClass = buf2; buf1 = NULL; buf2 = NULL;
       nrClass++;
     }
     else
       (samplesPerClass[c])++;
   }
 }
 fclose( data );
 for( i = 0; i < nrClass; i++ )
   U->sumSampl += samplesPerClass[i];
 U->nrFeat = featDim;
 FREE( U->min ); FREE( U->max );
 U->min = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
 U->max = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
 for( k = 0; k < U->nrFeat; k++ )
 {
   U->min[k] = INFINITY; U->max[k] = -INFINITY;
 }
 U->nrClass = nrClass;
 if( U->nrClass <= 0 || U->nrClass > MAXCLASS )
   { printf(" Number of classes invalid! Exitus...\n" ); exit(1); }
 /* allocate space for the universe */
 FREE( U->C );
 U->C = (Class*)malloc( U->nrClass* sizeof( struct Class_ ) );
 for( i = 0; i < U->nrClass; i++ )
 {
   init_Class( &(U->C[i]) );
   strcpy( U->C[i].name, classNames[i] );
   FREE( classNames[i] );

   /* now allocate space for one class */
   dim = sizeof(FeatVector*) * U->nrFeat * samplesPerClass[i];
   FREE( U->C[i].S );
   FREE( U->C[i].mean ); FREE( U->C[i].stddev ); FREE( U->C[i].sqrsum );
   if( (U->C[i].S = (FeatVector) malloc(dim*sizeof(FeatVector*))) == NULL )
     { printf("No space for buffer 'U->C[i].S'! Exitus...\n"); exit(1); }
   U->C[i].mean = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
   U->C[i].stddev = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
   U->C[i].sqrsum = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
   /* init mean and standard deviation */
   for( k = 0; k < U->nrFeat; k++ )
   {
     U->C[i].mean[k] = 0.0;
     U->C[i].stddev[k] = 0.0;
     U->C[i].sqrsum[k] = 0.0;
   }
 }
 FREE( classNames );

 /* open file again to store the feature values */
 data = fopen( dataFile, f_open_text_r ); 
 if( data == NULL )
   { printf("Cannot open %s! Exitus...\n", dataFile ); exit(1); }

 featBuf = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);
 /* feature vector dimension */
 dataline( data, linebuf ); sscanf( linebuf, "%f", &dummy );

 fprintf( stderr, "Loading ... "); fflush( stderr );
 for( i = 0; i < U->sumSampl; i++ )
 {
   fprintf( stderr, "%c\b", wheel[(w++) % strlen(wheel)] ); fflush( stderr );

   if( ascii_file )
   {
     for( k = 0; k < U->nrFeat; k++ )
       fscanf( data, "%f", &(featBuf[k]) );
     fscanf( data, "%s", nameBuf );
   }
   else
   {
     fread( featBuf, sizeof(float), U->nrFeat, data );
     j = 0;
     do
     {
       fread( &(nameBuf[j]), sizeof(char), 1, data );
       if( nameBuf[j] != '\n' )
         j++;
     }
     while( nameBuf[j] != '\n' && j < MAXSTRLEN );
     nameBuf[j] = '\0';
   }
   c = 0;
   while( c < U->nrClass && !(strcmp(nameBuf,U->C[c].name) == 0) )
     c++;
   if( c == U->nrClass )
     { printf("loadUnivFile> Cannot find class %s\n Exitus...\n",nameBuf );
       exit(1); }

   /* copy the sample buffer to the class */
   row = U->C[c].numSampl;
   for( k = 0; k < U->nrFeat; k++ )
   {
     U->C[c].S[row*U->nrFeat+k] = featBuf[k];
     value = featBuf[k];
     /* new mean value */
     U->C[c].mean[k] = 1.0/((float)(row+1))*(row*U->C[c].mean[k]+value);

     /* new auxiliary variable "sqrsum" */
     U->C[c].sqrsum[k] += value * value;

     /* new standart deviation */
     U->C[c].stddev[k] = (float)sqrt(fabs((1.0/(float)(row+1) *
             U->C[c].sqrsum[k] - (U->C[c].mean[k] * U->C[c].mean[k]))));

     /* update minimum and maximum */
     if( value < U->min[k] )
       U->min[k] = value;
     if( value > U->max[k] )
       U->max[k] = value;
   }
   (U->C[c].numSampl)++;
 }
 fprintf( stderr, "\n");

 for( i = 0; i < U->nrClass; i++ )
 {
   U->C[i].a_priori_prob = (float)U->C[i].numSampl / (float)U->sumSampl;
   /* compensate for the bias of the standard deviation */
   if( U->C[i].numSampl > 1 )
   {
     bias = (float)(U->C[i].numSampl)/(float)(U->C[i].numSampl-1);
     for( k = 0; k < U->nrFeat; k++ )
       U->C[i].stddev[k] *= bias;
   }
   if( verbose )
     printf(" A priori probability of class %d=%5.2f%%\n",
		i+1, 100.0*U->C[i].a_priori_prob );
   if( U->C[i].numSampl != samplesPerClass[i] )
     { printf("loadUnivFile> Inconsistent data\n Exitus...\n");
       printf("U->C[%d].numSampl=%d != samplesPerClass[%d]=%d\n",
	i, U->C[i].numSampl, i, samplesPerClass[i] ); exit(1); }
 }
/*
 for( c = 0; c < U->nrClass; c++ )
   for( i = 0; i < U->C[c].numSampl; i++ )
   {
     for( k = 0; k < U->nrFeat; k++ )
       printf("%f ", U->C[c].S[i*U->nrFeat+k] );
     printf(" %s\n", U->C[c].name );
   }
 printf(" MIN MAX\n");
 for( k = 0; k < U->nrFeat; k++ )
   printf("%f ", U->min[k] );
 printf("\n");
 for( k = 0; k < U->nrFeat; k++ )
   printf("%f ", U->max[k] );
 printf("\n");
*/
 FREE( samplesPerClass );
 FREE( featBuf );
 fclose( data );
}


void loadUniv()
{
 bool ok = FALSE;

 while( ! ok )
 {
   printf("Load data from (f)ile or from (d)irectory? ");
   gets( linebuf );
   switch( linebuf[0] )
   {
     case '\0' : return;
     case 'f': case 'F': printf("Load data from file? ");
       gets( dataFile ); loadUnivFile(); ok = TRUE; break;
     case 'd': case 'D': printf("Load data from directory? ");
       gets( dataDir ); loadUnivDir(); ok = TRUE; break;
     default: printf("Unknown option - nothing done..."); gets(linebuf); break;
   }
 }
}
