/************************************************************************
 *                                                                      *
 *  Program package "tooldiag":                                         *
 *                                                                      *
 *                                                                      *
 *  Version 1.3                                                         *
 *  Date: 15 November 1993                                              *
 *                                                                      *
 *  NOTE: This program package is copyrighted in the sense that it      *
 *  may be used for scientific purposes. The package as a whole, or     *
 *  parts thereof, cannot be included or used in any commercial         *
 *  application without written permission granted by the author.       *
 *  No programs contained in this package may be copied for commercial  *
 *  distribution.                                                       *
 *                                                                      *
 *  All comments  concerning this program package may be sent to the    *
 *  e-mail address 'tr@fct.unl.pt'.                                     *
 *                                                                      *
 ************************************************************************/

#include <stdio.h>
#include <math.h>
#include <string.h>
#include "def.h"

extern universe *U;
extern bool verbose;

static str80 buf, gnuFile;
static char cmd[200];

#ifdef DOS
#define EXEC ""
#else
#define EXEC "exec "
#endif

#define TOL (5.0)	/* Tolerance for the plotting */
static void gen_gnuplot( x, y, dim, first, second, nrClass, classes )
float *x, *y;
int dim;
int nrClass, *classes;
{
 int i;
 FILE *gf = NULL;
 float minX = INFINITY, minY = INFINITY, maxX = -INFINITY, maxY = -INFINITY;

 strcpy( gnuFile, DATA_DIR );
 strcat( gnuFile, "_correl_" );
 gf = fopen( gnuFile, f_open_text_w );
 if( gf == NULL )
   { printf("Cannot open %s! Exitus...\n", gnuFile ); exit(1); }
 if( verbose )
   printf("Dumping %s\n", gnuFile );
 for( i = 0; i < dim; i++ )
 {
   fprintf( gf, "%f  %f\n", x[i], y[i] );
   if( x[i] < minX )
     minX = x[i];
   if( x[i] > maxX )
     maxX = x[i];
   if( y[i] < minY )
     minY = y[i];
   if( y[i] > maxY )
     maxY = y[i];
 }
 fclose( gf );

 /* give 5% tolerance */
 minX -= (float)fabs((double)minX/TOL ); minY -= (float)fabs((double)minY/TOL );
 maxX += (float)fabs((double)maxX/TOL ); maxY += (float)fabs((double)maxY/TOL ); 
 /* generate the gnuplot batch file */
 strcpy( gnuFile, DATA_DIR );
 strcat( gnuFile, "_tmp.gnu" );
 gf = fopen( gnuFile, f_open_text_w );
 if( gf == NULL )
   { printf("Cannot open %s! Exitus...\n", gnuFile ); exit(1); }
 if( verbose )
   printf("Dumping %s\n", gnuFile );
 fprintf( gf, "#\n#  Batch file to visualize linear correlation\n" );
 fprintf( gf, "#  Generated automatically !\n#\n" );
 fprintf( gf, "# Universe %s\n", U->name );
 fprintf( gf, "set title \"LINEAR CORRELATION FOR CLASS");
 if( nrClass > 1 )
   fprintf( gf, "ES" );
 fprintf( gf, " " );
 for( i = 0; i < nrClass; i++ )
   fprintf( gf, "%d ", classes[i] );
 fprintf( gf, "\"\n");
 fprintf( gf, "set xlabel \"First feature = %d\"\n", first );
 fprintf( gf, "set ylabel \"Second feature = %d\"\n", second );
 fprintf( gf, "set xrange [%f:%f]\n", minX, maxX );
 fprintf( gf, "set yrange [%f:%f]\n", minY, maxY );
 fprintf( gf, "plot \"_correl_\"\n" );
 fprintf( gf, "pause -1 \"Hit return to exit...\"\n" );
 fclose( gf );
 sprintf( cmd, "cd %s\n\t%sgnuplot %s", DATA_DIR, EXEC, gnuFile );
 if( verbose )
   printf("\n --- Execute:\n\t%s\n", cmd );
#ifdef DOS
#else
 system( cmd );
#endif
 fclose( gf );
}


void corr( allClass, nrClass, classes, first, second )
bool allClass;
int nrClass, *classes, first, second;
{
 int i, j, row, class, nrPoints = 0;
 float *x = NULL, *y = NULL;
 float Ex = 0.0, Ey = 0.0, Exy = 0.0;	/* expected values */
 float Sx = 0.0, Sy = 0.0;		/* standard deviation */
 float Cov, CorCoeff;	/* Covariance and linear correlation coefficient */

 if( allClass )
   nrPoints = U->sumSampl;
 else
   for( i = 0; i < nrClass; i++ )
   {
     class = classes[i]-1;
     if( U->C[class].numSampl < 2 )
     {
       fprintf(stderr,"corr> Not enough samples: %d; Exit...\n",
		 U->C[class].numSampl ); exit(1);
     }
     nrPoints += U->C[class].numSampl;
   }

 x = (float*) malloc( nrPoints * sizeof(float) );
 y = (float*) malloc( nrPoints * sizeof(float) );

 row = 0;
 for( i = 0; i < nrClass; i++ )
 {
   class = classes[i]-1;
   for( j = 0; j < U->C[class].numSampl; j++ )
   {
     x[row] = U->C[class].S[j*U->nrFeat+(first-1)];
     y[row] = U->C[class].S[j*U->nrFeat+(second-1)];
     /* printf("x[%d]=%f  y[%d]=%f\n", row, x[row], row, y[row] ); /**/
     row++;
   }
 }
 /* all x and y values have been determined now:
    calculate linear correlation coefficient 
 */
 for( i = 0; i < row; i++ )
 {
   Ex += x[i]; Ey += y[i]; Exy += x[i] * y[i];
 }
 Ex /= (float)row; Ey /= (float)row; Exy /= (float)row;
 Cov = Exy - Ex*Ey;

 for( i = 0; i < row; i++ )
 {
   Sx += (x[i]-Ex)*(x[i]-Ex); Sy += (y[i]-Ey)*(y[i]-Ey);
 }
 Sx = (float)sqrt( (double)(Sx/(float)(row-1)) );
 Sy = (float)sqrt( (double)(Sy/(float)(row-1)) );

 if( Sx == 0.0 || Sy == 0.0 )
 {
   fprintf(stderr,"corr> Cannot calculate correlation coefficient");
   fprintf(stderr," because standard deviation is 0; Exit...\n" ); exit(1);
 }
 CorCoeff = Cov / (Sx*Sy);

 printf("\n>>>----------------------------------------------------<<<\n");
 printf("         RESULT OF LINEAR CORRELATION ANALYSIS            \n");
 printf("            Feature nr 1: %d\n", first );
 printf("               Mean=%7.3f   Standard deviation=%7.3f\n", Ex, Sx );
 printf("            Feature nr 2: %d\n", second );
 printf("               Mean=%7.3f   Standard deviation=%7.3f\n\n", Ey, Sy );
 printf("            Covariance=%7.3f\n", Cov );
 printf("    ===>    Correlation coefficient=%7.3f   <===\n", CorCoeff );
 printf(">>>----------------------------------------------------<<<\n\n");

 gen_gnuplot( x, y, row, first, second, nrClass, classes );
 FREE( x ); FREE( y );
}


void correlation()
{
 bool ok, allClass, already;
 int i, j, first, second, nrClass, *classes = NULL;

 printf(">>>--- Correlation analysis between two features ---<<<\n");
 printf("First feature? ");
 do
 {
   get_d( &first );
   ok = ( first > 0 && first <= U->nrFeat );
   if( ! ok )
     printf("Invalid value! Again ? ");
 }
 while( ! ok );
 printf("Second feature? ");
 do
 {
   get_d( &second );
   ok = ( second > 0 && second <= U->nrFeat && first != second );
   if( ! ok )
     printf("Invalid value! Again ? ");
 }
 while( ! ok );
 printf("Correlation of all classes (y/n)?y\b"); gets( buf );
 allClass = ((buf[0]=='y') || (buf[0]=='Y') || (buf[0]=='\0'));
 if( allClass )
 {
   nrClass = U->nrClass;
   classes = (int*) malloc( U->nrClass * sizeof(int) );
 }


 if( ! allClass )
 {
   printf("Number of classes to be analyzed? ");
   do
   {
     get_d( &nrClass );
     ok = ( nrClass > 0 && nrClass < U->nrClass );
     if( ! ok )
       printf("Invalid value! Again ? ");
   }
   while( ! ok );
   classes = (int*) malloc( nrClass * sizeof(int) );
   for( i = 0; i < nrClass; i++ )
   {
     printf("Class nr.%d ? ", i+1 );
     do
     {
       get_d( &(classes[i]) );
       ok = ( classes[i] > 0 && classes[i] <= U->nrClass );
       already = FALSE;
       if( ok )
         for( j = 0; j < i; j++ )
           already = already || (classes[j] == classes[i]);
       ok = ! already && ok;

       if( ! ok )
         printf("Invalid value! Again ? ");
     }
     while( ! ok );
   }
 }
 else
   for( i = 0; i < nrClass; i++ )
     classes[i] = i+1;
 corr( allClass, nrClass, classes, first, second );
 FREE( classes );
}
