/************************************************************************
 *                                                                      *
 *  Program package "tooldiag":                                         *
 *                                                                      *
 *                                                                      *
 *  Version 1.3                                                         *
 *  Date: 15 November 1993                                              *
 *                                                                      *
 *  NOTE: This program package is copyrighted in the sense that it      *
 *  may be used for scientific purposes. The package as a whole, or     *
 *  parts thereof, cannot be included or used in any commercial         *
 *  application without written permission granted by the author.       *
 *  No programs contained in this package may be copied for commercial  *
 *  distribution.                                                       *
 *                                                                      *
 *  All comments  concerning this program package may be sent to the    *
 *  e-mail address 'tr@fct.unl.pt'.                                     *
 *                                                                      *
 ************************************************************************/

/*
 *    COPYRIGHT COMMENT
*/
/************************************************************************
 *                                                                      *
 *  Program package 'lvq_pak':                                          *
 *                                                                      *
 *  sammon.c                                                            *
 *  -generates a Sammon mapping from a given list                       *
 *                                                                      *
 *  Version 2.0                                                         *
 *  Date: 31 Jan 1992                                                   *
 *                                                                      *
 *  NOTE: This program package is copyrighted in the sense that it      *
 *  may be used for scientific purposes. The package as a whole, or     *
 *  parts thereof, cannot be included or used in any commercial         *
 *  application without written permission granted by its producents.   *
 *  No programs contained in this package may be copied for commercial  *
 *  distribution.                                                       *
 *                                                                      *
 *  All comments  concerning this program package may be sent to the    *
 *  e-mail address 'lvq@cochlea.hut.fi'.                                *
 *                                                                      *
 ************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "def.h"

#define MAGIC (0.2)
#define EPSILON (0.00000000001)

extern universe *U;
extern bool verbose;
extern float Euclidian_Distance();
extern bool feat_description;
extern char **feature_desc;

static str80 buf, linebuf, name, lvqFile, gnuFile, featNameFile;
static char cmd[200];
static bool done;

void gen_lvq_data_file()
{
 int i, j, k;
 FILE *lvq = NULL, *feats = NULL;

 if( U->nrSelFeat == 0 )
   { printf(" Select features first please!..." ); gets( buf ); return; }

 strcpy( lvqFile, DATA_DIR );
 printf("Saving the data in LVQ format in file:\n\t\t%s", lvqFile );
 gets( name );
 if( name[0] != '\0' )
 {
   strcat( lvqFile, name );
   lvq = fopen( lvqFile, f_open_text_w );
   if( lvq == NULL )
     { printf("Cannot open %s! Exitus...\n", lvqFile ); exit(1); }
   if( verbose )
     printf("Generating LVQ-File: %s\n", lvqFile );

   fprintf( lvq, "%d\n", U->nrSelFeat );
   for( i = 0; i < U->nrClass; i++ )
   {
     for( j = 0; j < U->C[i].numSampl; j++ )
     {
       for( k = 0; k < U->nrSelFeat; k++ )
         fprintf( lvq, "%f ", U->C[i].S[j*U->nrFeat+U->FSV[k].rank] );
       fprintf( lvq, "%s\n", U->C[i].name );
     }
   }
   fclose( lvq );
 }

 if( ! feat_description )
   return;

 strcpy( featNameFile, DATA_DIR );
 printf("Saving the feature names in file:\n\t\t%s", featNameFile );
 gets( name );
 if( name[0] != '\0' )
 {
   strcat( featNameFile, name );
   feats = fopen( featNameFile, f_open_text_w );
   if( feats == NULL )
     { printf("Cannot open %s! Exitus...\n", featNameFile ); exit(1); }
   if( verbose )
     printf("Generating feature description-File: %s\n", featNameFile );

   fprintf( feats, "%d\n", U->nrSelFeat );
   for( k = 0; k < U->nrSelFeat; k++ )
     fprintf( feats, "%s\n", feature_desc[U->FSV[k].rank] );
   fclose( feats );
 }
}


static void filter_selected_lvq()
{
 str80 raw, filtered;
 FILE *r, *f;
 str100 nameBuf;
 int featDim, k, i;
 FeatVector featBuf = NULL;

 if( U->nrSelFeat == 0 )
   { printf(" Select features first please!..." ); gets( buf ); return; }

 printf("Name of the file to be filtered? "); gets( raw );
 r = fopen( raw, f_open_text_r );
 if( r == NULL )
   { printf(" Cannot open %s...", raw ); gets( buf ); return; }

 strcpy( filtered, DATA_DIR );
 printf("Name of the output file with filtered features?\n\t\t%s", filtered );
 gets( buf );
 if( (strcmp(raw,buf) == 0) )
   { printf(" Sorry file names are the same!..." ); gets( buf ); return; }
 strcat( filtered, buf );
 f = fopen( filtered, f_open_text_w );
 if( f == NULL )
   { printf(" Cannot open %s...", raw ); gets( buf ); return; }

 if( verbose )
   printf("Filtering\n\t\t%s\nTO\n\t\t%s\n", raw, filtered );

 /* try to read the dimension */
 dataline( r, buf ); sscanf( buf, "%d", &featDim );
 if( featDim != U->nrFeat )
   { printf("Universe and file have different feature dimensions: %d != %d...",
	U->nrFeat, featDim  ); gets( buf ); fclose( r ); fclose( f ); return; }

 featBuf = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat);

 fprintf( f, "%d\n", U->nrSelFeat );
 while( !feof( r ) )
 {
   for( k = 0; k < U->nrFeat; k++ )
   {
     fscanf( r, "%f", &(featBuf[k]) );
     /* normalize, if training data was also normalized */
     if( U->normalized )
       featBuf[k] = (featBuf[k] - U->min[k]) / (U->max[k] - U->min[k]);
   }
   fscanf( r, "%s", nameBuf );
   if( !feof( r ) )
   {
     for( i = 0; i < U->nrSelFeat; i++ )
       fprintf( f, "%f ", featBuf[ U->FSV[i].rank ] );
     fprintf( f, "%s\n", nameBuf );
   }
 }
 FREE( featBuf );
 fclose( r ); fclose( f );
}

#ifdef DOS
#define EXEC ""
#else
#define EXEC "exec "
#endif

static void gen_plot( x, y )
float *x, *y;
{
 FILE *gf = NULL;
 int i, j, row;
 str20 cntStr;

 /* Copy the data to gnuplot data files: one for each class */
 row = 0;
 for( i = 0; i < U->nrClass; i++ )
 {
   strcpy( gnuFile, DATA_DIR );
#ifdef DOS
   sprintf( cntStr, "_class.%d", i );
   strcat( gnuFile, cntStr );
#else
   strcat( gnuFile, U->C[i].name );
#endif
   gf = fopen( gnuFile, f_open_text_w );
   if( gf == NULL )
     { printf("Cannot open %s! Exitus...\n", gnuFile ); exit(1); }

   if( verbose )
     printf("Dumping %s\n", gnuFile );
   fprintf( gf, "# %s\n", U->C[i].name );
   for( j = 0; j < U->C[i].numSampl; j++ )
   {
     fprintf( gf, "%f\t%f\n", x[row], y[row] );
     /* printf("x[%d] = %f   y[%d] = %f  class = %s",
           row, x[row], row, y[row], U->C[i].name ); DBG; /**/
     row++;
   }
   fclose( gf );
 }
 /* generate the gnuplot batch file */
 strcpy( gnuFile, DATA_DIR );
 strcat( gnuFile, "_tmp.gnu" );
 gf = fopen( gnuFile, f_open_text_w );
 if( gf == NULL )
   { printf("Cannot open %s! Exitus...\n", gnuFile ); exit(1); }

 if( verbose )
   printf("Dumping %s\n", gnuFile );
 fprintf( gf, "#\n#  Batch file to visualize sammon plot\n" );
 fprintf( gf, "#  Generated automatically !\n#\n" );
 fprintf( gf, "# Universe %s\n", U->name );
 fprintf( gf, "set title \"SAMMON PLOT\"\n");
 fprintf( gf, "plot " );
 for( i = 0; i < U->nrClass-1; i++ )
#ifdef DOS
   fprintf( gf, "\"_class.%d\", ", i );
 fprintf( gf, "\"_class.%d\"\n", U->nrClass-1 );
#else
   fprintf( gf, "\"%s\", ", U->C[i].name );
 fprintf( gf, "\"%s\"\n", U->C[U->nrClass-1].name );
#endif
 fprintf( gf, "pause -1 \"Hit return to exit...\"" );
 fclose( gf );
 sprintf( cmd, "cd %s\n\t%sgnuplot %s", DATA_DIR, EXEC, gnuFile );
 if( verbose )
   printf("\n --- Execute:\n\t%s\n", cmd );
#ifdef DOS
#else
 system( cmd ); 
#endif
}

#define RLEN_MAX 32000
#define RLEN_DEFAULT 100
void sammon()
{
 int rlen;
 int i, j, k, row;
 int noc = 0;
 float e1x, e1y, e2x, e2y;
 float dpj;
 float dq, dr, dt;
 float *x = NULL, *y = NULL;
 float *xu = NULL, *yu = NULL, *dd = NULL;
 float xd, yd;
 float xx, yy;
 float e, tot;
 int mutual;
 float d, ee;
 FeatVector Samples = NULL, smp = NULL, smp1 = NULL;

 if( U->nrClass == 0 )
   { printf(" Please load universe first !..." ); gets( buf ); return; }
 if( U->nrSelFeat == 0 )
   { printf(" Select features first please!..." ); gets( buf ); return; }
 if( U->nrSelFeat == 1 )
 {
   printf("It does not make sense to map unidimensional ");
   printf("data to n dimensions!..." ); gets( buf ); return;
 }

 /* Load samples with selected features only in buffer */
 Samples = (FeatVector)malloc(U->nrSelFeat*U->sumSampl*sizeof(FeatVector*));
 if( Samples == NULL )
   { printf("No space for buffer 'Samples'! Exitus...\n"); exit(1); }

 row = 0;
 for( i = 0; i < U->nrClass; i++ )
   for( j = 0; j < U->C[i].numSampl; j++ )
   {
     for( k = 0; k < U->nrSelFeat; k++ )
       Samples[ row*U->nrSelFeat + k ] = U->C[i].S[j*U->nrFeat+U->FSV[k].rank];
     row++;
   }

 /* How many entries? */
 noc = U->sumSampl;

 /* Allocate dynamical memory */
 x = (float *) malloc(sizeof(float) * noc);
 y = (float *) malloc(sizeof(float) * noc);

 if( U->nrSelFeat == 2 )
 {
   printf("Number of selected features is 2 - Mapping the data directly\n");
   printf(" to 2 dimensions, without using the Sammon mapping.\n");
   for( i = 0; i < row; i++ )
   {
     x[i] = Samples[ i*U->nrSelFeat + 0 ];
     y[i] = Samples[ i*U->nrSelFeat + 1 ];
   }
   gen_plot( x, y );

   FREE( Samples );
   FREE( x ); FREE( y );
   return;
 }
 rlen = RLEN_DEFAULT;
 printf("Sammon: Number of iterations (default=%d): ", rlen );
 gets(buf);
 if( buf[0] != '\0' )
   sscanf( buf, "%d", &rlen );
 if( rlen <= 0 || rlen >= RLEN_MAX )
 {
   printf("Value %d for iterations is invalid. Setting to default %d...",
	rlen, RLEN_DEFAULT );
   rlen = RLEN_DEFAULT;
   gets( buf );
 }

 xu = (float *) malloc(sizeof(float) * noc);
 yu = (float *) malloc(sizeof(float) * noc);
 dd = (float *) malloc(sizeof(float) * (noc * (noc - 1) / 2));

 /* Initialize the tables */
 for (i = 0; i < noc; i++)
 {
   x[i] = (float) i / (float) noc;
   y[i] = (float) (i + i % 2) / (float) noc;
 }

 /* Compute the mutual distances between entries */
 mutual = 0;
 for (j = 1; j < noc; j++)
 {
   smp = &(Samples[j*U->nrSelFeat]); 
   for (k = 0; k < j; k++)
   {
     smp1 = &(Samples[k*U->nrSelFeat]); 
     dd[mutual] = Euclidian_Distance( smp, smp1, U->nrSelFeat );
     if( dd[mutual] == 0.0 )
     {
       printf("Warning: distance between samples %d and %d is 0\n", j, k );
       printf("\tSetting distance to a minimum %e\n", EPSILON );
       dd[mutual] = EPSILON;
     }
     /* showFV( U->nrSelFeat, smp ); showFV( U->nrSelFeat, smp1 );
     printf("Mutual[%d][%d] = %7.5f", j, k, dd[mutual] ); NL; /**/
     mutual++;
   }
 }

 /* Iterate */
 for (i = 0; i < rlen; i++) {
   /* printf("\r iterations to go:         %6d  - sample=", rlen - i -1 ); /**/
   printf("\r iterations to go:         %6d", rlen-i-1 ); fflush(stdout); /**/
   for (j = 0; j < noc; j++) {
     /* printf("%5d\b\b\b\b\b", j ); fflush(stdout); /**/
     e1x = e1y = e2x = e2y = 0.0;
     for (k = 0; k < noc; k++) {
     if (j == k)
       continue;
     xd = x[j] - x[k];
     yd = y[j] - y[k];
     dpj = (float) sqrt((double) (xd * xd + yd * yd));

     /* Calculate derivatives */
     if (k > j)
       dt = dd[k * (k - 1) / 2 + j];
     else
       dt = dd[j * (j - 1) / 2 + k];
       if( dt != 0.0 )
       {
         dq = dt - dpj;
         dr = dt * dpj;
         e1x += xd * dq / dr;
         e1y += yd * dq / dr;
         e2x += (dq - xd * xd * (1.0 + dq / dpj) / dpj) / dr;
         e2y += (dq - yd * yd * (1.0 + dq / dpj) / dpj) / dr;
       }
     }
     /* Correction */
     xu[j] = x[j] + MAGIC * e1x / (float)fabs((double)e2x);
     yu[j] = y[j] + MAGIC * e1y / (float)fabs((double)e2y);
   }

   /* Move the center of mass to the center of picture */
   xx = yy = 0.0;
   for (j = 0; j < noc; j ++) { 
     xx += xu[j];
     yy += yu[j];
   }
   xx /= (float)noc;
   yy /= (float)noc;
   for (j = 0; j < noc; j ++) {
     x[j] = xu[j] - xx;
     y[j] = yu[j] - yy;
   }

   /* Error in distances */
   e = tot = 0.0;
   mutual = 0;
   for (j = 1; j < noc; j ++) {
     for (k = 0; k < j; k ++) {
       d = dd[mutual];
       tot += d;
       xd = x[j] - x[k];
       yd = y[j] - y[k];
       ee = d - (float) sqrt((double)( xd * xd + yd * yd ));
       e += (ee * ee / d);
       mutual++;
     }
   }
   e /= tot;
   /* fprintf(stdout, "Mapping error: %7.3f\n", e); /**/
 }
 FREE( xu ); FREE( yu ); FREE( dd );


 printf("\n");
 gen_plot( x, y );

 FREE( Samples );
 FREE( x ); FREE( y );
}


void lvqLoop()
{
 printf("\n>>>>>----- Learning Vector Quantizer (LVQ) -----<<<<<<\n");
 printf("(1) Generate data file\n");
 printf("(2) Filter only selected features from a file\n");
 printf("(Q)uit\n\n");
 printf("Choice: ");

 gets(buf); done = FALSE;
 switch( buf[0] )
  {
   case '?': help( LOOP_LVQ, buf ); break;
   case '1': gen_lvq_data_file(); break;
   case '2': filter_selected_lvq(); break;
   case 'q': case 'Q': done = TRUE; break;
   default: showUniv( stdout ); break;
  }
}
