/************************************************************************
 *                                                                      *
 *  Program package "tooldiag":                                         *
 *                                                                      *
 *                                                                      *
 *  Version 1.3                                                         *
 *  Date: 15 November 1993                                              *
 *                                                                      *
 *  NOTE: This program package is copyrighted in the sense that it      *
 *  may be used for scientific purposes. The package as a whole, or     *
 *  parts thereof, cannot be included or used in any commercial         *
 *  application without written permission granted by the author.       *
 *  No programs contained in this package may be copied for commercial  *
 *  distribution.                                                       *
 *                                                                      *
 *  All comments  concerning this program package may be sent to the    *
 *  e-mail address 'tr@fct.unl.pt'.                                     *
 *                                                                      *
 ************************************************************************/

#include <stdio.h>
#include "def.h"
#include "matrix.h"

extern universe *U;
extern MatElem trace();
extern bool feat_description;
extern char **feature_desc;

static str80 buf;

static int maxFeat = MAXFEAT;

/* #define F_RATIO_THRESH (4.0) */
#define F_RATIO_THRESH (0.0)
static float F_Ratio_Thresh = F_RATIO_THRESH;


/*
 *   tests if feature with index j is already in the selected pool
*/
static bool already_selected( j, nrSlctFeat, FSV )
int j, nrSlctFeat;
FeatSelectVector FSV;
{
 int i;  bool sel = FALSE;

 for( i = 0; i < nrSlctFeat; i++ )
   sel = ( sel || FSV[i].rank == j );
 return( sel );
}
 

/*
 *   calculate the F-Ratio for a given T and W matrix
 *   with number of predictor features = 1
*/
static float calc_F_Ratio( nrSlctFeat, T, W )
int nrSlctFeat;
Matrix *T, *W;
{
 int i, f1, f2; 
 MatElem dummy, t, w;
 float F = 0.0;
 Matrix M11, M12, M21, M22, M_1_2,  Aux1, Aux2;

 init_Matrix( &M11 ); init_Matrix( &M12 ); init_Matrix( &M21 ); 
 init_Matrix( &M22 ); init_Matrix( &M_1_2 );
 init_Matrix( &Aux1 ); init_Matrix( &Aux2 );

 /* calculate t_2_1 */
 Split_Matrix( T, &M11, &M12, &M21, &M22, nrSlctFeat, 1 ); /**/

 Invert_Matrix( &M11, &dummy );
 Mult_Matrix( &M21, &M11, &Aux1 );
 Mult_Matrix( &Aux1, &M12, &Aux2 );
 Subtract_Matrix( &M22, &Aux2, &M_1_2 );
 t = M_1_2.Elem[0];
   
 /* calculate w_2_1 */
 Split_Matrix( W, &M11, &M12, &M21, &M22, nrSlctFeat, 1 );
 Invert_Matrix( &M11, &dummy );
 Mult_Matrix( &M21, &M11, &Aux1 );
 Mult_Matrix( &Aux1, &M12, &Aux2 );
 Subtract_Matrix( &M22, &Aux2, &M_1_2 );
 w = M_1_2.Elem[0];

 f1 = U->nrClass - 1; f2 = 0;
 for( i = 0; i < U->nrClass; i++ )
   f2 += U->C[i].numSampl; 
 f2 = f2 - U->nrClass - nrSlctFeat;

 F = (float)(((t-w)/(MatElem)f1) / (w/(MatElem)f2));
 /* printf("t: %.20lf  w: %.20lf  f1: %d  f2: %d  F: %f\n", t,w,f1,f2, F); /**/
 /* ------------------------------------------------------------- */

 Matrix_Free( &M11 ); Matrix_Free( &M12 ); Matrix_Free( &M21 ); 
 Matrix_Free( &M22 ); Matrix_Free( &M_1_2 );
 Matrix_Free( &Aux1 ); Matrix_Free( &Aux2 );

 return( F );
}


/*
 *   try to select d features from a pool of D = U->nrFeat
 *   with the Multiple Covariance Analysis
 *   return the number of really selected features
*/
static int covar( d )
int d;
{
 bool selected = FALSE, firstSubstituted = FALSE;
 int nrSlctFeat, available, j, maxj;
 FeatSelectVector FSV = NULL;
 int predictor;
 Matrix T, A, W;
 float F, maxF, *F_Ratios = NULL;

 if( d < maxFeat ) maxFeat = d;

 F_Ratios = (float*) malloc( U->nrFeat * sizeof(float) );
 if( F_Ratios == NULL ) { printf("No space for buffer 'F_Ratios'\n"); exit(1); }

 FSV = (feature*) malloc( U->nrFeat * sizeof(struct feature_) );
 if( FSV == NULL ) { printf("\nNo space for buffer 'FSV'\n"); exit(1); }

 init_Matrix( &T ); init_Matrix( &A ); init_Matrix( &W );
 
 /* choose the first selected feature */
 
 FSV[0].rank = 0;
 nrSlctFeat = 1;

 available = U->nrFeat - nrSlctFeat;
 selected = ( (nrSlctFeat == maxFeat) || (available == 0) );
 while( ! selected )
 {
   /*									*/
   /* for all available features calculate the F-Ratio			*/
   /* then choose the maximum ; if maximum is over threshold put	*/
   /* feature in selected pool.						*/
   /*									*/
   /* stop if one of the following conditions is true :			*/
   /*   1.) the number of selected features passes over a predefined	*/
   /*       limit							*/
   /*   2.) no more features are available 				*/
   /*   3.) none of the predictors passes over the quality theshold	*/

   available = U->nrFeat - nrSlctFeat;

   for( j = 0; j < U->nrFeat; j++ )
   {
     F_Ratios[j] = -INFINITY;
     /* test if feature is already selected */
     if( !already_selected( j, nrSlctFeat, FSV ) )
     {
       predictor = j;
       /* printf("Next predictor = %d ...", j ); gets(buf); /**/
       printf("Selecting feature Nr. %d ...  %3d%%\r",
           nrSlctFeat+1, (int)( 100.0 * (j+1)/(float)U->nrFeat) ); /**/
       fflush( stdout );

       /* ----------  Calculate the F-Ratio for the predictor   --------*/

       /* put the predictor as the last selected element */
       FSV[nrSlctFeat].rank = predictor;

       mcovar_mats( FSV, nrSlctFeat+1, &T, &A, &W ); /**/
       /* scatter_mats_duda( FSV, nrSlctFeat+1, &T, &A, &W ); /**/
       /* scatter_mats_kittler( FSV, nrSlctFeat+1, &T, &A, &W ); /**/

       F = calc_F_Ratio( nrSlctFeat, &T, &W ); /**/

       /* An alternative selection criterion: Kittler pp. 236 */
       /* F = (float)(trace(&A) / trace(&W)); /**/

       /*  write the F_Ratio to the buffer  */
       /* printf(" F_Ratio of predictor %d = %f\n", j, F ); /**/
       F_Ratios[j] = F;
     }
   }

   /* get the maximum F-Ratio */
   maxF = -INFINITY;
   for( j = 0; j < U->nrFeat; j++ )
   {
     if( F_Ratios[j] > maxF )
     {
       maxF = F_Ratios[j]; maxj = j;
     }
   }
   printf("\nPredictor %d had maximum F-Ratio: %7.3f", maxj, maxF );
   printf(" -  F_Ratio_Thresh: %7.3f\n", F_Ratio_Thresh);
   if( maxF >= F_Ratio_Thresh ) {
     FSV[nrSlctFeat].rank = maxj;
     FSV[nrSlctFeat].crit = maxF;
     nrSlctFeat++;
   }
   /* special case: change the first arbitralily selected feature */
   if( nrSlctFeat == 2  && ! firstSubstituted )
   {
     printf("Substituting first feature by second !...\n");
     firstSubstituted = TRUE;
     FSV[0].rank = FSV[1].rank; FSV[0].crit = FSV[1].crit;
     nrSlctFeat--;
   }
   else
   { 
     available--;
     selected = 
       ((nrSlctFeat == maxFeat) || (available == 0) || (maxF < F_Ratio_Thresh));
   }

   printf("Available features: %d\n", available);
   printf("\n--- SELECTED FEATURES --- : %d\n", nrSlctFeat);
   for( j = 0; j < nrSlctFeat; j++ )
   {
     printf("     Nr. %3d = %3d\tF-Ratio: %f", j+1, FSV[j].rank, FSV[j].crit );
     if( feat_description )
       printf("\tName: %s", feature_desc[FSV[j].rank] ); 
     printf("\n");
   }
   printf("\n");

 }/* while not selected */

 /*  Now write the features to the universe */

 FREE( U->FSV );
 U->FSV = FSV;
 U->nrSelFeat = nrSlctFeat;
 
 FREE( F_Ratios );

 Matrix_Free( &T ); Matrix_Free( &A ); Matrix_Free( &W );
 maxFeat = MAXFEAT; 
 return( nrSlctFeat );
}


void featSelectMCOVAR()
{
 int nrSelectFeat; bool ok;

 printf(">>>--- Selecting features ---<<<\n");
 printf("Maximum number of features to be selected (1-%d)? ", U->nrFeat);
 do
 {
   get_d( &nrSelectFeat );
   ok = ( nrSelectFeat > 0 && nrSelectFeat <= U->nrFeat );
   if( ! ok )
     printf("Invalid value! Again ? ");
 }
 while( ! ok );
 nrSelectFeat = covar( nrSelectFeat );
 printf("Selected %d features.\n", nrSelectFeat );
 save_selected_feat();
}
