/****************************************************************/
/* Copyright 1993 : Johns Hopkins University			*/
/*                  Department of Computer Science		*/
/****************************************************************/
/* Contact : murthy@cs.jhu.edu                                  */
/****************************************************************/
/* File Name : load_data.c 					*/
/* Author : Sreerama K. Murthy					*/
/* Last modified : October 1993					*/
/* Contains modules : 	load_points				*/
/*			allocate_point_array			*/
/*			shuffle_points				*/
/* Uses modules in :	oc1.h 					*/
/*			util.c					*/
/* Is used by modules in :	mktree.c			*/
/* Remarks       :	Routines in this file assume that the	*/
/*			dimensionality of the data, and the	*/
/*			number of classes are known. So, these	*/
/*			should be set in mktree.c. 		*/
/*			Throughout this program and other files	*/
/*			in OC1, "points" refer to data samples	*/
/*			or examples. This is due to the 	*/
/*			geometrical interpretation of the	*/
/*			parametric space used by OC1.		*/
/****************************************************************/		

#include "oc1.h"


extern int no_of_dimensions;
extern int no_of_categories;
extern int unlabeled;

/************************************************************************/
/* Module name : load_points						*/
/* Functionality : Reads data points from the input file into the	*/
/*		   array train_points.					*/
/*		   Dynamically allocates space for the points read.	*/
/*		   Counts the number of points read.			*/
/* Parameters :	infile :	File pointer to the input file.		*/
/*		points_ptr : 	pointer to the array into which the	*/
/*				data points are to be loaded.		*/
/* Returns :	Number of points read.					*/
/* Calls modules :	error (util.c)					*/
/*			allocate_point_array				*/	
/* Is called by modules :	read_data (mktree.c)			*/
/*				main (display.c)			*/
/* Remarks :	Assumes that the no_of_dimensions and no_of_categories 	*/
/* 		are set.		 				*/ 
/************************************************************************/
int load_points(infile,points_ptr)
FILE *infile;
POINT ***points_ptr;
{

 int points_allocated = 0,i,j,categories_unknown = FALSE;
 int point_count,*category_array;
 float temp;
 char c;
 POINT **allocate_point_array();
 POINT **array_name = NULL;
 
 if (!unlabeled && !no_of_categories) 
  {
   categories_unknown = TRUE; 
   no_of_categories = 1;
   category_array = (int *)malloc(no_of_categories * sizeof(int));
   category_array -= 1;
  }

 if (!no_of_dimensions) no_of_dimensions = MAX_DIMENSIONS;

 point_count = 0;
 while (TRUE)
  {
    point_count++;
    if (point_count > points_allocated)
     {
       if (points_allocated == 0) 
        {
         array_name = allocate_point_array(array_name,10,0);
         points_allocated = 10;
        }
       else 
        {
         array_name = allocate_point_array(array_name,points_allocated*2,
                              points_allocated);
         points_allocated *= 2;
        }

     }

   if (point_count == 1 && no_of_dimensions == MAX_DIMENSIONS)
     /*count the number of dimensions of the first line in the datafile,
       and set it as the no_of_dimensions. Use this value for reading in
       the subsequent lines. The last entry of any line is taken as the
       category value, if "unlabeled != TRUE". */
    {
      float temp = HUGE;
      int dim=0;

      while (TRUE)
       {
        c = (char)getc(infile);
        if (c == '\n')
         {
           if (temp != HUGE)
            { 
              if (unlabeled == TRUE)
                  array_name[1]->dimension[++dim] = temp;
              else
               {
                i = array_name[1]->category = (int)temp;
                if (temp - i != 0)
                  error("LOAD_POINTS: Float category values unacceptable.");
                if (categories_unknown)
                  category_array[1] = array_name[1]->category;
                else if (i < 1 || i > no_of_categories)
                 { 
                 printf ("Only category values between 1 and %d allowed.\n",
                         no_of_categories);
                 error("LOAD_POINTS: Data point with invalid category.");
                 }
               }
                
              no_of_dimensions = dim; 
              for (i=1;i<=points_allocated;i++)
               {
                 array_name[i]->dimension += 1;
                 array_name[i]->dimension = (float *)realloc
                                            (array_name[i]->dimension,
                                             no_of_dimensions * sizeof(float));
                 array_name[i]->dimension -= 1;
               }
              break;
            }
         }
        if (isspace(c)) continue;
        if (isalpha(c)) error("LOAD_POINTS: Invalid character in datafile.");
        if (temp != HUGE)
         {
          if (++dim > no_of_dimensions)
            error("LOAD_POINTS: Too many dimensions. Adjust the constant MAX_DIMENSIONS.");
          array_name[point_count]->dimension[dim] = temp;
         }

        ungetc(c,infile);
        fscanf(infile,"%f",&temp);
       }
    }
   else
    {
      for (j=1;j<=no_of_dimensions;j++)
       {
         i = fscanf(infile,"%f",&(array_name[point_count]->dimension[j]));
         if (i != 1)
          {
           if (j>1)
             error("LOAD_POINTS : Lines containing unequal number of attributes in datafile.");
           else
             break;
          }
       }
     if (i != 1) break;
     if (unlabeled == TRUE)
      {
        array_name[point_count]->val = (double)0.0;
        continue;
      }

     if (fscanf(infile,"%d",&i) != 1)
         error("LOAD_POINTS : Lines containing unequal number of attributes in datafile.");

     if (categories_unknown)
      {
        for (j=1;j<=no_of_categories;j++)
          if (i == category_array[j]) break;
        if (j > no_of_categories)
         {
          no_of_categories++;
          category_array += 1;
          category_array = (int *)realloc(category_array,
                                          no_of_categories * sizeof(int));
          category_array -= 1;
          category_array[no_of_categories] = i;
         }
      }
     else if (i<1 || i>no_of_categories)
      { 
        printf ("Only category values between 1 and %d allowed.\n",
                no_of_categories);
        error("LOAD_POINTS: Data point with invalid category.");
      }

     array_name[point_count]->category = i;

    }

   array_name[point_count]->val = (double)0.0;
  }

point_count--;
if (point_count != points_allocated)
  array_name = allocate_point_array(array_name,point_count,points_allocated);

if ( !unlabeled && categories_unknown)
 {
  /*There are no_of_categories classes.
    If all these numbers are between 1 and no_of_categories, then
    we don't need any remapping. */
  for (i=1;i<=no_of_categories;i++)
   if (category_array[i] < 1 || category_array[i] > no_of_categories)
     break;

  if (i <= no_of_categories)
   {
    printf("Remapping class numbers:\n");
    for (i=1;i<=no_of_categories;i++)
      if (i != category_array[i])
         printf("\t%d To %d\n",category_array[i],i);
    for (i=1;i<=point_count;i++)
      for (j=1;j<=no_of_categories;j++)
       if (category_array[j] == array_name[i]->category) 
         {
          array_name[i]->category = j;
          break;
         }
   }
 }

*points_ptr = array_name;

return(point_count);
}


/************************************************************************/
/* Module name : allocate_point_array					*/ 
/* Functionality :	Allocates or reallocates "array_name" to be an	*/
/*			array of pointers (to POINT structures), of	*/
/*			size "size". Fully allocates all the POINT	*/
/*			structures also.				*/
/* Parameters :	array_name : name of the array to be (re)allocated.	*/
/*		size	   : number of points to be allocated.		*/
/*		prev_size  : 0 if array_name doesn't exist already	*/
/*			     current size otherwise.			*/
/* Returns :	pointer to the allocated array.				*/
/* Calls modules :	error (util.c)					*/
/*			vector (util.c)					*/
/* Is called by modules : 	load_points				*/
/************************************************************************/
POINT **allocate_point_array(array_name,size,prev_size)
POINT **array_name;
int size,prev_size;
{
 int i;

 if (prev_size == 0)
  {
   if (array_name != NULL) 
    if (!free((char *)(array_name+1))) 
       fprintf (stderr,"Load_Data: Memory deallocation failure. Harmless.\n");

   array_name = (struct point **)malloc
           ((unsigned)size * sizeof(struct point *)); 
   if (!array_name)
     error("ALLOCATE_POINT_ARRAY: Memory Allocation Failure 1.");

   array_name -= 1; /* All indices start from 1*/

   for (i=1;i<=size;i++)
    {
     array_name[i] = (struct point *)malloc((unsigned) sizeof(struct point)); 
     if (!array_name[i])
       error("ALLOCATE_POINT_ARRAY : Memory Allocation failure 2.");
    }

   for (i=1;i<=size;i++)
     array_name[i]->dimension = vector(1,no_of_dimensions);
  }
 else
  {
   array_name += 1;
   array_name = (struct point **)realloc
            (array_name, (unsigned)size * sizeof(struct point *)); 
   if (!array_name)
     error("ALLOCATE_POINT_ARRAY: Memory Allocation Failure 3.");

   array_name -= 1; /* All indices start from 1*/

   if (prev_size >= size) return(array_name);

   for (i=prev_size+1;i<=size;i++)
    {
     array_name[i] = (struct point *)malloc((unsigned) sizeof(struct point)); 
     if (!array_name[i])
       error("ALLOCATE_POINT_ARRAY : Memory Allocation failure 4.");
    }

   for (i=prev_size+1;i<=size;i++)
     array_name[i]->dimension = vector(1,no_of_dimensions);
  }

 return(array_name);
}


/************************************************************************/
/* Module name :	shuffle_points					*/ 
/* Functionality :	Pseudo-randomly shuffles the points in the	*/
/*			array "array_name". 				*/
/*			for i = 1 to n, do				*/
/*			  swap point i with the point at a random 	*/
/*			  position between 1 and n.			*/ 
/* Parameters :	array_name : Point array which is to be shuffled.	*/
/*		count	: Number of entries in the array.		*/
/* Returns : Nothing.							*/
/* Calls modules :	myrandom (util.c)				*/
/* Is called by modules :	load_points				*/
/* Remarks :	Achieves shuffling just by swapping pointers, thus 	*/
/*		not spending time on allocation/deallocation.		*/
/*		Only training points are shuffled.			*/
/************************************************************************/
shuffle_points(array_name,count)
POINT **array_name;
int count;
{
 int i,newposition;
 POINT *temp_point;
 
 for (i=1;i<=count;i++)
  {
   newposition = (int)myrandom(1,count);
   /* shuffle position "i" with "newposition" */

   temp_point = array_name[i];
   array_name[i] = array_name[newposition];
   array_name[newposition] = temp_point;

  }
}

/************************************************************************/
/************************************************************************/
