/* $Header: /soma/users/miyata/planet/src/RCS/cluster.c,v 5.6.0.5 91/02/13 15:41:10 miyata Exp $ */
static char rcsid[] = "$Header: /soma/users/miyata/planet/src/RCS/cluster.c,v 5.6.0.5 91/02/13 15:41:10 miyata Exp $";
#include <stdio.h>
#include <math.h>
#include "alloc.h"
#include "error.h"

typedef struct _tree {
  float	*pat;
  int	size;
  int	root;
  int	leaf;
  float	y;
  float distance;
  struct _tree *r_tree, *l_tree;
} BiTree;

#define XMAX 80
#define NONE (-2)
#define TRUE 1
#define FALSE -1
#define Lname 32
#define BUFSIZE 256

float distance();
BiTree *new_tree();

main ( argc, argv )
     int argc; char *argv[];
{
  FILE *fp, *fopen();
  float **pattern;
  int	lpat, npat;
  char  **name = NULL;

  if ( argc < 2 ) fp = stdin;

  else IfErr ( fp = fopen ( argv[1], "r" ) ) {
    fprintf ( stderr, "cannot open file %s\n", argv[1] );
    exit(1);
  }
  IfErr( read_pattern ( fp,  &pattern, &lpat, &npat ) ) {
    fprintf ( stderr, "%s: cannot read pattern\n", ERR_MSG);
    exit(1);
  }
  printf ( "read %d patterns:  size = %d\n", npat, lpat );
  if ( argc > 2 ) {
    name = new_array_of( npat, char* );
    IfErr( fp = fopen( argv[2], "r" ) ) {
      fprintf ( stderr, "cannot open file %s\n", argv[2] );
      exit(1);
    }
    IfErr( read_names( fp, name, npat ) ) {
      fprintf( stderr, "%s: cannot read names\n", ERR_MSG);
      exit(1);
    }
  }
  cluster( pattern, name, lpat, npat );
}

read_names ( fp, name, npat )
     FILE *fp; char **name; int npat;
{
  register int i;
  char buffer[BUFSIZE];
  for( i=0; i< npat ; i++ ) {
    IfEOF( fscanf ( fp, "%s", buffer ) ) Erreturn("not enough names");
    IfErr( name[i] = new_string( buffer, NULL )) Erreturn("not enough core");
  }
  return( OK );
}

names( tree, name, str )
     BiTree *tree; char **name, *str;
{
  if( tree->leaf != FALSE )  {
    if(name) sprintf( str, "%s %s", str, name[ tree->leaf ] );
    else sprintf( str, "%s %d", str, tree->leaf );
    return( 1 );
  }
  else return( names( tree->r_tree, name, str )
	      + names( tree->l_tree, name, str ));
}

cluster ( pattern, name, lpat, npat )
     float **pattern; char **name; int lpat, npat;
{
  register int i,j,k;
  float	   dist,min_dist;
  int	   pair1, pair2, n_names;
  BiTree   *item = new_array_of ( npat, BiTree );
  float    **dist_matrix = new_2d_array_of( npat, npat, float );
  char	   *names1 = new_array_of( npat*Lname, char );
  char	   *names2 = new_array_of( npat*Lname, char );
  for ( i=0; i< npat ; i++ ) {
    item[i].pat = new_array_of ( lpat, float );
    for ( j=0; j< lpat ; j++ ) item[i].pat[j] = pattern[i][j];
    item[i].root = TRUE;
    item[i].size = 1;
    item[i].leaf = i;
  }
  for( i=0; i < npat; i++ )
    for( j=0; j < i; j++ )
      dist_matrix[i][j] = distance( item[i].pat,item[j].pat, lpat );

  for ( ; ; ) {
    min_dist = 999999999.9;
    pair1 = NONE;
    for ( i=0; i < npat ; i++ ) 
      for ( j=0; j < i ; j++ ) {
	if ( item[i].root == FALSE || item[j].root == FALSE ) continue;
	if( (dist = dist_matrix[i][j]) < min_dist ) {
	  min_dist = dist ;
	  pair1 = i;
	  pair2 = j;
	}
      }
    if ( pair1 == NONE ) break;		/* analysis finished */
    min_dist = sqrt( min_dist );
    *names1 = *names2 = NULL;
    n_names = names( &item[pair1], name, names1 ) +
	      names( &item[pair2], name, names2 );
    if( n_names > 10 ) printf( "minimum distance = %f\n(%s )\n(%s )\n", 
			      min_dist, names1, names2 );
    else printf( "minimum distance = %f\t(%s )\t(%s )\n", 
		min_dist, names1, names2 );
    item[ pair1 ].l_tree = new_tree ( &item[ pair1 ], lpat ); /* copy */
    item[ pair1 ].r_tree = &item[ pair2 ];
    item[ pair1 ].leaf = FALSE;	/* ith item cannot be a leaf it has subtrees */
    for ( k=0; k< lpat ; k++ )  /* pat of non-leaf is weighted average of    *
				 * pat's of its right & left subtrees */
      item[pair1].pat[k] = 
	(item[pair1].pat[k]*item[pair1].size
	 + item[pair2].pat[k]*item[pair2].size )
	  / (item[pair1].size + item[pair2].size );
    item[pair1].size = item[pair1].size + item[pair2].size ;
    item[pair1].distance = min_dist ;

    item[pair2].root = FALSE;	/* jth item is no longer a root.its a subtree*/
    for( i = 0; i < pair1 ; i++ ) { 	/* compute dist b/w new and other pat*/
      if( item[i].root == FALSE ) continue;
      dist_matrix[pair1][i] = distance(item[pair1].pat, item[i].pat, lpat );
    }
    for( i = pair1+1 ; i < npat; i++ ) {
      if( item[i].root == FALSE ) continue;
      dist_matrix[i][pair1] = distance(item[pair1].pat, item[i].pat, lpat );
    }
  }
  for ( i=0; i<npat ; i++ ) 
    if ( item[i].root == TRUE ) break;  /* there should be only one root */
  printf ( "Resulting Tree = \n" );
  print_tree ( &item[i], name, npat );
}

float
distance ( pat1, pat2, lpat )
     float *pat1, *pat2; int lpat;
{
  register int i;
  float dist = 0.0;
  for ( i=0; i< lpat ; i++ ) dist += (pat1[i]-pat2[i])*(pat1[i]-pat2[i]);
  return ( dist );
}

BiTree *
new_tree ( item, lpat )
     BiTree *item;
{
  register int i;
  BiTree *tree = new ( BiTree );

  tree->r_tree = item->r_tree;
  tree->l_tree = item->l_tree;
  tree->leaf = item->leaf;
  tree->root = item->root;
  tree->distance = item->distance;
  tree->pat = new_array_of ( lpat, float );
  for (i=0; i< lpat ; i++ ) tree->pat[i] = item->pat[i];
  return ( tree );
}

max_lname( name, npat )
  char **name; int npat;
{
  register int i;
  int max = 0, lname;
  if( name == NULL ) return( 3 );
  for( i=0; i < npat; i++ )
    if( (lname = strlen( name[i] )) > max ) max = lname;
  return( max );
}

int Y;

print_tree ( tree, name, npat )
     BiTree *tree; char **name; int npat;
{
  register int i,j; char **space;
  float total_distance();
  int lname = max_lname( name, npat );  /* maximum name length */
  float xscale = (XMAX - tree_depth(tree)*2 -lname-3) / total_distance(tree) ;
  Y = 0;
  y_tree( tree );
  space = new_2d_array_of( tree->size, XMAX, char );
  for( i=0; i< tree->size; i++ )
    for( j=0; j < XMAX ; j++ ) space[i][j] = ' ';
  draw_tree_in_space( tree, name, space, 0, xscale, 0.0 );
  print_space( space, XMAX, tree->size );
  printf("scale: %f per character.\n", 1.0/xscale );
  free_2d_array( space, tree->size );
}

print_space( space, depth, size )
     char **space; int depth, size;
{
  register int i;
  for( i=0; i<size; i++ ) {
    space[i][depth-1] = NULL;
    puts( space[i] );
  }
}

draw_tree_in_space( tree, name, space, x, xscale, distance )
BiTree *tree; char **name; char **space; int x; float xscale, distance;
{
  int y_l,y_r, y = (int) tree->y; char char_x; int x_next;
  if( tree->leaf != FALSE ) {		/* terminal node */
    for( x_next = x + distance * xscale; x <= x_next; x++ )
      space[y][x] = '-';
    if( name == NULL ) sprintf( &space[y][x], "> %d", tree->leaf );
    else sprintf( &space[y][x], "> %s", name[tree->leaf] );
  }
  else {				/* non-terminal -> expand subtrees */
    y_l = (int) tree->l_tree->y; y_r = (int) tree->r_tree->y;
    if( (y_l - y_r) < 2 ) {
      if( x > 0 ) { space[y+1][x-1] = space[y][x-1]; space[y][x-1] = ' '; }
      char_x = '_';
    }
    else  char_x = '-';
    for( x_next = x+distance*xscale; x <= x_next; x++ ) space[y][x] = char_x;

    draw_tree_in_space(tree->l_tree, name, space, x+1, xscale, tree->distance);
    space[y=y_r][x] = '/';
    for( y++ ; y < y_l ; y++ ) space[y][x] = '|';
    space[y][x] = '\\';
    draw_tree_in_space(tree->r_tree, name, space, x+1, xscale, tree->distance);
  }
}

tree_depth( tree )
     BiTree *tree;
{
  int depth_r, depth_l;
  if( tree->leaf == FALSE ) 
    return( ((depth_r=tree_depth( tree->r_tree )) >
	     (depth_l=tree_depth( tree->l_tree )))? depth_r+1 : depth_l+1 );
  else return( 1 );
}

float
total_distance( tree )
     BiTree *tree;
{
  float dist_r, dist_l, total_distance();
  if( tree->leaf == FALSE ) 
    return( ((dist_r=total_distance( tree->r_tree )) >
	     (dist_l=total_distance( tree->l_tree )))? 
	    tree->distance+dist_r : tree->distance+dist_l );
  else return( tree->distance );
}

y_tree ( tree )
     BiTree *tree;
{
  float y_r, y_l;
  if( tree->leaf == FALSE ) {
    y_r = y_tree( tree->r_tree); y_l = y_tree( tree->l_tree);
    return( tree->y = (y_r + y_l)/2 );
  }
  else
  return( tree->y = Y++ );
}

#define Blksize 128

read_pattern( Pfile, patternP, lpatternP, npatternP )
     FILE	*Pfile; 			/* ptrs to pattern file */
     float	***patternP;
     int	*lpatternP, *npatternP;
{
  register int	i;
  int   status;
  int	Asize = Blksize ;		/* current array size */

        /***** these local variables are temporary storage and are ****
	 ***** copied to the real variables if there is no error ******/
  int	lpattern, npattern;		/* size and # of patterns */
  float	**pattern, x;

  lpattern = nstrings ( Pfile );
				/* allocate space for input/target arrays */
  if Err( pattern = new_2d_array_of ( Asize, lpattern, float ) ) 
    Erreturn("cannot allocate memory for patterns" );
 
	/**** this loop reads in one line from pattern file,**
	 **** stores each pattern into pattern buffer    *****/

  for( npattern=0;  ; npattern++ ) { 

    if( npattern >= Asize ) {     /* need to allocate more space for arrays */
      IfErr( pattern = change_2d_array_size(
	    pattern, Asize, lpattern, Asize+Blksize, lpattern, float ))
	Erreturn( "cannot allocate memory for pattern " ); 
      Asize += Blksize;	/* array size is now Blksize bigger */
    }
	
    for ( i=0; i< lpattern ; i++ ) {
      IfEOF ( status = fscanf (Pfile, "%f", &pattern[npattern][i] ) ) {
	if ( i==0 ) break;
	Erreturn1("cannot read pattern # %d", npattern ); 
      }
      IfErr ( status )
	Erreturn1("cannot read pattern # %d", npattern); 
    }
    IfEOF ( status ) break;
  }
	/* if there is any error, these pointers below aren't changed */

		/* free any space already allocated for patterns */
  if( *patternP != NULL ) free_2d_array ( *patternP, *npatternP );

  *npatternP = npattern;	/* # of patterns read in from file */
  *lpatternP = lpattern;	/* # of elements in pattern */
  *patternP = pattern;		/* input array */
  return( OK );		/* patterns were read in without error */
}

nstrings ( fp ) 	/* counts number of strings in the first line of file*/
  FILE *fp;
{
  register int i;
  char c;
  long	ftell(), offset = ftell(fp);
  for( i=0; ; i++ ) {
    while( (c = getc ( fp ))==' ' || c == '\t' ); 
    if( c=='\n' ) break;
    ungetc( c, fp );
    while( (c = getc ( fp ))!=' ' && c != '\t' && c != '\n' );
    ungetc( c, fp );
  }
  fseek( fp, offset, 0 );
  return( i );
}
