/* File:     QuadCount.cc
 * Purpose:  To implement an API for computing pair counts,
 *           and for setting / getting related parameters.
 *           Uses the pairwise, quadratic, table-less method.
 *
 * RCS:
 ************************************************************
 * $Id: CrossCount.cc,v 1.1 2002/01/03 10:58:31 lw2j Exp $
 * $Log:  CrossCount.cc,v $
 *
 * Forked off of CrossCount.cc.
 *
 * Revision 1.1  2002/01/03  10:58:31  lw2j
 * Initial revision
 * 
 * Revision 1.6  2001/09/03  19:36:11  lw2j
 * Now allows the choice of DBLayer implementation.
 *
 * Revision 1.5  2001/08/23  16:19:14  lw2j
 * Added the zero_translate option.
 *
 * Revision 1.4  2001/08/22  23:38:45  lw2j
 * Fixed odd off-by-one bug using two_tables -- t'was caused by
 * the counter trying to fetch_plusplus newdb instead of prev_db,
 * (when generating a new prev_db because it had been passed in a
 * NULL).
 *
 * Revision 1.3  2001/08/22  17:20:18  lw2j
 * Untabified.
 *
 * Revision 1.2  2001/08/22  16:50:51  lw2j
 * Added the two_tables method, which may give more speed
 * when increasing the radius at the cost of storage.
 *
 * Revision 1.1  2001/08/22  04:27:01  lw2j
 * Initial revision
 *
 ************************************************************
 */

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <errno.h>
#include <iostream.h>
#include <string.h>
#include "QuadCount.h"


#ifndef MAX2
#define MAX2(__DEFMAC_A, __DEFMAC_B) (((__DEFMAC_A) >= (__DEFMAC_B)) ? (__DEFMAC_A) : (__DEFMAC_B))
#endif



/*******************************************************************/


QuadCount::QuadCount():dw0(NULL),
                     dw1(NULL),
                     base(2),
                     radius_min(0.00000095367431640625), /* 2^-20 */
                     radius_max(262144),                 /* 2^18  */
                     radius_count(39),
                     min_frac(0.0001),
                     max_frac(0.8)
{
  log_div = log(base);
}


QuadCount::~QuadCount() {
  /* No need for anything here, really. */
}




/* Do the math.  It iterates over both wrappers, updating the
 * counters.  Then, it checks to see what range of counters
 * passes the min_frac and max_frac tests.
 */
int QuadCount::compute(unsigned int& point_count,
                      double *&log_radius_array,
                      double *&log_count_array) {

  double       radius_mult = 0;
  double*      data_vec0 = NULL;
  double*      data_vec1 = NULL;

  unsigned int rad_idx = 0;
  unsigned int data_idx0 = 0;
  unsigned int data_idx1 = 0;
  unsigned int left      = 0;
  unsigned int right     = 0;
  unsigned int max_count = 0;

  double *temp_array        = NULL;
  double *temp_radius_array = NULL;
  double *temp_count_array  = NULL;

  /* Some basic sanity checking. */
  if ((!dw0) || (!dw1) || (base <= 0) ||
      (min_frac < 0) || (min_frac > 1)  ||
      (max_frac < 0) || (max_frac > 1)  ||
      (min_frac > max_frac)      ||
      (radius_min >= radius_max) ||
      (radius_count < 1)) {
    errno = EINVAL;
    return -1;
  }

  max_count   = data_count0 * data_count1;
  radius_mult = exp(log(radius_max / radius_min) /
                    (radius_count - 1));

#ifdef PARANOID
  if (radius_mult != floor(radius_mult)) {
    cerr << "Non-integer radius multiplier of " << radius_mult << " is " << endl;
    cerr << "forbidden when the cross-counting library is built with" << endl;
    cerr << "-DPARANOID." << endl;
    errno = EINVAL;
    return -1;
  }
#endif /* PARANOID */


  /* Initialize counters.  These are in normal scale, not log; we'll
   * log 'em later. 
   */
  point_count = radius_count;  /* We may trim these. */
  log_radius_array = new double[point_count];
  log_count_array = new double[point_count];
  log_radius_array[0] = radius_min;

  for (rad_idx=1; rad_idx < radius_count; rad_idx++) {
    log_radius_array[rad_idx] = radius_mult * log_radius_array[rad_idx-1]; 
    log_count_array[rad_idx] = 0;
  }

  /* Count. */

  if (dw0 == dw1) {
    /* Same wrapper, must use dynamic memory allocation */
    for (data_idx0=0; data_idx0 < data_count0; data_idx0++) {
      data_vec0 = dw0->get_vector_dynamic(data_idx0);
      for (data_idx1=0; data_idx1 < data_count1; data_idx1++) {
        data_vec1 = dw1->get_vector_dynamic(data_idx1);
        
        update_counters(log_radius_array, log_count_array, 
                        data_vec0, data_vec1);
        delete [] data_vec1;
      }
      delete [] data_vec0;
    }
  } else {
    for (data_idx0=0; data_idx0 < data_count0; data_idx0++) {
      data_vec0 = dw0->get_vector(data_idx0);
      for (data_idx1=0; data_idx1 < data_count1; data_idx1++) {
        data_vec1 = dw1->get_vector(data_idx1);
        
        update_counters(log_radius_array, log_count_array, 
                        data_vec0, data_vec1);
      }
    }
  }


  /* Identify the region to use. */
  for (left=0; left < radius_count; left++) {
    if ((log_count_array[left] >= (min_frac * max_count)) &&
        (log_count_array[left] > 0)) {
      break;
    }
  }

  /* Nobody had enough points?!  Should be impossible... */
  assert(left < radius_count);

  for (right=radius_count-1; right >= left; right--) {
    if (log_count_array[right] <= (max_frac * max_count)) {
      break;
    }

    if ((right == left) || (right == 0)) {
      point_count = 0;
      delete [] log_count_array;
      delete [] log_radius_array;
      log_count_array = NULL;
      log_radius_array = NULL;
    }
  }


  /* Memmove the region in each array. */
  assert(0 <= left);
  assert(left <= right);
  assert(right < radius_count);

  point_count = right - left + 1;

  temp_count_array  = new double[point_count];
  temp_radius_array = new double[point_count];

  memmove((char*) temp_count_array,
          (char*) (&(log_count_array[left])),
          (sizeof(double)) * (right-left+1));
  memmove((char*) temp_radius_array,
          (char*) (&(log_radius_array[left])),
          (sizeof(double)) * (right-left+1));

  temp_array      = log_count_array;
  log_count_array = temp_count_array;
  delete [] temp_array;

  temp_array       = log_radius_array;
  log_radius_array = temp_radius_array;
  delete [] temp_array;


  /* Logify. */
  for (rad_idx=0; rad_idx < point_count; rad_idx++) {
    log_count_array[rad_idx] = log(log_count_array[rad_idx]) / log_div;
    log_radius_array[rad_idx] = log(log_radius_array[rad_idx]) / log_div;
  }

  return 0;
}



/* Compute distance, and update the arrays.  Note that we 
 * don't need to pass in the number of radii.
 *
 * We use the Euclidean distance.
 */
void QuadCount::update_counters(double* log_radius_array, 
                                double* log_count_array, 
                                const double* data_vec0, 
                                const double* data_vec1) {
  double       ssq = 0;
  double       diff = 0;
  unsigned int dim_idx = 0;
  unsigned int rad_idx = 0;

  for (dim_idx=0; dim_idx < data_dims0; dim_idx++) {
    diff = data_vec0[dim_idx] - data_vec1[dim_idx];
    ssq += (diff * diff);
  }

  diff = sqrt(ssq);

  for (rad_idx=0; rad_idx < radius_count; rad_idx++) {
    if (diff <= log_radius_array[radius_count - (rad_idx+1)]) {
      log_count_array[radius_count - (rad_idx+1)]++;
    } else {
      break;
    }
  }
}

