/* File:     QuadCount.h
 * Purpose:  To provide an API for computing pair counts,
 *           and for setting / getting related parameters.
 *           Forked from CrossCount.h.
 *
 *           The primary difference is that this is designed
 *           to use the quadratic (pairwise!) algorithm, 
 *           which may be the only feasible algorithm on
 *           small sets.
 *
 * RCS:
 ************************************************************
 * $Id: CrossCount.h,v 1.1 2002/01/03 10:58:31 lw2j Exp $
 * $Log:	CrossCount.h,v $
 *
 * Forked from CrossCount.h.
 * 
 * Revision 1.1  2002/01/03  10:58:31  lw2j
 * Initial revision
 * 
 * Revision 1.4  2001/09/03  19:36:11  lw2j
 * Now knows about different DBLayer implementations.
 *
 * Revision 1.3  2001/08/22  17:20:18  lw2j
 * Untabified.
 *
 * Revision 1.2  2001/08/22  16:50:51  lw2j
 * Added support for two_tables.
 *
 * Revision 1.1  2001/08/22  04:27:01  lw2j
 * Initial revision
 *
 *
 *
 ************************************************************
 */


#ifndef QUADCOUNT_H
#define QUADCOUNT_H

/* I/O is expected to follow the DataWrapper virtual class.
 * For a non-seekable source such as a stream, or for small
 * data, MemoryWrapper might be either necessary (for the
 * former) or faster (for the latter).  For data that is too
 * large to easily fit in memory, DiskWrapper may be
 * necessary.  Quadcount will restrict itself to the
 * DataWrapper generic API in any case.
 *
 * This class will neither allocate nor deallocate the
 * wrapper -- that's the user's job.
 *
 * It should theoretically do for both cross- and normal pairs-.
 * However, one should use caution when using two pointers to 
 * the same wrapper, as if this is a disk-based method it may
 * require substantial disk I/O.  It also ignores exponents;
 * essentially, it's fixed at 2.
 */

#include <stdlib.h>
#include <assert.h>
#include <math.h>
#include "DataWrapper.h"

class QuadCount {
 public:
  QuadCount();
  ~QuadCount();

	/* There are no database options here, because we don't use one.
	 * Instead, we'll update the counters as we do the quad loop.
	 *
	 * That leaves the actual data, which will be handled opaquely
	 * by the wrappers.  They may use Berkeley or ExtHash or flat-
	 * file access or Komodo dragons waving semaphores, for all we
	 * care.
	 *
	 * Two_table is irrelevant, likewise; there are no tables.
	 */

  /* accessors and mutators */
  DataWrapper *get_wrapper0(void) const { return dw0; };
  DataWrapper *get_wrapper1(void) const { return dw1; };

  double get_base(void) const { return base; };
  double get_radius_min(void) const { return radius_min; };
  double get_radius_max(void) const { return radius_max; };
	double get_min_frac(void) const { return min_frac; };
	double get_max_frac(void) const { return max_frac; };

  unsigned int get_radius_count(void) const { return radius_count; };


  void set_wrappers(DataWrapper *dw_new0, DataWrapper *dw_new1) {
    dw0 = dw_new0;
		dw1 = dw_new1;
    assert(dw0);  /* why use a NULL? */
		assert(dw1);
    data_dims0  = dw0->get_dims();
    data_count0 = dw0->get_count();
		data_dims1  = dw1->get_dims();
    data_count1 = dw1->get_count();

		/* Note:  While the data sets are allowed to differ in
		 * cardinality, it makes absolutely no sense for them to have
		 * different dimensionalities, since they won't even be in the
		 * same space.
		 */

		if (data_dims0 != data_dims1) {
			/* Breakpoint the next line if you choose. */
			assert(0);
		}
  };

  void set_base(double base_new) {base = base_new; log_div = log(base); };
  void set_radius_min(double radius_min_new) {radius_min = radius_min_new;};
  void set_radius_max(double radius_max_new) {radius_max = radius_max_new;};
  void set_radius_count(unsigned int radius_count_new)
    { radius_count = radius_count_new; };
	void set_min_frac(double min_frac_new) { min_frac = min_frac_new; };
	void set_max_frac(double max_frac_new) { max_frac = max_frac_new; };

  /* Given the above parameters, if nothing goes wrong --
   *
   * - Fill in point_count with the number of log-log points actually
   *   generated.  This may be substantially less than radius_count.
   * - Fill in log_radius_array with a new()'d pointer to enough
   *   heap space containing exactly that many radii, each having
   *   already been subjected to log_base.
   * - Fill in log_count_array, again with a new()'d pointer, to
   *   counts that have been subjected to log().
   *
   * And return 0.
   *
   * On failure, return -1, and try to set errno appropriately.
   */
  int compute(unsigned int& point_count,
              double *&log_radius_array,
              double *&log_count_array);

 protected:
  /* methods */
	void update_counters(double* log_radius_array, 
											 double* log_count_array, 
											 const double* data_vec0, 
											 const double* data_vec1);
  /* data */
  DataWrapper *dw0;
  DataWrapper *dw1;

  double  base;                  /* Base for logarithms.  Merely a
                                  * scaling factor, really.  [2]
                                  */
  double  radius_min;            /* Minimum radius.  [2^-20] */
  double  radius_max;            /* Maximum radius.  [2^18]  */
  unsigned int  radius_count;    /* Maximum number of radii.  [39] */

	double min_frac;               /* Minimum fraction of pairs to
																	* count (otherwise, trimmed.)  [0.1]
																	*/
	double max_frac;               /* Ditto, but maximum.  [0.9] */

  /* No direct accessors or mutators for these four, since they're
   * derived from the wrappers. */
  unsigned int data_dims0;       /* From the wrapper. */
  unsigned int data_count0;      /* Likewise.         */
  unsigned int data_dims1;       /* From the wrapper. */
  unsigned int data_count1;      /* Likewise.         */

  /* Again, derived. */
  double  log_div;                /* log(base) */
};

#endif /* QUADCOUNT_H */
