/* File:     driver.cc
 * Purpose:  To supply a simple driver for using the
 *           box-counting library.  Parameters are expected
 *           to be supplied via command-line arguments only.
 *
 *           If no filename is specified, input will be read
 *           from STDIN (which, incidentally, forces use of
 *           the MemoryWrapper).
 *
 *           Unlike my Perl pair_counting library,
 *           normalization is not supported.  That could be
 *           implemented by creating a new DataWrapper
 *           derived class, which would compute stats during
 *           load-file and normalize when returning vectors,
 *           or one could normalize the input before ever
 *           sending it to the wrapper.
 *
 *           The old option names are handled via a
 *           compatibility Perl script wrapper.
 *
 * RCS:
 ************************************************************
 * $Id: driver.cc,v 1.7 2002/01/03 10:58:31 lw2j Exp $
 * $Log:	driver.cc,v $
// Revision 1.7  2002/01/03  10:58:31  lw2j
// Now allows CrossCount.
// 
// Revision 1.6  2001/09/03  19:36:11  lw2j
// Modified to allow choosing between ExtHash and BerkeleyLayer.
//
// Revision 1.5  2001/08/23  16:55:09  lw2j
// Added SPEEDER #define.
//
// Revision 1.4  2001/08/23  16:19:14  lw2j
// Added the zero_translate option.
//
// Revision 1.3  2001/08/22  18:41:29  lw2j
// Added --speed macro.
//
// Revision 1.2  2001/08/22  17:20:18  lw2j
// Untabified.
//
// Revision 1.1  2001/08/22  16:50:51  lw2j
// Initial revision
//
 ************************************************************
 */

#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <unistd.h>
#include <getopt.h>
#include <iostream.h>
#include <string.h>
#include <errno.h>

#include "DiskWrapper.h"
#include "MemoryWrapper.h"
#include "BoxCount.h"
#include "CrossCount.h"
#include "QuadCount.h"



/**********************************************************/
static void parse_arguments(int argc, char **argv,
                            BoxCount *&box,
                            DataWrapper *&dw0,
                            FILE *&fh0,
                            CrossCount *&cross,
                            DataWrapper *&dw1,
                            FILE *&fh1,
                            bool &mode_cross,
                            QuadCount *&quad,
                            bool &mode_quad);

static void print_usage_and_exit(char *argv0);
/**********************************************************/

static struct option long_options[] = {
  {"exponent",       1, 0, 0},  /* exponent to use */
  {"base",           1, 0, 0},  /* logarithm base */
  {"radius_min",     1, 0, 0},  /* minimum radius to use */
  {"radius_max",     1, 0, 0},  /* maximum radius to use */
  {"radius_count",   1, 0, 0},  /* use at most this many */
  {"singleton_max",  1, 0, 0},  /* max frac to be singletons */
  {"occupancy_max",  1, 0, 0},  /* max frac to be in one cell */
  {"data_memory",    2, 0, 0},  /* use MemoryWrapper? */
  {"counter_memory", 2, 0, 0},  /* in-memory cell counts? */
  {"two_table",      2, 0, 0},  /* use two_table algorith? */
  {"zero_translate", 2, 0, 0},  /* Translate the data so minima=origin */
  {"speed",          0, 0, 0},  /* both memory options, and two_table */
  {"db_type",        1, 0, 0},  /* Database type for occupancy counts */
  {"pairs",          0, 0, 0},  /* Do pair-counting. */
  {"cross",          0, 0, 0},  /* Do cross-product counting. */
  {"quad",           0, 0, 0},  /* Use the quadratic method *eek* */
  {"min_frac",       1, 0, 0},  /* For quadratic only. */
  {"max_frac",       1, 0, 0},  /* For quadratic only. */
  {0, 0, 0, 0}                  /* required terminator */
};


enum {
  OPT_EXPONENT = 0,
  OPT_BASE,
  OPT_RADIUS_MIN,
  OPT_RADIUS_MAX,
  OPT_RADIUS_COUNT,
  OPT_SINGLETON_MAX,
  OPT_OCCUPANCY_MAX,
  OPT_DATA_MEMORY,
  OPT_COUNTER_MEMORY,
  OPT_TWO_TABLE,
  OPT_ZERO_TRANSLATE,
  OPT_SPEED,
  OPT_DB_TYPE,
  OPT_PAIRS,
  OPT_CROSS,
  OPT_QUAD,
  OPT_FRAC_MIN,
  OPT_FRAC_MAX
};

/**********************************************************/


/* N.B. -- Options not specified use the defaults specified
 * by the BoxCounting library.  Normally, these are fairly
 * sane.
 *
 * The BoxCounting library itself handles most parameter
 * checking, such as guaranteeing that the minimum radius
 * is strictly less than the maximum radius.
 */

static void print_usage_and_exit(char *argv0) {
  char *pc = argv0;
  char *t  = argv0;

  /* find basename... assume '/' as IFS */
  while ((t = strchr(pc, '/'))) {
    pc = t+1;
  }

  cerr << pc << " -- Usage:" << endl;
  cerr << pc << "\t [--exponent   N]" << endl;
  cerr << "\t [--base             N]" << endl;
  cerr << "\t [--radius_min       N]" << endl;
  cerr << "\t [--radius_max       N]" << endl;
  cerr << "\t [--radius_count     N]" << endl;
  cerr << "\t [--singleton_max    N]" << endl;
  cerr << "\t [--occupancy_max    N]" << endl;
  cerr << "\t [--data_memory   [=N]]" << endl;
  cerr << "\t [--counter_memory[=N]]" << endl;
  cerr << "\t [--two_table     [=N]]" << endl;
  cerr << "\t [--zero_translate[=N]]" << endl;
  cerr << "\t [--speed             ]" << endl;
  cerr << "\t [--db_type          N]" << endl;
  cerr << "\t [--pairs             ]" << endl;
  cerr << "\t [--cross             ]" << endl;
  cerr << "\t [--quad              ]" << endl;
  cerr << "\t [--min_frac         N]" << endl;
  cerr << "\t [--max_frac         N]" << endl;
  cerr << "\t [FILENAME [FILENAME ]]" << endl;

  cerr << endl;
  cerr << "The data filename must be the final argument, if present;" << endl;
  cerr << "otherwise, it defaults to standard input (which also forces" << endl;
  cerr << "data_memory to be true)." << endl;
  cerr << endl;
  cerr << "The exponent defaults to 2 (correlation fractal dimension)." << endl;
  cerr << "The base is the base for logarithms; defaults to 2, and is" << endl;
  cerr << "purely cosmetic since it applies to both counts and radii." << endl;
  cerr << endl;
  cerr << "The radius minimum, maximum, and number determine the extent" << endl;
  cerr << "and multiplier of the geometric series [def. 39 radii between" << endl;
  cerr << "2^{-18} and 2^{20}, inclusive]." << endl;
  cerr << endl;
  cerr << "The singleton and occupancy fractions ([0,1]; both default to" << endl;
  cerr << "0.95) dictate how many objects may be the sole occupants of" << endl;
  cerr << "cells before the radius stops shrinking, and how many may be" << endl;
  cerr << "in the same cell before the radius stops increasing." << endl;
  cerr << endl;
  cerr << "The data_memory and counter_memory flags (both default to 0)" << endl;
  cerr << "specify whether the data file and the cell counts are stored" << endl;
  cerr << "memory (1=true)" << endl;
  cerr << endl;
  cerr << "The two-table boolean flag (again, default 0) specifies whether" << endl;
  cerr << "the two-table algorithm tweak is used.  This may or may not be" << endl;
  cerr << "faster." << endl;
  cerr << endl;
  cerr << "The zero_translation flag (default:  1/TRUE) controls whether" << endl;
  cerr << "the data is translated by subtracting the minimum along each axis." << endl;
  cerr << endl;
  cerr << "The 'cross' and 'pairs' flags are mutually exclusive, as the" << endl;
  cerr << "dictate the mode of operation.  'cross' means cross-product"  << endl;
  cerr << "counting, which requires two separate inputs and ignores the" << endl;
  cerr << "exponent.  'pairs' means pair-counting via the standard box-" << endl;
  cerr << "counting-method.";
 
  if (strstr(pc, "cross")) {
    cerr << "  The default is --cross.";
  } else {
    cerr << "  The default is --pairs.";
  }
  
  cerr << endl << endl;

  cerr << "The --quad option enables the use of a quadratic-order pairwise" << endl;
  cerr << "algorithm.  It is slow, but on small data sets it may be the" << endl;
  cerr << "only algorithm capable of giving good results.  Choosing this" << endl;
  cerr << "nullifies many of the other options as irrelevant, such as the" << endl;
  cerr << "database type, the exponent, the two_table flag, or the occupancy" << endl;
  cerr << "parameters.  It enables two otherwise irrelevant parameters," << endl;
  cerr << "namely min_frac and max_frac, which stipulate the minimum [0.0001] and" << endl;
  cerr << "maximum [0.8] fractions of pairs for a radius to be used." << endl;
  cerr << endl << endl;

  cerr << "The option 'speed' merely is a fast way to turn on data_memory," << endl;
  cerr << "counter_memory, and two_table." << endl;
#ifdef SPEEDER
  cerr << endl;
  cerr << "This binary has been compiled with -DSPEEDER, which means that" << endl;
  cerr << "--speed defaults to ON." << endl;
#endif
  cerr <<"The 'db_type' option specifies which database implementation" << endl;
  cerr <<"is used to store occupancy counts.  The following are " << endl;
  cerr <<"supported by this binary: " << endl;

#if (USE_BERKELEY == 1)
  cerr <<"   " << BoxCount::DB_Berkeley << " -> Berkeley Database" << endl;
#endif

#if (USE_EXTHASH == 1)
  cerr <<"   " << BoxCount::DB_ExtHash  << " -> Extensible Hash (MEMORY ONLY)" << endl;
#endif

  cerr << "The default is the first on the list." << endl;
  exit(1);
}




/* Note that two valid objects -- box (for box-counting) and cross
 * (for cross-products) must be supplied.  We'll apply settings to
 * both as apropos, and set mode_cross accordingly.
 *
 * It is the caller's responsibility to dispose of the unneeded
 * counting object.
 */

static void parse_arguments(int argc, char **argv,
                            BoxCount *&box,
                            DataWrapper *&dw0,
                            FILE *&fh0,
                            CrossCount *&cross,
                            DataWrapper *&dw1,
                            FILE *&fh1,
                            bool &mode_cross,
                            QuadCount *&quad,
                            bool &mode_quad) {
  char        *argv0          = argv[0];  /* in case it's permuted? */
  double       optarg_dbl     = 0;
  bool         data_memory    = false;
  bool         optarg_bool    = false;
  bool         zero_translate = true;  /* NOTICE */
  int          c              = 0;
  unsigned int optarg_ui      = 0;

  char *pc = argv0;
  char *t  = argv0;

  /* find basename... assume '/' as IFS */
  while ((t = strchr(pc, '/'))) {
    pc = t+1;
  }


  if (strstr(pc, "cross")) {
    mode_cross = true;
  } else {
    /* Pair-counting. */
    mode_cross = false;
  }

  dw0   = NULL;
  dw1   = NULL;
  fh0   = NULL;
  fh1   = NULL;


#ifdef SPEEDER
  /* default to --speed:  sacrifice memory for speed, enable tweaks */
  data_memory = true;
  box->set_two_table(true);
  box->set_in_memory(true);
  cross->set_two_table(true);
  cross->set_in_memory(true);
#endif


  while (1) {
    int option_index       = 0;

    /* For convenience down below.  Must reflect order of
     * items in long_options[].
     */

    c = getopt_long(argc, argv, "h", long_options, &option_index);
    if (c == EOF) {
      break;
    }

    switch(c) {
    case 'h':
      print_usage_and_exit(argv0);
      break;
    case 0:  /* long option found */
      /* Read the argument. */

      switch(option_index) {
      case OPT_EXPONENT:  /* these take doubles */
      case OPT_BASE:
      case OPT_RADIUS_MIN:
      case OPT_RADIUS_MAX:
      case OPT_SINGLETON_MAX:
      case OPT_OCCUPANCY_MAX:
      case OPT_FRAC_MIN:
      case OPT_FRAC_MAX:
        if (sscanf(optarg, "%lf", &optarg_dbl) != 1) {
          print_usage_and_exit(argv0);
        }

        switch(option_index) {
        case OPT_EXPONENT:
          box->set_exponent(optarg_dbl);
          /* Irrelevant to cross-product and quad. */
          break;
        case OPT_BASE:
          box->set_base(optarg_dbl);
          cross->set_base(optarg_dbl);
          quad->set_base(optarg_dbl);
          break;
        case OPT_RADIUS_MIN:
          box->set_radius_min(optarg_dbl);
          cross->set_radius_min(optarg_dbl);
          quad->set_radius_min(optarg_dbl);
          break;
        case OPT_RADIUS_MAX:
          box->set_radius_max(optarg_dbl);
          cross->set_radius_max(optarg_dbl);
          quad->set_radius_max(optarg_dbl);
          break;
        case OPT_SINGLETON_MAX:
          box->set_singleton_max(optarg_dbl);
          cross->set_singleton_max(optarg_dbl);
          break;
        case OPT_OCCUPANCY_MAX:
          box->set_occupancy_max(optarg_dbl);
          cross->set_occupancy_max(optarg_dbl);
          break;
        case OPT_FRAC_MIN:
          quad->set_min_frac(optarg_dbl);
          break;
        case OPT_FRAC_MAX:
          quad->set_max_frac(optarg_dbl);
          break;
        default:
          /* should be unreachable! */
          assert(0);
        }
        break;  /* the options which take doubles */

      case OPT_RADIUS_COUNT:  /* these take an unsigned integer */
      case OPT_DB_TYPE:
        if (sscanf(optarg, "%u", &optarg_ui) != 1) {
          print_usage_and_exit(argv0);
        }

        switch(option_index) {
        case OPT_RADIUS_COUNT:
          box->set_radius_count(optarg_ui);
          cross->set_radius_count(optarg_ui);
          break;
        case OPT_DB_TYPE:
          box->set_db_type(optarg_ui);
          cross->set_db_type(optarg_ui);
          break;
        default:
          /* unreachable */
          assert(0);
        }
        break;
      case OPT_DATA_MEMORY:  /* these take an optional boolean [0,1] */
      case OPT_COUNTER_MEMORY:
      case OPT_TWO_TABLE:
      case OPT_ZERO_TRANSLATE:
      case OPT_PAIRS:
      case OPT_CROSS:
      case OPT_QUAD:
        if (!optarg) {
          /* they default to true if option specified, but
           * boolean not
           */
          optarg_bool = true;
        } else {
          if ((sscanf(optarg, "%u", &optarg_ui) != 1) ||
              (optarg_ui > 1)) {
            print_usage_and_exit(argv0);
          }
          optarg_bool = optarg_ui ? true : false;
        }

        switch(option_index) {
        case OPT_DATA_MEMORY:
          data_memory = optarg_bool;
          break;
        case OPT_COUNTER_MEMORY:
          box->set_in_memory(optarg_bool);
          cross->set_in_memory(optarg_bool);
          break;
        case OPT_TWO_TABLE:
          box->set_two_table(optarg_bool);
          cross->set_two_table(optarg_bool);
          break;
        case OPT_ZERO_TRANSLATE:
          zero_translate = optarg_bool;
          break;
        case OPT_PAIRS:
          mode_cross = false;
          break;
        case OPT_CROSS:
          mode_cross = true;
          break;
        case OPT_QUAD:
          mode_quad = true;
          break;
        default:
          /* unreachable */
          assert(0);
        }
        break;
      case OPT_SPEED:
        /* abbreviation */
        box->set_in_memory(true);
        box->set_two_table(true);
        cross->set_in_memory(true);
        cross->set_two_table(true);
        data_memory = true;
        break;
      default:
        /* unreachable */
        assert(0);
      }  /* switch(option_index) -- which option? */
      break;

    case '?':
      print_usage_and_exit(argv0);
      break;
    default:
      assert(0);
    }  /* switch(c) -- option or no? */
  }




  /* Now for the filenames. */
  if (optind == argc) {
    /* No filenames supplied.  Verify that the mode is 'pairs'. */
    if (mode_cross) {
      print_usage_and_exit(argv0);
    }

    /* Pair mode.  The input comes from STDIN. */
    fh0       = stdin;
    data_memory = true;
  } else if (optind == (argc - 1)) {
    /* One filename provided. */
    if (mode_cross) {
      /* So we'll use this one, plus STDIN. */
      fh1         = stdin;
      data_memory = true;
    }

    fh0 = fopen(argv[optind], "r");
    if (!fh0) {
      perror("fopen() error in parse_arguments:  ");
      exit(1);
    }
  } else if (optind == (argc - 2)) {
    /* Two filenames.  Meaningless unless mode_cross. */
    if (mode_cross) {
      fh0 = fopen(argv[optind], "r");
      if (!fh0) {
        perror("fopen() error in parse_arguments:  ");
        exit(1);
      }
      fh1 = fopen(argv[optind+1], "r");
      if (!fh1) {
        perror("fopen() error in parse_arguments:  ");
        exit(1);
      }
    } else {
      print_usage_and_exit(argv0);
    }
  } else {
    /* More than two filenames. */
    print_usage_and_exit(argv0);
  }


  if (data_memory) {
    dw0 = new MemoryWrapper;

    if (mode_cross) {
      dw1 = new MemoryWrapper;
    }
  } else {
    dw0 = new DiskWrapper;

    if (mode_cross) {
      dw1 = new DiskWrapper;
    }
  }

  /* Do this *before* loading the file, so MemoryWrapper doesn't
   * have to make another pass.
   */
  dw0->set_zero_translation(zero_translate);
  dw0->load_file(fh0);

  if (dw1) {
    dw1->set_zero_translation(zero_translate);
    dw1->load_file(fh1);
  }
}




int main(int argc, char **argv) {
  BoxCount     *box        = new BoxCount;
  CrossCount   *cross      = new CrossCount;
  QuadCount    *quad       = new QuadCount;
  DataWrapper  *dw0        = NULL;
  FILE         *fh0        = NULL;
  DataWrapper  *dw1        = NULL;
  FILE         *fh1        = NULL;
  double       *log_radii  = NULL;
  double       *log_counts = NULL;
  unsigned int  points     = 0;
  unsigned int  idx        = 0;
  bool          mode_cross = false;
  bool          mode_quad  = false;

  assert(box);

  parse_arguments(argc, argv, box,
                  dw0, fh0, cross, dw1, fh1,
                  mode_cross, quad, mode_quad);

  if (mode_quad) {
    if (mode_cross) {
      quad->set_wrappers(dw0, dw1);
    } else {
      quad->set_wrappers(dw0, dw0);
    }
    if (quad->compute(points, log_radii, log_counts)) {
      cerr << "Computation did not proceed.  Check errno... " << errno;
    }
  } else {
    if (mode_cross) {
      cross->set_wrappers(dw0, dw1);
      
      if (cross->compute(points, log_radii, log_counts)) {
        cerr << "Computation did not proceed.  Check errno... " << errno;
      }
    } else {
      box->set_wrapper(dw0);
      if (box->compute(points, log_radii, log_counts)) {
        cerr << "Computation did not proceed.  Check errno... " << errno;
      }
    }
  }


  /* 20 digits should be usually be enough for our purposes... */
  cout.precision(20);

  for (idx=0; idx < points; idx++) {
    cout << log_radii[idx] << " " << log_counts[idx] << endl;
  }

  /* Deallocate resources */
  if (fh0 != stdin) {
    assert(!fclose(fh0));
  }

  if ((fh1 != NULL) && (fh1 != stdin)) {
    assert(!fclose(fh1));
  }

  delete [] log_radii;
  delete [] log_counts;
  delete box;
  delete cross;
  delete quad;
  delete dw0;
  delete dw1;

  exit(0);
}

