/* File:     diskwrapper.cc
 * Purpose:  To implement the DataWrapper interface with an
 *           on-disk, single-line-buffer, fseek method.
 *           Will not work with STDIN for the obvious 
 *           reasons.  Slow, but uses minimal memory.
 *
 * RCS:
 ************************************************************
 * $Id: DiskWrapper.cc,v 1.3 2001/08/23 16:19:14 lw2j Exp $
 * $Log:	DiskWrapper.cc,v $
// Revision 1.3  2001/08/23  16:19:14  lw2j
// Added the zero_translate option.
// 
// Revision 1.2  2001/08/22  17:20:18  lw2j
// Untabified.
// 
// Revision 1.1  2001/08/21  03:40:34  lw2j
// Initial revision
// 
 *
 ************************************************************
 */


#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <errno.h>
#include <string.h>
#include "DiskWrapper.h"


/* Try to verify that FILE fh_in exists, is readable,
 * and is seekable.  If it seems suitable, we scan it
 * for one single pass for determining count and 
 * dimensionality.
 */
int DiskWrapper::load_file(FILE *fh_in) {
  char           line_buf[DataWrapper::MAX_LINE_SIZE];
  double*        dbl_array = NULL;
  unsigned int   number    = 0;
  unsigned int   i         = 0;

  /* Reset values.  These will indicate bogosity if
   * load_file() fails at any stage.
   */

  if (buf) {
    delete [] buf;
    buf = NULL;
  }

  if (minima) {
    delete [] minima;
    minima = NULL;
  }

  dimensionality = 0;
  cardinality    = 0;
  fh             = NULL;

  if (!fh_in) {
    errno = EINVAL;
    return 0;
  }

  if (fseek(fh_in, 0, SEEK_SET) < 0) {
    /* Error condition... probably not
     * seekable.  Retain errno.*/
    return 0;
  }

  /* Find count and dimensionality. */

  while (fgets(line_buf, DataWrapper::MAX_LINE_SIZE, fh_in)) {
    DataWrapper::process_string(line_buf);

    /* Only care if it's not truncated to nothingness. */
    if (line_buf[0] != '\0') {
      number = DataWrapper::find_doubles(dbl_array, line_buf, 0);
                                         

      if (number > 0) {
        /* Doubles found.  Count the vector. */
        cardinality++;

        if (!dimensionality) {
          /* And this is how many dimensions we'll expect from
           * all subsequent vectors.  We do NOT specify this
           * above, 'tho, because we want to be paranoid and
           * count all the doubles in every line just this 
           * once.
           */
          dimensionality = number; 

          /* Allocate minima vector, and copy into it. */
          minima = new double[dimensionality];
          assert(minima);
          memcpy((void*) minima, (void*) dbl_array, sizeof(double) *
                 dimensionality);
        } else {
          /* Do they match? */
          if (dimensionality != number) {
            if (minima) {
              delete [] minima;
              minima = NULL;
            }
            errno = EINVAL;
            return 0;
          }

          /* Update minima, if need be */
          for (i=0; i < dimensionality; i++) {
            minima[i] = (minima[i] <= dbl_array[i]) ?
              minima[i] : dbl_array[i];
          }
        }
      }

      if (dbl_array) {
        /* Deallocate and NULLify. */
        delete [] dbl_array;
        dbl_array = NULL;
      }
    }
  }

  
  /* Back to start. */
  if (fseek(fh_in, 0, SEEK_SET) < 0) {
    /* Error condition... probably not
     * seekable.  Retain errno.*/
    return 0;
  }

  current_obj_no = 0;

  fh = fh_in;

  /* For the non-dynamic version. */
  buf = new double[dimensionality];
  assert(buf);

  return 1;
}




/* Get a vector, by index.  If we're already past it, we need
 * to fseek. 
 */

double* DiskWrapper::get_vector_common(unsigned int index,
                                       double* dst) {
  char line_buf[DataWrapper::MAX_LINE_SIZE];
  unsigned int found = 0;
  unsigned int i     = 0;

  if (!fh) {
    /* File never specified. */
    errno = ENOENT;   
    return NULL;
  }

  if (dimensionality == 0) {
    /* Something bogus -- either a very silly user, or a bug.
     */
    errno = ENOENT;
    return NULL;
  }

  if (index >= cardinality) {
    /* Out of range. */
    errno = EINVAL;
    return NULL;
  }

  if (index < current_obj_no) {
    if (fseek(fh, 0, SEEK_SET) < 0) {
      /* This really shouldn't happen, since it worked a minimum
       * of twice before. 
       */
      assert(0);
      
      /* Return retained in case assert redefined to a no-op for
         a release build. */
      return NULL;
    }
    current_obj_no = 0;
  }

  
  /* Skip the appropriate number of objects. */
  for (; current_obj_no < index; current_obj_no++) {
    while (fgets(line_buf, DataWrapper::MAX_LINE_SIZE, fh)) {
      DataWrapper::process_string(line_buf);

      /* Only care if it's not truncated to nothingness. 
       * If it isn't, then it would have been parsed by load_file(),
       * and therefore we know that it is a valid vector with the
       * appropriate number of doubles -- otherwise, load_file()
       * would have failed.
       *
       * If it IS all blank, then we skip to the next one -- this
       * line doesnt' have an object, and thus isn't good for
       * incrementing current_obj_no.  Back to the while loop we
       * go.
       */
      if (line_buf[0] != '\0') {
        break;
      }
    }
  }

  /* In theory, we've now skipped enough distinct objects.  We
   * now need to find the next non-truncatable-to-nothingness 
   * line and extract the doubles in our buffer 'buf', which 
   * should definitely be non-null. 
   */

  while (fgets(line_buf, DataWrapper::MAX_LINE_SIZE, fh)) {
    DataWrapper::process_string(line_buf);
    if (line_buf[0] != '\0') {
      break;
    }
  }

  /* We've found the line... so we're actually advanced
   * PAST it.
   */
  current_obj_no = index+1;

  /* Parse it.  If dst=NULL, it gets allocated.  If not, it's used. 
   */

  found = DataWrapper::find_doubles(dst, line_buf, dimensionality);
  
  /* This should ALWAYS be the case -- if it's not, either our 
   * parser is broken or a fundamental assumption is. 
   */
  assert(found == dimensionality);

  if (zero_translation) {
    /* Subtract minima. */

    for (i=0; i < dimensionality; i++) {
      dst[i] -= minima[i];
    }
  }

  return dst;
}

