: #Find perl...
eval 'exec perl5 -w -S $0 "$@"'
if 0;

#!/usr/bin/perl5 -w
#
############################################################
# $Id: Wrapper.pm,v 1.5 2002/01/09 15:19:04 lw2j Exp $
#
############################################################
# Revision purpose:  To provide a universal wrapper.
#
# By 'universal', I mean that it needs to support certain
# features that I tend to use.
#
# First, it has to accept a filehandle as the input source.
# This permits flat files, data from another process via
# open2(), STDIN input, and so forth.
#
# Second, it has to be able to buffer the entire data in
# memory, if desired.  This is mandatory for STDIN/open2()
# support as well as anything else that isn't rewindable,
# since we need to be able to support multiple passes as
# well as random access.  Perhaps we should explicitly
# support limits on memory consumption, and create a temp
# file?
#
# Third, it needs to be able to pretend that not all
# columns in the data exist.  This should be implemented by
# supporting a 'mask' containing exactly the desired
# columns, in order, 0-based.
#
# Fourth, it needs to be able to pretend that not all rows
# exist.  This should be implemented similarly to column
# masking.  This will be useful for segmentation testing.
#
# Subject to the above, it then needs to support a simple
# interface, consisting of --
#
#   How many records are there?
#   How many dimensions do they have?
#   Fetch me record number... (0-based)
#
############################################################
#
#  Data may be separated by either whitespace or commas.
#  Beginning/trailing whitespace will be ignored.
#  Semicolons and octothorpes may be used to denote comments.
#
#     get_count($)       # return how many items there are
#     get_dims($)        # return embedding dimensionality
#     get_obj($$)        # return the object by 0-based index, as an
#                        # array of scalars
#
# get_count(), naturally, requires reading in ALL the data.  So if
# you're going to use it on a pipe, well, set the buffer mode
# first.  get_dims() includes the same requirement, to a more
# limited degree -- it requires reading at least one vector from
# the $fh, and if it's not buffered when it should have been, that
# vector is lost for good.  Both obey masking -- if you only
# ask for rows 0, 17, and 36 out of 4000, get_count will return
# 3, for instance.
#
#
#  Masking support:
#     set_colmask($@)    # Give it a list of column indices.  If
#                        # none, that really means all.
#     clear_colmask($)   # Empty the mask, meaning use all columns.
#     get_colmask($)     # Returns as an array.
#
#     set_rowmask($@)    # \
#     clear_rowmask($)   #  >---  Isomorphic to colmasking.
#     get_rowmask($@)    # /
#
#     set_colmask_rle()  # RLE-style mask specification.
#     set_rowmask_rle()  # Ditto.
#
#     'RLE-style' means that the mask is specified as a list with
#     an even number of elements, consisting of segment offsets
#     and segment lengths (off0, len0, off1, len1, ...), where
#     the first segment starts at off0 and continues to (off0 +
#     len0 - 1), and so forth.  Note that get_colmask and get_rowmask
#     will return the mask in RLE-format if RLE was used.
#
#
#  Obsoleted:
#     set_mask           # \
#     clear_mask         #  >--- Old API, mapped to colmask
#     get_mask           # /
#
#  Miscellaneous:
#     set_buffer($$)     # 0 -> no buffering, 1 -> use buffering
#                        # Should be set during load if $fh is not
#                        # seekable.  2 -> assume IEEE double-
#                        # precision floating-point arithmetic and
#                        # pack in order to save memory.
#                        #
#                        # See the constants in the class constructor.
#
#     clear_buffer($)    # *shrug* For reducing memory consumption.
#     load_file($$)      # Load data from the provided filehandle.
#                        # Clears masks and buffers.
#     get_file()         # Return the filehandle currently used.
#
#     set_contents(@)    # Give it a list of list references, each
#                        # corresponding to a vector in unpacked
#                        # form.  Then, this will ignore the current
#                        # 'fh' setting; write these vectors to a
#                        # temporary file (in case the user later
#                        # turns off buffering...).  This function
#                        # will return the tempfile's file handle
#                        # and filename pair, so that the programmer
#                        # can close the handle and mktemp::untemp()
#                        # the filename before program exit.
#                        #
#                        # The user is also responsible for closing
#                        # the previously-used filehandle, if any.
#                        #
#                        # This may be of very limited use to most
#                        # users.
#
##########################################################################
# $Log:	Wrapper.pm,v $
# Revision 1.5  2002/01/09  15:19:04  lw2j
# Reset line number upon a reload.
# 
# Revision 1.4  2002/01/03  10:54:12  lw2j
# Numerous improvements such as RLE-style compression of buffers.
# It also allows directly specifying vectors via an array; the
# array will get written to a temporary file in case the user
# prefers an on-disk buffer policy (albeit in this case, it'd be
# a bit odd, since the array already exists... *shrug*).
# 
# Revision 1.3  2001/12/17  12:03:25  lw2j
# Fixed NOBUF handling.
#
# Revision 1.2  2001/12/17  11:44:20  lw2j
# Included old masking API (set_mask -> set_colmask, et al)
#
# Revision 1.1  2001/12/15  15:30:10  lw2j
# Initial revision
#
##########################################################################
#
# Package Header
#
##########################################################################

package Wrapper;
require Exporter;
require mktemp;


use strict;
use POSIX;

##########################################################################

BEGIN {
    use Exporter ();
    use Symbol;
    use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

    $VERSION     = 1.0;
    @ISA         = qw(Exporter);
    @EXPORT =  qw(
                  new
                  get_file
                  load_file
                  set_buffer
                  clear_buffer

                  get_count
                  get_dims
                  get_obj

                  set_colmask
                  clear_colmask
                  get_colmask

                  set_colmask_rle

                  set_mask
                  clear_mask
                  get_mask

                  set_rowmask
                  clear_rowmask
                  get_rowmask

                  set_rowmask_rle

                  set_contents
                  );
    %EXPORT_TAGS = ();
    @EXPORT_OK   = ();

    return 1;
}



# new:  creates blessed Wrapper objects with default params
#       Accepts a file reference as well.
sub new($$) {
    my $class    = shift;
    my $file_ref = shift;
    my $self     = +{};

    bless $self, $class;

    # Naming convention:
    # foo   => safe for external use
    # _foo  => internal-only data
    # __foo => internal-only function


    $self->{'_buffer'}    = +[];   # When buffering.
    $self->{'_line_num'}  = 0;     # Line number we're about to read.

    # Constants for users to use, if they choose.
    $self->{'NO_BUFFER'} = 0;
    $self->{'BUFFER'} = 1;
    $self->{'BUFFER_PACK'} = 2;

    # Constants for masking styles.
    $self->{'NO_MASK'} = 0;
    $self->{'MASK'} = 1;
    $self->{'MASK_RLE'} = 2;

    $self->clear_colmask();
    $self->clear_rowmask();

    # Used for handling buffering.  Give it a true row number
    # (in other words, use __get_rownum first).
    #
    # We default to BUFFER_PACK to reduce the chance of
    # accidental data loss by get_dims() or get_count() before
    # setting the buffer.
    $self->set_buffer($self->{'BUFFER_PACK'});

   if (defined($file_ref)) {
       $self->{'_fh'} = $file_ref;
       # load_file needs to do the seek tests.
       $self->load_file($file_ref);
    }

    return $self;
}



# Read the raw data when it's not buffered.  We need a filehandle
# ($self->{'_fh'}).  We also NEED it to be a seekable stream.
sub __row_reader_NOBUF($$) {
    my $self = shift;
    my $idx  = shift;
    my $line = undef;
    my $fh   = $self->{'_fh'};

    if (!defined($fh)) {
        die "Cannot get object if no file is defined.";
    }

    if (($idx < 0) ||
        (defined($self->{'_ct'}) && (($self->{'_ct'} <= $idx)))) {
        # Vector out of bounds.
        return ();
    }

    if ($self->{'_line_num'} > $idx) {
        # Passed it.  We need to seek back.  This should at some point
        # be changed to track tells (ftells) for every 250 lines or
        # so later on.
        if (!seek($fh, 0, SEEK_SET)) {
            # Non-seekable.  And if we're here, we didn't buffer.
            die "Cannot retrieve previous data from non-buffered, non-seekable stream.";
        } else {
            $self->{'_line_num'} = 0;
        }
    }


    while ($self->{'_line_num'} <= $idx) {
        $line = <$fh>;

        if (!defined($line)) {
            # This is the end, my friend, 'tho we fread for a minute plus
            # a slice.
            my $old_ct = $self->{'_ct'};

            if (defined($old_ct) && ($old_ct != $self->{'_line_num'})) {
                # Uh-oh.  Inconsistency.
                die "Programming logic error:  count mismatch.";
            }
            $self->{'_ct'} = $self->{'_line_num'};
            return ();
        }

        $self->{'_line_num'} = $self->{'_line_num'}++;
        $line =~ s/\;.*$//o;      # Support semicolon comments
        $line =~ s/\s+$//o;       # Trim trailing space
        $line =~ s/\s\s*/ /go;    # Shrink whitespace to single spaces
        $line =~ s/\s*,\s*/,/go;  # Remove spaces around commas.
        $line =~ s/,/ /go;        # Change commas to individual spaces

        # Is this still an interesting line?
        if ($line =~ /\S/) {
            # Yes.  Count it.
            $self->{'_line_num'} = $self->{'_line_num'} + 1;
        }
    }

    # At this point, the line we want should be in '$line'.
    defined($line) || die "Programming logic error in NOBUF obj retrieval.";

    my @array = split / /, $line;

    # And confirm dimensionality.
    if (defined($self->{'_dims'}) &&
        ($self->{'_dims'} != (scalar @array))) {
        die "Inconsistent dimensionality.";
    }

    $self->{'_dims'} = scalar @array;

    return @array;
}




# Read the raw data when it's buffered, but not packed.  First,
# check the buffer.  If it's there, we use that; if not, we fall
# back to __row_reader_NOBUF($$)
sub __row_reader_BUF($$) {
    my $self  = shift;
    my $idx   = shift;
    my $line  = undef;
    my $fh    = $self->{'_fh'};
    my @array = ();


    if (($idx < 0) ||
        (defined($self->{'_ct'}) && (($self->{'_ct'} <= $idx)))) {
        # Vector out of bounds.
        return ();
    }

    # Check _buffer.
    if (scalar(@{$self->{'_buffer'}}) <= $idx) {
        # It's not in the array.  We'll want to read from _line_num
        # up to $idx, inclusive.

      if (!defined($fh)) {
        die "Cannot get object if no file is defined.";
      }


      my $current = scalar(@{$self->{'_buffer'}});
      defined($current) || die;
      for (; $current <= $idx; $current++) {
        @array = $self->__row_reader_NOBUF($current);

        if (!scalar(@array)) {
          # __NOBUF already sets the _ct value for us when it
                # hits EOF.
          return ();
        }

        # A reference to a copy, not a reference to a local
        # variable which would be identical for every item we
        # read during this _BUF invocation.
        push @{$self->{'_buffer'}}, +[ @array ];
        }

      return @array;
    }

    # It's in _buffer since we made it this far.
    @array = @{$self->{'_buffer'}->[$idx]};

    return @array;
}



# Read the raw data when it's buffered and packed.  First,
# check the buffer.  If it's there, we use that; if not, we fall
# back to __row_reader_NOBUF($$)
sub __row_reader_PACK($$) {
    my $self  = shift;
    my $idx   = shift;
    my $line  = undef;
    my $fh    = $self->{'_fh'};
    my @array = ();


    if (($idx < 0) ||
        (defined($self->{'_ct'}) && (($self->{'_ct'} <= $idx)))) {
        # Vector out of bounds.
        return ();
    }

    # Check _buffer.
    if (scalar(@{$self->{'_buffer'}}) <= $idx) {
        # It's not in the array.  We'll want to read from _line_num
        # up to $idx, inclusive.

        my $current = scalar(@{$self->{'_buffer'}});

        if (!defined($fh)) {
          die "Cannot get object if no file is defined.";
        }

        defined($current) || die;
        for (; $current <= $idx; $current++) {
            @array = $self->__row_reader_NOBUF($current);

            if (!scalar(@array)) {
                # _NOBUF already sets the _ct value for us when it
                # hits EOF.
                return ();
            }

            # Pack it.  _NOBUF already set the _dims, too.
            my $pack_format = "d" .  ($self->{'_dims'});
            my $packed      = pack $pack_format, @array;

            # A reference to a copy, not a reference to a local
            # variable which would be identical for every item we
            # read during this _PACK invocation.
            push @{$self->{'_buffer'}}, $packed;
        }

        return @array;
    }

    # It's in _buffer since we made it this far.
    my $packed      = $self->{'_buffer'}->[$idx];
    my $pack_format = "d" . $self->{'_dims'};

    @array = unpack $pack_format, $packed;

    return @array;
}





# Change the buffer policy.  Flush it if the new value is different.
sub set_buffer($$) {
    my $self       = shift;
    my $new_bufset = shift;

    if ((defined($self->{'_buffer'})) &&
        ($new_bufset == $self->{'_buffer'})) {
        # No change.
        return;
    }

    # Note that changing from an unbuffered to a buffered method only
    # makes sense if either the file handle is seekable, or if we
    # don't care about what we've already read (if anything), because
    # we weren't preemptively buffering in NO_BUFFER mode.

    if ($new_bufset == $self->{'NO_BUFFER'}) {
        $self->{'__row_reader'} = sub {
          my $idx = shift @_;
          return ($self->__row_reader_NOBUF($idx)) };
        $self->{'__row_reader'} = sub {
          my $idx = shift @_; return ($self->__row_reader_NOBUF($idx));
        };
        $self->clear_buffer();
    } elsif ($new_bufset == $self->{'BUFFER'}) {
        if ((defined($self->{'buffer'})) &&
            ($self->{'_buffer'} == $self->{'BUFFER_PACK'})) {
            # Convert by unpacking everything in the buffer.
            my $itm  = undef;
            my $dims = $self->{'_dims'};

            if (defined($dims)) {
                # If it's NOT defined, then there won't be any entries
                # to unpack.
                my $pk_fmt = "d" . $dims;

                foreach $itm (@{$self->{'_buffer'}}) {
                    $itm = +[ unpack $pk_fmt, $itm ];
                }
            }
        } else {
            $self->clear_buffer();
        }
        $self->{'__row_reader'} = sub { my $idx = shift @_;
            return ($self->__row_reader_BUF($idx)) };
    } elsif ($new_bufset == $self->{'BUFFER_PACK'}) {
        if ((defined($self->{'buffer'})) &&
            ($self->{'_buffer'} == $self->{'BUFFER'})) {
            # Convert by packing everything in the buffer.
            my $itm  = undef;
            my $dims = $self->{'_dims'};

            if (defined($dims)) {
                # If it's NOT defined, then there won't be any entries
                # to pack.
                my $pk_fmt = "d" . $dims;

                foreach $itm (@{$self->{'_buffer'}}) {
                    $itm = pack $pk_fmt, @{$itm};
                }
            }
        } else {
            $self->clear_buffer();
        }
        $self->{'__row_reader'} = sub {
            my $idx = shift @_;
            return ($self->__row_reader_PACK($idx)) };
    } else {
        die "Wrapper::set_buffer got unknown policy of '$new_bufset'.";
    }
}



# Empty the contents of the buffer.  Or, rather, remove the reference
# to the old one and let Perl's garbage collector handle it.
sub clear_buffer($) {
    my $self = shift;

    $self->{'_buffer'} = +[];
}



# Set the column mask, where the mask consists of the attribute
# indices to use, in the order specified.  Attributes may be
# repeated as often as desired.
#
# If none are specified, then the default of 'all' is used.
sub set_colmask($@) {
    my $self    = shift;
    my @colmask = @_;

    $self->{'_colmask'} = +[ @colmask ];

    if (!scalar(@colmask)) {
      $self->{'__col_mapper'} = sub { return @_ };
      $self->{'_colmask_type'} = $self->{'NO_MASK'};
    } else {
      if (defined($self->{'_dims'})) {
        # Verify that all requested columns are actually in range.
        map {
          if (($_ < 0) || ($_ >= $self->{'_dims'})) {
            die "Bogus column requested in mask:  '$_'.";
          }
        } @colmask;
      }

      $self->{'_colmask_type'} = $self->{'MASK'};
      $self->{'__col_mapper'} = sub { return @_[@colmask] };
    }
}


# Set it to an empty colmask.  Equivalent to set_colmask($).
sub clear_colmask($) {
    my $self = shift;

    $self->{'_colmask'} = +[];
    $self->{'__col_mapper'} = sub { return @_ };
    $self->{'_colmask_type'} = $self->{'NO_MASK'};
}


# Return the current colmask.
sub get_colmask($) {
    my $self = shift;

    return @{$self->{'_colmask'}};
}


# Old API, for compatibility purposes
sub set_mask($@) {
  my $self = shift;

  return $self->set_colmask(@_);
}

# Old API, for compatibility purposes
sub get_mask($) {
  my $self = shift;

  return $self->get_colmask();
}

# Old API, for compatibility purposes
sub clear_mask($@) {
  my $self = shift;

  $self->clear_colmask();
}



# Pseudo-RLE specification of masking, in order to save space.
# With the standard colmask functions, doing something like
# specifying the second and fourth thousand columns requires a
# thousand-item array.  The _rle variations provide a different
# specification style:
#
# instead of
#
#  $Wrapper_obj->set_mask(1000..1999, 3000..3999)
#
# we use
#
#  $Wrapper_obj->set_mask(1000, 1000, 3000, 1000)
#
# In other words, it's (offset, length, [offset, length [...]]).
#
# get_colmask() will return in the same format.
#
# This should save space, but will probably be a fair bit slower
# since it can't just look up a single item in a table anymore,
# but has to do a linear scan of the mask.
sub set_colmask_rle($@) {
  my $self    = shift;
  my @colmask = @_;

  (!(scalar(@colmask) % 2)) || die "set_colmask_rle() given an odd-sized mask.";

  $self->{'_colmask'} = +[ @colmask ];

  if (!scalar(@colmask)) {
    $self->{'__col_mapper'} = sub { my $col = shift;  return $col };
    $self->{'_colmask_type'} = $self->{'NO_MASK'};
  } else {
    $self->{'_colmask_type'} = $self->{'MASK_RLE'};
    $self->{'__col_mapper'} =  sub {
      my $col  = shift;
      my $left = $col;
      my $i    = 0;
      my $ct   = scalar(@colmask) / 2;
      my $colmask = $self->{'_colmask'};

      for ($i=0; $i < $ct; $i++) {
        my $offset = $colmask->[2*$i];
        my $seglen = $colmask->[(2*$i) + 1];

        if ($left <= $seglen) {
          return ($offset + ($left - 1));
        } else {
          $left -= $seglen;
        }
      }

      # Out of range.
      return undef;
    };
  }

}




# Row mask handling.  Empty mask => all columns.  Otherwise,
# it's a level of indirection.
sub set_rowmask($@) {
    my $self    = shift;
    my @rowmask = @_;

    $self->{'_rowmask'} = +[ @rowmask ];

    if (!scalar(@rowmask)) {
        $self->{'__row_mapper'} = sub { my $row = shift;  return $row };
        $self->{'_rowmask_type'} = $self->{'NO_MASK'};
    } else {
      if (defined($self->{'_ct'})) {
        # Verify that all requested rows are actually in range.
        map {
          if (($_ < 0) || ($_ >= $self->{'_ct'})) {
            die "Bogus row requested in mask:  '$_'.";
          }
        } @rowmask;
      }
      $self->{'_rowmask_type'} = $self->{'MASK'};
      $self->{'__row_mapper'} =  sub { my $row = shift; return $rowmask[$row] };
    }
}


# Clear the mask.
sub clear_rowmask($) {
    my $self    = shift;

    $self->{'_rowmask'} = +[];
    $self->{'__row_mapper'} = sub { my $row = shift;  return $row };
    $self->{'_rowmask_type'} = $self->{'NO_MASK'};
}


# Get the current row mask.
sub get_rowmask($) {
    my $self = shift;

    return @{$self->{'_rowmask'}};
}





# [The row analog of set_colmask_rle].
#
# Pseudo-RLE specification of masking, in order to save space.
# With the standard rowmask functions, doing something like
# specifying the second and fourth thousand rows requires a
# thousand-item array.  The _rle variations provide a different
# specification style:
#
# instead of
#
#  $Wrapper_obj->set_mask(1000..1999, 3000..3999)
#
# we use
#
#  $Wrapper_obj->set_mask(1000, 1000, 3000, 1000)
#
# In other words, it's (offset, length, [offset, length [...]]).
#
# get_rowmask() will return in the same format.
#
# This should save space, but will probably be a fair bit slower
# since it can't just look up a single item in a table anymore,
# but has to do a linear scan of the mask.
sub set_rowmask_rle($@) {
  my $self    = shift;
  my @rowmask = @_;

  (!(scalar(@rowmask) % 2)) || die "set_rowmask_rle() given an odd-sized mask.";

  $self->{'_rowmask'} = +[ @rowmask ];

  if (!scalar(@rowmask)) {
    $self->{'__row_mapper'} = sub { my $row = shift;  return $row };
    $self->{'_rowmask_type'} = $self->{'NO_MASK'};
  } else {
    $self->{'__row_mapper'} =  sub {
      my $row  = shift;
      my $left = $row;
      my $i    = 0;
      my $ct   = scalar(@rowmask) / 2;
      my $rowmask = $self->{'_rowmask'};

      for ($i=0; $i < $ct; $i++) {
        my $offset = $rowmask->[2*$i];
        my $seglen = $rowmask->[(2*$i) + 1];

        if ($left <= $seglen) {
          return ($offset + ($left - 1));
        } else {
          $left -= $seglen;
        }
      }

      # Out of range.
      return undef;
    };
    $self->{'_rowmask_type'} = $self->{'MASK_RLE'};
  }

}



# How many vectors are there?
sub get_count($) {
    my $self = shift;
    my $total = undef;

    # Masking?
    if ($self->{'_rowmask_type'} eq $self->{'MASK'}) {
      # A normal (non-RLE) mask.  Note that this count may be wrong if
      # the user has specified rows that do not exist, and has not
      # called get_count() with an unmasked set first.  The reason is
      # that we cannot verify the mask's sanity without first figuring
      # out the real count.

      return scalar($self->{'MASK'});
    } elsif ($self->{'_rowmask_type'} eq $self->{'MASK_RLE'}) {
      # RLE masking.  The same caveat about unverifiable masks applies.
      # Since it's an RLE mask, we'll add up all the segment lengths.
      # Note that the mask is already known to be non-empty and of
      # even length.
      my $rowmask_ct  = 0;
      my $rowmask_len = (scalar @{$self->{'MASK'}}) / 2;
      my $i           = 0;

      for ($i=0; $i < $rowmask_len; $i++) {
        $rowmask_ct += $self->{'MASK'}->{(2*$i)+1};
      }

      return $rowmask_ct;
    }


    if (defined($self->{'_ct'})) {
      $total = $self->{'_ct'};
    } else {
      # Otherwise, read... and buffer, if need be.
      defined($self->{'_buffer'}) || die "Buffer needs to be set first.";

      # The buffer-setting methods already set __row_reader(), so we should
      # be OK.

      my $idx = $self->{'_line_num'};

      while (scalar($self->{'__row_reader'}->($idx))) {
            $idx++;
            # consistency check
            die unless ($idx == $self->{'_line_num'});
          }

      # Ditto.
      die unless ($idx == $self->{'_ct'});
      $total = $idx;
    }

    return $total;
 }


# How many dimensions are there?
sub get_dims($) {
    my $self = shift;
    my $total = undef;

    # Masking?
    if ($self->{'_colmask_type'} eq $self->{'MASK'}) {
      # A normal (non-RLE) mask.  Note that this count may be wrong if
      # the user has specified cols that do not exist, and has not
      # called get_count() with an unmasked set first.  The reason is
      # that we cannot verify the mask's sanity without first figuring
      # out the real count.

      return scalar($self->{'MASK'});
    } elsif ($self->{'_colmask_type'} eq $self->{'MASK_RLE'}) {
      # RLE masking.  The same caveat about unverifiable masks applies.
      # Since it's an RLE mask, we'll add up all the segment lengths.
      # Note that the mask is already known to be non-empty and of
      # even length.
      my $colmask_ct  = 0;
      my $colmask_len = (scalar @{$self->{'MASK'}}) / 2;
      my $i           = 0;

      for ($i=0; $i < $colmask_len; $i++) {
        $colmask_ct += $self->{'MASK'}->{(2*$i)+1};
      }

      return $colmask_ct;
    }


    if (defined($self->{'_dims'})) {
        $total = $self->{'_dims'};
    } else {
        # Otherwise, read ONE vector and buffer, if need be.
        defined($self->{'_buffer'}) || die "Buffer needs to be set first.";

        # The buffer-setting methods already set __row_reader(), so we should
        # be OK.

        if (($total = scalar($self->{'__row_reader'}->(0))) > 0) {
            # should be fine, unless it wasn't buffered (because the
            # user didn't request it) and it should have been (in
            # which case the zero vector is now consumed).
        }

        # Sanity check.
        die unless ($total == $self->{'_dims'});
    }

    return $total;
}



# Get an actual object and return an array of its values.  Give it a
# zero-based index -- a true row index if there is no row mask, a
# 'virtual' row index if there is one.
#
# Buffer support must have been decided upon by now.
sub get_obj($$) {
    my $self = shift;
    my $idx  = shift;

    my $row_mapper = $self->{'__row_mapper'};
    my $col_mapper = $self->{'__col_mapper'};
    my $row_reader = $self->{'__row_reader'};

    defined($row_mapper) || die "Undefined row mapper.";
    defined($col_mapper) || die "Undefined col mapper.";
    defined($row_reader) || die "Undefined row reader.";

    defined($idx) || return ();
    my $true_rowid = $row_mapper->($idx);
    defined($true_rowid) || die "Ugh!";

    my @row_raw    = $row_reader->($true_rowid);
    scalar(@row_raw) || die "Thud!";

    my @row_good = $col_mapper->(@row_raw);
    scalar(@row_good) || die "No!";

    return @row_good;
}

return 1;





# Return the filehandle currently in use.
sub get_file($) {
  my $self = shift;
  my $fh = $self->{'_fh'};

  return $fh;
}



# Most of the work has actually been moved to the set- and get-
# functions, or pushed on to request time (lazy execution of
# get_count(), for instance.)

sub load_file($$) {
    my $self = shift;
    my $fh   = shift;

    $self->{'_fh'} = $fh;
		$self->{'_line_num'} = 0;

    delete $self->{'_ct'};
    delete $self->{'_dims'};
    $self->clear_buffer();
    $self->clear_rowmask();
    $self->clear_colmask();
}



# Create a temporary file using the provided list of list references.
# Returns the file handle and filename from the mktemp package.
# Note that we close and reopen the file handle, so although mktemp
# has it for writing, we return it for reading.

sub set_contents($@) {
  my $self = shift;
  my $lref = undef;
  my ($fh, $fname) = mktemp::mktemp();

  while (defined($lref = shift)) {
    my @vec  = @{$lref};
    my $line = join(" ", @vec);

    print $fh $line, "\n";
  }

  close($fh) || die "Ack!  Thpppt.  $!";


  open($fh, $fname) || die "Arrrrrrgh.  $!";
  $self->load_file($fh);
  return ($fh, $fname);
}
