: # Find perl...
eval 'exec perl5 -w -S $0 "$@"'
if 0;

#!/usr/bin/perl5 -w
#
############################################################
#
# This is merely a Perl wrapper for the new box-counting
# C++ library.  It supports the old-style parameter
# interface, and can invoke the slow method via the old
# Pair_Count.pm system.  In-package normalization
# requires invoking the much slower Pair_Count.pm
# box-counting code, and is *definitely* not recommended

#
# The C++ library itself supports neither normalization
# nor attribute masking.  Both can be implemented as
# wrappers if desired (albeit passing a wrapper to this
# module forces --data_memory, which might be expensive
# in RAM), or by modifying the data beforehand, or by
# adding new options to the driver and subclassing one
# of the included C++ wrappers.
#
# The major API-visible difference is that data_o is now
# folded into the new parameter 'source'.  Source may
# be one of the following:
#
# 1.  A reference to a wrapper object with the old-style
#     API.  This will *force* the use of --data_memory,
#     since the C++ library has no hooks to read from a
#     Perl module.
#
# 2.  A scalar corresponding to a filename.
#
# As before, it may not be null.
#
#
# Addendum:  The C++ core now supports cross-products
# (ignores exponent; instead, give *two* data sources,
# and it multiplies occupancies in corresponding cells).
# Accordingly, you can set 'source2' as well as 'source',
# and then invoke cross_count().
#
#############################################################

#
# Package Header
#
##########################################################################

package FDC;
require Exporter;


##### <change_me>
#
# Change this to where you placed the modules.
#
use lib '/afs/cs.cmu.edu/user/lw2j/shared/FracDim/FracDim';
##### </change_me>

use strict;
use Symbol;

# For bidirectional pipes, when taking data from a Perl module
# wrapper and sending it to the compiled program.
use IPC::Open2;

# For transparent slow-method/normalization support.
require Pair_Count;


# Data I/O layer.  The unified Wrapper class permits specifying
# both row and column masks, as well as a few options regarding
# buffering.  Basically, it can do a bit more than SimpleWrapper
# and DiskWrapper combined.
require Wrapper;



##########################################################################

BEGIN {
    use Exporter ();
    use Symbol;
    use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

    $VERSION     = 1.0;
    @ISA         = qw(Exporter);
    @EXPORT =  qw(
                  new
                  clear
                  get_param
                  set_params
                  pair_count
									cross_count

                  set_default_params
                  set_param_compat_map
                 );
    %EXPORT_TAGS = ();
    @EXPORT_OK   = ();

    return 1;
}






# new:  creates blessed FDC objects with default params
#
# Standard usage:
#
# my $fracObj = FDC::new();
#
# Extended usage:
#
# my $fracObj = FDC::new( param => value, param => value, ... ).
sub new($@) {
    my $class = shift;
    my $self  = +{};

    bless $self, $class;

    $self->{'pair_obj'} = new Pair_Count;  # only for method== 'slow'
    $self->set_compat_param_map();
    $self->set_default_params();
    $self->set_params(@_);

    return $self;
}


# Pass-through, since this module doesn't store data.
sub clear($) {
    my $self = shift;
    $self->{'pair_obj'}->clear();
}

# A hash table which maps certain old-style parameter names
# to new ones.
sub set_compat_param_map($) {
    my $self = shift;

    $self->{'compat_param_map'} =
        +{
            'q'         => 'exponent',
            'r_min'     => 'radius_min',
            'r_max'     => 'radius_max',
            'r_count'   => 'radius_count',
            'data_o'    => 'source',
            's_max'     => 'singleton_max',
            'o_max'     => 'occupancy_max',
            'method'    => 'method',     # 'slow' forces pass-through
            'intervals' => 'intervals',  # ignored unless pass-through
            'normalize' => 'normalize'   # non-zero forces pass-through
        };

    # These should NOT be passed auto (--long value), either because
    # they're unsupported by the compiled library (the first three), or
    # they're handled specially (the rest).
    $self->{'irregular'} =
        +{
            'method'     => 1,
            'intervals'  => 1,
            'normalize'  => 1,
            'source'     => 1,
				   	'source2'    => 1,
            'executable' => 1,
            'speed'      => 1,
					  'quad'       => 1
        };

    $self->{'pam_marap_tapmoc'} = +{};

    # invert table
    my @old_keys = keys %{$self->{'compat_param_map'}};
    my $old = undef;

    foreach $old (@old_keys) {
        my $val = $self->{'compat_param_map'}->{$old};

        $self->{'pam_marap_tapmoc'}->{$val} = $old;
    }
}



sub set_default_params($) {
    my $self = shift;

    # default:  2^-20 to 2^18, should result in a multiplier of 2
    #
    # I *highly* advise choosing values that give integral
    # multipliers, as various optimizations can't happen
    # otherwise...
    #
    # ...and, actually, certain theoretical guarantees (such
    # as monotonicity!) will break if you don't.

    $self->{'params'} =
        +{
            exponent       => 2,
            base           => 2,
            radius_min     => 0.00000095367431640625,
            radius_max     => 262144,
            radius_count   => 39,
            singleton_max  => 0.95,
            occupancy_max  => 0.95,
            data_memory    => 0,
            counter_memory => 0,
            two_table      => 0,
            zero_translate => 1,
					  db_type        => 1,         # External hashing
            source         => undef,     # map to 'data_o' if 'slow'
            source2        => undef,     # For cross products only.
            intervals      => 20,        # pass-thru if 'slow'
            method         => 'fast',
            speed          => 1,        # Define to use --speed.
            normalize      => 0,         # Please don't turn this on,
            quad           => undef,     # FDC's Quadratic support.
            min_frac       => 0.0001,    # Quad only.
            max_frac       => 0.8,     # Quad only.
            executable     => '/tmp/FOO/FDC-20020114/driver' # Change me.
         };
}

# Will die() unless asked for a VALID parameter (Value just
# needs to exist; it can be undef).

sub get_param($$) {
    my $self = shift;
    my $param = shift;

    # THIS much at least should exist...
    die unless exists($self->{'params'});

    # As should the parameter.
    if (exists($self->{'params'}->{$param})) {
        return ($self->{'params'}->{$param});
    } elsif (exists($self->{'params'}->{$self->{'compat_param_map'}->{$param}})) {
        # old-style parameter
        return $self->{'params'}->{$self->{'compat_param_map'}->{$param}};
    } else {
        die "get_param() queried for bogus parameter:  '$param'";
    }
}




# Set zero or more parameters, doing some minimal sanity checking.
#
# Standard usage:
#
# $fdObj->set_params(exponent => 2, radius_count => 200);
sub set_params($@) {
    my $self  = shift;
    my $param = undef;
    my $arg   = undef;

    while (defined($param = shift)) {
        $arg = shift;

        if (exists(($self->{'params'})->{$param})) {
      $self->{'params'}->{$param} = $arg;
        } elsif (exists($self->{'params'}->{$self->{'compat_param_map'}->{$param}})) {
            $self->{'params'}->{$self->{'compat_param_map'}->{$param}} = $arg;
        } else {
            die "Unknown param/arg pair '$param':'$arg'";
         }
    }

    # Some sanity checks.

    if (defined($self->{'params'}->{'method'})) {
        my $method = $self->{'params'}->{'method'};

        if (($method ne 'fast') && ($method ne 'slow')) {
            # (and only use 'slow' if you mean it!
            die "Parameter 'method' must be either 'fast' or 'slow'.";
        }

        if ($method eq 'slow') {
            if (defined($self->{'params'}->{'intervals'}) &&
                ($self->{'params'}->{'intervals'} < 10)) {

                # Lower this threshold if you REALLY want to.  Do NOT use
                # just 1, however.
                die "Using below 10 intervals is REALLY not recommended.";
            }
        }
    } else {
        die "The 'method' parameter is mandatory.";
    }

    {
        # Propagate parameters to the Pair_Count object.
        my $old_param = undef;
        my $new_param = undef;
        my $val       = undef;
        my %old_p     = ();

        while (($new_param, $old_param) = each %{$self->{'pam_marap_tapmoc'}}) {
            $old_p{$old_param} = $self->{'params'}->{$new_param};
        }

        #
        $self->{'pair_obj'}->set_params(%old_p);
    }

    return $self;
}



# Returns two references corresponding to (copied) arrays
# containing the log_radius and log_count data.
#
# The data is not automatically clear()-ed afterwards.
sub pair_count($) {
  my $self       = shift;
  my $log_radii_ref  = +[];
  my $log_counts_ref = +[];
  my $data_memory = $self->{'params'}->{'data_memory'};
  my $source      = $self->{'params'}->{'source'};
  my $normalize   = $self->{'params'}->{'normalize'};
  my $method      = $self->{'params'}->{'method'};


  if (!defined($source)) {
      die "A source -- either a wrapper or a filename -- MUST be specified.";
  }

  # Do we need to use the Perl code?
  if (($normalize) || ($method eq 'slow')) {
      my $fh = undef;
      my $source_pass = $source;
      my $pc_obj      = $self->{'pair_obj'};

      # Were we given a filename or a wrapper?

      if (!(ref $source)) {
          # Should be a filename.  Load it into a wrapper.

          $fh = gensym();
          open($fh, $source) || die "Failed to open '$source':  $!";

					$source_pass = new Wrapper($fh);
          if ($data_memory) {
						$source_pass->set_buffer($self->{'BUFFER_PACK'});
          } else {
						$source_pass->set_buffer($self->{'NO_BUFFER'});
					}
      }

      # Override it.
      $pc_obj->set_params(data_o => $source_pass);
      ($log_radii_ref, $log_counts_ref) = $pc_obj->pair_count();

      # Clean up.
      $pc_obj->set_params(undef);
      $source_pass = undef;
      if (defined($fh)) {
          close($fh) || die "Failed to close '$source':  $!";
      }
  } else {
      # Fast method, no normalization.
      # Use the compiled implementation.

      # First, figure out the option list.
      my $arguments   = undef;
      my @option_list = ();
      my $reader      = gensym();
      my $writer      = undef;  # needed if $source is a wrapper
      my $executable  = $self->{'params'}->{'executable'};
      my $open2_pid   = undef;

			# Pair-counting.
			push @option_list, "--pairs";

      {
          my $option = undef;
          my $val    = undef;

          while (($option, $val) = each %{$self->{'params'}}) {
              if (!exists($self->{'irregular'}->{$option})) {
                  push @option_list, "--$option=$val";
              }
          }
      }


      if (defined($self->{'params'}->{'speed'}) &&
          ($self->{'params'}->{'speed'})) {
          push @option_list, "--speed";
      }

      if (defined($self->{'params'}->{'quad'}) &&
          ($self->{'params'}->{'quad'})) {
          push @option_list, "--quad";
      }

      $arguments = join(" ", @option_list);

      # If it's a $source is a scalar, then we can use that
      # directly; if it's a wrapper, we need to get its
      # statistics, and then pipe in the data via a
      # bidirectional pipe.  Ugh.

      if (!(ref $source)) {
          # The easier branch -- a straight file.
          $arguments = $arguments . " $source";
          open($reader, "$executable $arguments |") || die
              "Failed to run open '$executable $arguments':  $!";
      } else {
          # Arrrrrgh.  It's a wrapper... need to pipe it in.
          my $writer = gensym();
          my $count = $source->get_count();
          my $i=0;

          # The executable will automatically impose data_memory
          # when no filename is specified, so we don't have to
          # override that argument.

          $open2_pid = open2($reader, $writer, "$executable $arguments") ||
              die "Failed to open2 '$executable $arguments':  $!";

          for($i=0; $i < $count; $i++) {
              my @vec      = $source->get_obj($i);
              my $vec_text = join(" ", @vec);

              print $writer $vec_text, "\n";
          }
          close($writer) || die "Failed to close writer:  $!";
      }

      # Get ready...

      # Read output from executable.
      {
          my $line = <$reader>;

          defined($line) || last;
          chomp($line);
          defined($line) || last;
          $line =~ s/^\s+//go;

          my ($log_radius, $log_count) = split /\s+/, $line;

          push @{$log_radii_ref},  $log_radius;
          push @{$log_counts_ref}, $log_count;

          redo;
      }

      close($reader);

      if (defined($open2_pid)) {
          # Avoid zombies!
          while (!(waitpid $open2_pid, 0)) {};
      }
  }

  return ($log_radii_ref, $log_counts_ref);
}



# Returns two references corresponding to (copied) arrays
# containing the log_radius and log_count data.
#
# The data is not automatically clear()-ed afterwards.
#
# Since this routine invokes the cross-counting routines,
# it a) must use the FDC core, b) ignores exponents, and
# c) requires source2 to be defined, as well as source.
#

sub cross_count($) {
  my $self       = shift;
  my $log_radii_ref  = +[];
  my $log_counts_ref = +[];
  my $data_memory = $self->{'params'}->{'data_memory'};
  my $source      = $self->{'params'}->{'source'};
  my $source2     = $self->{'params'}->{'source2'};
  my $normalize   = $self->{'params'}->{'normalize'};
  my $method      = $self->{'params'}->{'method'};


  if ((!defined($source)) || (!defined($source2))) {
      die "Two sources -- either wrappers or filenames -- MUST be specified.";
  }

  if (ref($source)) {
		if (ref($source2)) {
			die "At least one of the sources must be a file, rather than a wrapper.";
		}

		# Swap them, for simplicity (filename goes first).
		my $temp = $source2;
		$source2 = $source;
		$source = $temp;
	}

  # Do we need to use the Perl code?
  if (($normalize) || ($method eq 'slow')) {
		die "Normalization/Slow + Cross-Product is an unsupported combination.";
  } else {
      # Fast method, no normalization.
      # Use the compiled implementation.

      # First, figure out the option list.
      my $arguments   = undef;
      my @option_list = ();
      my $reader      = gensym();
      my $writer      = undef;  # needed if $source is a wrapper
      my $executable  = $self->{'params'}->{'executable'};
      my $open2_pid   = undef;

			# Cross-products.
			push @option_list, "--cross";

      {
          my $option = undef;
          my $val    = undef;

          while (($option, $val) = each %{$self->{'params'}}) {
              if (!exists($self->{'irregular'}->{$option})) {
                  push @option_list, "--$option=$val";
              }
          }
      }

      if (defined($self->{'params'}->{'speed'}) &&
          ($self->{'params'}->{'speed'})) {
          push @option_list, "--speed";
      }


      if (defined($self->{'params'}->{'quad'}) &&
          ($self->{'params'}->{'quad'})) {
          push @option_list, "--quad";
      }

      $arguments = join(" ", @option_list);

      # If it's a $source is a scalar, then we can use that
      # directly; if it's a wrapper, we need to get its
      # statistics, and then pipe in the data via a
      # bidirectional pipe.  Ugh.

      if ((!(ref $source)) && (!(ref $source2))) {
				# The easiest branch -- two files.
				$arguments = $arguments . " $source $source2";
				open($reader, "$executable $arguments |") || die
					"Failed to run open '$executable $arguments':  $!";
			} elsif ((!ref $source) && (ref $source2)) {
				# One standard file ($source) and one wrapper
				# (source2).  We need to use open2().

				my $writer = gensym();
				my $count  = $source2->get_count();
				my $i=0;

				# The executable will automatically impose data_memory
				# when only one filename is specified, so we don't have to
				# override that argument.

				$arguments = $arguments . " $source";
				$open2_pid = open2($reader, $writer, "$executable $arguments") ||
					die "Failed to open2 '$executable $arguments':  $!";

				for($i=0; $i < $count; $i++) {
					my @vec      = $source2->get_obj($i);
					my $vec_text = join(" ", @vec);

					print $writer $vec_text, "\n";
				}
				close($writer) || die "Failed to close writer:  $!";
			} else {
				# Logic error.  This should be unreachable.
				die "Logic error.";
			}



      # Get ready...

      # Read output from executable.
      {
          my $line = <$reader>;

          defined($line) || last;
          chomp($line);
          defined($line) || last;
          $line =~ s/^\s+//go;

          my ($log_radius, $log_count) = split /\s+/, $line;

          push @{$log_radii_ref},  $log_radius;
          push @{$log_counts_ref}, $log_count;

          redo;
      }

      close($reader);

      if (defined($open2_pid)) {
          # Avoid zombies!
          while (!(waitpid $open2_pid, 0)) {};
      }
  }

  return ($log_radii_ref, $log_counts_ref);
}

