: # Find perl...
eval 'exec perl5 -w -S $0 "$@"'
if 0;

#!/usr/bin/perl5 -w
#
############################################################
# Major architectural change, 2001/05/23:
#
# ***** THIS IS A NEW FILE *****
#
# It uses the old name for compatibility reasons, but
# most of the old file was moved to Pair_Count.pm for
# reasons of architectural changes.
#
# Verbose option moved to this level.
#
# DiskFracDim.pm is now a shell interface that mostly
# mirrors the Pair_Count interface for compatibility
# reasons, plus references the flat-trimming and
# line-fitting.  Most of the functions in Pair_Count
# don't need to be called directly often, so they're
# no longer exported by this module.
#
############################################################
# $Id: DiskFracDim.pm,v 1.11 2002/01/09 15:06:06 lw2j Exp $
#
# $Log:	DiskFracDim.pm,v $
# Revision 1.11  2002/01/09  15:06:06  lw2j
# Fixed the broken get_loglog() and get_loglog_trimmed() versions.
# Changed log_count to log_pairs.
# 
# Revision 1.10  2002/01/03  10:51:57  lw2j
# Added get_loglog().
# 
# Revision 1.9  2001/08/22  17:21:29  lw2j
# Untabified (tab=8).
#
# Revision 1.8  2001/05/29  18:04:21  lw2j
# Radical reorganization -- much moved to individual modules now.
# Also added get_param.
#
# Revision 1.7  2001/02/28  13:25:19  lw2j
# Changed
#   unshift @log_r,  log($radius);
# to
#   unshift @log_r,  log($radius)/$log2;
#
# in line 640 of DiskFracDim.pm; it must have been missed when
# converting logs to log_{2}.
#
# Revision 1.6  2001/01/26  12:09:14  lw2j
# Fixed typo in comment for lts().
#
# Revision 1.5  2000/11/12  14:41:06  lw2j
# Changed cell ID computation scheme; it now uses packed ulongs
# instead of ASCII.  Max dimensionality should then be a bit higher.
#
# Revision 1.4  2000/10/24  18:00:46  lw2j
# Now supports $q=1.  Logarithms now base 2.
#
# Revision 1.3  2000/10/06  19:19:44  lw2j
# Fixed bugs involving q != 2.  Barred q=1, for now.
#
# Revision 1.2  2000/10/03  14:08:01  lw2j
# Added magic perl invoker.
#
# Revision 1.1  2000/07/18  13:27:11  lw2j
# Initial revision
#
############################################################
#
# Since this is still the main interface, the comments on the
# workings of Pair_Count.pm are duplicated both here and
# there.
#
#
# Purpose:
#  To provide simple code for computing the 'correlation' fractal
#  dimension $D_{q}$.
#
#  It's {\em very} simple code, so the data is expected to be accessed
#  via a wrapper, for reasons of flexibility.  This code is
#
#                   ***** NOT *****
#
#  optimized for speed in an industrial sense.
#
#  That is, the module accepts as input an object reference.  This
#  object reference must support:
#
#     get_count($)       # return how many items there are
#     get_dims($)        # return embedding dimensionality
#     get_obj($$)        # return the object by 0-based index, as an
#                        # array of scalars
#
#  The object should not change while we're processing it...
#
#  This object ref will be 'data_o' below.
#
#  Other parameters that should be worried about:
#
#     method             # 'fast' (inexact box-count) or 'slow'
#                        # (iterate over pairwise distances, but
#                        #  more exact; DO NOT USE if you care
#                        # about speed)
#     r_min              # minimum radius for our approximation
#     r_max              # maximum radius for our approximation
#     r_count            # maximum number of radii to use (>= 1)
#     intervals          # if defined (default 20) and the method
#                        # is 'slow', the log-distance axis is
#                        # divided into this many intervals, and
#                        # the point with the LOWEST pair-count is
#                        # chosen for the line-fitting if any
#                        # exists.
#     q                  # normally 2, the degree
#
#  *** NOTE ***
#
#     This module has been modified significantly; it now stops
#     the computation upon calculating the log-log points.  In
#     other words, it no longer removes flat regions, calculates
#     the least-squares fit, or actually give the fractal dimension
#     itself.  Therefore, another program/module needs to compute
#     the actual FD.  When $q=2, it's equal to the pair-count
#     slope.  When $q=1$, ditto.  When $q is something else,
#     the slope needs to be divided by $q-1.
#
#
#  The default values tend to be fairly sane, but if the data
#  you have has ranges significantly different than mine, you
#  may want to adjust the radii (for instance).  See
#  'set_default_params'.
#
#  Logarithms, incidentally, should now be base 2 throughout.
#  In cases where the scaling applies to both axes, it does
#  not matter from the point of determining fractal dimension --
#  since it does not affect the slope -- but it does matter
#  in terms of absolute numbers, and for $q=1 (based on
#  entropy, which normally uses bits).
#
#  The methods of greatest concern are new, set_params, clear
#  and fracdim.  The 'fracdim' interface has changed -- read
#  the comments.
#
#  Sample usage:
#    See 'FracTest.pl', which attempts to load a data file 'tri'.
#
#
#
#  WARNING re: high-dimensional sets:
#     Old "cell ID" computations -- needed in box_put and
#     box_put_int -- have changed format.
#
#     It used to be that a cell, which corresponds to an
#     N-tuple of integers, with each dimension ranging from
#        (0, ..., (max-min)/radius)
#     was identified simply by the concatenation of this
#     tuple separated by single spaces.
#
#     This has now changed to native-format unsigned longs
#     (i.e. binary format), in order to resolve an apparent
#     issue involving many dimensions; there may be a limit
#     on the size of a DBM key.  In any event, this change
#     seems to have fixed anomalous behavior regarding not
#     being able to retrieve a key/value pair that had just
#     been fixed.
#
#     If so, this will still break for many dimensions
#     depending on the limit, in which case one may need to
#     generate something like an MD5 hash.  These message-
#     digest functions, however, are not 1:1 and therefore
#     doing so discards any guarantee of correctness.  In
#     addition, this breaks the optimizations (namely the
#     box_put_int + all_boxes_int cycle) because it depends
#     on being able to invert the function, so that it can
#     iterate over keys/value pairs instead of data items.
#
#
##########################################################################
#
# Package Header
#
##########################################################################

package DiskFracDim;
require Exporter;
require Pair_Count;     # Most of the 'guts' moved here.
require Trim_Flat;
require Robust_LSFit;

##### <change_me>
#
# Change this to where you placed the modules.
#
use lib '/afs/cs.cmu.edu/user/lw2j/shared/FracDim/FracDim';
##### </change_me>

use strict;
use POSIX;

##########################################################################

BEGIN {
    use Exporter ();
    use Symbol;
    use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

    $VERSION     = 1.0;
    @ISA         = qw(Exporter);
    @EXPORT =  qw(
                  new
                  clear

                  get_param
                  set_params
                  set_default_params

                  fracdim

                  get_loglog
                  get_loglog_trimmed
                  print_loglog
                 );
    %EXPORT_TAGS = ();
    @EXPORT_OK   = ();

    return 1;
}



# new:  creates blessed Pair_Count objects with default params
#
# Standard usage:
#
# my $fracObj = Pair_Count::new();
#
# Extended usage:
#
# my $fracObj = Pair_Count::new( param => value, param => value, ... ).
sub new($@) {
    my $class = shift;
    my $self  = +{};

    bless $self, $class;

    $self->{'pair_obj'} = new Pair_Count;
    $self->clear();

    $self->set_default_params();
    $self->set_params(@_);

    return $self;
}




# A pass-through if the parameter can't be found locally.
# Note that my Pair_Count.pm get_param() will die() if given
# a bogus parameter.
sub get_param($$) {
    my $self = shift;
    my $pnam = shift;

    if (exists($self->{'params'}->{$pnam})) {
        return $self->{'params'}->{$pnam};
    } else {
        defined($self->{'pair_obj'}) || die;

        return ($self->{'pair_obj'}->get_param($pnam));
    }
}


# A pass-through.
#
# One change:  'verbose' has been retained at this level, since
# a) it printed *trimmed* log-logs, and b) Pair_Count.pm never
# does the trimming itself.
#
sub set_default_params($) {
    my $self = shift;

    defined($self->{'pair_obj'}) || die;

    $self->{'pair_obj'}->set_default_params();

    $self->{'params'} = +{
        verbose => 0,
        'q'     => 2
    };

    die unless defined($self->{'params'}->{'q'});

    # Make sure that they're in sync.
    $self->{'pair_obj'}->set_params('q' => 2);
}


# Forget about old data.  If you only call 'new' once, and
# use set_params to switch data objects, you'll want to
# clear() the old data out of there.
#
# Standard usage:
#
# $fdObj->clear();
sub clear($) {
    my $self = shift;

    if (defined($self->{'pair_obj'})) {
        $self->{'pair_obj'}->clear();
    }

    delete $self->{'log_radius'};
    delete $self->{'log_pairs'};
    delete $self->{'log_radius_trimmed'};
    delete $self->{'log_pairs_trimmed'};

    return $self;
}




# Set zero or more parameters.  All but verbose are handled
# by Pair_Count.
#
# Standard usage:
#
# $fdObj->set_params('q' => 2, r_count => 200);
#
sub set_params($@) {
    my $self  = shift;
    my $param = undef;
    my $arg   = undef;

    my @pass_through_params = ();

    defined($self->{'pair_obj'}) || die;

    while (defined($param = shift)) {
        $arg = shift;

        if (exists($self->{'params'}->{$param})) {
            $self->{'params'}->{$param} = $arg;

            if ($param eq 'q') {
                # Both levels need to know this.
                push @pass_through_params, $param;
                push @pass_through_params, $arg;
            }
        } else {
            push @pass_through_params, $param;
            push @pass_through_params, $arg;
        }
    }

    # Most arguments are handled (and checked) at the
    # Pair_Count level.
    $self->{'pair_obj'}->set_params(@pass_through_params);

    return $self;
}


# Returns two references corresponding to (copied) arrays
# containing the log_radius and log_pairs data.
#
# The data is not automatically clear()-ed afterwards.
#
# The interface has changed.  To preserve compatibility
# with old scripts, which expected
#
#  ($fd, $b, $r)
#
# we return instead
#
#  ($fd, $b, $r, $a)
#
# Note -- $a != $fd, when $q != 1 and $q != 2.
# $a is the true slope of the least-squares-fit line,
# and needs to be scaled for FD determination
# according to a factor of 1/($q-1).
#
# This method also stores copies of the log/log data,
# before (log_radius, log_pairs) and after
# (log_radius_trimmed, log_pairs_trimmed) trimming.
# The added memory consumption should be minimal,
# since normally the number of radii is quite small.
#
sub fracdim($) {
  my $self  = shift;

  defined($self->{'pair_obj'}) || die;

  # Pair_count returns non-trimmed log-logs.

  my ($log_rd_ref, $log_ct_ref) = $self->{'pair_obj'}->pair_count();

  $self->{'log_radius'} = +[ @{$log_rd_ref} ];
  $self->{'log_pairs'}  = +[ @{$log_ct_ref} ];

  # Trim them.
  ($log_rd_ref, $log_ct_ref) = Trim_Flat::trim_flat($log_rd_ref,
                                                       $log_ct_ref);

  # Again, copy for our purposes.  Or, rather, that for those of
  # our users.
  $self->{'log_radius_trimmed'} = +[ @{$log_rd_ref} ];
  $self->{'log_pairs_trimmed'}  = +[ @{$log_ct_ref} ];

  # If verbose, print them.
  if ($self->{'params'}->{'verbose'}) {
      $self->print_loglog($log_rd_ref, $log_ct_ref);
  }

  # Employ robust linefitting.
  #
  # y=a*b+r
  my ($a, $b, $r) = Robust_LSFit::robust_lsfit($log_rd_ref,
                                               $log_ct_ref);

  # Find $q.
  my $q = $self->{'params'}->{'q'};


  # Adjust FD.

  my $fd = undef;

  # $q = 1 is special...
  if ($q ==1) {
      $fd = $a;
  } else {
      # A NOOP for the normal case of $q=2.
      $fd = $a / ($q-1);
  }

  # We used to NOT return $a.
  return ($fd, $b, $r, $a);
}



# Returns references to copies of the radius and pair-count data,
# before flat regions have been removed (if the computation has
# made it that far).
sub get_loglog($) {
  my $self = shift;

  my $log_radius_ref = +[ @{$self->{'log_radius'}} ];
  my $log_pairs_ref  = +[ @{$self->{'log_pairs'}} ];

  return ($log_radius_ref, $log_pairs_ref);
}



# Returns references to copies of the radius and pair-count data,
# after flat regions have been removed (if the computation has
# made it that far).
sub get_loglog_trimmed($) {
  my $self = shift;

  my $log_radius_ref = +[ @{$self->{'log_radius_trimmed'}} ];
  my $log_pairs_ref  = +[ @{$self->{'log_pairs_trimmed'}} ];

  return ($log_radius_ref, $log_pairs_ref);
}



# report log/log points (the final ones fed next into line-fitting)
sub print_loglog($$$) {
    my $self  = shift;
    my $log_radius_ref  = shift;
    my $log_pairs_ref   = shift;

    my @log_radius = @{$log_radius_ref};
    my @log_pairs  = @{$log_pairs_ref};

    my $radius_ct  = scalar @log_radius;
    my $pairs_ct   = scalar @log_pairs;

    my $idx   = 0;

    ((defined($radius_ct)) && (defined($pairs_ct)) &&
     ($radius_ct == $pairs_ct)) ||
        die "Strange log-log data!";

    print ";; ===== begin-log-log =====\n";

    for ($idx=0; $idx < $radius_ct; $idx++) {
        my $lr = $log_radius[$idx];
        my $lp = $log_pairs[$idx];

        print "; $lr $lp\n";
    }
    print ";; ===== end-log-log ===== \n";
}





