#!/usr/local/bin/perl5 -w
#
# Purpose:
#  To supply miscellaneous helpers.
#
##################################################################
#
# Package Header
#
##################################################################
##################################################################

package util;

require ArrayFoo;
require Exporter;


##########################################################################

BEGIN {
  use Exporter ();
  use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

  $VERSION     = 1.0;
  @ISA         = qw(Exporter);
  @EXPORT_OK   = qw(
    @DIST_LIST
    gaussPair
    gaussRV

    sigmoid
    vecproj

    deep_copy

    quantize
    quantize_gen_cdf
    quantize_gen_ppf
    quantize_gen_pdf
  );
  %EXPORT_TAGS = ();
  @EXPORT      = ();

  return TRUE;
}

our @DIST_LIST = ();


sub deep_copy(@);

@DIST_LIST = (
  +[
  'quantize',
  \&quantize_est,
  +[ "0%", "10%", "20%",
  "30%", "40%", "50%",
  "60%", "70%", "80%",
  "90%", "100%"
  ],
  \&quantize_gen_cdf,
  \&quantize_gen_ppf,
  \&quantize_gen_pdf
  ]
);


##########################################################################

BEGIN {
  use constant M_PI => 4 * atan2(1, 1);

  sub gaussPair() {
    my $seed0;
    my $seed1;
    my $out0;
    my $out1;

    $seed0 = rand;
    $seed1 = rand;

    $out0 = ((-2 * (log $seed0)) ** 0.5) * (cos (2*M_PI*$seed1));
    $out1 = ((-2 * (log $seed0)) ** 0.5) * (sin (2*M_PI*$seed1));

    return ($out0, $out1);
  }

}

BEGIN {
  my $count = 0;
  my $out0;
  my $out1;

  sub gaussRV() {
    if ($count == 0) {
      ($out0, $out1) = &gaussPair();
    } else {
      $out0 = $out1;
    }

    $count = 1 - $count;

    return $out0;
  }
}

sub sigmoid($) {
  my $outval = shift;

  $outval =  1/(1+exp(-$outval));

  return $outval;
}





# take references to vectors A, B
# returns projection of B onto A

sub vecproj($$) {
  my $aref = shift;
  my $bref = shift;
  my @a = @{$aref};
  my @b = @{$bref};
  my @p = ();
  my $x = 0;
  my $num_attr = scalar @a;

  {
    my $aTb = 0;
    my $aTa = 0;
    my $idx = 0;

    for ($idx=0; $idx < $num_attr; $idx++) {
      $aTb += $a[$idx] * $b[$idx];
      $aTa += $a[$idx] * $a[$idx];
    }

    $x = $aTb / $aTa;
  }

  {
    my $idx = 0;

    for ($idx=0; $idx < $num_attr; $idx++) {
      $p[$idx] = $x * $a[$idx];
    }
  }

  return @p;
}




# Hierarchically duplicate a scalar or reference.
sub deep_copy(@) {
  my @inputs = @_;
  my @results = ();
  my $arg = undef;

  foreach $arg (@inputs) {
    if (!defined($arg)) {
      push @results, undef;
    } elsif (!ref($arg)) {
      push @results, $arg;
    }  elsif (ref($arg) eq 'SCALAR') {
      my $new_itm = $$arg;
      push @results, \$new_itm;
    } elsif (ref($arg) eq 'ARRAY') {
      my $new_ref = +[];

      map { push @$new_ref, deep_copy($_) } @$arg;
      push @results, $new_ref;
    } elsif ((ref($arg) eq 'CODE') ||
    (ref($arg) eq 'GLOB') ||
    (ref($arg) eq 'LVALUE')) {
      # Not going to handle these here.
      push @results, $arg;
    } elsif (ref($arg) eq 'REF') {
      my $item = deep_copy($$arg);

      push @results, \$item;
    } else {
      my %new_hash = ();
      my $key = undef;
      my $val = undef;

      while (($key, $val) = each %$arg) {
        $new_hash{$key} = deep_copy($val);
      }
      push @results, \%new_hash;
    }
  }

  if (wantarray()) {
    return @results;
  } else {
    return $results[0];
  }
}



# Perform an exact, in-memory quantizing scheme.  Arguments are given
# hash-style.  Return the quantiles in ref-to-array form.
sub quantize(@) {
  my %args = @_;

  my $data_ref = $args{'data_ref'};
  defined($data_ref) || return undef;

  # At most use this many quantiles.
  my $ct       = $args{'count'};
  defined($ct)   || return undef;

  # Defaults to 1.  Set to 0 if you want it to use the closeset value.
  my $int      = $args{'interpolate'};
  # Default value.
  (defined($int)) || ($int = 1);

  my $min_pct  = $args{'min_pct'};
  (defined($min_pct)) || ($min_pct = 0);

  my $max_pct  = $args{'max_pct'};
  (defined($max_pct)) || ($max_pct = 1);

  die unless (($min_pct >= 0) && ($min_pct <= 1));
  die unless (($max_pct >= 0) && ($max_pct <= 1));

  # Not checking for backwards order.  Feel free to do so if that's
  # really what you want.

  my @quantiles = ();
  my $n         = scalar(@$data_ref);
  my $i         = 0;

  for ($i=0; $i < $ct; $i++) {
    my $pct = $min_pct + ((($max_pct - $min_pct) * $i) / ($ct - 1));
    my $idx = $pct * $n;

    if ($idx == int($idx)) {
      $idx = ($idx < $n) ? $idx : ($n-1);

      push @quantiles, ArrayFoo::arrSelect($data_ref, $idx);
    } elsif ($int) {
      my $idx_lo = int($idx);
      my $idx_hi = $idx_lo + 1;

      ($idx_hi < $n) || die;

      my $val_lo = ArrayFoo::arrSelect($data_ref, $idx_lo);
      my $val_hi = ArrayFoo::arrSelect($data_ref, $idx_hi);

      my $val = $val_lo + (($val_hi - $val_lo) * ($idx - $idx_lo));
      push @quantiles, $val;
    }
  }

  return \@quantiles;
}


# A "fake" distribution.  Give it a number of quantiles to use, or it'll
# pick a mere 11 by default.
sub quantize_est($;$) {
  my $data_ref = shift;
  my $count    = shift;

  (defined($count)) || ($count = 11);

  my @quantiles = quantize(data_ref => $data_ref, count => $count);
  my $cdf = quantize_gen_cdf(\@quantiles);
  my $ppf = quantize_gen_ppf(\@quantiles);
  my $pdf = quantize_gen_pdf(\@quantiles);

  return ($cdf, \@quantiles, $ppf, $pdf);
}



# Note:  Quantiles assumed to be evenly distributed within [0,1].
sub quantize_gen_cdf($) {
  my $param_ref = shift;

  defined($param_ref) || return undef;

  my $ct        = scalar @$param_ref;

  return sub {
    my $x = shift @_;

    defined($x) || return undef;

    if ($x <= $param_ref->[0]) {
      return 0;
    }

    if ($x >= $param_ref->[-1]) {
      return 1;
    }

    # Search param_ref for bounds and assume linear interpolation.
    my $lo_idx = 0;
    my $hi_idx = $ct - 1;

    while (($hi_idx - $lo_idx) > 1) {
      my $mid_idx = int(($lo_idx + $hi_idx)/2);
      my $mid_val = $param_ref->[$mid_val];

      if ($mid_val < $x) {
        $lo_idx = $mid_idx;
      }  elsif ($mid_val > $x) {
        $hi_idx = $mid_idx;
      }  else {
        $lo_idx = $hi_idx = $mid_idx;
      }
    }

    my $mid_pct = (($lo_idx + $hi_idx) / (2 * ($ct-1)));
    return $mid_pct;
  };
}



# Note:  Quantiles assumed to be evenly distributed within [0,1].
sub quantize_gen_ppf($) {
  my $param_ref = shift;
  defined($param_ref) || return undef;

  my $ct = scalar @$param_ref;

  return sub {
    my $pct = shift @_;

    (defined($pct) && ($pct >= 0) && ($pct <= 1)) || return undef;

    my $idx = $pct * $ct;
    ($idx < $ct) || ($idx = ($ct-1));

    if ($idx == int($idx)) {
      return $param_ref->[$idx];
    } else {
      my $lo_idx = int($idx);
      my $hi_idx = $lo_idx+1;

      my $lo_val = $param_ref->[$lo_idx];
      my $hi_val = $param_ref->[$hi_idx];
      my $val    = $lo_val + (($hi_val - $lo_val) * ($idx - $lo_idx));

      return $val;
    }
  };
}


# Again, note:  Quantiles assumed to be evenly distributed within [0,1].
sub quantize_gen_pdf($) {
  my $param_ref = shift;

  defined($param_ref) || return undef;
  my $ct        = scalar @$param_ref;

  return sub {
    my $x = shift @_;

    defined($x) || return undef;

    if ($x <= $param_ref->[0]) {
      return 0;
    }

    if ($x >= $param_ref->[-1]) {
      return 0;
    }

    # Search param_ref for bounds and assume linear interpolation.
    my $lo_idx = 0;
    my $hi_idx = $ct - 1;

    search:  while (($hi_idx - $lo_idx) > 1) {
      my $mid_idx = int(($lo_idx + $hi_idx)/2);
      my $mid_val = $param_ref->[$mid_val];

      if ($mid_val < $x) {
        $lo_idx = $mid_idx;
      }  elsif ($mid_val > $x) {
        $hi_idx = $mid_idx;
      }  else {
        die unless ($mid_idx < ($ct-1));

        $lo_idx = $mid_idx;
        $hi_idx = $mid_idx+1;
        last search;
      }
    }

    my $lo_val = $param_ref->[$lo_idx];
    my $hi_val = $param_ref->[$hi_idx];
    my $d      = 1/(abs($hi_val - $lo_val) * ($ct-1));

    return $d;
  };
}


return 1;

##########################################################################
