#!/usr/local/bin/perl5 -w
#
# Purpose:
#
# To provide some basic numerical methods.
#
##########################################################################
# $Id: Numerical.pm,v 1.78 2004/07/07 13:08:08 lw2j Exp $
# $Log:	Numerical.pm,v $
# Revision 1.78  2004/07/07  13:08:08  lw2j
# Golub-Reinsch should auto-terminate after a number of
# iterations.
# 
# Revision 1.75  2004/04/02  15:01:42  lw2j
# More anti-infinite-loop traps placed.
#
# Revision 1.74  2004/01/23  17:27:13  lw2j
# Fix for m<n matrices in lsq_solve_backwards.
#
# Revision 1.70  2004/01/14  15:02:24  lw2j
# Fixed bugged interface to SVD; other assorted code cleanups.
#
# Revision 1.69  2004/01/13  18:32:37  lw2j
# Tweaked determinant computation (should be no functional change here).
# Added explicit function for PLDU decomposition.
#
# Revision 1.68  2004/01/06  19:14:13  lw2j
# Added explicit LU, LDU factorizations.
# Added non-algorithmic-specific aliases e.g. matrix_inverse, matrix_svd.
# Made matrix functions normally be named with 'matrix_' prefix.
#
# Revision 1.67  2003/12/30  15:52:35  lw2j
# Added missing 'vector_new' method.
#
# Revision 1.66  2003/07/28  14:50:43  lw2j
# Added 'use lib' line.
#
# Revision 1.65  2003/06/27  14:47:07  lw2j
# Tweaked inverse search to be a bit more paranoid.
#
# Revision 1.64  2003/05/31  14:33:57  lw2j
# Fixed spelling error, Marquadt -> Marquardt.
#
# Revision 1.63  2003/04/14  15:40:27  lw2j
# rls_logparabolic_breakpoints now imposes a minimum count of 10 (for
# very small sets, it could have been 0...).
#
# Revision 1.62  2003/03/19  15:09:25  lw2j
# More tweaks made to breakpoint detection.  RLS logparabolic method
# models now use much closer lambda values, for instance. Min/max
# search for local extrema now only looks for local minima.
#
# Revision 1.61  2003/03/14  13:56:11  lw2j
# Tweaked logparabolic breakpoint generator to have a nonzero minimum
# deviation.
#
# Revision 1.58  2003/03/07  09:56:04  lw2j
# Fixes made to simplex integration comparisons, tweaks made to
# tolerances.
#
# Revision 1.57  2003/03/04  16:22:24  lw2j
# Tweaking RLS.
#
# Revision 1.56  2003/02/28  16:26:07  lw2j
# Added support for recursive least-squares linear fitting.
#
# Revision 1.55  2003/02/24  18:02:41  lw2j
# Tweaking of EM parameters.
#
# Revision 1.54  2003/02/18  17:01:09  lw2j
# Fixed typo in EM's truncation/merging code.
#
# Revision 1.53  2003/02/18  13:38:39  lw2j
# More work towards EM's auto-merging of components.  It should now be
# aware of truncations, and handle them 'specially in order to prevent
# the possibility of points being truncated away -- that is, it should
# use the more inclusive truncation points.
#
# Revision 1.52  2003/02/14  15:11:13  lw2j
# Unified EM path (truncated and normal have been merged).  API changed...
#
# Revision 1.51  2003/02/13  15:01:40  lw2j
# Minor tweaks.
#
# Revision 1.50  2003/02/11  16:34:20  lw2j
# Merely added a debugging printf var.
#
# Revision 1.49  2003/02/07  16:39:31  lw2j
# Removed debugging messages.
#
# Revision 1.48  2003/02/06  12:25:07  lw2j
# Trivial tweaks.
#
# Revision 1.47  2003/01/28  16:34:21  lw2j
# Added 'lsq_linfit_SVD', a wrapper for doing least-squares linear
# fitting.
#
# Revision 1.46  2002/12/04  17:18:54  lw2j
# Simplex algorithm now supports a constraint function, which
# is even allowed to change the parameters.
#
# Revision 1.45  2002/12/02  17:43:07  lw2j
# Simplex wrapping and computation should, theoretically, not explode
# when given undef parameters.  This is theoretically an issue, because
# the truncated normal estimator can give undefs for either truncation
# point.
#
# Revision 1.44  2002/12/02  17:05:26  lw2j
# Nelder-Mead simplex routine now takes an optional maximum number of
# iterations.
#
# Revision 1.43  2002/12/02  16:33:10  lw2j
# The truncated EM path now doesn't bother passing on components
# with prob <= 0, even as undefs.
#
# Revision 1.42  2002/12/01  16:58:53  lw2j
# The "mixture of truncated components" estimator now should handle
# degenerate cases better (e.g. component now vanishes when it has
# too little support to estimate).
#
# Revision 1.41  2002/11/30  14:52:19  lw2j
# Fixed mixture of truncated EM some more.
#
# Revision 1.40  2002/11/26  16:58:51  lw2j
# Tweaked LM_perturb so its steps aren't binary...
#
# Revision 1.39  2002/11/19  13:02:07  lw2j
# Fixed a sort in SVD computation -- <=> doesn't handle undefs.
# Levenberg-Marquardt will now use the SVD algorithm to produce a
# pseudoinverse if Gaussian elimination fails.
# Other tweaks...
#
# Revision 1.37  2002/11/11  16:47:48  lw2j
# Numerous fixes made to support mixtures of truncated normals.
# Fairly obscure fixes made to Levenberg-Marquardt that kick in
# when not all columns matter on one iteration, but then lambda
# gets changed and the columns weren't being replaced where they
# should have been.
#
# Revision 1.36  2002/11/08  18:17:59  lw2j
# Added weights to Levenberg-Marquardt, SSQ.
# Adding capabilities for mixtures of truncated normals, still very
# much a work in progress.
#
# Revision 1.35  2002/10/25  17:37:46  lw2j
# Tweaked with much more stringent, exhaustive em-searching.
#
# Revision 1.34  2002/10/17  14:46:30  lw2j
# Added more sanity checks to loop convergence tests.  Tweaked EM,
# so it needs to make substantial progress (5% better) to keep
# iterating.  Changed inverse search to have a maximum number of
# iterations.  Termination probability appears to have increased...
#
# Revision 1.33  2002/10/07  12:25:41  lw2j
# Applied saner indenter script.
#
# Revision 1.32  2002/10/07  11:39:57  lw2j
# Log-likelihood treats negative probabilities as 0.
#
# Revision 1.31  2002/10/05  13:13:22  lw2j
# Changed indentation style (well, changed the script actually).
#
# Revision 1.30  2002/09/24  16:17:53  lw2j
# Decreased tolerance in Simpson's rule.
#
# Revision 1.29  2002/09/19  10:47:52  lw2j
# Fix for EM:  return undef in the degenerate case where /no/ mixtures are active.
# This implies that either truncated distributions were used and are all out of
# range of all the data, or numerical issues are preventing the close-to-zero
# responsibilities from being noticed, AFAIK.
#
# Revision 1.28  2002/09/06  15:58:29  lw2j
# Fixed bug in pseudoinverse computations -- D+ needed a transpose.
#
# Revision 1.27  2002/09/06  15:19:43  lw2j
# The matrix pseudoinverse function now returns the full
# results of the SVD as well.
#
# Revision 1.26  2002/09/04  17:39:58  lw2j
# EM algorithm tweaked to better handle failure modes like mixture
# components with 0 probability.
#
# Revision 1.25  2002/09/01  12:20:35  lw2j
# Now supports computing the pseudoinverse of a matrix, and using
# that to solve linear equations.
#
# Revision 1.24  2002/08/31  17:28:34  lw2j
# The Golub-Reinsch SVD code now handles rank-1 matrices; or, rather,
# it's handled as a special case.  Oh, and matrix_rank and
# matrix_nullspace() are now supported as well.
#
# Revision 1.23  2002/08/30  18:16:20  lw2j
# Fixed column-sorting bug in SVD.  Problem:  still need to break a
# division-by-zero issue with, say,
#   2 2
#   1 1
# being a matrix.
#
# Revision 1.22  2002/08/30  17:50:02  lw2j
# Implemented Golub-Reinsch SVD.
#
# It's /very/ ugly because it's a straight port from the original,
# ugly, mostly uncommented Algol-60 code found in
#
# "Singular Value Decomposition and Least Squares Solutions", by
# Golub and Reinsch, found in _Numerische Mathematik_.  Vol 14:4, 1970.
#
# Revision 1.21  2002/08/23  14:18:57  lw2j
# Newton's method now allows for a leapfrog differentiator.
# Levenberg-Marquardt gets another wrapper; this one does random
# restarts based on perturbing previous end states.
#
# Revision 1.20  2002/08/22  15:22:07  lw2j
# LM, if it /does/ reach a singular matrix, returns the results it
# got before it did so.
#
# Revision 1.19  2002/08/22  14:57:24  lw2j
# Levenberg-Marquardt code now has
#   a) a wrapper, to simplify calling it, and
#   b) code that deals with the case that not all parameters make a
#      noticeable difference (e.g. J^{T} x J is singular).
#
#      When this happens, the offending columns and corresponding
#      rows are removed from the product, as well as from J^{T} x r.
#
#      If NO variables appear to matter, then iteration should stop.
#
# Revision 1.18  2002/08/22  11:35:28  lw2j
# Computing the partial derivative matrices has been moved to a
# separate function (out of Levenberg_Marquardt).
#
# Revision 1.17  2002/08/22  11:11:32  lw2j
# Supports two matrix inverters, one based on Gaussian elimination
# (via the PA=LU decomposition) and another one using the QR factorization.
# The QR version has numerical issues, however, and may be best avoided.
#
# Revision 1.16  2002/08/22  10:25:34  lw2j
# Changed semantics of matrix_preprocess for when it's already a matrix
# (it now does nothing and returns the original reference).
#
# Added Levenberg_Marquardt(), which does LM-style nonlinear lsq fitting.
# If you don't have an array of partial derivative generators (it needs
# to construct Jacobians), it'll use a leapfrog differentiator, which
# is piling approximations on approximations so be warned.
#
# Revision 1.15  2002/08/21  23:41:46  lw2j
# _preprocess functions have lost the underscore.
# Matrix inversion is supported; it uses Gaussian elimination.
# There's now a "matrix_fix" function to turn low-magnitude values
# into 0.
#
# Revision 1.14  2002/08/21  17:30:40  lw2j
# Simplex:  if function being evaluated returns undef, substitute
# $__VERY_BIG (default:  1e50).
#
# Revision 1.13  2002/08/21  11:26:20  lw2j
# Tweak Nelder-Mead to use the expanded simplex /only/ if it's better
# than the reflected one.
#
# Revision 1.12  2002/08/16  16:39:38  lw2j
# Simplex wrapper maker now generates an array ref instead of a list
# if desired.
#
# Revision 1.11  2002/08/12  14:55:49  lw2j
# Added 'return 1' at end.
#
# Revision 1.10  2002/07/18  15:56:09  lw2j
# Eliminated a duplicate function call in the Nelder-Mead simplex
# algorithm.  Indentified.
#
# Revision 1.9  2002/07/15  15:45:21  lw2j
# Indented/untabified.
#
# Revision 1.8  2002/07/15  15:43:43  lw2j
# Supports Nelder-Mead simplex method for function minimization.
#
# Revision 1.7  2002/07/12  10:57:54  lw2j
# Supports Romberg integration.
#
# Revision 1.6  2002/07/11  16:21:50  lw2j
# Now supports Romberg integration.
#
# Revision 1.5  2002/06/28  13:11:43  lw2j
# Now has N-R iterative numerical solver for 2 equations in 2 unknowns.
# Also has support for a simple "leapfrog" numerical differentiator.
#
# Revision 1.4  2002/06/26  19:16:25  lw2j
# BIC now counts the mixing probabilities as additional parameters.
#
# Revision 1.3  2002/06/26  17:40:34  lw2j
# Now has a log_likelihood computer -- give it data ref and pdf.
# Also, the EM function now computes BIC.
#
# Revision 1.2  2002/06/26  15:07:17  lw2j
# Added support for expectation-maximization (EM), otherwise
# known as fuzzy k-means.  Right now, it operates for ONE start
# and ONE k/initialization; it's up to caller to decide how
# to initialize, or to compute BIC ('tho latter is simple.  EM
# code also gives you log-likelihood).
#
##########################################################################
#
# Package Header
#
##########################################################################

package Numerical;
require Exporter;
use     POSIX;

use strict;
use lib '/usr/lw2j/private/Stat/Perl';

require ArrayFoo;
require AndersonDarling;

##########################################################################

BEGIN {
  use Exporter ();
  use Symbol;
  use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

  $VERSION     = 1.0;
  @ISA         = qw(Exporter);
  @EXPORT      = ();
  %EXPORT_TAGS = ();
  @EXPORT_OK   = qw(
    inverse_search

    Simpsons_rule
    trapezoid_definite
    Romberg_definite
    Romberg_indefinite

    linear_solve
    linear_solve_SVD
    linear_solve_RLS

    lsq_linfit_SVD
    lsq_linfit_RLS


    lsq_solve_forwards
    lsq_solve_backwards

    Newtons_method
    Newton2
    leapfrog_diff
    compute_ssq_residuals

    matrix_print
    matrix_rank
    matrix_new
    vector_new
    matrix_identity
    matrix_duplicate
    matrix_fix
    matrix_multiply
    matrix_add
    matrix_transpose
    matrix_nullspace
    matrix_QR

    matrix_Gaussian_elimination
    matrix_LU
    matrix_LDU
    matrix_PLDU
    matrix_Gaussian_inverse


    matrix_QR_inverse
    matrix_inverse
    matrix_preprocess
    matrix_to_diag
    matrix_SVD_GolubReinsch
    matrix_SVD
    matrix_pseudoinverse
    matrix_determinant

    rls_update

    rls_logparabolic_breakpoints
    density_breakpoints

    diag_to_matrix

    vector_dot_product

    polynomial_evaluate
    polynomial_evaluator_gen
    polynomial_lsfit
    polynomial_guess
    polynomial_RMS
    polynomial_multiply

    info_equidepth
    info_equiwidth

    em_one_iteration
    em_converge

    compute_log_likelihood

    simplex_wrap
    Nelder_Mead_simplex
    vector_preprocess
    vector_normalize

    compute_partial_matrix
    Levenberg_Marquardt
    LM_fit
    LM_perturb
  );

  return 1;
}



# Internal functions.  These may change, are fairly specialized in use,
# or for various other reasons shouldn't be exported.

sub _info_general($);
sub _prep_equiwidth($$);
sub _prep_equidepth($$);
sub _fake_partials_with_leapfrogs($$$$);
sub _matrix_GolubReinsch_helper($);
sub  matrix_SVD_GolubReinsch($;$$);

my $__VERY_BIG = 1e50;

# Ignore the super-rare components automatically.  Do not make this
# undefined or less than zero.
my $__MIN_MIXPROB = 0;
die unless ((defined($__MIN_MIXPROB)) && ($__MIN_MIXPROB >= 0));


# This value is used by the EM implementations when computing
# log-likelihood.
my $__LL_PROB_FLOOR = 1e-5;


sub _NUMERICAL_debug_printf(@) {
  if (0) {
    print "***NUMERICAL_DEBUG:  ", @_;
  }
}


sub new($@) {
  my $class = shift;
  my $self  = +{};

  bless $self, $class;

  return $self;
}


# Generic search algorithm for "inverting" a function when we don't
# have an actual inverse.  Iterative search, oh joy.
#
# Requires:
#    The function to invert, f(x).
#    The y=f(x) value.
#    A lower bound.  Must be valid.
#    A upper bound.  Must be valid.
#
#    The error threshold (optional).  This is error in terms of f(x),
#    not on x, since we don't know the true value of x.
#
#    Maximum number of iterations.
#
# If $f is continuous, and we've got a valid range, we should find it.
# It may just take an awfully long time.
#
# CHANGE:  There is now a maximum number of iterations permitted.  In
# addition, if the search space shrinks below a certain range the
# search will terminate.
#
# Thus, it should always terminate...
sub inverse_search($$$$;$$$) {
  my $fref     = shift;
  my $y        = shift;
  my $lo       = shift;
  my $hi       = shift;
  my $max_err  = shift;
  my $max_iter = shift;
  my $min_range = shift;
  my $lo_val  = &$fref($lo);
  my $hi_val  = &$fref($hi);

  defined($lo) || return undef;
  defined($hi) || return undef;

  defined($lo_val) || return undef;
  defined($hi_val) || return undef;

  if (!defined($max_err)) {
    $max_err = 1e-6;
  }

  if (!defined($max_iter)) {
    $max_iter = 100;
  }

  if (!defined($min_range)) {
    $min_range = 1e-8;
  }

  if ($lo_val > $hi_val) {
    # Swap them, for simplicity.
    my $val_temp = $lo_val;
    my $x_temp   = $lo;

    $lo_val = $hi_val;
    $lo     = $hi;
    $hi_val = $val_temp;
    $hi     = $x_temp;
  }


  if (abs($lo_val-$y) <= $max_err) {
    return $lo;
  }

  if (abs($hi_val-$y) <= $max_err) {
    return $hi;
  }

  if (($lo_val > $y) || ($hi_val < $y)) {
    return undef;
  }

  # First try:  linearly interpolate.
  my $val_range = $hi_val - $lo_val;
  my $x_range   = $hi - $lo;

  my $x = $lo + ($x_range * (($y - $lo_val) / $val_range));
  my $iter = 0;

  search_loop:  {
    my $expected_val = $lo_val + ($val_range * (($x - $lo) / $x_range));
    my $actual_val   = &$fref($x);

    if (!defined($actual_val)) {
      return undef;
    }

    my $err_val      = abs($actual_val - $y);

    if ($err_val <= $max_err) {
      # Good enough.
      return $x;
    }

    # Adjust our bounds.
    if ($actual_val < $y) {
      # Too low.
      $lo     = $x;
      $lo_val = $actual_val;
    } else {
      # Too high.
      $hi     = $x;
      $hi_val = $actual_val;
    }

    # Not good enough.  Are we at least in a linear portion of the
    # curve?
    my $err_linear    = abs($actual_val - $expected_val);
    my $old_val_range = $val_range;

    # Adjust ranges.  Note that $x_range can be negative, which is just
    # dandy.  $val_range, however, should be positive due to our invariant.
    $x_range    = $hi - $lo;
    $val_range  = $hi_val - $lo_val;

    # This should always be true.
    die unless ($hi_val >= $lo_val);

    if ($err_linear <= abs(0.05 * $old_val_range)) {
      # Less than 5% off, relatively speaking.  Use linear interpolation
      # for the next guess.
      my $new_x = $lo + ($x_range * (($y - $lo_val) / $val_range));

      # Make sure it budges just a little, but stays in range.

      if ((abs($new_x - $x) <= $min_range) ||
      (($x <= $lo) && ($x <= $hi)) ||
      (($x >= $lo) && ($x >= $hi))) {
        $x = ($lo + $hi)/2;
      } else {
        $x = $new_x;
      }
    } else {
      # Curved.  Oh, bugger.  Try binary search.
      $x = ($lo + $hi)/2;
    }

    $iter++;

    if (abs($hi - $lo) <= $min_range) {
      # Converged, for all practical purposes.
      return $x;
    }

    if ($iter >= $max_iter) {
      # It's taking too long.
      return $x;
    }

    redo search_loop;
  }
}

# Simpson's Rule for numerical integration.  We need...
#
# ...a reference to the function to evaluate; it must take exactly
#    one argument
# ...a reference to the fourth derivative of the function
# ...a start point (undefined if not lower-bounded)
# ...a stop point (undefined if not upper-bounded)
# ...a middle point (only used if both start and stop are undefined)
# ...an interval size [1/200 if range if start and stop are both
#    defined; 0.02 otherwise]
# ...a cutoff, for use if either start or stop is undefined; if an
#    interval contributes less than this, we stop.
sub Simpsons_rule($$;$$$$$) {
  my $fref   = shift;
  my $f4ref  = shift;
  my $a      = shift;
  my $b      = shift;
  my $m      = shift;
  my $delta  = shift;
  my $cutoff = shift;

  defined($fref) || die;
  defined($f4ref) || die;

  if ((!defined($cutoff)) && ((!defined($a)) || (!defined($b)))) {
    $cutoff = 1e-5;
  }

  if (!defined($delta)) {
    if ((defined($a)) && (defined($b))) {
      $delta = abs($b - $a)/10000;
    } else {
      $delta = 0.00005;
    }
  }

  my $delta5  = $delta ** 5;
  my $sum     = 0;
  my $x_start = $m;
  my $x       = $m;



  {
    my $steps = 0;
    my $max_steps = 10000;  # termination trap

    if (defined($a)) {
      while (((!defined(&$fref($a))) || (!defined(&$f4ref($a+$delta)))) &&
      ($steps < $max_steps)) {
        $a += $delta;
        $steps++;
      }
      $x_start = $a;
    }

    if (defined($b)) {
      while (((!defined(&$fref($b))) || (!defined(&$f4ref($b-$delta)))) &&
      ($steps < $max_steps)) {
        $b -= $delta;
        $steps++;
      }
    }

    if ((defined($a)) && (defined($b)) && ($a >= $b)) {
      return undef;
    }

    defined($delta) || die;

    if ((defined($a) && ((!defined(&$fref($a))) ||
    (!defined(&$f4ref($a+$delta))))) ||
    (defined($b) && ((!defined(&$fref($b))) ||
    (!defined(&$f4ref($b-$delta)))))) {
      return undef;
    }
  }


  if (defined($x_start)) {
    # Sum forwards.
    my $x0 = $x_start;
    my $x1 = $x0 + $delta;
    my $x2 = $x1 + $delta;
    my $max_steps = 10000;
    my $steps     = 0;

    if ((!defined($b)) || ($x2 <= $b)) {
      my $f0 = &$fref($x0);
      my $f1 = &$fref($x1);
      my $f2 = &$fref($x2);

      forward_loop: {
        my $diff = ($delta * ($f0 + (4*$f1) + $f2)/3) -
        (&$f4ref($x1) * $delta5 / 90);

        if ((!defined($b)) && ($diff <= $cutoff)) {
          last forward_loop;
        }

        if ((($diff > 0) && (($sum + $diff) <= $sum)) ||
        (($diff < 0) && (($sum + $diff) >= $sum))) {
          # Overflow / underflow / precision.
          last forward_loop;
        }

        $sum += $diff;

        # Simpson's iterates two intervals at a time.
        $x0 = $x2;
        $x1 = $x2 + $delta;
        $x2 = $x2 + 2*$delta;
        $f0 = $f2;

        if ((defined($b)) && ($x2 > $b)) {
          last forward_loop;
        }

        if (++$steps >= $max_steps) {
          last forward_loop;
        }

        $f1 = &$fref($x1);
        $f2 = &$fref($x2);
        redo forward_loop;
      }
    }
  }

  if (!defined($a)) {
    my $steps = 0;
    my $max_steps = 10000;

    # Sum backwards.
    if (defined($m)) {
      $x_start = $m;
    } else {
      $x_start = $b;
    }

    my $x2 = $x_start;
    my $x1 = $x2 - $delta;
    my $x0 = $x1 - $delta;

    my $f2 = &$fref($x2);
    my $f1 = &$fref($x1);
    my $f0 = &$fref($x0);

    backwards_loop: {
      my $diff = ($delta * ($f0 + (4*$f1) + $f2)/3) -
      (&$f4ref($x1) * $delta5 / 90);

      if ($diff <= $cutoff) {
        last backwards_loop;
      }

      if ((($diff > 0) && (($sum + $diff) <= $sum)) ||
      (($diff < 0) && (($sum + $diff) >= $sum))) {
        # Overflow / underflow / precision.
        return $sum;
      }


      $sum += $diff;

      # Simpson's iterates two intervals at a time.
      $x2 = $x0;
      $x1 = $x2 - $delta;
      $x0 = $x1 - $delta;
      $f2 = $f0;
      $f1 = &$fref($x1);
      $f0 = &$fref($x0);

      if (++$steps >= $max_steps) {
        last backwards_loop;
      }

      redo backwards_loop;
    }
  }
  return $sum;
}




# Perform definite integration using the trapezoid rule.  If you
# want to skip values, set the skip parameter (e.g. skip=1 skips
# every second one).  This option is not too useful except for
# making Romberg integration more efficient.  Likewise, the
# half_ends flag can be ignored unless you're writing a Romberg
# routine.  Setting it to 0 means that the endpoints get full
# weight in the summation, not half.
sub trapezoid_definite($$$$;$$) {
  my $function  = shift;
  my $lo_bound  = shift;
  my $hi_bound  = shift;
  my $h         = shift;
  my $skip      = shift;
  my $half_ends = shift;

  if (!defined($skip)) {
    $skip = 0;
  }

  if (!defined($half_ends)) {
    $half_ends = 1;
  }

  my $lo_val    = &$function($lo_bound);
  my $hi_val    = &$function($hi_bound);

  ($h > 0) || die;  # being silly, now

  defined($lo_val) || return undef;
  defined($hi_val) || return undef;

  my $endpoints = $lo_val+$hi_val;

  if ($h > ($hi_bound - $lo_bound)) {
    $h = $hi_bound - $lo_bound;
  }

  if ($h <= 0) {
    return 0;
  }

  my $jmp = (1+$skip) * $h;
  my $sum = 0;

  my $lo_more = $lo_bound+$jmp;
  my $hi_less = $hi_bound-$jmp;

  # Temporarily ignore endpoints.
  for (my $x=$lo_more; $x <= $hi_less; $x+=$jmp) {
    my $val = &$function($x);
    defined($val) || return undef;
    $sum += $val;
  }


  if ($half_ends == 0) {
    $sum += $endpoints;
  } else {
    $sum += $endpoints/2;
  }

  my $final = $sum*$h;
  return $final;
}


# Using this function will probably be faster if you don't know what
# constitutes a 'good' interval value.
#
# This version does definite integration from lo_bound to hi_bound.
# You probably should define /one/ of the two tolerance settings, as
# these determine when it terminates.
#
# abs_tol specifies minimum change in absolute terms:
#    delta >= abs_tol for iteration to continue.
# rel_tol specifies it in relative terms:
#    delta >= (rel_tol * old_val)
#
# where delta = abs(new-old).
#
# If you like, you can also specify the initial number of intervals.
# It defaults to 1024 for now.
#
# This version is pretty careful about how many times it invokes
# your function -- unless I've made a mistake, it should never
# invoke the function twice on the same xval.
#
# One advantage of this over the included version of Simpson's rule
# is that this version requires no derivatives to be supplied.
#
sub Romberg_definite($$$;$$$@) {
  my $function  = shift;
  my $lo_bound  = shift;
  my $hi_bound  = shift;
  my $abs_tol   = shift;
  my $rel_tol   = shift;
  my $init_ct    = shift;
  my $init_h    = shift;  # overrides init_ct if specified

  my @Romberg_table = ();  # store previous Romberg numbers
  my $max_level = 20;

  if ((!defined($abs_tol)) && (!defined($rel_tol))) {
    $rel_tol = 0.001;
  }

  my $range = $hi_bound - $lo_bound;

  if (!defined($init_ct)) {
    $init_ct = 1024;
  }

  # Initial count
  my $h0 = $range/$init_ct;
  my $level = 1;

  if (defined($init_h)) {
    $h0 = $init_h;
  }

  my $h     = $h0;

  {
    my $R_1_h =
    trapezoid_definite($function, $lo_bound, $hi_bound, $h);

    defined($R_1_h) || return undef;
    $Romberg_table[1] = +[];
    $Romberg_table[1]->[0] = +[ $lo_bound, $hi_bound, $h];
    $Romberg_table[1]->[1] = $R_1_h;
  }

  $level++;
  $h /= 2;

  Romberg_loop:  {
    # Iterate for any given $h.

    # R_{$n, $h}
    # $h <= $h/2 every Romberg_loop iteration
    # $level <= $level+1  as well

    # $n goes from 1 to $level within each loop
    # $n=1:  base case

    # $Romberg_table[$a]->[$b] is
    #
    # R_($b)($h0 / 2^($a-1))

    {
      my $old_Romberg = $Romberg_table[$level-1]->[1];
      my $added_sum   = trapezoid_definite($function, $lo_bound+$h,
      $hi_bound-$h, $h, 1, 0);
      defined($added_sum) || return undef;
      my $new_Romberg = $added_sum + ($old_Romberg/2);

      $Romberg_table[$level] = +[];
      $Romberg_table[$level]->[0] = +[ $lo_bound, $hi_bound, $h];
      $Romberg_table[$level]->[1] = $new_Romberg;
    }

    for (my $n_new=2; $n_new <= $level; $n_new++) {
      my $n        = $n_new-1;
      my $rom_n_h  = $Romberg_table[$level]->[$n];
      my $rom_n_2h = $Romberg_table[$level-1]->[$n];
      my $denom    = (4**$n) - 1;

      my $r_level_new = $rom_n_h + (($rom_n_h - $rom_n_2h) / $denom);

      $Romberg_table[$level]->[$n_new] = $r_level_new;
    }

    my $Romberg_new = $Romberg_table[$level]->[$level];

    if ($level == 1) {
      $level++;
      $h /= 2;
      redo Romberg_loop;
    }

    my $Romberg_old    = $Romberg_table[$level-1]->[$level-1];
    my $diff = abs($Romberg_new - $Romberg_old);

    if ((defined($abs_tol)) && ($diff <= $abs_tol)) {
      last Romberg_loop;
    } elsif ((defined($rel_tol)) && ($diff <= ($rel_tol * $Romberg_old))) {
      last Romberg_loop;
    } else {
      if ($level >= $max_level) {
        last Romberg_loop;
      }

      $level++;
      $h /= 2;
      # print "Romberg loop:  $Romberg_new, $Romberg_old <$lo_bound,$hi_bound> depth $level, diff $diff\n";
      redo Romberg_loop;
    }
  }

  my $Romberg_final = $Romberg_table[$level]->[$level];
  return $Romberg_final;
}



# This version supports APPROXIMATE computation of indefinite
# integrals.  Said approximation will not work nicely on divergent
# integrals...
#
# Basically, it delineates segments and calls Romberg_definite
# on them.  Therefore, it needs to know either
#
#  a) a mid point   plus a segment length (jmp_size), or
#  b) a lower bound plus a segment length, or
#  c) an upper bound plus a segment length
#
# This takes a LOT more arguments, not all of which must be defined.
#
#   function    = reference to function to integrate (mandatory)
#
# Tolerance is also used on a per-interval basis.
sub Romberg_indefinite($$$$$;$$$$) {
  my $function  = shift;
  my $lo_bound  = shift;
  my $mid_point = shift;
  my $hi_bound  = shift;
  my $jmp_size  = shift;
  my $abs_tol   = shift;
  my $rel_tol   = shift;
  my $init_ct    = shift;
  my $init_h    = shift;

  if ((!defined($abs_tol)) && (!defined($rel_tol))) {
    $rel_tol = 0.001;
  }


  if (defined($lo_bound)) {
    if (defined($hi_bound)) {
      # Both ends defined.
      return Romberg_definite($function, $lo_bound, $hi_bound,
      $abs_tol, $rel_tol, $init_ct, $init_h);
    }
  }

  defined($jmp_size) || die("Romberg_indefinite requires segment length.");

  my $sum = 0;

  if ((!defined($mid_point)) && (!defined($lo_bound)) &&
  (!defined($hi_bound))) {
    die "Romberg indefinite requires either a bound or a point.";
  }

  # Scan low, if not lower-bounded.
  if (!defined($lo_bound)) {
    my $temp_hi = defined($hi_bound) ? $hi_bound : $mid_point;
    my $temp_lo = $temp_hi - $jmp_size;
    my $steps   = 0;
    my $max_steps = 10000;

    scan_lower:  {
      my $contribution =
      Romberg_definite($function, $temp_lo, $temp_hi, $abs_tol,
      $rel_tol, $init_ct, $init_h);
      defined($contribution) || return undef;
      my $new_sum = $contribution + $sum;

      if ((($contribution > 0) && ($new_sum <= $sum)) ||
      (($contribution < 0) && ($new_sum >= $sum))) {
        last scan_lower;
      }

      if (defined($abs_tol) && (abs($contribution) < $abs_tol)) {
        $sum = $new_sum;
        last scan_lower;
      } elsif (defined($rel_tol) && (abs($contribution) <
      ($rel_tol * $sum))) {
        $sum = $new_sum;
        last scan_lower;
      }

      if ($steps++ >= $max_steps) {
        $sum = $new_sum;
        last scan_lower;
      }

      $temp_hi  = $temp_lo;
      $temp_lo -= $jmp_size;
      $sum = $new_sum;
      redo scan_lower;
    }
  }

  # Scan high, if not upper-bounded.
  if (!defined($hi_bound)) {
    my $temp_lo = defined($lo_bound) ? $lo_bound : $mid_point;
    my $temp_hi = $temp_lo + $jmp_size;
    my $steps   = 0;
    my $max_steps = 10000;

    scan_higher:  {
      #  print "<$temp_lo, $temp_hi, $abs_tol, $rel_tol>, computing\n";
      my $contribution =
      Romberg_definite($function, $temp_lo, $temp_hi, $abs_tol,
      $rel_tol, $init_ct, $init_h);


      defined($contribution) || return undef;
      my $new_sum = $contribution + $sum;

      if ((($contribution > 0) && ($new_sum <= $sum)) ||
      (($contribution < 0) && ($new_sum >= $sum))) {
        last scan_higher;
      }

      if (defined($abs_tol) && (abs($contribution) < $abs_tol)) {
        $sum = $new_sum;
        last scan_higher;
      } elsif (defined($rel_tol) && (abs($contribution) <
      ($rel_tol * $sum))) {
        $sum = $new_sum;
        last scan_higher;
      }

      if ($steps++ >= $max_steps) {
        $sum = $new_sum;
        last scan_higher;
      }

      $temp_lo  = $temp_hi;
      $temp_hi += $jmp_size;
      $sum = $new_sum;
      redo scan_higher;
    }
  }

  return $sum;
}


# Invokes Gaussian elimination to solve Ax=b.
#
# Arguments:
#   #1 = Matrix A., as a reference to a list of list references,
#        each of which is a row.
#   #2 = A reference to a vector, namely b.
#
# Returns:
#   undef      if inconsistent
#   +[ $x ]    (list reference, containing list reference) if one
#
#  list if infinite:
#   +[ undef ]  completely unconstrained (all variables are free)
#   +[ $x, +[ $idx, $x_h], +[$idx, $x_h ], ...]
#       if constrained and infinite ($x is particular)

sub linear_solve($$) {
  my $A      = shift @_;
  my $b      = shift @_;
  my $row_ct = scalar @$A;
  my $col_ct = scalar @{$A->[0]};

  $A = matrix_preprocess($A);

  if (scalar(@$b) != $row_ct) {
    return undef;
  }

  my @column = vector_preprocess($b);
  $b = \@column;

  my ($P, $L, $U) = matrix_Gaussian_elimination($A);

  # Ax=b
  # PA=LU
  #
  # => PAx=Pb
  #    LUx=Pb
  #
  # Let Ux=c
  # Then...
  #
  #   Lc=Pb

  my $Pb = matrix_multiply($P, $b);
  my $c = lsq_solve_forwards($L, $Pb);
  return lsq_solve_backwards($U, $c);
}



# Solve Lc=b.
#
# Assumes that $L is a lower-triangular square matrix with 1 on the
# diagonal.  Also, b has already been permuted accordingly.  (PA=LU).

sub lsq_solve_forwards($$) {
  my $L = shift;
  my $b = shift;
  my $m = scalar(@$L);
  my $e = +[];

  die unless ($m == scalar(@{$L->[0]}));
  die unless ($m == scalar(@{$b}));
  my $c = +[];

  my @column = vector_preprocess($b);
  $b = \@column;

  my $i=0;

  for ($i=0; $i < $m; $i++) {
    $c->[$i] = $b->[$i];

    my $j=0;
    for ($j=$i+1; $j < $m; $j++) {
      $b->[$j] -= ($L->[$j]->[$i] * $b->[$i]);
    }
  }

  return $c;
}



# Solve Ux=c.
#
# Assumes that $U is an upper-triangular $m-by-$n matrix,
# and that $c was solved forwards from Lc=b.
#
# We return:
#
#    undef if there is no solution (inconsistent)
#    +[ $x ]    if there is exactly one solution ($x a list reference)
#    +[ undef ]     if the problem is (0x=c).
#
#    +[ $x, +[ free index, +[ homogeneous ]],
#          +[ free index, +[ homogeneous ]]...]
#
#    if there are infinite, but constrained solutions.  The free
#    index is the 0-based index indicating which variable is free.
sub lsq_solve_backwards($$) {
  my $U = shift;
  my $c = shift;
  my $m = scalar(@$U);
  my $n = scalar(@{$U->[0]});

  my $x = +[];

  die unless ($m == scalar(@$c));

  my @column = vector_preprocess($c);
  $c = \@column;

  # Identify the number of pivots.  This is equal to the number of
  # rows of $U which are nonzero.
  my $r=0;
  row_loop:  for ($r=0; $r < $m; $r++) {
    my $i=0;

    col_loop:  for ($i=0; $i < $n; $i++) {
      if ($U->[$r]->[$i] != 0) {
        next row_loop;
      }
    }
    last row_loop;
  }

  for (my $i=$r; $i < $m; $i++) {
    if ($c->[$i] != 0) {
      return undef;
    }
  }

  # At least one solution exists.

  if ($r == $n) {
    my $i=0;
    # No free variables.  This is the nice case.
    for ($i=$r-1; $i >= 0; $i--) {
      $x->[$i] = $c->[$i]/($U->[$i]->[$i]);

      my $j=0;
      for ($j=$i-1; $j >= 0; $j--) {
        $c->[$j] -= ($x->[$i]) * ($U->[$j]->[$i]);
      }
    }

    return +[ $x ];
  } elsif ($r == 0) {
    # Completely unconstrained, anything goes.  We say 'undef' because
    # this is special.  We could, of course, simply return
    #
    # +[ +[ 0...0 ], +[0, [1, 0...]], +[1, [0, 1, 0...]] ...]
    #
    # but this is *probably* an error or otherwise worth noting...

    return +[undef];
  } else {
    # 0 < $r < $n.
    # Means $n-$r free variables running around.
    # Compute free list.
    my @base = ();
    my @free = ();
    my %pivot = ();
    my %tovip = ();
    my @answer = ();

    {
      my $row = 0;
      my $col = 0;

      for ($col=0; $col < $n; $col++) {
        if (($row < $m) && ($U->[$row]->[$col] != 0)) {
          $pivot{$col} = $row;
          $tovip{$row} = $col;
          push @base, $col;
          $row++;
        } else {
          push @free, $col;
          $pivot{$col} = undef;
        }
      }
    }

    # Solve for the base variables.  Ignore the free variables.
    my $c_copy = +[ @$c ];

    {
      my $col = $n-1;

      for ($col=$n-1; $col >= 0; $col--) {
        my $row = $pivot{$col};

        if (!defined($row)) {
          $x->[$col] = 0;
        } else {
          $x->[$col] = $c_copy->[$row] / $U->[$row]->[$col];
          my $row_minus=$row-1;
          for ($row_minus=$row-1; $row_minus >= 0; $row_minus--) {
            $c_copy->[$row_minus] -= ($x->[$col] * $U->[$row_minus]->[$col]);
          }
        }
      }
    }

    push @answer, $x;
    {
      # Solve for the free variables.
      my $free_idx = undef;

      foreach $free_idx (@free) {
        my $zero = +[ (0) x $m ];
        my $xh = +[];
        my $i  = 0;

        for ($i=$n-1; $i >= 0; $i--) {
          if ($i == $free_idx) {
            # This free variable.
            $xh->[$i] = 1;
          } elsif (!defined($pivot{$i})) {
            # Free variable, besides this one.
            $xh->[$i] = 0;
            next;
          } else {
            # Base variable.  Pivot in row $pivot{$i}, column $i.
            $xh->[$i] = $zero->[$pivot{$i}] / ($U->[$pivot{$i}]->[$i]);
          }

          my $j=$i-1;

          if ($j >= $m) {
            $j = $m-1;
          }

          for (; $j >=0; $j--) {
            defined($zero->[$j]) || die;

            defined($xh->[$i]) || die;

            defined($U->[$j]->[$i]) || die;

            $zero->[$j] -= ($xh->[$i]) * ($U->[$j]->[$i]);
          }
        }

        push @answer, +[ $free_idx, $xh];
      }
    }

    return \@answer;
  }
}



# Here's another way to solve linear equations.  This provides the
# shortest least squares solution to Ax=b, using the pseudoinverse
# of A.  Now has an 'augment' parameter, normally 0.
#
# Returns an array of coefficients (constant term last) if you
# want it, ref to array otherwise.
sub linear_solve_SVD($$;$) {
  my $A = shift;
  my $b = shift;
  my $augment = shift;

  $augment = (defined($augment) && $augment) ? $augment : 0;

  if (!(defined($A) && defined($b))) {
    return undef;
  }

  $A = matrix_preprocess($A);
  my $B_matrix = matrix_preprocess($b);

  my $A_rows = scalar(@$A);
  my $A_cols = scalar(@{$A->[0]});
  my $B_rows = scalar(@$B_matrix);
  my $B_cols = scalar(@{$B_matrix->[0]});

  ($A_rows == $B_rows) || die;
  ($B_cols == 1) || die;

  if (defined($augment) && $augment) {
    $A = matrix_duplicate($A);
    map { push @{$_}, 1 } @$A;
    $A_cols++;
  }

  _NUMERICAL_debug_printf "SVD lsq solver - Generating pseudoinverse\n";
  my $pseudoinverse = matrix_pseudoinverse($A);
  _NUMERICAL_debug_printf "SVD lsq solver - got pseudoinverse\n";

  if (!defined($pseudoinverse)) {
    return undef;
  }

  my $PI_rows = scalar(@$pseudoinverse);
  my $PI_cols = scalar(@{$pseudoinverse->[0]});

  ($PI_rows == $A_cols) || die;
  ($PI_cols == $A_rows) || die;

  my $projection = matrix_multiply($pseudoinverse, $B_matrix);

  if (!defined($projection)) {
    return undef;
  }

  my @xvec = vector_preprocess($projection);

  (scalar(@xvec) == $A_cols) || die;

  if (wantarray()) {
    return @xvec;
  } else {
    return \@xvec;
  }
}




# This is, really, just a wrapper to linear_solve_SVD() specialized
# to the case of linear modeling.  Namely, it adds an additional
# column of 1s and an additional parameter for the constant term.
# This corresponds to the last coefficient.
#
# Returns an array of coefficients (constant term last) if you
# want it, ref to array otherwise.
sub lsq_linfit_SVD($$) {
  my $A = shift;
  my $b = shift;

  # Augmentation now supported by linear_solve_SVD.

  if (wantarray()) {
    my @output = linear_solve_SVD($A, $b, 1);
    return @output;
  } else {
    my $output = linear_solve_SVD($A, $b, 1);
    return $output;
  }
}




# Use recursive least-squares to solve Ax=b.
sub linear_solve_RLS($$;$) {
  my $A       = shift;
  my $b       = shift;
  my $augment = shift;

  $augment = (defined($augment) && $augment) ? 1 : 0;

  if ((!defined($A)) || (!defined($b))) {
    return undef;
  }

  $A = matrix_preprocess($A);
  $b = vector_preprocess($b);

  my $A_rows = scalar(@$A);
  my $A_cols = scalar(@{$A->[0]});
  my $b_rows = scalar(@$b);

  if ($A_rows != $b_rows) {
    return undef;
  }

  my $d = $A_cols + $augment;
  my $x = +[(0) x $d];
  my $gain = undef;

  if (!($augment)) {
    for (my $i=0; $i < $A_rows; $i++) {
      my $vec = $A->[$i];
      my $out = $b->[$i];

      ($x, $gain) = rls_update($vec, $out, $x, $gain);
    }
  } else {
    for (my $i=0; $i < $A_rows; $i++) {
      my $vec = $A->[$i];
      my $out = $b->[$i];

      $vec = +[ @$vec, 1];
      ($x, $gain) = rls_update($vec, $out, $x, $gain);
    }
  }

  if (wantarray()) {
    return @$x;
  } else {
    return $x;
  }
}


# Use recursive least-squares to solve A'x=b.
# It's really just a macro for calling linear_solve_RLS with
# augmentation.  As with the SVD-based method, the final
# coefficient corresponds to the constant factor.
sub lsq_linfit_RLS($$) {
  my $A = shift;
  my $b = shift;

  if (wantarray()) {
    my @output = linear_solve_RLS($A, $b, 1);
    return @output;
  } else {
    my $output = linear_solve_RLS($A, $b, 1);
    return $output;
  }
}

# Gaussian elimination.
#
#   Argument:
#      A reference to a list.  The elements of the list should
#      be the individual rows of the matrix A, each represented
#      by another list (reference).  Hence, +[ +[ 1, 2], +[3, 4] ]
#      corresponds to
#
#        | 1  2 |
#        | 3  4 |
#
#      You can give it an additional parameter which, if nonzero,
#      will disallow row exchanges.  This is not normally
#      recommended, but will help if you want to force an
#          A=LU
#      decomposition.
#  Assumptions:
#    All the standard assumptions of Gaussian elimination.
#
#  Returns:  (PA = LU)
#    P,L,U
#    or just L,U (A=LU) if exchanges prohibited.
#
#    P = permutation matrix
#    L = lower-triangular matrix
#    U = upper-triangular matrix

sub matrix_Gaussian_elimination($;$) {
  my $A = shift;
  my $permutations_prohibited = (scalar(@_)) ? shift : 0;

  my $m = scalar @$A;
  my $n = scalar @{$A->[0]};
  my $P = matrix_identity($m);
  my $L = matrix_identity($m);
  my $U = matrix_duplicate($A);

  # Make it upper-triangular.  Well, moreso:  put ones on the
  # diagonal, 0 everywhere else.
  #
  # We first need to find pivots.
  {
    my $col       = 0;
    my $pivot_row = 0;   # Start looking in first row.

    for ($col=0; $col < $n; $col++) {
      # Is there a pivot in this column?  Yes, if there is a
      # non-zero value in a row that is in the desired pivot row, or
      # later.
      my $pivot_src = undef;

      if ($pivot_row >= $m) {
        # Ran out of rows in which to find pivots.
        last;
      }

      if ($permutations_prohibited) {
        $pivot_row = $col;
        $pivot_src = $col;

        if (!($U->[$col]->[$col])) {
          # Oops.  Should have allowed row exchanges.
          return undef;
        }
      } else {
        {
          my $pivot_val = undef;
          my $row = 0;
          for ($row=$pivot_row; $row < $m; $row++) {
            my $x = $U->[$row]->[$col];

            if (($x != 0) && ((!defined($pivot_val)) ||
            (abs($pivot_val) < abs($x)))) {
              # Bias towards larger numbers.  We'll be dividing by the pivot,
              # and dividing by really tiny numbers results in numerical
              # instability.

              $pivot_src = $row;
              $pivot_val = $x;
            }
          }
        }

        if (!defined($pivot_src)) {
          # No.  Next column, then; try for the same row.
          # This corresponds to a free variable.
          next;
        }

        if ($pivot_src != $pivot_row) {
          # Swap them.  We prefer larger magnitudes to reduce
          # numerical issues.

          my $r_old = $U->[$pivot_row];
          $U->[$pivot_row] = $U->[$pivot_src];
          $U->[$pivot_src] = $r_old;

          # Update the permutation matrix.  We first generate a
          # m-by-m identity, and then move two 1's.
          my $P_mult = matrix_identity($m);

          $P_mult->[$pivot_src]->[$pivot_src] = 0;
          $P_mult->[$pivot_row]->[$pivot_row] = 0;
          $P_mult->[$pivot_row]->[$pivot_src] = 1;
          $P_mult->[$pivot_src]->[$pivot_row] = 1;

          $P = matrix_multiply($P_mult, $P);

          {
            # Update the L matrix.  If we don't do this, the indices in
            # L will not be internally consistent if ANY row exchanges
            # happen.

            my $L_new = +[];
            my $i     = 0;

            for ($i=0; $i < $m; $i++) {
              my $j=0;
              my $i_new = $i;

              if ($i_new == $pivot_src) {
                $i_new = $pivot_row;
              } elsif ($i_new == $pivot_row) {
                $i_new = $pivot_src;
              }

              $L_new->[$i] = +[];

              for ($j=0; $j < $m; $j++) {
                my $j_new = $j;

                if ($j_new == $pivot_src) {
                  $j_new = $pivot_row;
                } elsif ($j_new == $pivot_row) {
                  $j_new = $pivot_src;
                }

                $L_new->[$i]->[$j] = $L->[$i_new]->[$j_new];
              }
            }

            $L = $L_new;
          }
        }
      }

      # Now, the pivot is in $U->[$pivot_row, $col].  Subtract a
      # multiple of this row from subsequent rows, if need be, and
      # update L with the coefficients.
      my $next_row = $pivot_row+1;
      my $src_row  = $U->[$pivot_row];
      for ($next_row=$pivot_row+1; $next_row < $m; $next_row++) {
        my $factor = $U->[$next_row]->[$col] / $U->[$pivot_row]->[$col];
        my $col2   = 0;

        $L->[$next_row]->[$pivot_row] = $factor;

        for ($col2=0; $col2 < $n; $col2++) {
          $U->[$next_row]->[$col2] -= $factor * ($src_row->[$col2]);
        }
      }

      # Look for the pivot in the next row.
      $pivot_row++;
    }
  }

  $L = matrix_fix($L);
  $U = matrix_fix($U);

  if ($permutations_prohibited) {
    return ($L, $U);
  } else {
    return ($P, $L, $U);
  }
}


# Perform A=LU decomposition using Gaussian elimination.
# May fail since row exchanges are not permitted -- either
# because a 0 was found or because of numerical precision
# problems.  More of a macro than anything else.
sub matrix_LU($) {
  my $A = shift;

  $A = matrix_preprocess($A);

  # Verify squareness.
  my $row_ct     = scalar @$A;

  # Don't accept nonsense.
  ($row_ct > 0) || return undef;

  my $col_ct = scalar @{$A->[0]};

  if ($row_ct != $col_ct) {
    return undef;
  }

  my ($L, $U) = matrix_Gaussian_elimination($A, 1);

  if (!defined($L)) {
    return undef;
  }

  return ($L, $U);
}


# Perform A=LDU decomposition using Gaussian elimination.
# May fail since row exchanges are not permitted -- either
# because a 0 was found or because of numerical precision
# problems.
sub matrix_LDU($) {
  my $A = shift;
  my ($L, $U) = matrix_LU($A);

  if (!defined($L)) {
    return undef;
  }

  # matrix_LU already verified squareness.
  # Determining
  #   A=LDU'
  # from
  #   A=LU
  # is a matter of factoring out the pivots.
  my $n = scalar @$U;
  my $D = matrix_new($n, $n);

  for (my $i=0; $i < $n; $i++) {
    my $pivot = $U->[$i]->[$i];

    $D->[$i]->[$i] = $pivot;
    $U->[$i]->[$i] = 1;

    for (my $j=$i+1; $j < $n; $j++) {
      $U->[$i]->[$j] /= $pivot;
    }
  }

  return ($L, $D, $U);
}




# Perform A=P^{-1}LDU decomposition using Gaussian elimination.
sub matrix_PLDU($) {
  my $A = shift;

  $A = matrix_preprocess($A);

  # Verify squareness.
  my $row_ct     = scalar @$A;

  # Don't accept nonsense.
  ($row_ct > 0) || return undef;

  my $col_ct = scalar @{$A->[0]};

  if ($row_ct != $col_ct) {
    return undef;
  }

  my ($P, $L, $U) = matrix_Gaussian_elimination($A);

  if (!defined($L)) {
    return undef;
  }

  # matrix_LU already verified squareness.
  # Determining
  #   A=LDU'
  # from
  #   A=LU
  # is a matter of factoring out the pivots.
  my $n = scalar @$U;
  my $D = matrix_new($n, $n);

  for (my $i=0; $i < $n; $i++) {
    my $pivot = $U->[$i]->[$i];

    $D->[$i]->[$i] = $pivot;

    if ($pivot != 0) {
      $U->[$i]->[$i] = 1;

      for (my $j=$i+1; $j < $n; $j++) {
        $U->[$i]->[$j] /= $pivot;
      }
    }
  }

  return ($P, $L, $D, $U);
}


# Univariate version only.  Takes
#   a) an initial guess
#   b) a function
#   c) a derivative of that function
#   d) an error threshold (deviation from zero permitted) [1e-6]
#   e) maximum number of iterations [200]
#
## and iteratively attempts to find a zero.
#
#  Returns ($x, $fref($x)).
sub Newtons_method($$;$$$) {
  my $x0      = shift;
  my $fref    = shift;
  my $fdev    = shift;
  my $thresh  = shift;
  my $itermax = shift;

  my $x      = $x0;
  my $iter   = 0;

  if (!defined($thresh)) {
    $thresh = 1e-6;
  }

  if (!defined($itermax)) {
    # We do want termination... and this default value should suffice
    # unless the threshold is set extremely tiny.
    $itermax = 200;
  }

  if (!defined($fdev)) {
    $fdev = sub {
      my $x = shift @_;
      return leapfrog_diff($fref, $x);
    };
  }

  iter_loop:  {
    my $fx    = &$fref($x);
    my $fdevx = &$fdev($x);

    if (abs($fx) < $thresh) {
      if (wantarray()) {
        return ($x, $fx);
      } else {
        return $x;
      }
    }

    # Last iteration, or local minimum?
    if (($iter == $itermax) || ($fdevx == 0)) {
      if (wantarray()) {
        return ($x, $fx);
      } else {
        return $x;
      }
    } else {
      $x = $x - $fx/$fdevx;
      $iter++;
      redo iter_loop;
    }
  }
}


# Newton-Raphson for solving a system of two simultaneous equations in
# two unknowns.
#
# The required parameters are (8):
#     f      (subroutine reference, must take two arguments -- x,y)
#     df/dx  (ditto)
#     df/dy  (ditto)
#     g      (ditto)
#     dg/dx  (ditto)
#     dg/dy  (ditto)
#     x0     (scalar; initial guess)
#     y0     (scalar; initial guess)
# Optional arguments:
#     Maximum error tolerated (absolute value of f(x,y), g(x,y).
#     Maximum number of iterations
#
# Equations are of the form
#     f(x,y) = 0
#     g(x,y) = 0
#
# and may be wildly nonlinear.
#
# Returned values:
#     (x,y)
#
sub Newton2($$$$$$$$;$$) {
  my $f      = shift;
  my $df_dx  = shift;
  my $df_dy  = shift;
  my $g      = shift;
  my $dg_dx  = shift;
  my $dg_dy  = shift;
  my $x0     = shift;
  my $y0     = shift;
  my $errtol = shift;
  my $iter_max = shift;


  if (!defined($errtol)) {
    $errtol = 0.00001;
  }

  if (!defined($iter_max)) {
    $iter_max = 1000;
  }


  my $iter = 0;

  loop:  {
    my $f0      = &$f($x0, $y0);
    my $g0      = &$g($x0, $y0);
    my $df_dx0  = &$df_dx($x0, $y0);
    my $df_dy0  = &$df_dy($x0, $y0);
    my $dg_dx0  = &$dg_dx($x0, $y0);
    my $dg_dy0  = &$dg_dy($x0, $y0);

    (defined($f0) && defined($g0) &&
    defined($df_dx0) && defined($df_dy0) &&
    defined($dg_dx0) && defined($dg_dy0)) ||
    return ($x0, $y0);

    my $denom   = (($df_dx0 * $dg_dy0) -
    ($df_dy0 * $dg_dx0));

    ($denom != 0) || return undef;

    my $dx      = (($g0 * $df_dy0) - ($f0 * $dg_dy0)) / $denom;
    my $dy      = (($f0 * $dg_dx0) - ($g0 * $df_dx0)) / $denom;

    my $x1 = $x0 + $dx;
    my $y1 = $y0 + $dy;

    my $f1 = &$f($x1, $y1);
    my $g1 = &$g($x1, $y1);

    (defined($f1) && defined($g1)) || return ($x0, $y0);

    if ((abs($f1) < $errtol) && (abs($g1) < $errtol)) {
      return ($x1, $y1);
    } elsif ((++$iter) >= $iter_max) {
      # That's enough.
      return ($x1, $y1);
    } else {
      $x0 = $x1;
      $y0 = $y1;
      redo loop;
    }
  }
}


# Leapfrog differentiation.
#
# Given subroutine reference $f, value $x, and delta $h
# (default 0.0001), approximate f'(x) as
#
#   (&$f($x+$h) - &$f($x-$h)) / (2*$h)
#
# which should give small errors for low $h.
sub leapfrog_diff($$;$) {
  my $f = shift;
  my $x = shift;
  my $h = shift;

  $h = (defined($h)) ? $h : 0.00001;

  my $fx_more = &$f($x+$h);
  my $fx_less = &$f($x-$h);

  if ((!defined($fx_more)) || (!defined($fx_less)) ||
  ($h == 0)) {
    return undef;
  }

  my $diff = (($fx_more - $fx_less) / (2*$h));

  return $diff;
}





# A function for computing sum-of-squared residuals.  Give it a ref
# to the function, and refs to two parallel arrays which contain
# input (x) and desired output (y) values.
#
# If wantarray() is true, then the residuals (NOT squared) are
# also returned.
#
# You can now pass in a reference to an array of weights.  The
# residuals WILL be multiplied by the weights.  Note that
# the weights are not normalized in any way....
sub compute_ssq_residuals($$$;$) {
  my $f     = shift;
  my $x_ref = shift;
  my $y_ref = shift;
  my $wtd_ref = shift;

  my $n   = scalar @$x_ref;
  my $ssq = 0;
  my @r  = ();

  ($n == (scalar @$y_ref)) || die;

  if (!defined($wtd_ref)) {
    for (my $i=0; $i < $n; $i++) {
      my $val = 0;
      my $y   = $y_ref->[$i];
      my $x   = $x_ref->[$i];

      $val = ref($x) ? &$f(@$x) : &$f($x);

      defined($val) || die;

      (!(ref $y)) || die;
      (!(ref $val)) || die;

      my $diff = $y - $val;
      my $square = $diff * $diff;

      # Overflow check
      die if (($ssq + $square) < $ssq);


      $ssq += $square;

      $r[$i] = $diff;
    }
  } else {
    for (my $i=0; $i < $n; $i++) {
      my $val = 0;
      my $y   = $y_ref->[$i];
      my $x   = $x_ref->[$i];

      $val = ref($x) ? &$f(@$x) : &$f($x);

      defined($val) || die;
      my $wt = $wtd_ref->[$i];
      $wt = defined($wt) ? $wt : 1;

      my $diff   = ($y - $val) * $wt;
      my $square = $diff * $diff;

      # Overflow check
      die if (($ssq + $square) < $ssq);

      $r[$i] = $diff;
      $ssq += $square;
    }
  }

  if (wantarray()) {
    return ($ssq, \@r);
  } else {
    return $ssq;
  }
}



# Return the dot product of two vectors.  They should be supplied
# as list references.
sub vector_dot_product($$) {
  my $avec = shift;
  my $bvec = shift;

  my $ct   = scalar @$avec;

  if ($ct != (scalar @$bvec)) {
    return undef;
  }

  my $sum = 0;
  my $i   = 0;

  for ($i=0; $i < $ct; $i++) {
    if (!defined($avec->[$i])) {
      return undef;
    }

    if (!defined($bvec->[$i])) {
      return undef;
    }

    $sum += ($avec->[$i]) * ($bvec->[$i]);
  }

  return $sum;
}


# Return the transpose of a matrix.
sub matrix_transpose($) {
  my $A = shift;


  $A = matrix_preprocess($A);
  my $m = scalar @$A;
  my $n = scalar @{$A->[0]};

  my $B = matrix_new($n, $m);

  for (my $i=0; $i < $m; $i++) {
    for (my $j=0; $j < $n; $j++) {
      $B->[$j]->[$i] = $A->[$i]->[$j];
    }
  }

  return $B;
}



# Compute nullspace of A by solving Ax=0.  The new matrix has as
# many columns as A has free variables, and as many rows as A.
sub matrix_nullspace($) {
  my $A         = shift;
  $A = matrix_preprocess($A);
  my $m         = scalar @$A;
  ($m > 0) || return undef;
  my $n         = scalar @{$A->[0]};
  my $rank      = matrix_rank($A);

  if ($rank == $n) {
    # Fully specified!
    return +[];
  }

  my $N         = matrix_new($m, $n-$rank);
  my $solutions = linear_solve($A, +[(0) x $m]);
  my $col       = 0;

  foreach (@$solutions) {
    my $solution = $_;
    ((ref $solution) eq 'ARRAY') || next;
    my ($var_idx, $x_h) = @{$solution};

    ((ref $x_h) eq 'ARRAY') || next;

    $x_h = vector_normalize($x_h);
    for (my $row=0; $row < $n; $row++) {
      $N->[$row]->[$col] = $x_h->[$row];
    }
    $col++;
  }

  # Sanity check.
  die unless ($col == ($n-$rank));

  return $N;
}



# A simple function for printing matrices.
sub matrix_print($) {
  my $A = shift;

  my $rows = scalar @$A;
  my $cols = scalar @{$A->[0]};

  for (my $row=0; $row < $rows; $row++) {
    print "|  ";

    my @join_me = ();

    for (my $col=0; $col < $cols; $col++) {
      my $str = sprintf "% 10.4f", $A->[$row]->[$col];
      push @join_me, $str;
    }

    print join("  ", @join_me), "  |\n";
  }
}



# Compute the rank of a matrix using Gaussian elimination
# (PA=LU factorization).  In array context, return P,L,U
# as well.
sub matrix_rank($) {
  my $A = shift;
  my ($P, $L, $U) = matrix_Gaussian_elimination($A);

  # Count the pivots in U.
  my $r  = 0;
  my $Um = scalar @$U;

  if ($Um == 0) {
    return 0;
  }

  my $Un = scalar @{$U->[0]};

  outer:
  for (my $i=0; $i < $Um; $i++) {
    my $found_nonzero = 0;

    inner:
    for (my $j=$r; $j < $Un; $j++) {
      if ($U->[$i]->[$j] != 0) {
        $found_nonzero = 1;
        $r++;
        last inner;
      }
    }

    ($found_nonzero) || last outer;
  }

  if (wantarray()) {
    return ($r, $P, $L, $U);
  } else {
    return $r;
  }
}



# Given n (rows) and m (cols), create an n-by-m matrix containing
# all zeroes.  If m is not defined, treat it as a request for a
# n-by-1 column vector.
sub matrix_new($;$) {
  my $n = shift;
  my $m = shift;

  defined($n) || die;

  $m = defined($m) ? $m : 1;

  my $A = +[];

  for (1..$n) {
    push @$A, +[ (0) x $m ];
  }

  return $A;
}


# Return a new zero-vector.
sub vector_new($) {
  my $n = shift;

  return +[ (0) x $n ];
}


# Given a size n, return an n-by-n identity matrix.
sub matrix_identity($) {
  my $n = shift;
  my $I = +[];

  defined($n) || die;
  for (my $i=0; $i < $n; $i++) {
    $I->[$i] = +[ (0) x $n ];
    $I->[$i]->[$i] = 1;
  }

  return $I;
}


# Given a matrix, return a duplicate -- a deep copy that doesn't use
# the same references.  This is just an alias for matrix_preprocess.
sub matrix_duplicate($) {
  my $A = shift;

  $A = matrix_preprocess($A);
  my $B = +[];
  map { push @$B, +[ @{$_} ] } @$A;

  return $B;
}



# Take low magnitude values (magnitude < 1e-8) and change them to 0;
# magnitudes that low may be indicative of a precision problem and
# the niceties of IEEE doubles -- since you get a finite number of
# bits, an infinite number of numbers within the possible range
# cannot be represented precisely.

sub matrix_fix($) {
  my $A = shift;

  $A = matrix_preprocess($A);

  my $n = scalar @$A;
  my $m = scalar @{$A->[0]};

  for (my $row=0; $row < $n; $row++) {
    for (my $col=0; $col < $m; $col++) {
      if (abs($A->[$row]->[$col]) < 1e-10) {
        $A->[$row]->[$col] = 0;
      }
    }
  }

  return $A;
}


# Given two matrices, each specified as a list reference of rows,
# each of which is a list reference itself, multiply them and return
# a new matrix.

sub matrix_multiply($$) {
  my $A = shift;
  my $B = shift;

  defined($A) || die;
  defined($B) || die;

  $A = matrix_preprocess($A);
  $B = matrix_preprocess($B);

  my $A_rows = scalar(@$A);
  my $B_rows = scalar(@$B);
  my $A_cols = scalar(@{$A->[0]});
  my $B_cols = scalar(@{$B->[0]});

  ($A_rows > 0) || return undef;
  ($B_rows > 0) || return undef;
  ($A_cols > 0) || return undef;
  ($B_cols > 0) || return undef;

  if (($A_rows == 1) && ($A_cols == 1)) {
    $A = $A->[0]->[0];
  }

  if (($B_rows == 1) && ($B_cols == 1)) {
    $B = $B->[0]->[0];
  }




  {
    my $C = undef;
    my $factor = undef;
    my $C_rows = undef;
    my $C_cols = undef;

    if (!(ref($A))) {
      $factor = $A;
      if (!(ref($B))) {
        # Scalar * Scalar.
        return +[ +[ $A*$B ] ];
      } else {
        # Scalar * Matrix.
        $C = matrix_duplicate($B);
        $C_rows = $B_rows;
        $C_cols = $B_cols;
      }
    } elsif (!ref($B)) {
      # Matrix * Scalar.
      $factor = $B;
      $C = matrix_duplicate($A);
      $C_rows = $A_rows;
      $C_cols = $A_cols;
    }

    if (defined($C)) {
      for (my $row=0; $row < $C_rows; $row++) {
        for (my $col=0; $col < $C_cols; $col++) {
          $C->[$row]->[$col] *= $factor;
        }
      }
      return $C;
    }
  }

  if ($A_cols != $B_rows) {
    return undef;
  }

  # Transpose, so we do row multiplies.
  my $B_transpose = matrix_transpose($B);
  my $C = +[];

  my $i=0;
  my $j=0;

  for ($i=0; $i < $A_rows; $i++) {
    my @new_row = ();

    for ($j=0; $j < $B_cols; $j++) {
      push @new_row, vector_dot_product($A->[$i], $B_transpose->[$j]);
    }
    push @$C, \@new_row;
  }

  return $C;
}



# Add two matrices and return a new one.
sub matrix_add($$) {
  my $A = shift;
  my $B = shift;

  $A = matrix_preprocess($A);
  $B = matrix_preprocess($B);

  my $rows = scalar @$A;
  my $cols = scalar @{$A->[0]};

  die unless ($rows == scalar @$B);
  die unless ($cols == scalar @{$B->[0]});

  my $C = matrix_new($rows, $cols);

  for (my $row=0; $row < $rows; $row++) {
    for (my $col=0; $col < $cols; $col++) {
      $C->[$row]->[$col] = $A->[$row]->[$col] + $B->[$row]->[$col];
    }
  }

  return $C;
}



# Decompose a matrix A into A=QR, where Q is orthonormal and R
# is upper-triangular.  The columns of A should be linearly
# independent...
#
# We return the matrices Q and R, as lists of rows as usual.
# We also return QT since it may be useful (it's the inverse of
# Q for square Q) and we just computed it.
#
sub matrix_QR($) {
  my $A       = shift;
  my $A_rows  = scalar(@$A);
  my $AT      = matrix_transpose($A);  # work with columns, they're now rows
  my $A_cols  = scalar(@$AT);

  # Determine the components q0, q1, .., q($A_cols - 1);
  my $QT       = +[];
  my $i       = 0;

  # Gram-Schmidt.
  for ($i=0; $i < $A_cols; $i++) {
    my $b    = $AT->[$i];
    my $bnew = +[ @$b ];
    my $j = 0;
    # Subtract previous components, if any.

    for ($j=0; $j < $i; $j++) {
      # Specifically, subtract (q_{j}^{T}b)q_{j}.
      my $qj     = $QT->[$j];
      my $factor = 0;

      {
        # Multiply the two vectors to find the scalar.
        my $k=0;
        for ($k=0; $k < $A_rows; $k++) {
          $factor += ($qj->[$k]) * ($bnew->[$k]);
        }
      }

      # Scalar times a vector gets subtracted.
      {
        my $k=0;
        for ($k=0; $k < $A_rows; $k++) {
          $bnew->[$k] -= ($factor * $qj->[$k]);
        }
      }
    }

    # Other components should now be removed.  Normalize the vector.
    {
      my $mag = 0;
      my $k   = 0;

      for ($k=0; $k < $A_rows; $k++) {
        $mag += ($bnew->[$k]) * ($bnew->[$k]);
      }

      $mag = sqrt($mag);

      if ($mag != 0) {
        for ($k=0; $k < $A_rows; $k++) {
          $bnew->[$k] /= $mag;
        }
      }
    }

    push @$QT, +[ @$bnew ];
  }

  # Correct spurious low-magnitude values.
  $QT  = matrix_fix($QT);

  # QT is really a tranpose, because the Q vectors are rows when they
  # should be columns.
  my $Q = matrix_transpose($QT);

  # Compute R.
  my $R = +[];

  {
    my $i = 0;
    for ($i=0; $i < $A_cols; $i++) {
      my @new_row = ();
      my $j=0;

      my $qi = $QT->[$i];

      for ($j=0; $j < $A_cols; $j++) {
        # R_{ij} = q_{i}^{T} * jth column of A
        my $aj = $AT->[$j];
        my $k=0;
        my $sum = 0;
        for ($k=0; $k < $A_rows; $k++) {
          $sum += $qi->[$k] * $aj->[$k];
        }
        push @new_row, $sum;
      }

      push @$R,  \@new_row;
    }
  }


  $R = matrix_fix($R);

  return ($Q, $R, $QT);
}




# Given a lower-triangular matrix, compute its inverse.
sub matrix_lt_inverse($) {
  my $L = shift;
  my $n = scalar @{$L->[0]};

  my $D = matrix_identity($n);
  my $non_unity_diagonal = 0;

  # Verify lower-triangular status, and fix the diagonal if need be.
  for (my $row=0; $row < $n; $row++) {
    my $diag = $L->[$row]->[$row];

    if ($diag == 0) {
      return undef;
    }

    if ($diag != 1) {
      $non_unity_diagonal = 1;
      $D->[$row]->[$row] = 1/$diag;
    }

    for (my $col=$row+1; $col < $n; $col++) {
      if (abs($L->[$row]->[$col]) >= 1e-5) {
        return undef;
      }
    }
  }

  my $X = undef;

  if ($non_unity_diagonal) {
    $X = matrix_multiply($L, $D);
  } else {
    $X = $L;
  }


  # If need be, LD=X where X is lower-triangular.  In this case,
  # L^{-1} = DX^{-1}
  my $E = matrix_identity($n);

  for (my $col=0; $col < ($n-1); $col++) {
    my $elim = matrix_identity($n);

    # Column-wise elimination matrix
    for (my $row=$col+1; $row < $n; $row++) {
      $elim->[$row]->[$col] = -($X->[$row]->[$col]);
    }

    # Order is important here!
    $E = matrix_multiply($elim, $E);
  }

  # E=X^{-1}
  if ($non_unity_diagonal) {
    $E = matrix_multiply($D, $E);
  }

  matrix_fix($E);
  return $E;
}




# Given a matrix, return its inverse as found via elimination.
sub matrix_Gaussian_inverse($) {
  my $A = shift;

  my $n = scalar @$A;
  my $m = scalar @{$A->[0]};

  if ($n != $m) {
    return undef;
  }

  # PA=LU
  # P=LUA^{-1}
  # L^{-1}P = UA^{-1}
  # U^{-1}L^{-1}P = A^{-1}

  my ($r, $P, $L, $U) = matrix_rank($A);

  defined($r) || die;

  if ($r < $n) {
    # Singular.
    return undef;
  }

  my $L_inverse  = matrix_lt_inverse($L);

  if (!defined($L_inverse)) {
    return undef;
  }

  my $UT         = matrix_transpose($U);
  my $UT_inverse = matrix_lt_inverse($UT);

  if (!defined($UT_inverse)) {
    return undef;
  }

  my $U_inverse  = matrix_transpose($UT_inverse);

  my $inverse    = $P;

  $inverse = matrix_multiply($L_inverse, $P);
  $inverse = matrix_multiply($U_inverse, $inverse);

  return $inverse;
}



# Given a matrix, return its inverse as found via QR factorization.
#
# There appear to be some numerical precision issues with this
# approach, so if you get tiny nonzero values where there should be
# 0s, consider trying matrix_Gaussian_inverse instead.
sub matrix_QR_inverse($) {
  my $A = shift;

  my $n = scalar @$A;
  my $m = scalar @{$A->[0]};

  if ($n != $m) {
    return undef;
  }

  # A = QR
  #   Q orthonormal, so Q x QT = I
  # R = upper triangular
  #  (so transpose it and use the lt code)
  #
  # Then:
  #
  # R^{-1}QT = A^{-1}

  my ($Q, $R, $QT) = matrix_QR($A);

  my $RT         = matrix_transpose($R);
  my $RT_inverse = matrix_lt_inverse($RT);

  if (!defined($RT_inverse)) {
    return undef;
  }

  my $R_inverse  = matrix_transpose($RT_inverse);
  my $inverse    = $QT;

  $inverse = matrix_multiply($R_inverse, $inverse);

  return $inverse;
}


# Try Gaussian elimination first, SVD second.
sub matrix_inverse($) {
  my $A = shift;
  my $I = undef;

  $I = matrix_Gaussian_inverse($A);

  if (!defined($I)) {
    $I = matrix_pseudoinverse($A);
    if (!defined($I)) {
      # All zeros, perhaps?
      return undef;
    }
  }

  return $I;
}



# Evaluate a univariate polynomial.  The first argument is
# the array of polynomial coefficients, in descending order
# [x^2, x, 1]; the second, $x.
sub polynomial_evaluate($$) {
  my $params = shift;
  my $x      = shift;
  my $i      = 0;
  my $coeff  = scalar @$params;

  my $y = 0;

  for ($i=0; $i < $coeff; $i++) {
    $y  = $y * $x;
    $y += $params->[$i];
  }

  return $y;
}



# Returns a function that evaluates this specific polynomial.
#
# As with the other functions, it uses the standard descending-
# degree order.
sub polynomial_evaluator_gen($) {
  my $coeff_ref = shift;
  my $coeff_copy = +[ @$coeff_ref ];
  return sub {
    my $x = shift @_;
    return polynomial_evaluate($coeff_copy, $x);
  }
}


# Given univariate vector x, output vector y, and a vector of polynomial
# coefficients, compute RMS error.  The coefficients should be in
# descending order of degree -- Ax^2 + Bx + C => +[ A, B, C ].

sub polynomial_RMS($$$) {
  my $x_ref  = shift;
  my $y_ref  = shift;
  my $params = shift;
  my $coeff  = scalar($params);
  my $ct     = scalar(@$x_ref);
  my $RMS    = 0;

  if ($ct != scalar(@$y_ref)) {
    return undef;
  }

  my $i=0;

  for ($i=0; $i < $ct; $i++) {
    my $x    = $x_ref->[$i];
    my $y    = $y_ref->[$i];
    my $val  = polynomial_evaluate($params, $x);
    my $diff = abs($y - $val);

    $RMS += (($diff * $diff) / $ct);
  }

  $RMS = sqrt($RMS);
  return $RMS;
}




# Symbolically multiply two polynomials.  As usual, they're specified
# as list references of coefficients in descending order.
sub polynomial_multiply($$) {
  my $poly_a = shift;
  my $poly_b = shift;

  if (!(ref $poly_a)) {
    # Scalar?
    $poly_a = +[ $poly_a ];
  }

  if (!(ref $poly_b)) {
    # Scalar?
    $poly_a = +[ $poly_b ];
  }
  my $deg_a  = (scalar @$poly_a)-1;
  my $deg_b  = (scalar @$poly_b)-1;
  my $deg_c  = $deg_a + $deg_b;
  my $poly_C = +[ (0) x ($deg_c+1) ];

  my $idx_a = 0;

  for ($idx_a=0; $idx_a <= $deg_a; $idx_a++) {
    my $idx_b=0;
    my $a_coeff = $a->[$idx_a];

    ($a_coeff != 0) || next;
    for ($idx_b=0; $idx_b <= $deg_b; $idx_b++) {
      my $b_coeff = $b->[$idx_b];
      my $idx_c   = $idx_a + $idx_b;

      $poly_C->[$idx_c] += $a_coeff * $b_coeff;
    }
  }

  return $poly_C;
}




# Given a reference to an input array, an output array, and a vector of
# first guesses of polynomial coefficients (in descending order of degree:
# x^4, x^3, x^2... 1), go through one pass of nonlinear least-squares
# fitting.  The third parameter is an optional convergence threshold;
# RMS must decrease by this percentage (0.05 = 5%), if it decreases at
# all, or iteration stops.  If it increases, we halve the learning
# rate and continue.
#
# We return the final parameter vector, and the root-mean-squared
# error.
#
# The algorithm is Gauss-Newton.
#
# Incidentally, yes, this could be generalized easily to non-polynomials
# if it's given a set of partial derivative functions, and it regenerated
# the V0 matrix every iteration instead of the first.

sub polynomial_lsfit($$$;$) {
  my $x_ref      = shift;
  my $y_ref      = shift;
  my $theta      = shift;
  my $err_thresh = shift;
  my $param_ct   = scalar @$theta;
  my $ct         = scalar(@$x_ref);

  if (!defined($err_thresh)) {
    $err_thresh = 0.001;
  }
  # Compute the V matrix.  Vij = the partial derivative of
  # f(xi) with respect to parameter theta j.  These should
  # be invariant with respect to thetas, actually, because
  # they're polynomial coefficients and the parameters are
  # always first-degree only, and never multiplied by each
  # other.

  my $V0  = +[];

  {
    my $i=0;

    for ($i=0; $i < $ct; $i++) {
      my $j = 0;

      $V0->[$i] = +[];

      my @partial = (0) x $param_ct;
      $partial[0] = 1;

      for ($j=0; $j < $param_ct; $j++) {
        my $Vij = polynomial_evaluate(\@partial, $x_ref->[$i]);
        pop @partial;
        $V0->[$i]->[$j] = $Vij;
      }
    }
  }

  # Compute QR decomposition of V0.
  my ($Q0, $R0) = matrix_QR($V0);

  # Compute QT.
  my $Q0T = matrix_transpose($Q0);

  # Learning rate.
  my $lambda = 1;


  my $f   = sub {
    my $x = shift @_;
    return polynomial_evaluate($theta, $x);
  };

  my ($ssq, $z) =
  compute_ssq_residuals($f, $x_ref, $y_ref);
  $z   = matrix_preprocess($z);
  my $RMS = sqrt($ssq/$ct);


  main_loop: {

    # Multiply Q0T by z.
    my $Q0Tz = matrix_multiply($Q0T, $z);
    my $delta = linear_solve($R0, $Q0Tz);
    my $theta_new = +[ @$theta ];

    {
      my $i=0;

      for ($i=0; $i < $param_ct; $i++) {
        $theta_new->[$i] += $lambda * $delta->[$i];
      }
    }

    my $f_new   = sub {
      my $x = shift @_;
      return polynomial_evaluate($theta_new, $x);
    };



    ($ssq, $z) = compute_ssq_residuals($f_new, $x_ref, $y_ref);
    $z   = matrix_preprocess($z);
    my $RMS_new = sqrt($ssq/$ct);

    if ($RMS_new > $RMS) {
      $lambda *= 0.5;
    } else {
      if (($RMS_new > ((1-$err_thresh) * $RMS)) ||
      ($RMS_new == 0)) {
        $RMS = $RMS_new;
        $theta = $theta_new;
        last main_loop;
      }
    }

    $RMS = $RMS_new;
    $theta = $theta_new;

    redo main_loop;
  }

  return ($theta, $RMS);
}

return 1;



# Iteratively fit polynomials, increasing degree until we either hit
# a user-specified limit, or RMS error falls below a given threshold.
# The user may also specify a lower limit, if he has reason to believe
# that lower-degree polynomials will not work.
#
# We return the set of polynomial coefficents
# (x^{n-1}, .., x^{2}, x, 1) and the corresponding RMS, if we do find
# a satisfactory fit.  Otherwise,  we return undef.
sub polynomial_guess($$$$;$) {
  my $x_ref        = shift;
  my $y_ref        = shift;
  my $max_degree   = shift;
  my $max_RMS      = shift;
  my $min_degree   = shift;
  my $ct           = scalar @$x_ref;

  if (!defined($min_degree)) {
    $min_degree = 0;
  }

  die unless ($min_degree <= $max_degree);

  my $degree = $min_degree;
  my @coeff  = ();
  my $mean   = ArrayFoo::arrMean($y_ref);

  # We have ($degree+1) coefficients to estimate.  We need a
  # first guess.  One approach is to assume that the highest-degree
  # element provides the entire contribution, in which case we might
  # try
  #
  #   y=ax^{k}
  #
  # and set a=mean(y/x^{k}).
  #
  # However, I'm not convinced that makes sense when the polynomial
  # is over range [0..1]...  So I'll guess constant, using the mean!

  for ($degree = $min_degree; $degree <= $max_degree; $degree++) {
    my @coeffs = ();

    # Produce the estimates.
    if ($degree > 0) {
      push @coeffs, (0) x $degree;
    }

    push @coeffs, $mean;
    my ($new_coeffs, $RMS) = polynomial_lsfit($x_ref, $y_ref,
    \@coeffs);
    if ($RMS <= $max_RMS) {
      return ($new_coeffs, $RMS);
    }
  }

  return undef;
}





# Convert data to a row-order matrix.
# Form 1:
#    +[ +[ a_11, a_12, a_13 ], +[ a_21, a_22, a_23 ], ...]
# Form 2:
#    +[ a_11, a_21, a_31 ], +[ a_12, a_22, a_32 ], ...
# Form 3:
#    a_11, a_21, a_31, a_41...

sub matrix_preprocess(@) {
  my @others    = @_;
  my @processed = ();

  die unless scalar(@others);

  if (!(ref($others[0]))) {
    # Flat array.  Case 3.
    map { push @processed, +[ $_ ] } @others;
  } elsif (ref($others[0]) eq 'ARRAY') {
    my $first_ref = $others[0];

    if (!(ref($first_ref->[0]))) {
      # Case 2, parallel arrays in sequence.
      my $ct = scalar @$first_ref;
      for (0..($ct-1)) {
        push @processed, +[];
      }

      my $list = undef;
      foreach $list (@others) {
        for (0..($ct-1)) {
          push @{$processed[$_]}, $list->[$_];
        }
      }
    } else {
      # Case 1.
      # It's fine.
      defined($others[0]) || die;
      return $others[0];
    }
  } else {
    # Unexpected input format.
    die;
  }

  return \@processed;
}



# The reverse:  Assume a single-column matrix, and turn it into
# a simple vector (Form 3).  Return the vector.
sub vector_preprocess(@) {
  my @others    = @_;

  die unless scalar(@others);

  if ((scalar(@others) == 1) && (ref($others[0]) eq 'ARRAY')) {
    @others = @{$others[0]};
  }

  if (!scalar(@others)) {
    return undef;
  }

  if (ref($others[0]) eq 'ARRAY') {
    # Matrix.
    my @vector = ();

    foreach (@others) {
      push @vector, $_->[0];
    }

    return (wantarray() ? @vector : \@vector);
  } elsif (wantarray()) {
    return @others;
  } else {
    return \@others;
  }
}



# Take a reference to a list and return a reference to a normalized
# version of it.
sub vector_normalize($) {
  my $not_normal = shift;
  my $normal     = +[ @$not_normal ];

  my $sum2 = 0;
  my $mag  = 0;

  map { $sum2 += ($_ ** 2) } @$not_normal;

  $mag = sqrt($sum2);

  if ($mag == 0) {
    # all zero...
    return $normal;
  }

  map { $_ /= $mag } @$normal;

  return $normal;
}



# Given a matrix, return a flat vector consisting of the values on
# the diagonal.  We return the vector as a reference.
sub matrix_to_diag($) {
  my $A = shift;

  $A = matrix_preprocess($A);

  my $m = scalar @$A;

  ($m > 0) || return ();

  my $n      = scalar @{$A->[0]};
  my $lesser = ($m < $n) ? $m : $n;
  my @diag   = ();

  for (my $i=0; $i < $lesser; $i++) {
    $diag[$i] = $A->[$i]->[$i];
  }

  return \@diag;
}


# The other way around.  The matrix will be square, and will have 0
# everywhere except the diagonal.  Supply the diagonal as a reference.
sub diag_to_matrix($) {
  my $diag = shift;
  my $n    = scalar @$diag;

  my $A    = matrix_new($n, $n);

  for (my $i=0; $i < $n; $i++) {
    $A->[$i]->[$i] = $diag->[$i];
  }

  return $A;
}



# Compute information using equidepth partitioning.
#
# The first argument must be the number of partitions.
#
# Then, we allow two multiple for the rest:
#    +[ +[ x1, +y1], +[ x2, +y2 ], ...] or
#
#    +[ x1, x2, ... ], +[ y1, y2, ...].
#
#    x1, x2, x3, x4
#
# We will be working with Type 1 (nested list refs) here.
# That is, normal matrix form.


sub info_equidepth($@) {
  my $part_ct   = shift;
  my $proc_data = matrix_preprocess(@_);

  $proc_data = _prep_equidepth($part_ct, $proc_data);
  return _info_general($proc_data);
}


sub info_equiwidth($@) {
  my $part_ct   = shift;
  my $proc_data = matrix_preprocess(@_);
  $proc_data = _prep_equiwidth($part_ct, $proc_data);
  return _info_general($proc_data);
}


# Given a number of intervals and a matrix in row-order form,
# replace all the numbers with interval indices based on
# independent equiwidth discretization along each column.
sub _prep_equiwidth($$) {
  my $part_ct   = shift;
  my $proc_data = shift;

  my $row_ct    = scalar @$proc_data;
  my $col_ct    = scalar (@{$proc_data->[0]});
  my $transpose = matrix_transpose($proc_data);
  my $new_mat   = +[];

  foreach (1..$row_ct) {
    push @$new_mat, +[];
  }

  for (my $col_idx=0; $col_idx < $col_ct; $col_idx++) {
    my $low  = ArrayFoo::arrSelect($transpose->[$col_idx], 0);
    my $high = ArrayFoo::arrSelect($transpose->[$col_idx], $row_ct-1);

    if ($low < $high) {
      for (my $row_idx=0; $row_idx < $row_ct; $row_idx++) {
        my $x = $proc_data->[$row_idx]->[$col_idx];
        $new_mat->[$row_idx]->[$col_idx] =
        int($part_ct * ($x-$low)/($high-$low));
      }
    } else {
      foreach (@$new_mat) {
        push @{$_}, 0;
      }
    }
  }

  return $new_mat;
}



# independent equidepth discretization along each column.
sub _prep_equidepth($$) {
  my $part_ct   = shift;
  my $proc_data = shift;

  my $row_ct    = scalar @$proc_data;
  my $col_ct    = scalar (@{$proc_data->[0]});
  my $transpose = matrix_transpose($proc_data);
  my $new_mat   = +[];

  foreach (1..$row_ct) {
    push @$new_mat, +[];
  }

  for (my $col_idx=0; $col_idx < $col_ct; $col_idx++) {
    my $col_quant = ArrayFoo::arrQuantize($transpose->[$col_idx]);

    foreach (0..($row_ct-1)) {
      push @{$new_mat->[$_]}, int(($col_quant->[$_]) * $part_ct);
    }
  }

  return $new_mat;
}




# AFTER the matrix has been properly discretized -- in other words,
# all data have been replaced with integer tags -- then we compute
# entropy with a row-major perspective (each row = one instance).
#
# This is... box-counting, actually.  With a twist in that the data
# is known to fit in memory if we're using THIS package.  Which means
# that it isn't that large, which means we can be a bit lazy here
# and use a sorting method.
#
sub _info_general($) {
  my $matrix = shift;
  my $row_ct = scalar @$matrix;
  my $col_ct = scalar @{$matrix->[0]};

  my $vec_cmp = sub {
    my $a = shift @_;
    my $b = shift @_;

    foreach (0..($col_ct-1)) {
      my $val = $a->[$_] <=> $b->[$_];

      if ($val != 0) {
        return $val;
      }
    }
    return 0;
  };

  my @ordered_rows = sort {
    &$vec_cmp($a, $b)
  } @{$matrix};

  # Lump aggregates.
  my $previous = undef;
  my $ct       = 0;
  my $sum      = 0;
  my $log2     = log(2);

  foreach (@ordered_rows) {
    if ((!defined($previous)) || (&$vec_cmp($previous, $_))) {
      # Different from the previous one.
      if ($ct != 0) {
        my $p = $ct/$row_ct;
        $sum -= $p*log($p);
      }
      $ct = 1;
      $previous = $_;
    } else {
      $ct++;
    }
  };


  if ($ct != 0) {
    my $p = $ct/$row_ct;
    $sum -= $p*log($p);
  }

  $sum /= $log2;

  return $sum;
}



# Perform ONE iteration of expectation maximization, otherwise
# known as 'fuzzy K-means'.
#
# Give it a reference to either a data matrix (multivariate
# distributions) or vector (univariate distributions), a flag
# indicating whether or not the mix is of truncated components
# (some pruning will happen if so), and a lref of tuples.
#
# Each tuple contains the following:
#
#   mixing probability
#   estimator function (weighted version -- takes [data, weights],
#    returns [mix, params, ppf, pdf])
#   initial pdf
#
# All the estimators I currently have are for univariate distros,
# but this framework should be general enough to allow for
# multivariates as well.  Data will be passed in as scalars if
# 1-D, and list references if more than that.
#
# Procedure:
#   Scan through matrix.  For each <datum, tuple> pair, compute
#   a 'responsibility' level.  Sum of responsibilities for each
#   datum must be 1.  These responsibilities correspond to
#   weights.
#
#   Compute new mixing probabilities as average responsibilities.
#
#   Use weighted estimators to produce new estimates on the
#   weighted data for any that are still active.
#
# Returns:  A list containing...

#   List reference with new tuples.  The first two are the new
#   mixing probability and the estimator.  The third is the new
#   pdf, if any (undef if mixing probability drops to 0).  The
#   fourth is a reference to the estimated parameters, with the
#   same caveat.
#
#   Log likelihood estimate (log in base 2).
#
# It is up to the caller to decide how to choose the initial set
# of tuples (and how many), how to assess goodness, and when to
# stop.
#
# Failure /is/ possible, if the distribution list appears to
# be completely irrelevant to the data (0 responsibility for
# everybody).  In that case, undef is returned.
#
#
# WARNING:  The merging code assumes that, if truncated, it's a
# truncated normal.  Other distributions that use the 'truncate'
# flag should TURN OFF MERGING, which may yield unnecessarily
# inefficient solutions.
#
sub em_one_iteration($;$$$$) {
  my $data_ref     = shift;
  my $distros_ref  = shift;
  my @distros      = @$distros_ref;
  my $truncate     = (scalar @_) ? shift : 0;

  # $trunc_thresh is now relative to maximum responsibility.
  my $trunc_thresh = (scalar @_) ? shift : 0.3;
  my $merge_thresh = (scalar @_) ? shift : 0.97;


  # $trunc_thresh matters only if $truncate is defined and non-zero.
  # It controls how much responsibility is sufficient to NOT prune
  # a component's reach.
  my @distros_new  = ();

  my $k       = scalar @distros;
  my $ct      = scalar @$data_ref;

  # The responsibility matrix will consist of rows, where each
  # row corresponds to a single distribution versus all the data.
  my $responsibilities = +[];

  for (my $data_idx=0; $data_idx < $ct; $data_idx++) {
    my $total_density = 0;
    my @wtd_dense     = ();
    my $sum           = 0;
    my $data          = $data_ref->[$data_idx];

    for (my $distro_idx=0; $distro_idx < $k; $distro_idx++) {
      my ($mix_prob, $estimator, $pdf) = @{$distros[$distro_idx]};

      if ($mix_prob > 0) {
        # Ignore inactive distributions.
        my $density          = &$pdf($data);
        my $density_adjusted = $density * $mix_prob;

        $sum += $density_adjusted;
        push @wtd_dense, $density_adjusted;
      } else {
        push @wtd_dense, 0;
      }
    }

    if ($sum == 0) {
      # NO active distributions.  With normal distributions and non-empty
      # data, this should be impossible -- but it won't be, due to the
      # limits of machine precision and perhaps a truly horrible
      # initialization.
      return undef;
    }

    # Normalize.
    map { $_ /= $sum } @wtd_dense;
    for (my $distro_idx=0; $distro_idx < $k; $distro_idx++) {
      $responsibilities->[$distro_idx]->[$data_idx] =
      $wtd_dense[$distro_idx];
    }
  }


  if (defined($truncate) && $truncate) {
    # Pruning phase.
    my @rg_min = (undef) x $k;
    my @rg_max = (undef) x $k;

    for (my $data_idx=0; $data_idx < $ct; $data_idx++) {
      my $data_itm = $data_ref->[$data_idx];
      my $max_res   = 0;   # maximum responsibility for this one

      for (my $distro_idx=0; $distro_idx < $k; $distro_idx++) {
        my $res = $responsibilities->[$distro_idx]->[$data_idx];

        $max_res = ($max_res > $res) ? $max_res : $res;
      }

      for(my $distro_idx=0; $distro_idx < $k; $distro_idx++) {
        my $res = $responsibilities->[$distro_idx]->[$data_idx];

        # CHANGE:  $trunc_thresh now relative to maximum observed
        # responsibility.
        if (($res == $max_res) || ($res >= ($trunc_thresh*$max_res))) {
          # It's significant.
          my $min = $rg_min[$distro_idx];
          my $max = $rg_max[$distro_idx];

          $rg_min[$distro_idx] =
          (defined($min) && ($min < $data_itm)) ? $min : $data_itm;
          $rg_max[$distro_idx] =
          (defined($max) && ($max > $data_itm)) ? $max : $data_itm;
        }
      }
    }

    for (my $data_idx=0; $data_idx < $ct; $data_idx++) {
      my $data_itm = $data_ref->[$data_idx];
      my $res_sum  = 0;

      for(my $distro_idx=0; $distro_idx < $k; $distro_idx++) {
        # Undefined rg_min means 'truncate out of existence
        # because it's not really that significant anywhere.'
        if ((!defined($rg_min[$distro_idx])) ||
        ($data_itm < $rg_min[$distro_idx]) ||
        ($data_itm > $rg_max[$distro_idx])) {
          $responsibilities->[$distro_idx]->[$data_idx] = 0;
        }
        $res_sum += $responsibilities->[$distro_idx]->[$data_idx];
      }

      if ($res_sum == 0) {
        # This shouldn't be possible except when numerical precision
        # limits do our truncating for us...

        return undef;
      }

      # Normalization.
      for(my $distro_idx=0; $distro_idx < $k; $distro_idx++) {
        $responsibilities->[$distro_idx]->[$data_idx] /= $res_sum;
      }
    }
  }


  # Compute new mixture probabilities and reestimate.
  for (my $distro_idx=0; $distro_idx < $k; $distro_idx++) {
    my $mixing_prob = ArrayFoo::arrMean($responsibilities->[$distro_idx]);
    my $estimator   = $distros[$distro_idx]->[1];
    my @new_tuple   = ($mixing_prob, $estimator, undef, undef);

    if ($mixing_prob > 0) {
      # Active distribution.
      my ($cdf, $param_ref, $ppf, $pdf) =
      &$estimator($data_ref, $responsibilities->[$distro_idx]);

      if (!defined($cdf)) {
        # Should not be reached -- if the pdf is already giving us
        # results, what would stop it from being reestimated?  Hm.

        return undef;
      } else {
        $new_tuple[2] = $pdf;
        $new_tuple[3] = $param_ref;
      }
      push @distros_new, \@new_tuple;
    }
  }

  # Drop components with tiny probabilities.
  my @distros_active = grep { $_->[0] > $__MIN_MIXPROB } @distros_new;

  # Merge components that match extremely closely.
  if (defined($merge_thresh)) {
    my $ct = scalar @distros_active;

    outer_loop:  for (my $i=0; $i < ($ct-1); $i++) {
      my $outer_param_ref = $distros_active[$i]->[3];
      my $pc = scalar @$outer_param_ref;

      inner_loop:  for (my $j=$i+1; $j < $ct; $j++) {
        my $inner_param_ref = $distros_active[$j]->[3];
        ((scalar @$inner_param_ref) == $pc) || die;

        my $p_idx = 0;

        param_check:  foreach $p_idx (0 .. ($pc-1)) {
          my $outer = $outer_param_ref->[$p_idx];
          my $inner = $inner_param_ref->[$p_idx];

          if ((!defined($outer)) && (!defined($inner))) {
            next param_check;
          }

          if ((!defined($outer)) || (!defined($inner))) {
            next inner_loop;
          }

          my $diff   = abs($outer - $inner);
          my $larger = (abs($inner) < abs($outer)) ? abs($outer) :
          abs($inner);
          if ($diff > ($larger * (1-$merge_thresh))) {
            next inner_loop;
          }
        }

        # Criteria met.  Average parameters.

        {
          my $ltrunc = undef;
          my $rtrunc = undef;

          if ($truncate) {
            # We need to treat the final parameters 'specially, so as
            # to retain full coverage.  Do NOT average the truncation
            # points.

            # BUGBUG:  This code ASSUMES that the distribution is a
            # truncated normal, or at least that the first parameter is
            # a mean, the second is standard deviation, and that the
            # third and fourth are left and right truncation points
            # expressed in terms of standard deviations from mean (with
            # - meaning left, and + meaning right).

            my ($outer_mean, $outer_dev, $outer_ltrunc, $outer_rtrunc)
            = @$outer_param_ref;
            my ($inner_mean, $inner_dev, $inner_ltrunc, $inner_rtrunc)
            = @$inner_param_ref;

            # Compute in REAL terms.
            $outer_ltrunc = defined($outer_ltrunc) ?
            (($outer_ltrunc * $outer_dev) + $outer_mean) : undef;
            $outer_rtrunc = defined($outer_rtrunc) ?
            (($outer_rtrunc * $outer_dev) + $outer_mean) : undef;
            $inner_ltrunc = defined($inner_ltrunc) ?
            (($inner_ltrunc * $inner_dev) + $inner_mean) : undef;
            $inner_rtrunc = defined($inner_rtrunc) ?
            (($inner_rtrunc * $inner_dev) + $inner_mean) : undef;

            if (defined($outer_ltrunc) && defined($inner_ltrunc)) {
              $ltrunc = ($outer_ltrunc < $inner_ltrunc) ?
              $outer_ltrunc : $inner_ltrunc;
            }

            if (defined($outer_rtrunc) && defined($inner_rtrunc)) {
              $ltrunc = ($outer_rtrunc > $inner_rtrunc) ?
              $outer_rtrunc : $inner_rtrunc;
            }

            $pc -= 2;
          }

          foreach $p_idx (0 .. ($pc-1)) {
            if (defined($outer_param_ref->[$p_idx])) {
              $outer_param_ref->[$p_idx] =
              ($outer_param_ref->[$p_idx] + $inner_param_ref->[$p_idx])/2;
            }
          }

          if ($truncate) {
            my $outer_mean = $outer_param_ref->[0];
            my $outer_dev  = $outer_param_ref->[1];

            $outer_param_ref->[-2] = defined($ltrunc) ?
            (($ltrunc - $outer_mean) / $outer_dev) : undef;
            $outer_param_ref->[-1] = defined($rtrunc) ?
            (($rtrunc - $outer_mean) / $outer_dev) : undef;
            $pc += 2;
          }
        }


        # Sum probabilities.
        $distros_active[$i]->[0] += $distros_active[$j]->[0];

        my $est   = $distros_active[$i]->[1];

        # Identify PDF generator.
        my $tuple = UniRand::find_by_est($est);
        defined($tuple) || die;
        my $pdf_gen = $tuple->[-1];

        # Fix PDF.
        $distros_active[$i]->[2] = &$pdf_gen($outer_param_ref);

        # Remove $j from mixture list.
        splice @distros_active, $j, 1;

        # Decrement count.
        $ct--;

        # Redo doesn't reevaluate the conditional, so we need to check
        # it ourselves.
        ($j < $ct) || last inner_loop;
        redo inner_loop;
      }
    }
  }

  # Sort by first parameter (which, for normals and truncated normals,
  # will be the mean).
  @distros_active = sort { $a->[3]->[0] <=> $b->[3]->[0] } @distros_active;


  # Renormalize mixing probabilities.
  {
    my $prob_sum = 0;

    map { $prob_sum += $_->[0] } @distros_active;
    map { $_->[0] /= $prob_sum } @distros_active;
  }


  # Compute log_likelihood.  For this, we need a mixture pdf.
  my $mix_pdf = sub {
    my $x    = shift @_;
    my $sum  = 0;

    foreach (@distros_active) {
      my ($prob, $estimator, $pdf, $param_ref) = @{$_};
      $sum += $prob * &$pdf($x);
    }
    return $sum;
  };

  # Third parameter is minimum probability.
  my $ll = compute_log_likelihood($data_ref, $mix_pdf, $__LL_PROB_FLOOR);

  return (\@distros_active, $ll);
}


# Compute the log(base 2) likelihood of data.
#
# Parameters:
#   Reference to list containing data
#   Probability density function.
#
#   Probability floor (optional, default 0).  Probabilities that are
#   less than the floor get treated as the floor.  This effectively
#   caps the maximum penalty per outlier.
#
# Output:
#   Scalar (log-likelihood)
#
#  (or, undef if the data is impossible -- that is, PDF
#  returns 0).
#
sub compute_log_likelihood($$;$) {
  my $data_ref = shift;
  my $pdf      = shift;
  my $pdf_min  = (scalar @_) ? shift : 0;

  my $ct       = scalar @$data_ref;

  # Compute new log likelihood.  Since it's a log, this is
  # the sum of the log of the likelihood of each data item.
  # Each of these is, in turn, a summation...

  my $log_likelihood = 0;
  my $data           = undef;

  foreach $data (@$data_ref) {
    my $l_single = &$pdf($data);

    if (defined($l_single) && ($l_single > 0) && ($l_single < $pdf_min)) {
      $l_single = $pdf_min;
    }

    if ((!defined($l_single)) || ($l_single <= 0)) {
      return undef;
    }

    $log_likelihood += log($l_single);
  }

  # Base 2 log.
  $log_likelihood /= log(2);

  return $log_likelihood;
}



# This takes the same arguments and returns the same results as
# em_one_iteration.  Given the inputs, we run em_one_iteration
# until the log_likelihood converges.  Return the new distributions
#
# Returns:  A list containing...
#   List reference with new tuples.  The first two are the new
#   mixing probability and the estimator.  The third is the new
#   pdf, if any (undef if mixing probability drops to 0).  The
#   fourth is a reference to the estimated parameters, with the
#   same caveat.
#
#   Log-likelihood (base 2).
#
#   Bayesian information criterion.

sub em_converge($;$$) {
  my $data_ref       = shift;
  my $distros_oldref = shift;
  my @distros_old    = defined($distros_oldref) ? @$distros_oldref : ();
  my $truncate       = shift;

  my $ll_old     = undef;
  my $iter           = 0;
  my $ll_new     = undef;
  my @distros_new   =  ();

  em_loop:  {
    $iter++;
    my ($distros_ref, $ll) =
    em_one_iteration($data_ref, \@distros_old, $truncate);

    if (!defined($distros_ref)) {
      # Failure detected!
      $ll_new = $ll_old;
      @distros_new = @distros_old;
      last em_loop;
    }


    if (!defined($ll_old)) {
      $ll_old  = $ll;
      @distros_old = @$distros_ref;
      redo em_loop;
    } elsif ((!defined($ll)) || ($ll <= $ll_old)) {
      $ll_new  = $ll_old;
      @distros_new = @distros_old;

      # converged, or worsened!
      last em_loop;
    } elsif ($ll <= ($ll_old + (0.01 * abs($ll_old)))) {
      # Minimal gain, so stop.

      $ll_new  = $ll;
      @distros_new = @$distros_ref;
      last em_loop;
    } else {
      # Continue iterating.
      @distros_old = @$distros_ref;
      $ll_old  = $ll;
      redo em_loop;
    }
  }

  if (!defined($ll_new)) {
    return undef;
  }

  # BIC = (-2*$ll) + q*log(n)

  my $n   = scalar(@$data_ref);
  my $q   = 0;

  map {
    my ($prob, $param_ref) = @{$_}[0,3];
    if ($prob > 0) {
      # The +1 is for the mixing probability.
      $q += scalar(@$param_ref)+1;
    }
  }
  @distros_new;


  my $BIC = (-2*$ll_new) + ($q * log($n) / log(2));

  # High log-likelihood is good.
  # Low BIC is good.

  return (\@distros_new, $ll_new, $BIC);
}


# Given a reference to a tuple of $ct initial parameters, find
# $ct+1 points in $ct-dimensional space that are $size away from
# the guess and surround it.  This function knows nothing about
# constraints; if you need to constrain it, construct your
# initial simplex more carefully...
#
# This is really just a function to ease the use of
# Nelder_Mean_simplex.
sub simplex_wrap($$) {
  my $guess = shift;
  my $size  = shift;
  my $ct    = scalar @{$guess};  # number of parameters
  my $n     = $ct+1;             # Return this many tuples.

  die unless (($ct > 0) && ($ct == int($ct)));

  my @tuples = ();

  if ($ct == 1) {
    # Bracket.
    my $x = $guess->[0];

    if (defined($x)) {
      return (+[ $x-$size ], +[ $x+$size ]);
    } else {
      return (undef);
    }
  }

  for (my $i=0; $i < $n; $i++) {
    $tuples[$i] = +[ (0) x $ct ];
  }

  my $reduced_size = $size/sqrt($ct-1);
  my $sqrt2        = sqrt(2);

  for (my $i=0; $i < $ct; $i++) {
    $tuples[$i]->[$i] = $size / $sqrt2;

    for (my $k=0; $k < $n; $k++) {
      ($k != $i) || next;
      $tuples[$k]->[$i] = -$reduced_size/$sqrt2;
    }
  }

  # If I did my math right, every one of these tuples (which are
  # currently offset vectors, not actual guesses) should be of
  # length '$size', except the last one.
  #
  # The first $ct vectors get 1 component of length ($size/sqrt(2))
  # and ($ct-1) of length ($size/sqrt(2*($ct-1))), and are of length
  # $size already.
  #
  # The last, however, has $ct components of length
  #   ($size/sqrt(2*($ct-1)))
  #
  # Multiplying it by a factor of sqrt(2($ct-1)/$ct) should work.
  {
    my $factor = sqrt(2*($ct-1)/$ct);
    map { $_ *= $factor } @{$tuples[$ct]};
  }

  # Add in the original vector.
  for (my $j=0; $j < $ct; $j++) {
    if (defined($guess->[$j])) {
      for (my $i=0; $i < $n; $i++) {
        $tuples[$i]->[$j] += $guess->[$j];
      }
    } else {
      for (my $i=0; $i < $n; $i++) {
        $tuples[$i]->[$j] = undef;
      }
    }
  }

  if (wantarray()) {
    return @tuples;
  } else {
    return \@tuples;
  }
}





# See:
#  Nelder, J.A. and R. Mead.  "A Simplex Method for Function Minimization".
#  _Computer Journal_, Volume 7, pages 308-313, 1965.
#
#
# Also see:
#  Wright, M.H.  "Direct Search Methods:  Once Scorned, Now Respectable",
#  in D.F. Griffiths and G.A. Watson (eds), _Numerical Analysis 1995
#  (Proceedings of the 1995 Dundee Biennial Conference in Numerical
#  Analysis)_, 191-2089, Addison Wesley Longman, Harlow, United Kingdom.
#
#
# Code is meant to be a straightforwards implementation of the algorithm
# specified.  As such, it provides a fairly simple black-box function
# minimizer, and one that does not require a first derivative.  However,
# it does require an initial guess region...
#
#
# Parameters:
#   One reference to a function subroutine to minimize.
#
#   Reference to a list of references to lists, each of which is one
#   set of parameters to the function.  For an n-dimensional function,
#   there should be n+1 points in the list to properly enclose an
#   n-dimensional simplex.
#
#   (Optional) alpha = reflection coefficient; positive.
#   (Optional) beta = contraction coefficient; in (0,1).
#   (Optional) gamma = expansion coefficient; > 1.
#   (Optional) tolerance = Maximum permitted "standard error"
#
#  These values default to (1, 0.5, 2, 1e-8) as per the cited paper.
#
#
# Note:
#   To form an n-dimensional simplex, n=number of parameters, n+1 tuples
#   are needed.
#
#   Paper states that "For functions with 2 to 10 variables it was found
#   that the relationship between the nubmer of variables k and the mean
#   number of evaluations N for convergence (using a final value of
#   approximately 2.5 x 10^-9) was well described by
#
#      N=3.16(k+1)^{2.11}
#
#   with default convergence criterion.
#
# Return values (list):
#   A reference to the winning tuple
#   The corresponding Y value
#
#
# Sample usage, to minimize Rosenbrock's parabolic valley, with an
# initial guess of (-1.2, 1) and a simplex size of 1:
#
# sub Rosenbrock($$) {
#   my $x1 = shift;
#   my $x2 = shift;
#
#   my $y  = 100*(($x2-($x1**2))**2);
#
#   $y += (1-$x1) ** 2;
#   return $y;
# }
#
#
# my @initial_simplex = Numerical::simplex_wrap(+[ -1.2, 1], 1);
# my ($P_ref, $yval)  = Numerical::Nelder_Mead_simplex(\&Rosenbrock,
#    \@initial_simplex);
#
# my ($x1, $x2) = @$P_ref;
# print "Minimal value of $yval found at ($x1,$x2)\n";
#
#
# Now you can add a 'constraint' function.  If it's defined, then
# before any function evaluation the constraint function gets
# tested.  It receives a REFERENCE to the parameter list, so the
# constraint function can adjust them as desired.  It should
# return a non-zero defined value if the constraint is fulfilled.
#
sub Nelder_Mead_simplex($$;$$$$$$) {
  my $function  = shift;  # What we minimize
  my $P_ref     = shift;  # Ref to set of input tuples
  my $alpha     = shift;  # Reflection coefficient
  my $beta      = shift;  # Contraction coefficient
  my $gamma     = shift;  # Expansion coefficient
  my $tolerance = shift;
  my $max_iter  = shift;
  my $constraint = shift;
  my @P       = ();

  # Make local copies.
  foreach (@{$P_ref}) {
    push @P, +[ @{$_} ];
  }

  my @Y       = ();     # Y = function(P)
  my @P_bar     = ();     # Centroid
  my $Y_bar     = undef;  # Value at centroid
  my $ct        = scalar @P;
  my $l_idx     = undef;  # Index of low @Y;
  my $h_idx     = undef;  # Index of high @Y.
  my $h2_idx    = undef;  # Index of second-highest.

  ($ct > 1) || die "Nelder-Mead requires more inputs.";

  my $param_ct  = scalar @{$P[0]};

  $alpha = defined($alpha) ? $alpha : 1;
  $beta  = defined($beta)  ? $beta  : 0.5;
  $gamma = defined($gamma) ? $gamma : 2;
  $tolerance = defined($tolerance) ? $tolerance : 1e-8;
  $max_iter  = defined($max_iter)  ? $max_iter  : 100;

  ($alpha > 0) || die "Reflection coeff alpha must be positive.";
  (($beta > 0) && ($beta < 1)) ||
  die "Contraction coefficient must be between 0 and 1.";
  ($gamma > 1) || die "Expansion coefficient must exceed 1.";


  compute_Yi: {
    for (my $i=0; $i < $ct; $i++) {
      my $val = $__VERY_BIG;

      if (!defined($constraint)) {
        $val = &$function($P[$i]);
      } else {
        my $cx = &$constraint($P[$i]);

        if ((defined($cx)) && ($cx != 0)) {
          $val = &$function(@{$P[$i]});
        }
      }

      $val = defined($val) ? $val : $__VERY_BIG;
      $Y[$i] = $val;

      if ((!defined($l_idx)) || ($Y[$l_idx] > $val)) {
        $l_idx = $i;
      }

      if ((!defined($h_idx)) || ($Y[$h_idx] < $val)) {
        $h2_idx = $h_idx;
        $h_idx = $i;
      } elsif ((!defined($h2_idx)) || ($Y[$h2_idx] < $val)) {
        $h2_idx = $i;
      }
    }
  }


  compute_P_bar: {
    @P_bar = (0) x $param_ct;

    foreach (@P) {
      my $tuple = $_;

      for (my $i=0; $i < $param_ct; $i++) {
        ($i != $h_idx) || next;

        if (defined($tuple->[$i])) {
          $P_bar[$i] += $tuple->[$i];
        } else {
          $P_bar[$i] = undef;
        }
      }
    }
    map { defined($_) && ($_ /= ($ct-1)) } @P_bar;

    $Y_bar = $__VERY_BIG;
    if (!defined($constraint)) {
      $Y_bar = &$function(@P_bar);
    } else {
      my $cx = &$constraint(\@P_bar);

      if ((defined($cx)) && ($cx != 0)) {
        $Y_bar = &$function(@P_bar);
      }
    }
    $Y_bar = defined($Y_bar) ? $Y_bar : $__VERY_BIG;
  }

  my $iter = 0;
  Simplex_Loop:  {
    my @P_star    = ();     # Reflection...
    my $Y_star    = undef;

    reflection:  {
      for (my $i=0; $i < $param_ct; $i++) {
        if (defined($P_bar[$i])) {
          $P_star[$i] = ((1+$alpha) * $P_bar[$i]) -
          ($alpha * $P[$h_idx]->[$i]);
        } else {
          $P_bar[$i] = undef;
        }
      }

      $Y_star = $__VERY_BIG;
      if (!defined($constraint)) {
        $Y_star = &$function(@P_star);
      } else {
        my $cx = &$constraint(\@P_star);

        if ((defined($cx)) && ($cx != 0)) {
          $Y_star = &$function(@P_star);
        }
      }
      $Y_star = defined($Y_star) ? $Y_star : $__VERY_BIG;
    }

    if ($Y_star < $Y[$l_idx]) {
      # New minimum.  Try expansion in the same direction.
      my @P_starstar = ();
      my $Y_starstar = undef;

      for (my $i=0; $i < $param_ct; $i++) {
        if (defined($P_bar[$i])) {
          $P_starstar[$i] = ($gamma * $P_star[$i]) +
          ((1-$gamma) * $P_bar[$i]);
        } else {
          $P_starstar[$i] = undef;
        }
      }

      $Y_starstar = $__VERY_BIG;
      if (!defined($constraint)) {
        $Y_starstar = &$function(@P_starstar);
      } else {
        my $cx = &$constraint(\@P_starstar);

        if ((defined($cx)) && ($cx != 0)) {
          $Y_starstar = &$function(@P_starstar);
        }
      }
      $Y_starstar = defined($Y_starstar) ? $Y_starstar : $__VERY_BIG;

      # $P_starstar = expansion point.
      # $P_star     = reflection point.

      # The original Nelder-Mead algorithm uses:
      #   if ($Y_starstar < $Y[$l_idx]) {
      #
      # According to Wright (and my sense of it concurs), it makes
      # more sense to accept the better of the expansion and
      # reflection points.

      if ($Y_starstar < $Y_star) {
        # Expansion accepted.  Replace Ph by P**.
        $P[$h_idx] = +[ @P_starstar ];
        $Y[$h_idx] = $Y_starstar;
      } else {
        # Expansion failed.  Replace Ph by P*.
        $P[$h_idx] = +[ @P_star ];
        $Y[$h_idx] = $Y_star;
      }
    } elsif ($Y_star >= $Y[$h2_idx]) {
      # If P* replaced the high, then Y* would be the highest value
      # in the simplex.  Attempt contraction.
      my @P_h = ();
      my $Y_h = undef;

      if ($Y_star > $Y[$h_idx]) {
        # P_h is better than P_star
        @P_h = @{$P[$h_idx]};
        $Y_h = $Y[$h_idx];
      } else {
        # The other way around (or tie)
        @P_h = @P_star;
        $Y_h = $Y_star;

        $P[$h_idx] = +[ @P_star ];
        $Y[$h_idx] = $Y_star;
      }

      my @P_starstar = ();
      my $Y_starstar = undef;

      for (my $i=0; $i < $param_ct; $i++) {
        if (defined($P_bar[$i])) {
          $P_starstar[$i] = ($beta  * $P_h[$i]) +
          ((1-$beta) * $P_bar[$i]);
        } else {
          $P_starstar[$i] = undef;
        }
      }


      $Y_starstar = $__VERY_BIG;
      if (!defined($constraint)) {
        $Y_starstar = &$function(@P_starstar);
      } else {
        my $cx = &$constraint(\@P_starstar);

        if ((defined($cx)) && ($cx != 0)) {
          $Y_starstar = &$function(@P_starstar);
        }
      }
      $Y_starstar = defined($Y_starstar) ? $Y_starstar : $__VERY_BIG;


      if ($Y_starstar <= $Y_h) {
        # Good.  Replace the point indexed by $h_idx by this one;
        # it's no worse, and the simplex should be smaller.
        $P[$h_idx] = +[ @P_starstar ];
        $Y[$h_idx] = $Y_starstar;
      } else {
        # Contraction failed.  Ouch.  Move all Pis towards Pl.
        for (my $i=0; $i < $ct; $i++) {
          ($i != $l_idx) || next;

          for (my $j=0; $j < $param_ct; $j++) {
            if (defined($P[$i]->[$j])) {
              $P[$i]->[$j] = ($P[$i]->[$j] +$P[$l_idx]->[$j])/2;
            }
          }
          # And recompute.

          $Y[$i] = $__VERY_BIG;
          if (!defined($constraint)) {
            $Y[$i] = &$function(@{$P[$i]});
          } else {
            my $cx = &$constraint($P[$i]);

            if ((defined($cx)) && ($cx != 0)) {
              $Y[$i] = &$function(@{$P[$i]});
            }
          }
          $Y[$i] = defined($Y[$i]) ? $Y[$i] : $__VERY_BIG;
        }
      }
    } else {
      # Y* replacing y_h will help.  Not the new minimum, but will
      # help.
      $P[$h_idx] = +[ @P_star ];
      $Y[$h_idx] = $Y_star;
    }


    # Simplex vertices updated.  Find new hi/lo and P_bar.
    my $Y_dev = undef;

    update_stats: {
      $l_idx = undef;
      $h_idx = undef;
      $h2_idx = undef;
      my @P_sum  = (0) x $param_ct;

      for (my $i=0; $i < $ct; $i++) {
        for (my $j=0; $j < $param_ct; $j++) {
          my $val = $P[$i]->[$j];

          if (defined($val)) {
            $P_sum[$j]  += $val;
          } else {
            $P_sum[$j] = undef;
          }
        }

        my $val = $Y[$i];

        if ((!defined($l_idx)) || ($Y[$l_idx] > $val)) {
          $l_idx = $i;
        }

        if ((!defined($h_idx)) || ($Y[$h_idx] < $val)) {
          $h2_idx = $h_idx;
          $h_idx = $i;
        } elsif ((!defined($h2_idx)) || ($Y[$h2_idx] < $val)) {
          $h2_idx = $i;
        }
      }

      for (my $j=0; $j < $param_ct; $j++) {
        if (defined($P_sum[$j])) {
          $P_sum[$j] -= $P[$h_idx]->[$j];
        }
      }

      map { defined($_) && ($_ /= ($ct-1)) } @P_sum;
      @P_bar = @P_sum;

      $Y_bar = $__VERY_BIG;
      if (!defined($constraint)) {
        $Y_bar = &$function(@P_bar);
      } else {
        my $cx = &$constraint(\@P_bar);

        if ((defined($cx)) && ($cx != 0)) {
          $Y_bar = &$function(@P_bar);
        }
      }
      $Y_bar = defined($Y_bar) ? $Y_bar : $__VERY_BIG;

      {
        my $sum_diff  = 0;
        my $sum2_diff = 0;


        foreach (@Y) {
          my $diff = $_ - $Y_bar;
          $sum_diff  += $diff;
          $sum2_diff += ($diff ** 2);
        }

        $Y_dev = sqrt(($sum2_diff - ($sum_diff * $sum_diff / $ct))/$ct);
      }
    }


    $iter++;

    if (defined($max_iter) && ($iter >= $max_iter)) {
      # We hit the user-specified limit on number of iterations.
      return (\@P_bar, $Y_bar);
    }

    if ($Y_dev < $tolerance) {
      # It converged.
      return (\@P_bar, $Y_bar);
    } else {
      redo Simplex_Loop;
    }
  }
}



# This function is used for supplying ad-hoc derivative generators for
# when none are provided.
#
# $f_gen:  Reference to a subroutine which takes an array of parameters
#          and returns a function.
#
# $h_ref:  If undefined, the default step size is used for ALL
#          parameter dimensions.  If defined and scalar, the value is
#          again used for all.  If a list reference, then any defined
#          values are used for their corresponding parameter
#          dimension.
#
# $d_gen:  list ref for generators of 1st-order partial derivatives
#
#   my $gen   = $d_gen->[$j];              # Derivative generator for
#                                          # parameter $j
#   my $d_ref = &$gen(@$a_ref);            # df/da_{$j}
#   my $deriv = &$d_ref(@{$x_ref->[$i]});  # ...at $x
#
#   Any missing values in $d_gen, we fill in.  If you know there aren't
#   any, you don't need to use this function.
#
#
# This function is useful for, say, doing LM (Levenberg Marquardt)
# nonlinear least-squares fitting for when you /don't/ have an easy
# analytical way to generate all the partial derivatives with respect
# to the parameter values.
sub _fake_partials_with_leapfrogs($$$$) {
  my $f_gen = shift;
  my $h_ref = shift;
  my $d_gen = shift;
  my $k     = shift;

  my $default_h = 1e-4;

  if (defined($h_ref)) {
    if (!ref($h_ref)) {
      # Scalar shorthand.
      $h_ref = +[ ($h_ref) x $k ];
    }
    map { $_ = defined($_) ? $_ : $default_h } @$h_ref;
  } else {
    # Default 'h'.
    $h_ref = +[ ($default_h) x $k ];
  }

  if (!defined($d_gen)) {
    $d_gen = +[ (undef) x $k ];
  } else {
    ($k == scalar (@$d_gen)) || die;
  }


  for (my $i=0; $i < $k; $i++) {
    if (!defined($d_gen->[$i])) {
      my $h = $h_ref->[$i];
      my $i_copy = $i;
      my $h_copy = $h;

      ($h > 0) || die;
      my $leapfrogger = sub {
        my @a = @_;
        my @a_more = @a;
        my @a_less = @a;

        $a_more[$i_copy] += $h_copy;
        $a_less[$i_copy] -= $h_copy;

        my $f_more = &$f_gen(@a_more);
        my $f_less = &$f_gen(@a_less);

        my $differentiator = sub {
          my @x = @_;
          my $y_more = &$f_more(@x);
          my $y_less = &$f_less(@x);
          my $gap    = $y_more - $y_less;
          return (($y_more - $y_less) / (2*$h_copy));
        };

        return $differentiator;
      };

      $d_gen->[$i] = $leapfrogger;
    }
  }

  return $d_gen;
}



# Given:
#   $d_gen:  reference to list of partial derivative generators
#   $x_ref:  reference to list of x values (or tuples)
#       $a:  reference to list of current parameters
#
# Compute the Jacobian matrix of partial derivatives evaluated.
# at $x.
#
# Hence,
#  D_{r,c} = df(x_{r})/da_{c}
#
# If $d_gen points to first-degree partials, the result is the
# Jacobian matrix.
#
# If $d_gen points to second-degree partials instead of first,
# then this produces the Hessian matrix.
sub compute_partial_matrix($$$) {
  my $d_gen = shift;
  my $x_ref = shift;
  my $a     = shift;

  my $n = scalar @$x_ref;
  my $k = scalar @$d_gen;
  my $D = matrix_new($n, $k);

  for (my $col=0; $col < $k; $col++) {
    my $partial_generator = $d_gen->[$col];
    my $partial           = &$partial_generator(@$a);

    defined($partial) || return undef;

    for (my $row=0; $row < $n; $row++) {
      my $x   = $x_ref->[$row];
      my $val = ref($x) ? &$partial(@$x) : &$partial($x);

      defined($val) || return undef;
      $D->[$row]->[$col] = $val;
    }
  }

  $D = matrix_fix($D);
  return $D;
}



# $f_gen:  should be a reference to the function to fit
#          &$f_ref should take a list of arguments.
# $x_ref:  should be an array of list refs
# $y_ref:  should be an array of scalars parallel to $y_ref
# $a_ref:  list reference of parameters.
#
# That is, we try to fit:
#
#   my $f_ref = &$f_gen(@$a_ref);   # generate function
#   my $n     = scalar @$x_ref;
#
#   for (my $i=0; $i < $n; $i++) {
#     my $r = $y_ref->[$i] - &$f_ref(@{$x_ref->[$i]});
#
#     # $r = residual, should be forced towards zero by changing a
#   }
#
# $d_gen:  list ref for generators of partial derivatives
#
#   my $gen   = $d_gen->[$j];              # Derivative generator for
#                                          # parameter $j
#   my $d_ref = &$gen(@$a_ref);            # df/da_{$j}
#   my $deriv = &$d_ref(@{$x_ref->[$i]});  # ...at $x
#
# If it doesn't exist, we use leapfrog differentiation (!):
#   df_a/dx =~  ((f_{a+h}(x) - f_{a-h}(x))/2h);
#
#   This should *** NOT *** take into account any weights.
#
# $h_ref:  list of h steps, 1 for each parameter.  These are only
#   used if the corresponding $d_ref value isn't defined.  Even if
#   these aren't defined, a default value can be used.  If you want
#   to use the SAME h value for all, use a scalar instead of a ref.
#
#
# $tol is absolute tolerance for error in terms of sum-of-squared
# residuals (ssqr).  The default is 1e-8.
#
# $min_abs is minimum absolute reduction in ssqr for iteration to
# continue.  There is no default.
#
# $min_rel is minimum relative reduction in ssqr for continued
# iteration.  The default value is 0.01 -- a 1% improvement.
#
# $min_iter is minimum number of iterations before convergence
# parameters kick in.  There is no default.
#
# $max_iter is maximum number of iterations before convergence
# parameters kick in.  There is no default.
#
# Returned:
#   A reference to the final set of parameters.
#   The final function, if wantarray().
#   SSQ, if wantarray().
#   A reference to the final list of residuals, if wantarray().
#     Note that the residuals are in /matrix/ form.  If you want
#     them as a vector, run 'vector_preprocess' on the reference.
#
#
# N.B.  This has such a ridiculous number of parameters, it might
# make sense to write a wrapper that takes a hash table and maps
# them to the appropriate parameters.
sub Levenberg_Marquardt($$$$;$$$$$$$$$$) {
  my $f_gen  = shift;   # reference to subroutine generator to fit
  my $x_ref  = shift;   # array of inputs
  my $y_ref  = shift;   # array of outputs to match
  my $a_ref  = shift;   # array of initial parameters

  my $d_gen  = shift;   # array of partial derivatives
  my $h_ref  = shift;   # array of h's, for leapfrog
  my $tol    = shift;   # tolerance

  my $min_abs  = shift; # minimum reduction in SSQR (absolute)
  my $min_rel  = shift; # minimum reduction in SSQR (relative)
  my $min_iter = shift; # minimum number of iterations
  my $max_iter = shift; # maximum number of iterations
  my $wtd_ref  = shift; # reference to array of weights
  my $min_lambda = shift;  # minimum lambda
  my $max_lambda = shift;  # maximum lambda

  my $lambda   = 1;

  $tol     = defined($tol)     ? $tol     : 1e-8;
  $min_rel = defined($min_rel) ? $min_rel : 0.01;

  # Note:  if $d_gen is not defined at all, or for any given
  # parameter $d_gen->[$param_idx] isn't, then we resort to
  # "leapfrog" differentiation.

  (defined($f_gen) && defined($y_ref) && defined($x_ref) &&
  defined($a_ref)) || die;

  (ref($f_gen) eq 'CODE') || die;

  my $n = scalar @$y_ref;  # x-y pairs
  my $k = scalar @$a_ref;  # parameters

  ($n == scalar(@$x_ref)) || die;

  map { defined($_) || die } @$a_ref;
  map { defined($_) || die } @$x_ref;
  map { defined($_) || die } @$y_ref;

  # Fill in any missing partial-derivative-generators.
  $d_gen =_fake_partials_with_leapfrogs($f_gen, $h_ref, $d_gen, $k);

  my @a_current = ( @$a_ref );
  my $iter      = 0;


  my $f = &$f_gen(@a_current);
  defined($f) || die;


  # Compute original residuals.

  my $x_vec = +[ @$x_ref ];
  my ($ssq, $r) = compute_ssq_residuals($f, $x_vec, $y_ref);
  $r = matrix_preprocess($r);
  defined($x_ref) || die;
  defined($r) || die;

  # Verify that $r is $n x 1.
  (scalar(@$r == $n) || die);
  (scalar(@{$r->[0]} == 1) || die);



  $x_ref = matrix_preprocess($x_vec);
  defined($x_ref) || die;


  do_iteration:  {
    # Compute Jacobian, with $n rows and $k columns.
    my $J       = compute_partial_matrix($d_gen, $x_ref, \@a_current);

    # If the user provided weights, adjust the Jacobian by multiplying
    # each row by its corresponding weight.
    if (defined($wtd_ref)) {
      for (my $i=0; $i < $n; $i++) {
        my $wt = $wtd_ref->[$i];

        if (defined($wt)) {
          map { $_ *= $wt } @{$J->[$i]};
        }
      }
    }

    # Verify that J is $n x $k.
    (scalar(@$J) == $n) || die;
    (scalar(@{$J->[0]}) == $k) || die;

    my $ssq_old = $ssq;

    # Compute the transpose.
    my $JT = matrix_transpose($J);
    defined($JT) || die;
    defined($J) || die;

    # Verify that JT is $k x $n.
    (scalar(@$JT) == $k) || die;
    (scalar(@{$JT->[0]}) == $n) || die;

    # The product of $JT and $J.
    my $JTJ = matrix_multiply($JT, $J);
    defined($JTJ) || die;

    # Verify that JTJ is $k x $k.
    (scalar(@$JTJ) == $k) || die;
    (scalar(@{$JTJ->[0]}) == $k) || die;

    # J   = $n x $k
    # JT  = $k x $n
    # JTJ = $k x $k
    #  r  = $n x 1
    # JTr = $k x 1
    defined($r) || die;
    my $JTr_full = matrix_multiply($JT, $r);
    my $JTr      = matrix_duplicate($JTr_full);

    # Verify that JTr is $k x 1.
    (scalar(@$JTr) == $k) || die;
    (scalar(@{$JTr->[0]}) == 1) || die;


    # The full equation is
    #
    # a(new) = a(old) + (inverse(JTJ + D) * JTr)
    #
    # But D = lambda * diagonal(JTJ)
    #
    # and we may need to test multiple values of lambda.
    my $cols_active = 0;
    my @active = ();

    try_lambda:  {
      my $D = matrix_new($k, $k);

      ($ssq > 0) || last try_lambda;
      for (my $i=0; $i < $k; $i++) {
        $D->[$i]->[$i] = $lambda * $JTJ->[$i]->[$i];
      }

      my $X  = matrix_add($JTJ, $D);

      $cols_active = 0;
      @active = ();

      # Are all columns of X meaningful?
      outer: for (my $col=0; $col < $k; $col++) {
        inner:  for (my $row=0; $row < $k; $row++) {
          if (abs($X->[$row]->[$col]) >= 1e-5) {
            $cols_active++;
            push @active, $col;
            next outer;
          }
        }
      }

      if ($cols_active == 0) {
        # Saddle point?
        return (\@a_current, $f, $ssq, $r);
      }  elsif ($cols_active < $k) {
        # Not all parameters appear to have an impact on the residuals.
        # J^{T} x J will then be of less than full rank.  So, take a
        # subset...

        @active = sort { $a <=> $b } @active;

        my $new_X = +[];
        my $new_JTr = +[];
        my $i       = undef;

        foreach $i (@active) {
          my $X_row = +[];
          my $j     = 0;

          foreach $j (@active) {
            push @$X_row, $X->[$i]->[$j];
          }
          push @$new_X, $X_row;


          push @$new_JTr, +[ $JTr_full->[$i]->[0] ];
        }


        $X = $new_X;
        $JTr = $new_JTr;
      }

      defined($X) || die;
      my $X_inverse = matrix_inverse($X);

      defined($X_inverse) || die;

      # $X is JTJ + D which should be $k x $k, trimmed to CAxCA,
      # so $X_inverse should be $CA x $CA.
      (scalar(@$X_inverse) == $cols_active) || die;
      (scalar(@{$X_inverse->[0]}) == $cols_active) || die;

      # JTr has been trimmed to $CA x 1.  Verify.
      (scalar(@$JTr) == $cols_active) || die;
      (scalar(@{$JTr->[0]}) == 1) || die;

      # r should be
      if (!defined($X_inverse)) {
        # Singular matrix?!
        #
        # LM can't proceed any further, so return what we
        # have.
        return (\@a_current, $f, $ssq, $r);
      }
      defined($JTr) || die;
      defined($X_inverse) || die;
      my $a_delta = matrix_multiply($X_inverse, $JTr);

      defined($a_delta) || die;

      my @a_new = @a_current;

      for (my $i=0; $i < $cols_active; $i++) {
        defined($a_delta->[$i]) || die;
        defined($a_delta->[$i]->[0]) || die;

        $a_new[$active[$i]] += $a_delta->[$i]->[0];
      }

      my $f_new = &$f_gen(@a_new);
      defined($f_new) || undef;

      my ($ssq_new, $r_new) = compute_ssq_residuals($f_new, $x_vec, $y_ref,
      $wtd_ref);
      $r_new = matrix_preprocess($r_new);

      # print "lambda=$lambda, ssq=$ssq_new, A=(", join(" ", @a_current), ")\n";
      if ($ssq_new < $ssq) {
        # Improvement.
        $f         = $f_new;
        $r         = $r_new;
        $ssq       = $ssq_new;
        @a_current = @a_new;

        # Verify that $r is $n x 1.
        (scalar(@$r == $n) || die);
        (scalar(@{$r->[0]} == 1) || die);

        $lambda /= 5;
        if (defined($min_lambda) && ($lambda < $min_lambda)) {
          $lambda = $min_lambda;
        }

        last try_lambda;
      } elsif (defined($max_lambda) && ($lambda == $max_lambda)) {
        # Tried max, no gain.
        last try_lambda;
      } else {
        $lambda *= 3;

        if (defined($max_lambda)) {
          $lambda = ($lambda < $max_lambda) ? $lambda : $max_lambda;
        }

        if (defined($min_iter) && ($iter <= $min_iter)) {
          redo do_iteration;
        }
        $JTr      = matrix_duplicate($JTr_full);

        $iter++;
        (defined($max_iter) && ($iter == $max_iter)) &&
        last do_iteration;
        redo try_lambda;
      }
    }

    # Lambda loop over.  Check convergence.
    my $gain = $ssq_old - $ssq;

    if (($ssq <= $tol) ||
    (defined($min_abs) && ($gain < $min_abs)) ||
    ($gain < ($min_rel * $ssq_old)) ||
    (defined($max_iter) && ($iter == $max_iter)) ||
    ($ssq == 0)) {
      last do_iteration;
    }

    redo do_iteration;
  }

  if (wantarray()) {
    return (\@a_current, $f, $ssq, $r);
  } else {
    return \@a_current;
  }
}



# This is simply a wrapper.  See contents for details...
#
sub LM_fit(@) {
  my %params = @_;

  my $f_gen  = $params{"f_gen"};
  my $x_ref  = $params{"x_ref"};
  my $y_ref  = $params{"y_ref"};
  my $a_ref  = $params{"a_ref"};

  my $d_gen  = $params{"d_gen"};
  my $h_ref  = $params{"h_ref"};
  my $tol    = $params{"tol"};

  my $min_abs  = $params{"min_abs"};
  my $min_rel  = $params{"min_rel"};
  my $min_iter = $params{"min_iter"};
  my $max_iter = $params{"max_iter"};

  my $wtd_ref = $params{"wtd_ref"};
  my $min_lambda = $params{"min_lambda"};
  my $max_lambda = $params{"max_lambda"};

  if (wantarray()) {
    my @results = Levenberg_Marquardt($f_gen, $x_ref, $y_ref, $a_ref,
    $d_gen, $h_ref, $tol, $min_abs, $min_rel, $min_iter, $max_iter,
    $wtd_ref, $min_lambda, $max_lambda);
    return @results;
  } else {
    my $result = Levenberg_Marquardt($f_gen, $x_ref, $y_ref, $a_ref,
    $d_gen, $h_ref, $tol, $min_abs, $min_rel, $min_iter, $max_iter,
    $wtd_ref, $min_lambda,
    $max_lambda);
    return $result;
  }
}




# Objective:  Reduce the pain suffered when watching LM fall into
# local minima by providing a reasonable way of restarting.
#
# This is a wrapper for LM_fit that runs multiple trials.  You still
# need to give it an initial starting configuration.  However, to
# seed the following runs, it uses one of two strategies:
#
#   - perturb the best-known ending configuration
#   - perturb the previous ending configuration
#
# You can specify the probability of choosing the first one (p_best).
# If probability is undefined, it's based on the SSQ errors (e.g.
# if best SSQ is half that of previous, it has a 2/3 chance of
# selection).
#
# If the /absolute/ tolerance criterion is met, it stops.  If it has
# hit 'max_runs', it stops.
#
# The magnitude of perturbation can be specified either as absolute
# or relative, and can be per attribute or no.  If both absolute and
# relative are given, absolute takes precedence.
sub LM_perturb(@) {
  my %params = @_;

  my $restarts = $params{"restarts"};
  my $p_best   = $params{"p_best"};
  my $tol      = $params{"tol"};
  my $max_runs = $params{"max_runs"};

  my $abs_perturb = $params{"abs_perturb"};
  my $rel_perturb = $params{"rel_perturb"};
  my $k        = scalar @{$params{"a_ref"}};

  my $run      = 0;

  # Either runs must be bounded or tolerance must be.

  (defined($max_runs) || defined($tol)) || die;

  my @best_a   = (undef) x $k;
  my $best_f   = undef;
  my $best_ssq = undef;
  my $best_r   = undef;

  my @prev_a   = (undef) x $k;
  my $prev_f   = undef;
  my $prev_ssq = undef;
  my $prev_r   = undef;

  if (defined($abs_perturb)) {
    if (!ref($abs_perturb)) {
      $abs_perturb = +[ ($abs_perturb) x $k ];
    }
  } else {
    $abs_perturb =[ (undef) x $k ];
  }

  if (defined($rel_perturb)) {
    if (!ref($rel_perturb)) {
      $rel_perturb = +[ ($rel_perturb) x $k ];
    }
  } else {
    $rel_perturb = +[ (undef) x $k ];
  }


  my %param_copy = %params;
  do_run:
  {
    my ($a_ref, $f_ref, $ssq, $r) = LM_fit(%param_copy);

    if (defined($a_ref)) {
      @prev_a   = @{$a_ref};
      $prev_f   = $f_ref;
      $prev_ssq = $ssq;
      $prev_r   = $r;

      if ((!defined($best_ssq)) || ($best_ssq > $ssq)) {
        @best_a   = @{$a_ref};
        $best_f   = $f_ref;
        $best_ssq = $ssq;
        $best_r   = $r;
      }
      # print "Run $run, params (", join(" ", @$a_ref), "):  ssq=$ssq\n";
    }

    $run++;


    if ($ssq == 0) {
      # Finis, really.
      last do_run;
    }

    if (defined($max_runs) && ($run == $max_runs)) {
      # Ran long enough.
      last do_run;
    }

    if (defined($tol) && defined($best_ssq) && ($best_ssq <= $tol)) {
      # Found an acceptable termination point.
      last do_run;
    }

    # If we got an undef on the FIRST run, there's nothing more we
    # can do.
    if (!defined($prev_ssq)) {
      return undef;
    }

    my $prob = $p_best;

    if (!defined($prob)) {
      $prob = $prev_ssq / ($best_ssq + $prev_ssq);
    }

    # OK, we need to choose our starting point for perturbation.
    if (rand(1) < $prob) {
      # All-time best.
      $param_copy{"a_ref"} = +[ @best_a ];
    } else {
      # Previous termination point.
      $param_copy{"a_ref"} = +[ @prev_a ];
    }

    # Now, we perturb it.
    for (my $i=0; $i < $k; $i++) {
      my $mag = $abs_perturb->[$i];

      if (defined($mag)) {
        $param_copy{"a_ref"}->[$i] += (rand(2) - 1)*$mag;
      } elsif (defined($mag = $rel_perturb->[$i])) {
        $param_copy{"a_ref"}->[$i] *= (1 +  ((rand(2)-1)*$mag));
      } else {
        # Default: peturb at most 10%.
        $param_copy{"a_ref"}->[$i] *= (1 + ((rand(2)-1)*0.1));
      }
    }

    redo do_run;
  };

  if (wantarray()) {
    return (\@best_a, $best_f, $best_ssq, $best_r);
  } else {
    return \@best_a;
  }
}



######################################################################
# The SVD code in here is based off of the Algol-60 code found in
#
# Golub, G.H. and C. Reinsch.  "Singular Value Decomposition and
# Least Squares Solutions", in _Numerische Mathematik_, 14:4, 1970,
# pages 403-420.
#
# The 'withu' and 'withv' parameters have been dropped, as if memory
# were an issue sparse matrix support would be more important.
#
# The dimensions of the data are inferred from the matrix, which
# again is expected to be in row-major form:
#
# my $matrix = +[ +[ row_1_values ], +[ row_2_values ], ... ];
#
#
# The two other parameters are optional:  epsilon and tolerance.
# Epsilon is a convergence criterion which helps determine when
# nearly-zero values should be set to zero.  Tolerance should be
# B/e0, where B=smallest positive number and e0 = machine precision:
# 1+e0 > 1.
#
# The original Algol program has a restriction that rows $m$ >=
# columns $n$.  There's some code to automatically take the
# transpose and adjust accordingly if need be.
#
# Variable names will generally be preserved as from the Algol.
#
# Upon success, return four references:
#    Q:   ref to list pf singular values   (size depends on rank)
#   Uc:   ref to Uc matrix ($m x $m)
#    V:   ref to V matrix ($n x $n)
#    D:   ref to ($m x $n) singular value matrix (computable from Q)
#
#  A = UDV^{T}
#
# To produce Uc, the code has been modified as suggested in section
# 5(i) of the paper.
#
#  Uc = ($m x $m)
#       first $r columns are columns space of $A
#       rest are nullspace of $AT
#   V = first $r columns are row space of $A
#       rest are nullspace of $A
#
######################################################################

sub matrix_SVD_GolubReinsch($;$$) {
  my $A   = shift;  # The main matrix.
  my $eps = shift;
  my $tol = shift;
  my $max_iter = 500;
  my $iter     = 0;
  $A = matrix_preprocess($A);

  my $m = scalar @$A;
  ($m > 0) || return undef;
  my $n = scalar @{$A->[0]};

  my $def_epsilon = $POSIX::DBL_EPSILON;

  if (!defined($def_epsilon)) {
    $def_epsilon = 2.2204460492503131e-16;
  }

  my $dbl_min = $POSIX::DBL_MIN;

  if (!defined($dbl_min)) {
    $dbl_min = 2.2250738585072014e-308;
  }

  $eps = defined($eps) ? $eps : $def_epsilon;
  $tol = defined($tol) ? $tol : ($dbl_min / $def_epsilon);

  if ($m < $n) {
    my $AT = matrix_transpose($A);
    my ($Q, $UCprime, $Vprime, $Dprime) =
    matrix_SVD($AT, $eps, $tol);

    if (!(defined($Q) && defined($UCprime) && defined($Vprime)
    && defined($Dprime))) {
      return undef;
    }

    # $AT is $n x $m, with $n >= $m
    # $Q  has at most $m eigenvalues inside
    #
    # $AT = $UCprime x $Dprime x $Vprime^{T}
    # $A  = $Vprime x $Dprime^{T} x $UCprime^{T}
    #

    my $Uc = $Vprime;
    my $D  = matrix_transpose($Dprime);

    my $V  = $UCprime;

    return ($Q, $Uc, $V, $D);
  }

  # $m >= $n
  my $Uc = matrix_new($m, $m);
  my $D  = matrix_new($m, $n);
  my $V  = matrix_new($n, $n);
  my $Q  = +[ (0) x $n ];
  my $e  = +[ (0) x $n ];

  for (my $i=0; $i < $m; $i++) {
    for (my $j=0; $j < $n; $j++) {
      $Uc->[$i]->[$j] = $A->[$i]->[$j];
    }
  }

  {
    # The Golub-Reinsch algorithm as ported below fails on rank-1
    # matrices, so the following code will check for that and attempt
    # to handle this special case.
    my $r = matrix_rank($A);

    if ($r == 0) {
      # All zero?!
      return undef;
    } elsif ($r == 1) {
      # Rank 1.
      $Uc = _matrix_GolubReinsch_helper($A);
      my $AT = matrix_transpose($A);
      $V = _matrix_GolubReinsch_helper($AT);

      # There is exactly one singular value.  This value 's' is
      # such that AV_{0} = sU_{0}.  Find this value.

      my @AV1 = (0) x $m;

      for (my $i=0; $i < $m; $i++) {
        for (my $j=0; $j < $n; $j++) {
          $AV1[$i] += $A->[$i]->[$j] * $V->[$j]->[0];
        }
      }

      my $ratio = undef;
      $Q = undef;
      for (my $i=0; $i < $m; $i++) {
        if ($AV1[$i] == 0) {
          if ($Uc->[$i]->[0] != 0) {
            return undef;
          }
        } else {
          if ($Uc->[$i]->[0] == 0) {
            return undef;
          }

          $Q = +[ $AV1[$i] / $Uc->[$i]->[0] ];
          last;
        }
      }
      if (!defined($Q)) {
        return undef;
      }

      # Now, there's one modification to make:  $Q->[0] may be
      # negative.  If so, we flip it and also U1 so the math
      # still works.
      if ($Q->[0] < 0) {
        $Q->[0] = -($Q->[0]);

        for (my $i=0; $i < $m; $i++) {
          $Uc->[$i]->[0] *= -1;
        }
      }

      # Since there's only one singular value, $D is easy.
      $D->[0]->[0] = $Q->[0];

      return ($Q, $Uc, $V, $D);
    }
  }

  # Everything below, code-wise, is pretty much taken as is from
  # the original implementation as per the paper.

  # Preserving the wonderfully mnemonic variable names of the
  # original code.

  my $c = 0;
  my $f = 0;
  my $g = 0;
  my $h = 0;
  my $s = 0;
  my $x = 0;
  my $y = 0;
  my $z = 0;
  my $l = 0;
  my $l1 = 0;

  # OC designates a comment from the original code.  Others are
  # my own notations.

  # OC:  Householder's reduction to bidiagonal form.
  for (my $i=0; $i < $n; $i++) {
    $e->[$i] = $g;
    $s     = 0;
    $l     = $i+1;

    for (my $j=$i; $j < $m; $j++) {
      $s = $s + ($Uc->[$j]->[$i] ** 2);
    }

    if ($s < $tol) {
      $g = 0;
    } else {
      $f = $Uc->[$i]->[$i];
      $g = ($f < 0) ? sqrt($s) : -sqrt($s);
      $h = ($f * $g) - $s;
      $Uc->[$i]->[$i] = $f-$g;

      for (my $j=$l; $j < $n; $j++) {
        $s=0;
        for (my $k=$i; $k < $m; $k++) {
          $s += $Uc->[$k]->[$i] * $Uc->[$k]->[$j];
        }
        $f = $s/$h;
        for (my $k=$i; $k < $m; $k++) {
          $Uc->[$k]->[$j] += $f * $Uc->[$k]->[$i];
        }
      }
    }

    $Q->[$i]=$g;
    $s = 0;

    for (my $j=$l; $j < $n; $j++) {
      $s += ($Uc->[$i]->[$j]) ** 2;
    }

    if ($s < $tol) {
      $g=0;
    } else {
      $f = $Uc->[$i]->[$i+1];
      $g = ($f < 0) ? sqrt($s) : -sqrt($s);
      $h = ($f * $g) - $s;
      $Uc->[$i]->[$i+1] = $f-$g;

      for (my $j=$l; $j < $n; $j++) {
        $e->[$j] = $Uc->[$i]->[$j] / $h;
      }

      for (my $j=$l; $j < $m; $j++) {
        $s = 0;
        for (my $k=$l; $k < $n; $k++) {
          $s += $Uc->[$j]->[$k] * $Uc->[$i]->[$k];
        }
        for (my $k=$l; $k < $n; $k++) {
          $Uc->[$j]->[$k] += $s * $e->[$k];
        }
      }
    }

    $y = abs($Q->[$i]) + abs($e->[$i]);
    $x = ($y > $x) ? $y : $x;
  }

  # $Uc should now be a bidiagonal form of $A, methinks.

  # OC:  Accumulation of right-hand transformations:

  $g = $e->[$n-1];
  $l = $n-1;

  for (my $i=$n-1; $i >= 0; $i--) {
    # This loop has been slightly altered to avoid what would appear
    # to be out-of-bounds values.  $i goes from $n-2 to 0, instead of
    # $n-1.  $g and $l are defined above.

    if (($g != 0) && ($i < $n-1)) {
      $h = $Uc->[$i]->[$i+1];
      defined($h) || die;
      $h *= $g;

      for (my $j=$l; $j < $n; $j++) {
        $V->[$j]->[$i] = $Uc->[$i]->[$j]/$h;
      }

      for (my $j=$l; $j < $n; $j++) {
        $s=0;
        for (my $k=$l; $k < $n; $k++) {
          $s += $Uc->[$i]->[$k] * $V->[$k]->[$j];
        }
        for (my $k=$l; $k < $n; $k++) {
          $V->[$k]->[$j] += $s * $V->[$k]->[$i];
        }
      }
    }

    for (my $j=$l; $j < $n; $j++) {
      $V->[$i]->[$j] = 0;
      $V->[$j]->[$i] = 0;
    }

    $V->[$i]->[$i] = 1;
    $g = $e->[$i];
    $l = $i;
  }


  # OC:  Accumuluation of left-hand transformations.

  # Modification 5(i):  fill in Uc.
  for (my $i=$n; $i < $m; $i++) {
    for (my $j=$n+1; $j < $m; $j++) {
      $Uc->[$i]->[$j] = 0;
    }
    $Uc->[$i]->[$i] = 1;
  }


  for (my $i=$n-1; $i >= 0; $i--) {
    $l = $i+1;
    $g = $Q->[$i];

    # 5(i)
    for (my $j=$l; $j < $m; $j++) {
      $Uc->[$i]->[$j] = 0;
    }

    if ($g != 0) {
      $h = $Uc->[$i]->[$i] * $g;

      # 5(i)
      for (my $j=$l; $j < $m; $j++) {
        $s=0;
        for (my $k=$l; $k < $m; $k++) {
          $s+= $Uc->[$k]->[$i] * $Uc->[$k]->[$j];
        }
        $f = $s/$h;
        for (my $k=$i; $k < $m; $k++) {
          $Uc->[$k]->[$j] += $f * $Uc->[$k]->[$i];
        }
      }

      for (my $j=$i; $j < $m; $j++) {
        $Uc->[$j]->[$i] /= $g;
      }
    } else {
      for (my $j=$i; $j < $m; $j++) {
        $Uc->[$j]->[$i] = 0;
      }
    }
    ($Uc->[$i]->[$i])++;
  }


  # OC:  diagonalization of the bidiagonal form;
  $eps *= $x;

  for (my $k=$n-1; $k >= 0; $k--) {

    test_f_splitting:
    for ($l=$k; $l >= 0; $l--) {
      if (abs($e->[$l]) <= $eps) {
        goto test_f_convergence;
      }

      # ($l > 0) check added to avoid an obvious out-of-bounds access
      if (($l > 0) && (abs($Q->[$l-1]) <= $eps)) {
        goto cancellation;
      }
    }

    # In Algol 60, the loop variable does not get the last increment
    # it would in C or Perl -- that is,
    #
    # for $i:=1 step 2 until 10 do ...
    #
    # completes with $i=9, not 11 as it would in Perl or C.
    $l = 0;


    # OC:  cancellation of $e->[$l] if $l > 1
    cancellation:
    if ($l > 1) {
      # added to avoid OOB
      $c=0;
      $s=1;
      $l1 = $l-1;
      for (my $i=$l; $i <= $k; $i++) {
        $f = $s * $e->[$i];
        $e->[$i] *= $c;

        if (abs($f) <= $eps) {
          goto test_f_convergence;
        }

        $g = $Q->[$i];
        $h = sqrt(($f*$f) + ($g*$g));
        $Q->[$i] = $h;
        $c = $g/$h;
        $s = -$f/$h;

        for (my $j=0; $j < $m; $j++) {
          $y = $Uc->[$j]->[$l1];
          $z = $Uc->[$j]->[$i];
          $Uc->[$j]->[$l1] = ($y*$c) + ($z*$s);
          $Uc->[$j]->[$i]  = ($z*$c) - ($y*$s);
        }
      }
    }

    test_f_convergence:
    $z = $Q->[$k];
    if (($l == $k) || ($iter++ >= $max_iter)) {
      goto convergence;
    }

    # OC:  Shift from bottom 2x2 minor;
    $x = $Q->[$l];
    $y = $Q->[$k-1];
    $g = $e->[$k-1];
    $h = $e->[$k];
    $f = ((($y-$z) * ($y+$z)) + (($g-$h) * ($g+$h))) / (2*$h*$y);
    $g = sqrt(($f * $f)+1);
    $f = ((($x-$z) * ($x+$z)) +
    ($h * (($y / (($f < 0) ? ($f-$g) : ($f+$g))) - $h)))/$x;

    # OC:  next QR transformation
    $c = 1;
    $s = 1;

    for (my $i=$l+1; $i <= $k; $i++) {
      $g = $e->[$i];
      $y = $Q->[$i];

      defined($s) || die;
      defined($c) || die;
      defined($g) || die;

      $h = $s * $g;
      $g = $c * $g;
      $z = sqrt(($f*$f) + ($h*$h));

      $e->[$i-1] = $z;
      $c = $f/$z;
      $s = $h/$z;

      $f = ($x*$c) + ($g*$s);
      $g = -($x*$s) + ($g*$c);
      $h = $y * $s;
      $y*= $c;

      for (my $j=0; $j < $n; $j++) {
        $x = $V->[$j]->[$i-1];
        $z = $V->[$j]->[$i];

        $V->[$j]->[$i-1] = ($x*$c) + ($z*$s);
        $V->[$j]->[$i]   = -($x*$s) + ($z*$c);
      }

      $z = sqrt(($f*$f) + ($h*$h));
      $Q->[$i-1] = $z;

      # What if $z == 0?

      if ($z == 0) {
        return undef;
      }

      $c = $f/$z;
      $s = $h/$z;

      $f = (($c*$g) + ($s*$y));
      $x = -($s*$g) + ($c*$y);

      for (my $j=0; $j < $m; $j++) {
        $y = $Uc->[$j]->[$i-1];
        $z = $Uc->[$j]->[$i];

        $Uc->[$j]->[$i-1] = ($y*$c) + ($z*$s);
        $Uc->[$j]->[$i]   = -($y*$s) + ($z*$c);
      }
    }

    $e->[$l] = 0;
    $e->[$k] = $f;
    $Q->[$k] = $x;

    goto test_f_splitting;

    convergence:
    if ($z < 0) {
      #OC:  $Q->[$k] is made non-negative

      $Q->[$k] = -$z;
      for (my $j=0; $j < $n; $j++) {
        $V->[$j]->[$k] = -$V->[$j]->[$k];
      }
    }
  };


  ############ END OF ORIGINAL ALGOL CODE ####################

  {
    # The Q vector is unsorted, and may contain zeros.  Let's find the
    # nonzero values, sort them in decreasing order, and also sort the
    # columns of $U and $V accordingly.

    # To do so, first create an augmented version that has the array
    # indices stored with the data.
    my $augmented = +[];

    for (my $i=0; $i < $n; $i++) {
      $augmented->[$i] = +[ $Q->[$i], $i];
    }

    # Then sort by the Q value, in decreasing order.
    @$augmented = sort { ($b->[0] <=> $a->[0]) ||
    (defined($b->[0])) } @$augmented;

    $Q = +[];

    # Correct Q.
    for (my $i=0; ($i < $n) && ($augmented->[$i]->[0] > 0); $i++) {
      $Q->[$i] = $augmented->[$i]->[0];
    }

    # Correct Uc, rearranging the corresponding columns.
    my $Uc_correct = +[];

    for (1..$m) {
      push @$Uc_correct, +[];
    }

    for (my $col=0; $col < $m; $col++) {
      my $col_orig =
      ($col < $n) ? $augmented->[$col]->[1] : $col;

      for (my $row=0; $row < $m; $row++) {
        $Uc_correct->[$row]->[$col] = $Uc->[$row]->[$col_orig];
      }
    }
    $Uc = $Uc_correct;

    my $V_correct  = +[];

    for (1..$n) {
      push @$V_correct, +[];
    }

    for (my $col=0; $col < $n; $col++) {
      my $col_orig = $augmented->[$col]->[1];

      for (my $row=0; $row < $n; $row++) {
        $V_correct->[$row]->[$col] = $V->[$row]->[$col_orig];
      }
    }

    $V = $V_correct;
  }

  {
    # Truncate $Q to delete null eigenvalues.
    my $first_zero = 0;

    for ($first_zero=0; $first_zero < $n; $first_zero++) {
      (defined($Q->[$first_zero]) && ($Q->[$first_zero] != 0)) || last;
    }

    splice @$Q, $first_zero;
  }


  {
    # Create the $m x $n diagonal matrix $D, for convenience.
    my $eigencount = scalar @$Q;

    for (my $i=0; $i < $eigencount; $i++) {
      $D->[$i]->[$i] = $Q->[$i];
    }
  }
  return ($Q, $Uc, $V, $D);
}




# This is mainly a function to save having two nigh-identical copies
# of it embedded in the main SVD_GR code.  It's a VERY specialized
# function.
#
# Given a single-rank matrix of size $m x $n, compute a $m x $m
# matrix where the first column is an unit basis vector of $A,
# and the rest are orthogonal.


sub _matrix_GolubReinsch_helper($) {
  my $A = shift;

  my $m = scalar @$A;
  my $n = scalar @{$A->[0]};
  my $B = matrix_new($m, $m);

  # First, find a non-zero column of A and normalize it.  This
  # will be the "real" vector in $U.

  my $col      = 0;

  outer:
  for ($col=0; $col < $n; $col++) {
    my $all_zero = 1;
    inner:
    for (my $row=0; $row < $m; $row++) {
      if ($A->[$row]->[$col] != 0) {
        $all_zero = 0;
        last inner;
      }
    }

    ($all_zero) || last outer;
  }


  my $vector = +[ (0) x $m ];

  for (my $row=0; $row < $m; $row++) {
    my $val = $A->[$row]->[$col];
    $vector->[$row] = $val;
  }

  $vector = vector_normalize($vector);

  # 'vector' now is a unit basis vector for A.
  for (my $row=0; $row < $m; $row++) {
    my $val = $vector->[$row];
    defined($val) || die;
    $B->[$row]->[0] = $val;
  }

  # Compute the nullspace of AT and add the columns.
  my $AT = matrix_transpose($A);
  my $N  = matrix_nullspace($AT);

  for (my $col=1; $col < $m; $col++) {
    for (my $row=0; $row < $m; $row++) {
      my $val = $N->[$row]->[$col-1];
      $B->[$row]->[$col] = $val;
    }
  }


  return $B;
}


# Shorter alias that doesn't mention algorithm name.
# Should match API of GolubReinsch.
sub matrix_SVD($;$$) {
  my $A = shift;
  my $eps = shift;
  my $tol = shift;

  if (wantarray()) {
    my @outputs = matrix_SVD_GolubReinsch($A, $eps, $tol);
    return @outputs;
  } else {
    my $output = matrix_SVD_GolubReinsch($A, $eps, $tol);
    return $output;
  }
}



# This function uses singular value decomposition to compute the
# pseudoinverse of a matrix.
#
# If matrix A such that
#       A = U x D x V^{T}
#
# with their usual interpretations, then the pseudoinverse A^{+}
# is
#
#     A^{+} = V x D^{+} x U^{T}
#
# where D^{+} is D^{T} with the non-zero values inverted.
#
# You may, if you wish, pass in epsilon and tolerance.  They're
# used only by the SVD routine and have the usual meanings.
#
# If you call this in array context, you also get $Q, $U, $V,
# and $D, because you may find them useful and they were
# expensive to compute.
sub matrix_pseudoinverse($;$$) {
  my $A   = shift;
  my $eps = shift;
  my $tol = shift;

  my $A_rows = scalar (@$A);
  my $A_cols = scalar (@{$A->[0]});

  my ($Q, $U, $V, $D) = matrix_SVD($A, $eps, $tol);

  if (!(defined($Q) && defined($U) && defined($V) && defined($D))) {
    return undef;
  }

  my $UT    = matrix_transpose($U);
  my $Dplus = matrix_transpose($D);

  my $m     = scalar @$D;
  my $n     = scalar @{$D->[0]};


  my $U_rows = scalar (@$U);
  my $U_cols = scalar (@{$U->[0]});
  my $V_rows = scalar (@$V);
  my $V_cols = scalar (@{$V->[0]});
  my $D_rows = scalar (@$D);
  my $D_cols = scalar (@{$D->[0]});

  ($A_rows == $U_rows) || die;
  ($A_cols == $V_rows) || die;
  ($U_cols == $D_rows) || die;
  ($D_cols == $V_cols) || die;

  for (my $i=0; ($i < $m) && ($i < $n); $i++) {
    my $val = $Dplus->[$i]->[$i];

    if ($val > 0) {
      $Dplus->[$i]->[$i] = 1/$val;
    } else {
      # Assumes $D is sorted in order of decreasing singular values.
      # My Golub-Reinsch code is, but if you want to add additional
      # methods be aware of that.
      last;
    }
  }

  if (!defined($V)) {
    die "V not defined";
  }

  if (!defined($Dplus)) {
    die "V not defined";
  }

  my $VDplus        = matrix_multiply($V, $Dplus);

  if (!defined($VDplus)) {
    print "V=\n";
    matrix_print($V);
    print "Dplus=\n";
    matrix_print($Dplus);
    die "VDplus not defined";
  }

  my $pseudoinverse = matrix_multiply($VDplus, $UT);

  if (wantarray()) {
    return ($pseudoinverse, $Q, $U, $V, $D);
  } else {
    return $pseudoinverse;
  }
}



# Given a square matrix, compute the determinant.
# The matrix must be in the usual row-major form.
sub matrix_determinant($) {
  my $A = shift;

  defined($A) || return undef;
  (ref($A) eq 'ARRAY') || return undef;

  my $n = scalar @$A;
  ($n > 0) || return undef;

  # Check squareness.
  ($n == scalar(@{$A->[0]})) || return undef;

  if ($n == 1) {
    # Simple case:  1x1.

    return $A->[0]->[0];
  } elsif ($n == 2) {
    # Simple case:  2x2.
    my ($a, $b) = @{$A->[0]};
    my ($c, $d) = @{$A->[1]};

    return (($a * $d) - ($b * $c));
  } else {
    # Gaussian elimination.
    my ($P, $L, $D, $U) = matrix_PLDU($A);

    # PA=LDU.
    # det(A) = det(L)det(D)det(U)/det(P)
    #
    # L, U are triangular matrices with unit diagonals.
    # D is a diagonal matrix.
    #
    # P is a permutation matrix; the determinant is 1, multiplied
    # by -1^{number of row exchanges}.

    my $det_D = 1;
    my $det_P = 1;

    foreach (0..($n-1)) {
      $det_D *= $D->[$_]->[$_];
    }

    col_loop:
    for (my $col=0; $col < $n; $col++) {
      if ($P->[$col]->[$col] == 1) {
        next;
      } else {
        row_loop:
        for (my $row=0; $row < $n; $row++) {
          if ($P->[$row]->[$col] == 1) {
            ($row != $col) || die;
            my $temp = $P->[$row];
            $P->[$row] = $P->[$col];
            $P->[$col] = $temp;
            $det_P *= -1;
            next col_loop;
          }
        }
      }
    }

    my $det = ($det_D / $det_P);
    return $det;
  }
}



# Recursive least-squares.
#
# Arguments:
#  1.  Reference to vector containing the input values.
#  2.  Scalar corresponding to the series we're trying to match.
#
#  The first two are required; the next three are optional.
#
#  3.  Reference to a vector of current coefficient estimates.
#      If the reference is undefined, or points to undef, or
#      to a vector with incorrect cardinality, we'll use the
#      default of all-zero.
#  4.  Reference to a current gain matrix.  If it's not of the
#      expected size (square of same cardinality as input
#      vector) we use our own.
#  5.  Memory value; 0 is illegal, low means forget most, 1 means
#      forget nothing.  Default is 1.
#
# Outputs:
#  1.  Reference to a new coefficient vector.
#  2.  Reference to a new gain matrix.
#  Error conditions should normally return undefs or terminate.
#
# This function does NOT update the contents of the inputs.  You'll
# have to do that yourself.
#
sub rls_update($$;$$$) {
  my $inputs_v = shift;
  my $output   = shift;
  my $coeff_v  = shift;
  my $gain_m   = shift;
  my $memory   = scalar(@_) ? shift : 1;

  # Tweak to algorithm to avoid exploding matrices.
  my $explode_hack = 0;

  $inputs_v = +[ vector_preprocess($inputs_v) ];
  defined($inputs_v) || return undef;
  defined($output)   || return undef;

  if (!defined($memory)) {
    $memory = 1;
  } elsif (($memory <= 0) || ($memory > 1)) {
    return undef;
  }

  my $ct = scalar @$inputs_v;

  ($ct > 0) || return undef;

  if ((!defined($coeff_v)) ||
  (ref($coeff_v) ne 'ARRAY') ||
  (scalar(@$coeff_v) != $ct) ||
  (ref($coeff_v->[0]))) {
    # A battery of sanity checks.
    $coeff_v = +[ (0) x $ct ];
  }

  if ((!defined($gain_m)) ||
  (ref($gain_m) ne 'ARRAY') ||
  (scalar(@$gain_m) != $ct) ||
  (ref($gain_m->[0]) ne 'ARRAY') ||
  (scalar(@{$gain_m->[0]}) != $ct)) {
    $gain_m = matrix_new($ct, $ct);
    my $eps    = 1e-6;

    foreach (0..($ct-1)) {
      $gain_m->[$_]->[$_] = 1/$eps;
    }
  }

  # Create new gain matrix.
  my $gain_new_m = matrix_duplicate($gain_m);

  if (!$explode_hack) {
    if ($memory != 1) {
      for (my $r_idx=0; $r_idx < $ct; $r_idx++) {
        map { $_ /= $memory } @{$gain_new_m->[$r_idx]};
      }
    }
  }

  my $inputs_m     = matrix_preprocess($inputs_v);
  my $inputs_T_m   = matrix_transpose($inputs_m);

  {
    my $temp = matrix_multiply($inputs_T_m, $gain_m);
    $temp    = matrix_multiply($temp, $inputs_m);

    # should be 1x1
    die unless (scalar(@$temp) == 1);
    die unless (scalar(@{$temp->[0]}) == 1);

    my $val = $temp->[0]->[0];
    $val += $memory;

    # This shouldn't happen...
    die if ($val == 0);

    $val = 1/$val;
    if (!$explode_hack) {
      $val /= $memory;
    }

    # $val will be multiplied by a matrix term we'll add to $gain_new_m.

    my $additive = matrix_multiply($gain_m, $inputs_m);
    $additive    = matrix_multiply($additive, $inputs_T_m);
    $additive    = matrix_multiply($additive, $gain_m);

    for (my $r_idx=0; $r_idx < $ct; $r_idx++) {
      map { $_ *= -$val } @{$additive->[$r_idx]};
    }

    $gain_new_m = matrix_add($gain_new_m, $additive);
  }

  my $coeff_new_v = +[ @$coeff_v ];

  {
    # Compute updates to the coefficient vector.
    my $mod_vec = +[ vector_preprocess(matrix_multiply($gain_new_m,
    $inputs_m)) ];
    my $predict = vector_dot_product($inputs_v, $coeff_v);
    my $error   = $predict - $output;

    map { $_ *= $error } @$mod_vec;

    for (0..($ct-1)) {
      $coeff_new_v->[$_] -= $mod_vec->[$_];
    }
  }

  return ($coeff_new_v, $gain_new_m);
}





# Use recursive-least-squares and a simple log-parabolic model in order to
# try to find points where trends might be shifting.  We return a list
# of them, in sorted order.
#
# Any transformation of the data, such as applying the logarithm,
# should have been already performed.
#
# Two models are used simultaneously with different lambdas -- 1 and
# lambda.
sub rls_logparabolic_breakpoints(@) {
  my %params   = @_;
  my $data     = $params{"data"};
  my $bufsize  = $params{"bufsize"};
  my $margin   = $params{"margin"};
  my $mindev   = $params{"mindev"};
  my $lambda   = $params{"lambda"};
  my $count    = $params{"count"};
  my $sorted   = $params{"sorted"};
  my $x_dense  = $params{"x_dense"};


  (defined($params{"data"})) || return undef;
  my $n        = scalar @$data;

  ($n > 1) || return undef;

  # The parameters mean...
  #
  # bufsize:     Size of buffer used for trailing average computation.
  #              Higher bufsize, the more smooth averages should be.
  # margin:      When deviations jump by this much (in absolute terms)
  #              above the trailing average, we hit the 'reset' button
  #              and reinitialize the RLS models.
  # mindev:      Minimum deviation ratio to qualify.
  # lambda:      Forgetting value for the LOCAL rls fitter.
  # count:       How many breakpoints to return (at most).
  # sorted:      Is the data already sorted?  (non-zero = yes)
  # x_dense:     x, density pairs.

  defined($bufsize) || ($bufsize = 20);
  defined($margin)  || ($margin  = 0.2);
  defined($mindev)  || ($mindev  = 0.001);
  defined($lambda)  || ($lambda  = 0.95);
  defined($count)   || ($count   = (($n < 10000) ? int($n * 0.01) : 100));

  ($bufsize > 0) || return undef;
  ($margin  > 0) || return undef;
  ($count   >= 10) || ($count = 10);

  # Strictly (0,1] and == 1 wouldn't make sense for our purposes anyway.
  (($lambda > 0) && ($lambda < 1)) || return undef;


  if (!defined($sorted)) {
    # First:  Is it sorted?
    $sorted = 1;
    my $old_val   = $data->[0];

    # sort_check
    sort_loop:  for (my $i=1; $i < $n; $i++) {
      if ($old_val > $data->[$i]) {
        $sorted = 0;
        last sort_loop;
      } else {
        $old_val = $data->[$i];
      }
    }
  }

  if (!($sorted)) {
    # Well, then sort it!  More precisely, make a sorted copy.
    $data = +[ sort { $a <=> $b } @$data ];
  }

  # @x_dense stores ($x, $density) pairs.  Densities have not been
  # normalized but should make relative sense.  The '1' indicates
  # 'already-sorted' (unless, of course, we were lied to.)
  #
  # This part will be quite expensive; while it scales linearly,
  # the constant factor will be fairly high.
  if (!defined($x_dense)) {
    my $density = UniRand::density_est($data, 1);
    defined($density) || (return undef);
    $x_dense = +[ UniRand::density_est($data, 1) ];
  }

  my @breakpoints = ();


  # An RLS parabolic model uses recursive-least-squares to fit a
  # 3-parameter model (Ax^{2} + x + 1) on 3 inputs (x^2, x, 1).
  #
  # Note that this violates some assumptions of RLS, namely that
  # the inputs are linearly independent.  However, the method may
  # still be useful for our purposes...
  #
  # Now, RLS accepts one input instance at a time.  Therefore, it
  # has to track /something/ to save state -- in RLS, it's a gain
  # matrix, which is a square matrix with the same number of rows
  # as there are parameters to fit.  The RLS implementation I
  # wrote will give you a default gain matrix if you pass it in
  # undef, so that's what I'll do.  It also needs to track a
  # vector of coefficients that it's guessing.
  #
  # Two models:
  #  global, gets lambda=1 (no forgetting)
  #  local,  gets lambda=$lambda (some forgetting)
  my $coeff_global = undef;
  my $gain_global  = undef;

  my $coeff_local  = undef;
  my $gain_local   = undef;


  # devs_buffer will track recent ($bufsize) deviation ratios, while
  # devs_all tracks ($x, $ratio) pairs for EVERY candidate datum
  # (deviation > $mindev).

  my @dev_buffer = ();
  my $dev_average = 0;
  my @devs_all    = ();

  foreach (@$x_dense) {
    my ($x, $density) = @{$_};

    ($density > 0) || die;

    # rls_update takes a default lambda of 1, so we don't need to
    # specify it for the 'global' model.
    ($coeff_global, $gain_global) =
    rls_update(+[$x**2, $x, 1], log($density),
    $coeff_global, $gain_global);

    ($coeff_local, $gain_local) =
    rls_update(+[$x**2, $x, 1], log($density),
    $coeff_local, $gain_local, $lambda);

    my $length_global = 0;
    my $length_local  = 0;
    my $deviation     = 0;

    for (0..2) {
      $length_global += ($coeff_global->[$_] ** 2);
      $length_local  += ($coeff_local->[$_] ** 2);
      $deviation     += ($coeff_global->[$_] - $coeff_local->[$_]) ** 2;
    }

    $length_global = sqrt($length_global);
    $length_local  = sqrt($length_local);
    $deviation     = sqrt($deviation);

    # We track deviation as a ratio between the absolute difference
    # between the two coefficient vectors and the GREATER of the
    # magnitudes of the two coefficients.
    my $greater = ($length_global > $length_local) ?
    $length_global : $length_local;

    # sanity check
    die if ($greater <= 0);

    my $ratio   = $deviation / $greater;

    if ($ratio > $mindev) {
      push @devs_all, +[ $x, $ratio ];
    }

    if ((scalar(@dev_buffer) == $bufsize) &&
    ($ratio > ($dev_average + $margin))) {
      # Deviation is unusually large according to trailing average.
      # Reset average, buffers.
      $dev_average = 0;
      @dev_buffer = ();

      if (1) {
        # Reset gain matrices, coefficient vectors.
        $gain_global  = undef;
        $coeff_global = undef;
        $gain_local   = undef;
        $coeff_local  = undef;
      } else {
        # Synchronize them.
        $gain_global  = matrix_duplicate($gain_local);
        $coeff_global = +[ @$coeff_local ];
      }
    }

    if ((scalar @dev_buffer) == $bufsize) {
      my $decay = shift @dev_buffer;
      $dev_average -= ($decay / $bufsize);
    }
    push @dev_buffer, $ratio;
    $dev_average += $ratio / $bufsize;
  }

  @devs_all = sort { $b->[1] <=> $a->[1] } @devs_all;

  if (scalar(@devs_all) >= $count) {
    splice @devs_all, ($count-1);
  };

  @devs_all = map { $_->[0] } @devs_all;
  @breakpoints = sort { $a <=> $b } @devs_all;

  if (wantarray()) {
    return (\@breakpoints, $x_dense);
  } else {
    return \@breakpoints;
  }
}



# Density-based breakpoints.  Identify all local minima.
sub density_breakpoints(@) {
  my %params   = @_;
  my $data     = $params{"data"};
  my $count    = $params{"count"};
  my $sorted   = $params{"sorted"};
  my $x_dense  = $params{"x_dense"};
  my $radius   = $params{"radius"};

  (defined($params{"data"})) || return undef;
  my $n        = scalar @$data;

  ($n > 1) || return undef;

  # The parameters mean...
  #
  # count:       How many breakpoints to return (at most).
  # sorted:      Is the data already sorted?  (non-zero = yes)
  # x_dense:     x, density pairs.
  # radius:      How many points on either side are looked at?

  (defined($radius)) || ($radius = 1);

  # Don't be silly.
  ($n >= ((2*$radius)+1)) || return undef;

  if (!defined($sorted)) {
    # First:  Is it sorted?
    $sorted = 1;
    my $old_val   = $data->[0];

    # sort_check
    sort_loop:  for (my $i=1; $i < $n; $i++) {
      if ($old_val > $data->[$i]) {
        $sorted = 0;
        last sort_loop;
      } else {
        $old_val = $data->[$i];
      }
    }
  }

  if (!($sorted)) {
    # Well, then sort it!  More precisely, make a sorted copy.
    $data = +[ sort { $a <=> $b } @$data ];
  }

  # @x_dense stores ($x, $density) pairs.  Densities have not been
  # normalized but should make relative sense.  The '1' indicates
  # 'already-sorted' (unless, of course, we were lied to.)
  #
  # This part will be quite expensive; while it scales linearly,
  # the constant factor will be fairly high.
  if (!defined($x_dense)) {
    my $density = UniRand::density_est($data, 1);
    defined($density) || (return undef);
    $x_dense = +[ $density ];
  }

  my $ct = scalar @$x_dense;

  map { (defined($_) && defined($_->[0]) && defined($_->[1])) || die; } @$x_dense;
  my @breakpoints = ();

  {
    my @buffer_before = ();
    my @buffer_after  = ();
    my $current_val   = $x_dense->[$radius]->[1];

    defined($current_val) || die;

    for (my $i=0; $i < $radius; $i++) {
      push @buffer_before, $x_dense->[$i]->[1];
      push @buffer_after, $x_dense->[$i+$radius+1]->[1];
    }

    for (my $i=$radius; $i < ($ct-$radius); $i++) {
      if ((!(grep { $_ <= $current_val } @buffer_before)) &&
      (!(grep { $_ <= $current_val } @buffer_after))) {
        # local minimum
        push @breakpoints, $x_dense->[$i]->[0];
      }

      if (($i+$radius+1) < $ct) {
        shift @buffer_before;
        push @buffer_before, $current_val;
        $current_val = shift @buffer_after;
        push @buffer_after, $x_dense->[$i+1+$radius]->[1];
      }
    }
  }

  if (wantarray()) {
    return (\@breakpoints, $x_dense);
  } else {
    return (\@breakpoints);
  }
}

return 1;


