#!/usr/local/bin/perl5 -w
#
# Purpose:
#
# To provide for parameter estimation and CDF computation of univariate
# distributions.
#
# The functions should accept a reference to the array, and return:
#   - a reference to a subroutine corresponding to a single-parameter
#     CDF for this distribution, including all parameters.   If it's
#     not a true CDF, it should at least be a valid and useful
#     transformation...
#   - a reference to a list of estimated parameters
#   - a reference to a percentage point function (which in many cases
#     may need to be numerical)
#   - a reference to the probability density function (again, often
#     numerical)
#
#
##########################################################################
# $Id: UniRand.pm,v 1.91 2005/05/05 20:40:13 lw2j Exp $
# $Log:	UniRand.pm,v $
# Revision 1.91  2005/05/05  20:40:13  lw2j
# Fixed bug in normal truncated pdf computation.
# 
# Revision 1.87  2003/12/30  18:02:16  lw2j
# *** empty log message ***
#
# Revision 1.86  2003/10/20  16:16:48  lw2j
# Trivial tweaks.
#
# Revision 1.85  2003/06/27  14:47:07  lw2j
# Tweaked mixture PPF generation; possible bugs fixed when dealing
# with overlapping components, and made them more paranoid about
# the edges.
#
# Revision 1.84  2003/06/23  14:47:06  lw2j
# Tweaks.
#
# Revision 1.83  2003/05/31  14:33:36  lw2j
# Fixed spelling error, Marquadt - Marquardt.
#
# Revision 1.82  2003/05/06  13:32:11  lw2j
# Minor tweaks.
#
# Revision 1.80  2003/04/17  18:01:53  lw2j
# Transferred private _lrefdupe to util.pm as public deep_copy().
#
# Revision 1.79  2003/04/02  10:48:56  lw2j
# Fixed normal_truncated_mix_gen_cdf()'s handling of flat parameter
# lists.
#
# Revision 1.78  2003/03/19  15:09:25  lw2j
# Minor tweaks if any while trying to improve estimation of mixtures
# of truncated normals.
#
# Revision 1.77  2003/03/14  13:56:11  lw2j
# Tweaked density estimator; it should be significantly better now.
#
# Revision 1.75  2003/03/07  09:56:04  lw2j
# Fixes made to Cohen NTE estimator (^ vs **), plus some tweaks.
# It's still inactive however.
#
# Revision 1.74  2003/03/04  16:22:24  lw2j
# Fixed bug in _mix_gen_ppf().
#
# Revision 1.73  2003/02/28  16:26:07  lw2j
# Assorted minor tweaks.
#
# Revision 1.72  2003/02/24  18:02:41  lw2j
# Added experimental method for normal-truncated-mixture estimation.
# This path (currently normal_truncated_em2) differs mostly in how
# it initializes; in addition, it lacks randomly-done passes.
#
# Revision 1.71  2003/02/19  13:58:20  lw2j
# Tweaked handling of out-of-bounds values for ppfs of mixtures,
# discrete distributions.
#
# Revision 1.70  2003/02/18  13:38:39  lw2j
# Now has a method for locating the distribution tuple via estimator
# function reference.  In addition, there's a list of the weighted
# versions of distributions.
#
# Revision 1.69  2003/02/14  15:11:13  lw2j
# Unified EM path (truncated and normal have been merged).  API changed...
#
# Revision 1.68  2003/02/13  15:01:40  lw2j
# Tweaked how mixtures generate PPFs at the left extreme when
# transformations give illegal values for box_cox.
#
# Revision 1.67  2003/02/11  16:34:20  lw2j
# Tweaked discrete PPF generation to deal with an approximation-
# introduced error.
#
# Revision 1.66  2003/02/07  16:39:31  lw2j
# Log-likelihood prob floor fixed.
#
# Revision 1.65  2003/02/06  15:33:27  lw2j
# More checking for undefs returned from ppfs.
#
# Revision 1.64  2003/02/06  12:25:07  lw2j
# Tweaked gamma, Gaussian, mixture ppf code to catch situations where
# algorithmic approximations / numerical precision issues mean that
# it otherwise wouldn't necessarily terminate.  Such situations may
# arise at the extreme percentiles.
#
# Revision 1.63  2003/01/17  10:14:33  lw2j
# Tweaked gamma_std_ppf() so it steps very slowly once percentiles
# go outside [0.2,0.8] rather than [0.05,0.95].
#
# Revision 1.62  2003/01/10  16:23:49  lw2j
# Added a command for finding a distribution by (exact) name.
#
# Revision 1.61  2003/01/06  16:02:22  lw2j
# Removed gamma debugging printf.
#
# Revision 1.60  2002/12/18  16:38:01  lw2j
# Fixed typo in mixture termination (checking to see whether the mixture
# retained all components, specifically) that was causing EM to stop at
# two components only.
#
# Revision 1.59  2002/12/18  14:40:19  lw2j
# Chi-square back in town for Gamma.
#
# Revision 1.58  2002/12/17  16:14:55  lw2j
# Added an undef check to DGX_prime estimation.
#
# Revision 1.57  2002/12/17  15:25:54  lw2j
# Reverted gamma code to use numerical integration again.
#
# Revision 1.56  2002/12/04  17:18:54  lw2j
# Started to change some 'die' results to 'return undef',
# as Simplex will trigger some.
#
# Revision 1.55  2002/12/03  15:53:05  lw2j
# Discretizers now can take transformations, inverse
# transformations.
#
# Revision 1.54  2002/12/02  17:05:26  lw2j
# Nelder-Mead parameters greatly tweaked in discrete-distribution
# estimator
#
# Revision 1.53  2002/12/02  16:33:10  lw2j
# Work done on the distribution discretizer.  It's now separated
# into multiple parts.
#
# Revision 1.52  2002/12/01  16:57:43  lw2j
# Truncated normal estimators will return undef instead of trying
# to divide by zero when too few points are provided (fewer than
# three).
#
# The LM parameters have been tweaked to boost processing speed.
#
# There are more "warning" comments regarding truncated normals,
# esp. mixes thereof.
#
# Revision 1.51  2002/11/30  15:42:48  lw2j
# Fixed (commented out) a k=$k, pass=$pass debugging message.
#
# Revision 1.50  2002/11/29  16:15:49  lw2j
# Fixed bad PDF definition of truncated normals.
#
# Revision 1.49  2002/11/26  17:06:22  lw2j
# Added the fix to the weighted normal truncated estimator.
#
# Revision 1.48  2002/11/26  16:58:51  lw2j
# Fixed a SIGNIFICANT bug in truncated-normal estimation -- t'was
# generating a function that returned a function that returned a
# function, instead of a function that returned a function that
# returned a scalar.
#
# Revision 1.47  2002/11/19  13:57:14  lw2j
# Fixed a bug in flatten() -- it stopped if there was an undefined
# item in the lists.
#
# Revision 1.46  2002/11/19  13:31:49  lw2j
# Added the missing 'max_lambda' setting for the /weighted/ truncated
# normal estimator.
#
# Revision 1.45  2002/11/19  13:03:35  lw2j
# Getting closer to a working truncated normal estimator, esp. for
# mixtures.
#
# Revision 1.43  2002/11/13  13:56:00  lw2j
# Put in some checks against (unlikely) division-by-zeros.
#
# Revision 1.42  2002/11/11  16:47:48  lw2j
# Assorted changes made to bug-fix mixtures of truncated normals.
#
# Revision 1.41  2002/11/08  18:18:39  lw2j
# Working on adding support for mixtures of truncated normals.
# Switched from integration-based to Levenberg-Marquardt-based
# estimation of truncated normals; it's slower, but the results
# are far better at least on some tests.
#
# Revision 1.40  2002/10/25  17:37:46  lw2j
# Much more exhaustive EM searching (more passes).
#
# Revision 1.39  2002/10/17  14:46:30  lw2j
# Changed mixture ppf generation on how it finds upper and
# lower bounds.  It can fail in fairly odd cases, perhaps when
# a spike is at either end due to the way that the entire spike
# gets one CDF value, but PPF needs to match it over a range of
# percentiles.  I'll need to figure out a way to fix this...
# basically, need to find the full CDF range associated with the
# spike.
#
# Revision 1.38  2002/10/11  13:06:34  lw2j
# Tweaks how the search bounds are identified when using mixture ppfs.
#
# Revision 1.37  2002/10/10  14:27:31  lw2j
# EM has more testing for cases that shouldn't happen in theory,
# but do in practice due to finite-precision arithmetic.
#
# Revision 1.36  2002/10/08  17:33:55  lw2j
# Density estimation moved to here.
#
# Revision 1.35  2002/10/08  12:52:38  lw2j
# Added a very basic density estimator.
#
# Revision 1.34  2002/10/07  11:46:07  lw2j
# Some range checks added:  force [0,1] for CDFs, [0,inf) for pdfs.
#
# Revision 1.33  2002/10/05  13:13:22  lw2j
# Fixed gamma pdf -- it wasn't using the chi-square base code, as
# were the cdf and ppf.
#
# Better determination of valid/invalid values for gld parameters.
#
# Revision 1.32  2002/09/27  18:13:06  lw2j
# Fixed inconsistency (pareto vs Pareto) in function names.
#
# Revision 1.31  2002/09/27  12:00:33  lw2j
# Fixed gld estimator handling of (lambda2 < 0); changing p to 1-p
# is probably a better way to handle things here.
#
# gld_gen_pdf's pdfs should now handle out-of-range calls.
#
# Revision 1.30  2002/09/26  17:45:03  lw2j
# Added (purely table-based) support for the generalized lambda
# distribution.
#
# Revision 1.29  2002/09/24  16:18:20  lw2j
# Now supports chi-square random variables (well, cdf/pdf/ppf.  No
# estimator yet.)
#
# Replaced estimator for cumulative distribution function of the
# normal distribution -- it should be more accurate now.
#
# Gamma distribution now uses the chi-square code rather than
# numerical integration (the old code is still there just in case
# this needs to be rolled back).  As a consequence, it /should/
# handle more extreme values now e.g. percentiles of 0.99999
# that would previously have caused an infinite loop.
#
# Revision 1.28  2002/09/19  10:49:01  lw2j
# Now allows goodness-of-fit testing via the correlation of the quantiles
# of the data and of the ppf.
#
# Revision 1.27  2002/09/06  13:49:26  lw2j
# Typo fixed in truncated lognormal ppf generation.
#
# Revision 1.26  2002/09/04  17:40:37  lw2j
# EM methods tweaked; bugs fixed in min/max/initial means setting.
#
# Revision 1.25  2002/09/04  14:11:31  lw2j
# Made the normal_em code more flexible -- the $means list ref
# provides a starting point, but it can add more components
# up to $k_max.
#
# Revision 1.24  2002/09/03  15:48:36  lw2j
# _flatten renamed to flatten(), since it's public.
# Mixture estimators now take two additional optional parameters:
# a minimum number of components, and a set of means to use.
#
# Revision 1.23  2002/08/21  13:33:01  lw2j
# Fixed number of parameters in Simpsons_rule invocation for
# computing I0 in truncated normal fitting.  This speeds it up
# a /lot/ as far as I can tell.
#
# Revision 1.22  2002/08/16  16:42:09  lw2j
# pdf/cdf/ppf now have better handling of bogus values (generally
# returning undef).
#
# Truncated estimators (normal, lognormal) now adjust the truncation
# points if they exclude some extreme data due to precision/estimation
# problems.
#
# DIST_LIST array includes cdf, ppf, pdf generators.
#
# $above_zero => $__ABOVE_ZERO.
#
# flatten is now exported.
#
# discretize_dist provides an (untested) way to discretize a
# continuous distribution given an estimator and the generator
# functions for cdf, ppf, pdf.
#
# Revision 1.21  2002/08/13  10:35:33  lw2j
# Commented out prints (fitting, chi-square testing) in the matcher.
#
# Revision 1.20  2002/08/12  17:54:42  lw2j
# Fixed issue with gamma_std_cdf.
#
# Revision 1.19  2002/08/06  14:09:14  lw2j
# Now supports a modified DGX distribution (dgx_prime).
#
# Revision 1.18  2002/07/18  15:56:40  lw2j
# Indentified.
#
# Revision 1.17  2002/07/15  15:45:21  lw2j
# Indented/untabified.
#
# Revision 1.16  2002/07/15  15:44:02  lw2j
# Minor tweaks and fixes.
#
# Revision 1.15  2002/07/12  10:58:22  lw2j
# Fixed integration of Gamma that shouldn't have started at zero
# (use $above_zero instead).
#
# Now includes mention of Romberg integration in case it seems
# useful.
#
# Revision 1.14  2002/07/08  17:13:44  lw2j
# Fixed typo in gamma pdf generation.
#
# Revision 1.13  2002/07/08  17:06:33  lw2j
# Fixed @_ vs 'shift' issue in fit_report.
#
# Revision 1.12  2002/07/08  16:45:36  lw2j
# Commented out debugging print.
#
# Revision 1.11  2002/07/08  16:44:46  lw2j
# Reworked the 'matching' functions.  Provided an AIC wrapper.
# Provided export of index arrays, distribution table.
#
# Revision 1.10  2002/06/28  16:35:32  lw2j
# Untabified.
#
# Revision 1.9  2002/06/28  14:45:01  lw2j
# Handles a limited form of truncated lognormals (very simple
# guessing method for the location parameter).
#
# Revision 1.8  2002/06/28  13:11:43  lw2j
# Now has support for estimating truncated normal distributions.
# It's a bit slow and uses a lot of approximations, and even at
# that it's darn ugly.
#
# Revision 1.7  2002/06/27  14:32:46  lw2j
# Mixture of lognormals is now supported in a _basic_ form -- it
# assumes that all the lognormal distributes share the same
# location parameter.
#
# Some untidiness from previous iterations has been cleaned up.
#
# There is now a function for generating ppfs for mixtures, given
# mixture CDF and a list of parameters (one list per component -
# x value, a sigma for jumping around, mixture cdf at x value,
# density, probability of mixture).  Basically, the function
# provides a way for estimating upper and lower bounds of x to
# "bracket" the y value; these then get passed to the numerical
# function inverter.
#
# Revision 1.6  2002/06/26  19:15:42  lw2j
# BIC_wrap modified to "flatten" parameter list before computing BIC.
#
# Revision 1.5  2002/06/26  18:00:10  lw2j
# BIC_wrap now provides a convenient way to use an estimator and
# simultaneously get --
#   cumulative distribution function
#   estimated parameters of distribution
#   percentage-point function (inverse CDF -- usually numeric)
#   probability density function
#   log-likelihood (log base 2)
#   Bayesian information criterion (log base 2)
# Jeepers.
#
# Revision 1.4  2002/06/26  17:40:34  lw2j
# Gaussian mix now takes BIC from the EM code.
#
# Revision 1.3  2002/06/26  16:50:24  lw2j
# Added 'normal_mix_est' along with cdf,pdf,ppf generators.
#
# Revision 1.2  2002/06/26  15:07:17  lw2j
# Added support for EM fitting of mixture of Gaussians.
# Uses BIC to judge.
#
##########################################################################
#
# Package Header
#
##########################################################################

package UniRand;
require Exporter;
require ChiSquare;
require POSIX;

use strict;
use lib '/usr/lw2j/private/Stat/Perl';

require AndersonDarling;
require ArrayFoo;
require Numerical;
require util;


# Prototypes.
sub normal_est($);
sub normal_mix_est($;$$$);
sub normal_truncated_est($);
sub uniform_est($);
sub exponential_est($);
sub lognormal_est($);
sub lognormal_mix_est($;$$$);
sub lognormal_truncated_est($);
sub gamma_est($);
sub Pareto_est($);
sub fit_report(@);
sub _normal_truncated_mix_fix_helper($$$);

##########################################################################
# This package depends heavily on the numerical algorithms in the
# Numerical package -- code here requires integration, differentiaton,
# inverse function solving, loads of stuff like that.
##########################################################################

BEGIN {
  use Exporter ();
  use Symbol;
  use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

  $VERSION     = 1.0;
  @ISA         = qw(Exporter);
  @EXPORT      = ();
  %EXPORT_TAGS = ();

  ##########################################################################
  # Common terms:
  #   est = estimator
  #         Takes reference to data as array of scalars.
  #         Returns tuple of
  #            cumulative distribution function
  #            list reference to estimated parameters
  #            percentage-point function
  #            probability distance function
  #         OR tuple of undefs if fitter does not apply.
  #
  #   wtd = Weighted version - takes additional list ref.
  #         referring to weights.
  #   std = Standard form, e.g N(0,1).
  #   gen = Function generator.  Takes ref to list of parameters,
  #         returns function.
  #   cdf = cumulative distribution function.  Maps from data to
  #         [0,1] (or (0,1) or so forth).
  #   ppf = percentage-point function (inverse cdf).  In almost
  #         all cases this is done through an iterative search.
  #   pdf = probability density function.
  #
  #   em  = expectation-maximation (fuzzy k-means)
  #   mix = mixture (e.g. normal_mix_est fits a mixture of
  #         normals)
  #   truncated = truncated distribution
  #   ll  = log-likelihood (base 2), high is good
  #   BIC = Bayesian Information Criterion, namely
  #         BIC = (-2 * ll) + (q * log(n))
  #
  #         q = number of parameters
  #         n = sample size
  #
  #         BIC (low is good) rewards fit but punishes
  #         model complexity.
  #   AIC = Akaike Information Criterion, namely
  #         AIC = (-2 * ll) + (2 * q)
  #         Similar to BIC, but reduced penalty for model
  #         complexity
  #
  #
  ##########################################################################
  # Distribution-specific notes:
  ##########################################################################
  # normal = Univariate normal.
  #          Two-parameter form:  mean and standard deviation.
  #          Math is based on approximation via Q-function; see
  #          Leon-Garcia, "Probability and Random Processes for
  #          Electrical Engineering".  ppf requires iterative
  #          search.
  #
  #          Mixture fitting is slow, but decent.  EM criterion
  #          is BIC; k stops increasing when BIC increases.
  #
  #          Truncated fitting is complicated, very slow, has
  #          nontrivial variance.  Based on A. C. Cohen Jr's
  #          "Estimating the Mean and Variance of Normal
  #          Populations from Singly Truncated and Doubly
  #          Truncated Samples".
  #
  # uniform = Univariate.  Included here as a formality.  Two
  #          parameters -- high and low.
  #
  # exponential = Technically superfluous due to presence of
  #          gamma, but requires signficantly simpler, fewer
  #          computations to perform.  Two parameters --
  #          location and lambda.
  #
  # lognormal = Three-parameter form (mean, standard
  #           deviation, location).  Location is inferred
  #           very simply (NOT via full MLE).
  #
  # gamma   = Three parameters -- location, shape, scale.
  #           Very complicated.  CDF known to be inaccurate
  #           at extrema.
  #
  # Pareto  = Two parameters, location and shape.
  #

  @EXPORT_OK   = qw(
    %EST_INDICES
    @DIST_LIST
    @DIST_LIST_PLAIN

    find_by_name
    find_by_est

    normal_est
    normal_est_wtd
    normal_std_cdf
    normal_std_ppf
    normal_std_pdf
    normal_gen_cdf
    normal_gen_ppf
    normal_gen_pdf

    normal_em
    normal_mix_est
    normal_mix_gen_cdf
    normal_mix_gen_ppf
    normal_mix_gen_pdf

    normal_truncated_est
    normal_truncated_est_wtd
    normal_truncated_gen_cdf
    normal_truncated_gen_ppf
    normal_truncated_gen_pdf

    normal_truncated_em
    normal_truncated_mix_est
    normal_truncated_mix_gen_cdf
    normal_truncated_mix_gen_ppf
    normal_truncated_mix_gen_pdf
    normal_truncated_mix_fix

    window_est

    uniform_est
    uniform_est_wtd
    uniform_gen_cdf
    uniform_gen_ppf
    uniform_gen_pdf

    exponential_est
    exponential_est_wtd
    exponential_gen_cdf
    exponential_gen_ppf
    exponential_gen_pdf

    lognormal_est
    lognormal_est_wtd
    lognormal_gen_cdf
    lognormal_gen_ppf
    lognormal_gen_pdf

    lognormal_mix_est
    lognormal_mix_gen_cdf
    lognormal_mix_gen_ppf
    lognormal_mix_gen_pdf

    lognormal_truncated_est
    lognormal_truncated_gen_cdf
    lognormal_truncated_gen_ppf
    lognormal_truncated_gen_pdf

    chi_square_gen_cdf
    chi_square_gen_pdf
    chi_square_gen_ppf

    gamma_est
    gamma_est_wtd
    gamma_gen_std_cdf
    gamma_gen_std_pdf
    gamma_gen_std_ppf
    gamma_gen_cdf
    gamma_gen_ppf
    gamma_gen_pdf
    gamma_gen_cdf_via_chi
    gamma_gen_ppf_via_chi
    gamma_gen_pdf_via_chi

    Pareto_est
    Pareto_est_wtd
    Pareto_gen_cdf
    Pareto_gen_ppf
    Pareto_gen_pdf

    dgx_prime_est
    dgx_prime_gen_cdf
    dgx_prime_gen_ppf
    dgx_prime_gen_pdf

    gld_est
    gld_gen_cdf
    gld_gen_ppf
    gld_gen_pdf

    discretize_dist
    discretize_est
    discretize_cdf
    discretize_ppf

    density_est

    %MATCH_INDICES
    all_match
    best_match
    fit_report
    BIC_wrap
    qq_correlate
    flatten
    _mix_gen_ppf
  );
}

our @EXPORT_OK;

# For convenience:
our %MATCH_INDICES = ();
our %EST_INDICES   = ();
our @DIST_LIST     = ();
our @DIST_LIST_WTD = ();
our @DIST_LIST_PLAIN = ();

my $__UNIRAND_DEBUG_PRINTF = 0;
my $__match_idx = 0;

map {
  $MATCH_INDICES{$_} = $__match_idx++
}
(
  "name", "param_names", "ll", "k",
  "BIC", "AIC", "chi", "cdf", "params",
  "ppf", "pdf", "est"
);

# Constants, for reference and ease of use.  These give
# the orders of the values returned by parameters.
%EST_INDICES = (
  "cdf"    => 0,
  "params" => 1,
  "ppf"    => 2,
  "pdf"    => 3
);


# List of distributions as name/estimator/param names tuples.
# The parameter list is mostly for reference.  For the mixtures,
# well, they're complicated; see the comments preceding the
# appropriate estimators.
#
# This list may grow (for instance, I just added the cdf/ppf/pdf
# function references to each item) but the fields' ordering
# should be preserved.
@DIST_LIST = (
  +[ 'normal', \&normal_est,
  +[ "mean", "standard deviation"],
  \&normal_gen_cdf,
  \&normal_gen_ppf,
  \&normal_gen_pdf],
  +[ 'uniform', \&uniform_est,
  +[ "low", "high"],
  \&uniform_gen_cdf,
  \&uniform_gen_ppf,
  \&uniform_gen_pdf],
  +[ 'exponential', \&exponential_est,
  +[ "location", "shape" ],
  \&exponential_gen_cdf,
  \&exponential_gen_ppf,
  \&exponential_gen_pdf],
  +[ 'lognormal',   \&lognormal_est,
  +[ "mean (N)", "standard deviation (N)", "location"],
  \&lognormal_gen_cdf,
  \&lognormal_gen_ppf,
  \&lognormal_gen_pdf],
  +[ 'gamma',  \&gamma_est,
  +[ "location", "shape", "scale" ],
  \&gamma_gen_cdf,
  \&gamma_gen_ppf,
  \&gamma_gen_pdf],
  +[ 'Pareto',      \&Pareto_est,
  +[ "location", "shape"],
  \&Pareto_gen_cdf,
  \&Pareto_gen_ppf,
  \&Pareto_gen_pdf],
  +[ "DGX (modified)", \&dgx_prime_est,
  +[ "mean (N)", "standard deviation (N)"],
  \&dgx_prime_gen_cdf,
  \&dgx_prime_gen_ppf,
  \&dgx_prime_gen_pdf],
  +[ "generalized lambda", \&gld_est,
  +[ "lambda1", "lambda2", "lambda3", "lambda4" ],
  \&gld_gen_cdf,
  \&gld_gen_ppf,
  \&gld_gen_pdf],
  +[ 'truncated normal', \&normal_truncated_est,
  +[ "mean", "standard deviation", "left trunc",
  "right trunc"],
  \&normal_truncated_gen_cdf,
  \&normal_truncated_gen_ppf,
  \&normal_truncated_gen_pdf],
  +[ 'truncated lognormal', \&lognormal_truncated_est,
  +[ "mean (N)", "standard deviation (N)",
  "left trunc (N)", "right trunc (N)",
  "location"],
  \&lognormal_truncated_gen_cdf,
  \&lognormal_truncated_gen_ppf,
  \&lognormal_truncated_gen_pdf],
  +[ 'mixture of normals', \&normal_mix_est,
  +[ 'mixing probability', 'mean', 'standard deviation'],
  \&normal_mix_gen_cdf,
  \&normal_mix_gen_ppf,
  \&normal_mix_gen_pdf],
  +[ 'mixture of truncated normals', \&normal_truncated_mix_est,
  +[ 'mixing probability', 'mean', 'standard deviation', 'left trunc',
  'right trunc'],
  \&normal_truncated_mix_gen_cdf,
  \&normal_truncated_mix_gen_ppf,
  \&normal_truncated_mix_gen_pdf],
  +[ 'mixture of lognormals', \&lognormal_mix_est,
  +[ 'mixing probability', 'mean (N)', 'standard deviation (N)',
  'location'],
  \&lognormal_mix_gen_cdf,
  \&lognormal_mix_gen_ppf,
  \&lognormal_mix_gen_pdf]
);


@DIST_LIST_WTD = (
  +[ 'normal (weighted)', \&normal_est_wtd,
  +[ "mean", "standard deviation"],
  \&normal_gen_cdf,
  \&normal_gen_ppf,
  \&normal_gen_pdf],
  +[ 'uniform (weighted)', \&uniform_est_wtd,
  +[ "low", "high"],
  \&uniform_gen_cdf,
  \&uniform_gen_ppf,
  \&uniform_gen_pdf],
  +[ 'exponential (weighted)', \&exponential_est_wtd,
  +[ "location", "shape" ],
  \&exponential_gen_cdf,
  \&exponential_gen_ppf,
  \&exponential_gen_pdf],
  +[ 'lognormal (weighted)',   \&lognormal_est_wtd,
  +[ "mean (N)", "standard deviation (N)", "location"],
  \&lognormal_gen_cdf,
  \&lognormal_gen_ppf,
  \&lognormal_gen_pdf],
  +[ 'gamma (weighted)',  \&gamma_est_wtd,
  +[ "location", "shape", "scale" ],
  \&gamma_gen_cdf,
  \&gamma_gen_ppf,
  \&gamma_gen_pdf],
  +[ 'Pareto (weighted)',      \&Pareto_est_wtd,
  +[ "location", "shape"],
  \&Pareto_gen_cdf,
  \&Pareto_gen_ppf,
  \&Pareto_gen_pdf],
  +[ 'truncated normal (weighted)', \&normal_truncated_est_wtd,
  +[ "mean", "standard deviation", "left trunc",
  "right trunc"],
  \&normal_truncated_gen_cdf,
  \&normal_truncated_gen_ppf,
  \&normal_truncated_gen_pdf]
);

# @DIST_LIST_PLAIN has only 'simple' distributions -- non-truncated,
# non-mixture.
@DIST_LIST_PLAIN =  grep {
  (!($_->[0] =~ /truncated/)) &&
  (!($_->[0] =~ /mixture/))
} @DIST_LIST;


# Arbitrary.  When data must be shifted so it's all positive for
# picky functions, like log, this is treated as a minimum.
my $__ABOVE_ZERO = 1e-6;


# This is used for telling Nelder-Mead simplex to avoid the choice
# of parameters that caused this.  For instance, if a parameter was
# out of bounds (e.g. standard deviation < 0) and the PDF generator
# returns undef (no such function possible), then log-likelihood
# cannot be computed.  In that case, return $__VERY_HIGH, which
# penalizes that sequence rather severely.
my $__VERY_HIGH  = 1e50;


# When computing log-likelihoods, apply this as a floor to
# probabilities.
my $__LL_PROB_FLOOR = 1e-6;


sub new($@) {
  my $class = shift;
  my $self  = +{};

  bless $self, $class;

  return $self;
}


# Useful constants.
my $__M_PI      = 4*atan2(1,1);
my $__SQRT2PI   = sqrt(2*$__M_PI);

# Divide the PPF domain into eight equiwidth Gauss_intervals.
my @Gauss_intervals = ( +[ -3, -1], [-1.5, -0.5], [-1, -0.25], [-0.5, 0],
[ 0, 0.5], [0.25, 1], [0.5, 1.5], [1, 3] );



# Give it a name (possibly including the tag "[DISCRETE]"),
# a transformation (optional; only meaningful for discretized
# distros), and an inverse (ditto).
#
# It'll try to match the name with one in the lists.  If so, it'll
# return
#
#   the estimator
#   the cdf generator
#   the ppf generator
#   the pdf generator
#
# unless there's no match at all, in which case undef is returned.
sub find_by_name($;$$) {
  my $name       = shift;
  my $xf         = shift;
  my $xf_inverse = shift;
  my $discrete  = 0;

  if (($name =~ s/\[discrete\]//gi) ||
  ($name =~ s/\(discrete\)//gi)) {
    $discrete = 1;
  }

  $name =~ s/^\s//;
  $name =~ s/\s$//;

  my $est     = undef;
  my $cdf_gen = undef;
  my $ppf_gen = undef;
  my $pdf_gen = undef;
  my $cdf     = undef;
  my $ppf     = undef;
  my $pdf     = undef;

  dist_loop:  foreach (@DIST_LIST, @DIST_LIST_WTD) {
    my @dist_tuple = @{$_};
    my $d_name     = $dist_tuple[0];

    if ($d_name =~ /^$name$/i) {
      # case-insensitive match
      ($est, $cdf_gen, $ppf_gen, $pdf_gen) = @dist_tuple[1,3,4,5];
      last dist_loop;
    }
  }

  defined($est) || return undef;

  if ($discrete) {
    my $d_est = discretize_est($est, $cdf_gen, $ppf_gen, $pdf_gen,
    $xf, $xf_inverse);
    defined($d_est) || return undef;

    my $d_cdf_gen = sub {
      my $param_ref = shift @_;
      my $cdf    = &$cdf_gen($param_ref);
      my $d_cdf  = discretize_cdf($cdf, $xf, $xf_inverse);
      return ($d_cdf);
    };

    my $d_ppf_gen = sub {
      my $param_ref = shift @_;
      my $ppf    = &$ppf_gen($param_ref);
      my $d_ppf  = discretize_ppf($ppf, $xf, $xf_inverse);
      return ($d_ppf);
    };

    my $d_pdf_gen = sub {
      my $param_ref = shift @_;
      my $cdf    = &$cdf_gen($param_ref);
      my ($d_cdf, $d_pdf)  = discretize_cdf($cdf, $xf, $xf_inverse);
      return ($d_pdf);
    };

    return ($d_est, $d_cdf_gen, $d_ppf_gen, $d_pdf_gen);
  } else {
    return ($est, $cdf_gen, $ppf_gen, $pdf_gen);
  }
}



# Given a reference to one of the estimator functions, identify the
# tuple and return a reference to it.
#
# This tuple is --
#  ( name of distribution,
#    ref to estimator,
#    lref of parameter names,
#    function ref to cdf generator,
#    function ref to ppf generator,
#    function ref to pdf generator )
#
# or undef if no match.
sub find_by_est($) {
  my $est = shift;

  defined($est) || die;

  my @matches = grep { $_->[1] == $est } (@DIST_LIST, @DIST_LIST_WTD);
  my $ct = scalar @matches;

  if (!$ct) {
    return undef;
  } elsif ($ct > 1) {
    die;  # Eh?  Two identical estimators?
  } else {
    my $tuple = shift @matches;
    return $tuple;
  }
}

# Fit a normal distribution to the data.  The parameter list contains
# the mean and standard deviation.
#
# If no inputs, return cdf/ppf/pdf generators.
#
# Parameters:  ($mu [mean], $sigma [standard deviation])
#
# Standard form:
#
# F($x)   =~ 1-Q(x) (for half)
# f($x)   = exp(-($x^2)/2) / sqrt(2*$__M_PI)
# f-1($x) = numeric only

# Otherwise:
# F($x)   =~ 1-Q((x-$mu)/$sigma)
# f($x)   = exp(-(($x-$mu)^2)/(2*($sigma^2))) / (sqrt(2*$__M_PI)*$sigma)
# f-1($x) = numeric only

sub normal_est($) {
  my $data_ref = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $mu     = ArrayFoo::arrMean($data_ref);
  my $sigma     = ArrayFoo::arrDev($data_ref);
  my $param_ref = +[ $mu, $sigma ];
  my $CDF_sub  = normal_gen_cdf($param_ref);
  my $PPF_sub  = normal_gen_ppf($param_ref);
  my $PDF_sub  = normal_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}

# Weighted version -- pass in two parallel arrays.  This version
# exists for doing EM.
sub normal_est_wtd($$) {
  my $data_ref   = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $wtdRef    = shift;
  my $mu        = ArrayFoo::arrMeanWtd($data_ref, $wtdRef);
  my $sigma     = ArrayFoo::arrDevWtd($data_ref, $wtdRef);
  my $param_ref = +[ $mu, $sigma ];
  my $CDF_sub   = normal_gen_cdf($param_ref);
  my $PPF_sub   = normal_gen_ppf($param_ref);
  my $PDF_sub   = normal_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}



# The CDF of the standard form of the normal distribution --
# mean 0, standard deviation 1.
sub normal_std_cdf($) {
  my $x    = shift @_;

  if (0) {
    # Old way.
    # The Q function serves as an approximator for half of the
    # CDF curve.  Method found in "Probability and Random Processes
    # for Electrical Engineering", by Albert Leon-Garcia.

    my $absx = abs($x);
    my $a = 1/$__M_PI;
    my $b = 2*$__M_PI;
    my $Q = (1/(((1-$a)*$absx) + $a*(sqrt(($absx*$absx)+$b)))) *
    (1/sqrt(2*$__M_PI)) * (exp(-$absx*$absx/2));

    my $cdf = 1 - $Q;

    # The other half.
    if ($x < 0) {
      $cdf = 0.5 - ($cdf - 0.5);
    }

    $cdf = ($cdf < 0) ? 0 : $cdf;
    $cdf = ($cdf > 1) ? 1 : $cdf;

    return $cdf;
  } else {
    if (abs($x) <= 3) {
      # Johnson and Kotz, _Continuous Univariate Distributions_, c13,
      # Burr's approximations.
      my $alpha = 0.644693;
      my $beta  = 0.161984;
      my $c     = 4.874;

      # I flipped the sign, as it /really/ seems that there's a typo
      # in J&K here.
      my $k       = 6.158;
      my $Gx      = 1 - ((1+(($alpha + ($beta * $x))**$c)) ** (-$k));
      my $Gx_sym  = 1 - ((1+(($alpha + ($beta * (-$x)))**$c)) ** (-$k));

      my $Hx      = 0.5*($Gx + 1 - $Gx_sym);

      defined($Hx) || die;

      $Hx = ($Hx >= 0) ? $Hx : 0;
      $Hx = ($Hx <= 1) ? $Hx : 1;
      return $Hx;
    } else {
      # With higher-magnitude values, the above runs into NANs.
      # This code uses a method by Shucany and Gray, same J&K book.

      my $flip = ($x < 0);

      $x = abs($x);

      my $x2 = $x ** 2;
      my $x4 = $x2 * $x2;
      my $x6 = $x4 * $x2;

      my $temp = 1/(($x2+2)*sqrt(2*$__M_PI));

      $temp *= $x;
      $temp *= exp(-$x2/2);
      $temp *= ($x6 + (6*$x4) + (14*$x2) - 28);
      $temp /= ($x6 + (5*$x4) - (20*$x2) - 4);

      my $phi = $flip ? $temp : (1-$temp);

      defined($phi) || die;
      $phi = ($phi >= 0) ? $phi : 0;
      $phi = ($phi <= 1) ? $phi : 1;

      return $phi;
    }

  }
};


# Likewise.
sub normal_std_ppf($;$) {
  my $y          = shift @_;
  my $err_thresh = shift @_;
  my $low        = undef;
  my $high       = undef;
  my $iter       = 0;
  my $converge   = 1e-6;

  # Don't be silly.
  (return undef) unless (($y > 0) && ($y < 1));

  if ($y == 0.5) {
    return 0;
  }

  if (!defined($err_thresh)) {
    $err_thresh = 0.0005;
  }

  # Horizontal partitioning.  At the tails, we may need to extend
  # by additional sdev's.
  my $int_count = scalar @Gauss_intervals;
  my $int_ref = $Gauss_intervals[int($y*$int_count)];
  my ($lo, $hi)     = @$int_ref;
  my $loval = undef;
  my $hival = undef;
  my $old_loval = undef;
  my $old_hival = undef;

  # Extend the extremes if need be.
  while ((($loval=normal_std_cdf($lo)) > $y) &&
  ((!defined($old_loval)) ||
  (($old_loval - $loval) >= $converge))) {
    $lo--;
    $old_loval = $loval;
  }

  while ((($hival=normal_std_cdf($hi)) < $y) &&
  ((!defined($old_hival) ||
  (($hival - $old_hival) >= $converge)))) {
    $hi++;
    $old_hival = $hival;
  }

  my $x = Numerical::inverse_search(\&normal_std_cdf, $y,
  $lo, $hi);

  return $x;
}


# The PDF of the standard form of the normal distribution --
# mean 0, standard deviation 1.
sub normal_std_pdf($) {
  my $x    = shift @_;

  my $density = exp(-($x ** 2)) / $__SQRT2PI;

  return $density;
};


# Given the parameter list reference, generate a CDF.
sub normal_gen_cdf($) {
  my $param_ref = shift @_;
  my $mu      = $param_ref->[0];
  my $sigma      = $param_ref->[1];

  if ($sigma < 0) {
    return undef;
  }

  return sub {
    my $x = shift @_;

    # normalize
    if ($sigma > 0) {
      $x = ($x-$mu) / $sigma;
    } else {
      $x = 0;
    }

    return normal_std_cdf($x);
  };
}


# Generate the percentage-point function, or inverse CDF.
sub normal_gen_ppf($) {
  my $param_ref = shift @_;
  my $mu        = $param_ref->[0];
  my $sigma     = $param_ref->[1];

  # Takes an optional errthresh parameter.  0.001 means "get it right to
  # within a tenth of a percent" absolute scale, that is.
  my $PPF_sub = sub {
    my $y = shift @_;

    (($y > 0) && ($y < 1)) || (return undef);
    my $x = normal_std_ppf($y);

    defined($x) || (return undef);
    return (($x * $sigma) + $mu);
  };

  return $PPF_sub;
}


# Given the parameter list reference, generate a probability density
# function.
sub normal_gen_pdf($) {
  my $param_ref = shift @_;
  my $mu      = $param_ref->[0];
  my $sigma      = $param_ref->[1];

  my $denom     = $__SQRT2PI * $sigma;
  my $variance  = $sigma * $sigma;

  if ($sigma == 0) {
    #!
    return sub {
      my $x = shift @_;

      # Should be infinite.
      return (($x == $mu) ? 1 : 0);
    }
  }

  return sub {
    my $x = shift @_;

    my $density = exp(-(($x-$mu) ** 2) / (2*$variance)) /
    $denom;

    return $density;
  };
}



# Given a reference to the data, use Numerical::em_converge to
# fit a mixture of standard (non-truncated) univariate
# Gaussians.  You can also give it an optional maximum k.
#
# Criterion for stopping one iteration:
#    Log-likelihood appearing to converge.
# Criterion for stopping testing with one particular $k:
#    Just because -- it's arbitrary.  4*$k for now -- we
#    run more passes when the model is more complex.
# Criterion for stopping testing (final $k reached):
#    BIC increases.
#
# Returns:  3 items.
#   a) List (ref) of tuples
#      Each consists of <mixing prob, estimator, pdf, parameter ref list>
#   b) Log-likelihood.
#   c) BIC.
#
# Estimate initialization of mixing probability:
#   All passes:  1/$k
#
# Estimate initialization of means:
#   Pass #1:     Uniform wrt density.
#   Pass #2:     Uniform ignoring density.
#   Pass #3-x:  Random initialization.
#
# Estimate initialization of standard deviations:
#   All passes:  Sample standard deviation.
#
# BIC = -2 log_likelihood + (q * log(n))
#
# where n is sample size and q is parameter count (thus
# dependent on $k).
#

sub normal_em($;$$$) {
  my $data_ref = shift;
  my $k_max    = shift;
  my $k_min    = shift;
  my $means    = shift;  # list of means
  my $k        = defined($k_min) ? $k_min : 2;
  my $n        = scalar @$data_ref;
  my $lo       = ArrayFoo::arrSelect($data_ref, 0);
  my $hi       = ArrayFoo::arrSelect($data_ref, $n-1);

  # Standard deviation.
  my $sigma_global = ArrayFoo::arrDev($data_ref);

  # List, LL, BIC
  my @global_best = (undef, undef, undef);

  if (defined($means)) {
    $k_min = scalar @$means;
    $k = $k_min;
  }

  test_k:  {
    my @k_best = (undef, undef, undef);
    my $passes = 20;

    for (my $pass=0; $pass < $passes; $pass++) {
      my @current = (+[], undef, undef);


      # Initialization.
      for (my $k_idx=0; $k_idx < $k; $k_idx++) {
        my $mean     = undef;
        my $sigma    = $sigma_global;

        if ($pass == 0) {
          if ((defined($means)) && ($k_idx < (scalar @$means))) {
            $mean = $means->[$k_idx];
          } else {
            # Equally spaced in terms of density.
            my $real_idx = ($k_idx  / $k ) * $n;
            $real_idx = ($real_idx == $n) ? ($n-1) : $real_idx;


            if ($real_idx == int($real_idx)) {
              $mean = ArrayFoo::arrSelect($data_ref, $real_idx);
            } else {
              my $lo_idx = int($real_idx);
              my $hi_idx = $lo_idx+1;

              my $lo_mean = ArrayFoo::arrSelect($data_ref, $lo_idx);
              my $hi_mean = ArrayFoo::arrSelect($data_ref, $hi_idx);

              $mean = $lo_mean + (($hi_mean - $lo_mean) *
              ($real_idx - $lo_idx));
            }
          }
        } elsif (defined($means) && ($k_idx < scalar(@$means))) {
          $mean = $means->[$k_idx];
        } else {
          # Random selection, should reflect density.
          $mean = $data_ref->[int(rand($n))];
        }

        my $param_ref = +[ $mean, $sigma ];
        my $pdf       = normal_gen_pdf($param_ref);

        $current[0]->[$k_idx] = [ 1/$k, \&normal_est_wtd, $pdf ];
      }
      #      print STDERR "k=$k, pass=$pass\n";

      # Initialization complete.  Run EM to convergence.
      my ($final_ref, $ll, $BIC) = Numerical::em_converge($data_ref,
      $current[0], 0);
      if (!defined($final_ref)) {
        return undef;
      }

      @current = ($final_ref, $ll, $BIC);

      if ((!defined($k_best[2])) || ($k_best[2] > $BIC)) {
        # This is the new winner for this particular 'k'.
        @k_best = @current;
      }
    }

    if ((!defined($global_best[2])) || ($global_best[2] > $k_best[2])) {
      # This increased k yielded the new winner under BIC.

      @global_best = @k_best;

      if ((!defined($k_max)) || ($k < $k_max)) {
        if ((scalar (@{$k_best[0]})) == $k) {
          $k++;
          redo test_k;
        }
      }
    }

    return @global_best;
  }
}




# Given a reference to the data, use Numerical::em_converge to
# fit a mixture of univariate Gaussians.  You can also give it an
# optional maximum k.
#
# Criterion for stopping one iteration:
#    Log-likelihood appearing to converge.
# Criterion for stopping testing with one particular $k:
#    Just because -- it's arbitrary.  4*$k for now -- we
#    run more passes when the model is more complex.
# Criterion for stopping testing (final $k reached):
#    BIC increases.
#
# Returns:  3 items.
#   a) List (ref) of tuples
#      Each consists of <mixing prob, estimator, pdf, parameter ref list>
#   b) Log-likelihood.
#   c) BIC.
#
# BIC = -2 log_likelihood + (q * log(n))
#
# where n is sample size and q is parameter count (thus
# dependent on $k).
sub normal_em2($;$$$) {
  my $data_ref = shift;
  my $k_max    = shift;
  my $k_min    = shift;
  my $means    = shift;  # list of means
  my $k        = defined($k_min) ? $k_min : 2;
  my $iter        = 0;
  my $iter_auto   = 5;
  my $n        = scalar @$data_ref;
  my $lo       = ArrayFoo::arrSelect($data_ref, 0);
  my $hi       = ArrayFoo::arrSelect($data_ref, $n-1);
  my $range    = ($hi-$lo);

  # Standard deviation.
  my $sigma_global = ArrayFoo::arrDev($data_ref);

  # List, LL, BIC
  my @global_best = (undef, undef, undef);

  if (defined($means)) {
    $k_min = scalar @$means;
    $k = $k_min;
  }

  my @current = (+[], undef, undef);

  # Initialization.

  if ($k > 1) {
    for (my $k_idx=0; $k_idx < $k; $k_idx++) {
      my $mean     = undef;
      my $real_idx = undef;

      $real_idx = ($k_idx / ($k - 1)) * $n;
      $real_idx = ($real_idx == $n) ? ($n-1) : $real_idx;

      if ($real_idx == int($real_idx)) {
        $mean = ArrayFoo::arrSelect($data_ref, $real_idx);
      } else {
        my $lo_idx = int($real_idx);
        my $hi_idx = $lo_idx+1;

        my $lo_mean = ArrayFoo::arrSelect($data_ref, $lo_idx);
        my $hi_mean = ArrayFoo::arrSelect($data_ref, $hi_idx);

        $mean = $lo_mean + (($hi_mean - $lo_mean) *
        ($real_idx - $lo_idx));
      }

      my $sigma_l = abs(($lo - $mean)/3);
      my $sigma_r = abs(($hi - $mean)/3);
      my $sigma = ($sigma_l > $sigma_r) ? $sigma_l : $sigma_r;
      my $param_ref = +[ $mean, $sigma ];

      $__UNIRAND_DEBUG_PRINTF && print "Component init [$mean, $sigma]\n";
      my $pdf       = normal_gen_pdf($param_ref);

      $current[0]->[$k_idx] = +[ 1/$k, \&normal_est_wtd, $pdf ];
    }
  } else {
    my ($cdf, $param_ref, $ppf, $pdf) = normal_est($data_ref);
    defined($cdf) || return undef;


    $current[0]->[0] = +[ 1, \&normal_est_wtd, $pdf,
    $param_ref ];

    my $ll = Numerical::compute_log_likelihood($data_ref, $pdf,
    $__LL_PROB_FLOOR);
    defined($ll) || return undef;

    # prob + 4 base params of normal-truncated
    my $q = 5;
    my $BIC = (-2*$ll) + ($q * log($n)/log(2));


    $__UNIRAND_DEBUG_PRINTF && print "Component init [",
    join(", ", @$param_ref), "]\n";
    $__UNIRAND_DEBUG_PRINTF && print "ll=$ll, BIC=$BIC\n";

    $current[1] = $ll;
    $current[2] = $BIC;
  }

  $__UNIRAND_DEBUG_PRINTF && print "k=$k\n";

  em_loop:  {
    my ($final_ref, $ll, $BIC) = (undef, undef, undef);
    my $again = 0;

    if ($k > 1) {
      ($final_ref, $ll, $BIC) = Numerical::em_converge($data_ref,
      $current[0], 0);
    } else {
      ($final_ref, $ll, $BIC) = @current;
    }

    if (!defined($final_ref)) {
      return @global_best;
    }

    defined($ll) || die;
    defined($BIC) || die;

    $k = scalar @$final_ref;
    @current = (util::deep_copy($final_ref), $ll, $BIC);

    if ((!defined($global_best[2])) || ($global_best[2] > $current[2])) {
      # This increased k yielded the new winner under BIC.
      @global_best = (util::deep_copy($final_ref), $ll, $BIC);
      $again = 1;
    }

    if ($iter < $iter_auto) {
      $again = 1;
    }

    if ($again && ((!defined($k_max)) || ($k < $k_max))) {
      $k++;
      $iter++;

      # Identify area of least weighted likelihood, and put a new
      # component there.

      my $glob_param_ref = +[];

      foreach (@{$current[0]}) {
        my ($prob, $base_params) = @{$_}[0,3];
        push @$glob_param_ref, +[ $prob, @$base_params ];
      }

      my $global_pdf =  normal_mix_gen_pdf($glob_param_ref);
      my @density_pairs = ();

      defined($global_pdf) || die;
      foreach (@$data_ref) {
        my $datum      = $_;
        my $likelihood = &$global_pdf($datum);

        # Consistency check.
        defined($likelihood) || die;
        ($likelihood > 0) || die;
        push @density_pairs, +[ $datum, $likelihood ];
      }

      @density_pairs = sort { $a->[1] <=> $b->[1] } @density_pairs;

      # Keep just the least-likely 1%.  1% is arbitrary.
      my $pts = int(100*$n);

      # But always at least 1...
      $pts = ($pts > 0) ? $pts : 1;

      splice @density_pairs, $pts;

      my $wt_sum = 0;
      my $sum    = 0;

      foreach (@density_pairs) {
        my ($itm, $lh) = @{$_};

        $wt_sum += 1/$lh;
        $sum    += $itm/$lh;
      }

      my $mean    = $sum / $wt_sum;
      my $sigma_l = abs(($lo - $mean)/3);
      my $sigma_r = abs(($hi - $mean)/3);
      my $sigma = ($sigma_l > $sigma_r) ? $sigma_l : $sigma_r;
      my $param_ref = +[ $mean, $sigma ];

      $__UNIRAND_DEBUG_PRINTF && print "Component init [$mean, $sigma]\n";
      my $comp_pdf       = normal_gen_pdf($param_ref);


      ($k > 1) || die;
      # Reduce probabilities accordingly.
      map { $_->[0] *= (($k-1)/$k) } @{$current[0]};

      push @{$current[0]}, +[ 1/$k, \&normal_est_wtd,
      $comp_pdf, $param_ref ];
      redo em_loop;
    }
  }

  return @global_best;
}


# Fit a mixture of Gaussians to the data.  The number of Gaussians,
# $k, does not need to be specified -- it will be inferred via the
# Bayesian information criterion
#
#   BIC = -2*log(l) + q*log(n)      { low is good }
#
#  l = likelihood
#  q = number of parameters = 3k
#  n = number of samples
#
# Logs are in base 2, not that it matters much.
#
# Parameters:
#  A list of { probability, mean, deviation } trios.
#
# F($x)   = sum of (mix * Fgauss($x))
# f($x)   = sum of (mix * fgauss($x))
# F-1($y) = not pleasant.
#
# If you like, give it a maximum k.
sub normal_mix_est($;$$$) {
  my $data_ref = shift;
  my $k_max    = shift;
  my $k_min    = shift;
  my $means    = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  if (defined($means)) {
    $k_min = scalar @$means;
  }

  my ($mixture_ref, $ll, $BIC) = normal_em2($data_ref, $k_max,
  $k_min, $means);

  if (!defined($mixture_ref)) {
    return undef;
  }

  if (!scalar(@$mixture_ref)) {
    return undef;
  }

  # mixture_ref is a reference to a list of tuples, each of
  # which has mixing probability, estimator, pdf, and parameter
  # reference list.

  my $param_ref = +[];

  foreach (@$mixture_ref) {
    my ($mix_prob, $estimator, $pdf, $params) = @{$_};
    if (!defined($_)) {
      return undef;
    }

    if (!defined($params)) {
      return undef;
    }

    my ($mu, $sigma) = @$params;

    push @$param_ref, +[ $mix_prob, $mu, $sigma ];
  }


  my $CDF_sub = normal_mix_gen_cdf($param_ref);
  my $PPF_sub = normal_mix_gen_ppf($param_ref);
  my $PDF_sub = normal_mix_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}


# Generate the CDF for a mixture of normals.  The parameters
# are expected as a list of (probability, mean, standard
# deviation) triples, as outputted by the estimator.
sub normal_mix_gen_cdf($) {
  my $param_ref = shift;
  my @pairs     = ();  # prob, cdf

  if (!(ref($param_ref->[0]))) {
    # In flat form.  We'll fudge it into the expected form.
    my @param_flat = @$param_ref;
    my $ct         = (scalar @$param_ref)/3;

    $param_ref = +[];
    ($ct == int($ct)) || die;
    for (my $i=0; $i < $ct; $i++) {
      my ($p, $mean, $dev) = splice @param_flat, 0, 3;
      $param_ref->[$i] = +[ $p, $mean, $dev];
    }
  }

  my $prob_sum = 0;
  foreach (@$param_ref) {
    my ($prob, $mu, $sigma) = @{$_};
    my $cdf = undef;

    if ($prob > 0) {
      $cdf = normal_gen_cdf(+[ $mu, $sigma ]);
      push @pairs, +[ $prob, $cdf ];
      $prob_sum += $prob;
    } elsif ($prob < 0) {
      die;
    }
  }

  ($prob_sum > 0) || return undef;
  map { $_->[0] /= $prob_sum } @pairs;


  return sub {
    my $x   = shift @_;
    my $sum = 0;

    foreach (@pairs) {
      my ($prob, $cdf) = @{$_};
      $sum += $prob * &$cdf($x);
    }

    return $sum;
  };
}


# Generate the PDF for a mixture of normals.
sub normal_mix_gen_pdf($) {
  my $param_ref = shift;
  my @pairs     = ();  # prob, pdf

  if (!(ref($param_ref->[0]))) {
    # In flat form.  We'll fudge it into the expected form.
    my @param_flat = @$param_ref;
    my $ct         = (scalar @$param_ref)/3;

    $param_ref = +[];
    ($ct == int($ct)) || die;
    for (my $i=0; $i < $ct; $i++) {
      my ($p, $mean, $dev) = splice @param_flat, 0, 3;
      $param_ref->[$i] = +[ $p, $mean, $dev];
    }
  }

  my $prob_sum = 0;
  foreach (@$param_ref) {
    my ($prob, $mu, $sigma) = @{$_};
    my $pdf = undef;

    if ($prob > 0) {
      $pdf = normal_gen_pdf(+[ $mu, $sigma ]);
      push @pairs, +[ $prob, $pdf ];
      $prob_sum += $prob;
    } elsif ($prob < 0) {
      die;
    }
  }

  ($prob_sum > 0) || return undef;
  map { $_->[0] /= $prob_sum } @pairs;

  return sub {
    my $x   = shift @_;
    my $sum = 0;

    foreach (@pairs) {
      my ($prob, $pdf) = @{$_};
      $sum += $prob * &$pdf($x);
    }

    return $sum;
  };
}



# Generate the PPF for a mixture of normals.  The unmixing,
# in the general case, is probably quite nasty.
sub normal_mix_gen_ppf($) {
  my $param_ref  = shift;
  my $mix_cdf    = normal_mix_gen_cdf($param_ref);
  my @param_copy = flatten(@$param_ref);
  my @guides     = ();

  defined($mix_cdf) || return undef;

  if (!(ref($param_ref->[0]))) {
    # In flat form.  We'll fudge it into the expected form.
    my @param_flat = @$param_ref;
    my $ct         = (scalar @$param_ref)/3;

    $param_ref = +[];
    ($ct == int($ct)) || die;
    for (my $i=0; $i < $ct; $i++) {
      my ($p, $mean, $dev) = splice @param_flat, 0, 3;
      $param_ref->[$i] = +[ $p, $mean, $dev];
    }
  }

  my $prob_sum = 0;
  while (scalar(@param_copy) > 0) {
    my ($prob, $mean, $sigma) = splice(@param_copy, 0, 3);

    ($prob < 0) && die;
    ($prob > 0) || next;
    $prob_sum += $prob;
    my $cdf_val = &$mix_cdf($mean);
    my $pdf_fun = normal_gen_pdf(+[ $mean, $sigma ]);

    # We do this to avoid changing $param_ref.
    push @guides, +[ $prob, $mean, $sigma, $cdf_val, $pdf_fun];
  }

  ($prob_sum > 0) || return undef;
  map { $_->[0] /= $prob_sum } @guides;

  return _mix_gen_ppf($mix_cdf, @guides);
}




# Give it data, and this tries to fit a doubly-truncated normal
# distribution.  This involves assorted transformations and
# ugliness.  The truncation points are inferred about as simply
# as possible -- namely, the minimum and maximum data values.
#
# Method:  maximum likelihood.
#
# This is Case I from
#   A. C. Cohen, Jr.  Annals of Mathematical Statistics. Vol 21,
#   Issue 4 (Dec., 1950), pp. 557-569.
#
# The final parameters are:
#    [ $mu, $sigma, $eta_prime, $eta_prime_prime ]
#
# where $eta_prime and $eta_prime_prime are the left and right
# truncation points, expressed in terms of standard deviations
# from the mean (negative = less, positive = more).
#
# CHANGE:
#   Trying out the following approach:
#   - Sort the data.
#   - Identify minimum, maximum.  These define truncation points.
#   - If it exceeds $max_pts points, reduce it.
#   - x=percentile, y=data.  Apply Levenberg-Marquardt to search the
#   - space of mean, deviation.
#
#  This is considerably slower (hence, the $max_pts-point rule),
#  but it gives far better results in many cases.
#
# Ayup.  It's VERY computationally expensive -- it is, after all,
# numerical search -- but empirical tests suggests it handles even
# cases where the mean has been truncated away (e.g. ep, epp both
# positive).
#
# The Cohen method is still here, just in case you prefer to use
# it.

sub normal_truncated_est($) {
  my $data_ref = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  if (0) {
    # The Cohen method.

    # Number of observations.
    my $n0       = scalar @$data_ref;

    # Left truncation point.
    my $x0_prime = ArrayFoo::arrSelect($data_ref, 0);

    # Sample range, in absolute terms (not devs).
    my $R = ArrayFoo::arrSelect($data_ref, $n0-1) - $x0_prime;

    # Translate the origin:  $x <= $x_prime - $x0_prime
    my @xf_data = map { $_ - $x0_prime } @$data_ref;

    # We will solve two simultaneous equations for
    # $eta_prime and $sigma.  $eta_prime is the left
    # truncation point, expressed in standard units where
    # the mean is 0:
    #
    #  $mu = $x0_prime - ($sigma * $eta_prime)
    #

    # First approximations:

    my $v1              = ArrayFoo::arrMean(\@xf_data);
    my $v2              = ArrayFoo::arrMoment(\@xf_data, 2);
    my $sx              = ArrayFoo::arrDev(\@xf_data);
    my $sigma_hat       = $sx;
    my $eta_prime_hat   = -$v1 / $sx;


    die unless defined($__M_PI);
    die unless defined($__SQRT2PI);
    my $rho = sub {
      my $x = shift;

      return (exp(-($x**2)/2) / $__SQRT2PI);
    };

    # For computing I functions.
    my $et2_2 = sub {
      my $t = shift @_;
      return (exp(-($t**2)/2));
    };

    # For computing I functions.
    my $et2_2_4th = sub {
      my $t = shift @_;

      return (exp(-($t**2)/2) * (3 - (6*$t*$t) + ($t ** 4)));
    };

    my $I0 = sub {
      my $x  =  shift @_;

      return (Numerical::Simpsons_rule($et2_2, $et2_2_4th, $x,
      undef, undef, 1e-4, 1e-5));

      #return (Numerical::Romberg_indefinite($et2_2, $x, undef, undef, 0.0001,
      #                                      1e-5));
    };

    # Two simultaneous equations.
    my $f = sub {
      my $eta_prime = shift @_;
      my $sigma     = shift @_;
      my $eta_prime_prime = $eta_prime + ($R/$sigma);
      my $rho_prime       = &$rho($eta_prime);
      my $rho_prime_prime = &$rho($eta_prime_prime);
      my $I0_prime        = &$I0($eta_prime);
      my $I0_prime_prime  = &$I0($eta_prime_prime);

      if ($__UNIRAND_DEBUG_PRINTF) {
        print "e_p = $eta_prime, e_p_p = $eta_prime_prime\n";
        print "I0_p = $I0_prime, I0_p_p = $I0_prime_prime\n";
      }

      my $Z_denom = $I0_prime - $I0_prime_prime;

      # Failure mode!
      if ($Z_denom == 0) {
        return undef;
      }

      my $Z1 = $rho_prime / $Z_denom;
      my $Z2 = $rho_prime_prime / $Z_denom;

      return (($sigma * ($Z1 - $Z2 - $eta_prime)) - $v1);
    };


    my $g = sub {
      my $eta_prime = shift @_;
      my $sigma     = shift @_;
      my $eta_prime_prime = $eta_prime + ($R/$sigma);
      my $rho_prime       = &$rho($eta_prime);
      my $rho_prime_prime = &$rho($eta_prime_prime);
      my $I0_prime        = &$I0($eta_prime);
      my $I0_prime_prime  = &$I0($eta_prime_prime);

      my $Z_denom = $I0_prime - $I0_prime_prime;

      if ($Z_denom == 0) {
        return undef;
      }

      my $Z1 = $rho_prime / $Z_denom;
      my $Z2 = $rho_prime_prime / $Z_denom;

      my $variance = $sigma ** 2;
      my $A        = $eta_prime * ($Z1 - $Z2 - $eta_prime);
      my $B        = $Z2 * $R / $sigma;

      return (($variance * (1 - $A - $B)) - $v2);
    };

    # Newton-Rhapson for two simultaneous equations in two unknowns
    # also needs their four partial derivatives.  Oh joy, oh raptuous
    # joy.

    # Approximate partial derivative of f with respect to
    # eta_prime.
    my $f_eta_prime = sub {
      my $eta_prime = shift @_;
      my $sigma     = shift @_;

      my $f_constant_sigma = sub {
        my $ep = shift @_;
        my $fval = &$f($ep, $sigma);
        return $fval;
      };

      return Numerical::leapfrog_diff($f_constant_sigma, $eta_prime);
    };

    # Approximate partial derivative of f with respect to
    # sigma.
    my $f_sigma = sub {
      my $eta_prime = shift @_;
      my $sigma     = shift @_;

      my $f_constant_eta = sub {
        my $sigma = shift @_;

        return (&$f($eta_prime, $sigma));
      };

      return Numerical::leapfrog_diff($f_constant_eta, $sigma);
    };


    # Approximate partial derivative of g with respect to
    # eta_prime.
    my $g_eta_prime = sub {
      my $eta_prime = shift @_;
      my $sigma     = shift @_;

      my $g_constant_sigma = sub {
        my $ep = shift @_;

        return (&$g($eta_prime, $sigma));
      };

      return Numerical::leapfrog_diff($g_constant_sigma, $eta_prime);
    };

    # Approximate partial derivative of g with respect to
    # sigma.
    my $g_sigma = sub {
      my $eta_prime = shift @_;
      my $sigma     = shift @_;

      my $g_constant_eta = sub {
        my $sigma = shift @_;

        return (&$g($eta_prime, $sigma));
      };

      return Numerical::leapfrog_diff($g_constant_eta, $sigma);
    };

    my ($eta_prime, $sigma) =
    Numerical::Newton2($f, $f_eta_prime, $f_sigma,
    $g, $g_eta_prime, $g_sigma,
    $eta_prime_hat, $sigma_hat);

    if (!defined($eta_prime)) {
      return undef;
    }

    my $mu = $x0_prime - ($sigma * $eta_prime);
    my $eta_prime_prime = $eta_prime + ($R / $sigma);


    my $min = ArrayFoo::arrSelect($data_ref, 0);
    my $max = ArrayFoo::arrSelect($data_ref, -1);

    my $min_scale = ($min - $mu)/$sigma;
    my $max_scale = ($max - $mu)/$sigma;

    $min_scale *= ($n0 + 2) / $n0;
    $max_scale *= ($n0 + 2) / $n0;

    # Correct the truncation points if need be due to numerical issues.
    if ($min_scale < $eta_prime) {
      $eta_prime = $min_scale;
    }

    if ($max_scale > $eta_prime_prime) {
      $eta_prime_prime = $max_scale;
    }
    my $param_ref = +[ $mu, $sigma, $eta_prime, $eta_prime_prime ];

    my $CDF = normal_truncated_gen_cdf($param_ref);
    my $PPF = normal_truncated_gen_ppf($param_ref);
    my $PDF = normal_truncated_gen_pdf($param_ref);

    return ($CDF, $param_ref, $PPF, $PDF);
    # end of Cohen method
  } else {
    # another method; based on Levenberg-Marquardt search
    my @data = sort { $a <=> $b } @$data_ref;
    my $min  = $data[0];
    my $max  = $data[-1];

    my $ct   = scalar @data;
    my $max_pts = 500;

    if ($ct < 3) {
      # Too few points!
      return undef;
    }

    my @percentiles = ();
    my @vals        = ();

    if ($ct > $max_pts) {
      for (my $i=0; $i < $max_pts; $i++) {
        my $pct = ($i+1) / ($max_pts+1);
        push @percentiles, $pct;

        my $idx = $ct * ($i / ($max_pts-1));
        if ($idx > ($ct-1)) {
          $idx = $ct-1;
        }

        if ($idx == int($idx)) {
          push @vals, $data[$idx];
        } else {
          my $lo_val = $data[int($idx)];
          my $hi_val = $data[int($idx)+1];

          push @vals, ($lo_val + (($hi_val - $lo_val) *
          ($idx - int($idx))));
        }
      }
    } else {
      @vals = @data;

      for (my $i=0; $i < $ct; $i++) {
        push @percentiles, (($i+1) / ($ct+1));
      }
    }

    my $pts_used = scalar @vals;

    # Levenberg-Marquardt least-squares fitting needs a function...
    # We'll fit over mean and deviation space, while deriving ep
    # and epp (the truncation points will be based on min and
    # maximum observations).

    my $f_gen = sub {
      my $mean = shift @_;
      my $dev  = shift @_;

      if ($dev <= 0) {
        return sub {
          # For impossible values, return a BAD fit.
          return ($max + 1e100);
        };
      }

      # It would seem that we could set the truncation points to
      # be the extrema.
      #
      # Well... not quite.  We have a problem here.
      #
      # If we set the truncation points to the EXACT minimum and
      # maximum, the PDF at those points will be 0.  This means
      # that the log-likelihood will be undefined.
      #
      # Suppose we have, say, 20 points.  /Assuming/, with all
      # the dangers that presents, that they represent quantiles
      # that do NOT include the extrema, we might believe they're
      # from 5%-95%.
      #
      # We have two estimated parameters (mean, dev); two points
      # ((min, 1/pts), (max, 1-(1/pts)); and two
      # equations.
      #
      #            N_{mean,dev}(x) - N_{0,1}(ep)
      # cdf =    --------------------------------
      #            N_{0,1}(epp) - N_{0,1}(ep)
      #
      # N_{0,1} is a function that I can (numerical approximation)
      # invert.

      my $Fnt     = normal_gen_cdf(+[$mean, $dev]);
      my $Fnt_min = &$Fnt($min);
      my $Fnt_max = &$Fnt($max);

      # From solving the system.
      my $ep_pct =
      (
        ((($ct ** 2) - $ct) * $Fnt_min) -
        ($ct * $Fnt_max)
      ) / (($ct) * ($ct-2));
      my $epp_pct = $ep_pct + ($ct * ($Fnt_min - $ep_pct));

      ($ep_pct <= 1) || return sub { return ($max+(1e+100));};
      ($epp_pct > 0) || return sub { return ($max+(1e+100));};
      ($epp_pct > $ep_pct) || return sub { return ($max+(1e+100));};

      my $ep   = ($ep_pct > 1e-3) ? normal_std_ppf($ep_pct) : undef;
      my $epp  = ($epp_pct < (1-(1e-3))) ? normal_std_ppf($epp_pct) : undef;

      my $f = normal_truncated_gen_ppf(+[ $mean, $dev, $ep, $epp ]);

      return sub {
        my $y = shift @_;
        my $x = &$f($y);

        return (defined($x) ? $x : ($max + 1e100));
      }
    };

    my @initial_guess = ( ArrayFoo::arrMean(\@vals),
    ArrayFoo::arrDev(\@vals) );

    # Tuning these parameters may give greater speed in exchange for
    # (possible) accuracy loss.  Consider changing the number of runs
    # to 1 (no perturbation!), or increasing the min_rel or
    # tol factors.
    my $param_ref = Numerical::LM_perturb(
      "f_gen" => $f_gen,
      "x_ref" => \@percentiles,
      "y_ref" => \@vals,
      "a_ref" => \@initial_guess,
      "min_rel" => 0.05,
      "max_lambda" => 729,
      "tol"   => 1e-3,
      "max_runs" => 2,
      "rel_perturb" => 0.4,
      "p_best" => 0.7
    );

    my ($mean, $dev) = @$param_ref;
    if ((!defined($dev)) || ($dev <= 0)) {
      return (undef, undef, undef, undef);
    } else {

      my $Fnt     = normal_gen_cdf(+[$mean, $dev]);
      my $Fnt_min = &$Fnt($min);
      my $Fnt_max = &$Fnt($max);

      # From solving the system.
      my $ep_pct =
      (
        ((($ct ** 2) - $ct) * $Fnt_min) -
        ($ct * $Fnt_max)
      ) / (($ct) * ($ct-2));
      my $epp_pct = $ep_pct + ($ct * ($Fnt_min - $ep_pct));

      if (($ep_pct >= 1) || ($epp_pct <= 0) ||
      ($epp_pct <= $ep_pct)) {
        return sub {
          return ($max + 1e100);
        };
      }
      my $ep   = ($ep_pct > 0) ? normal_std_ppf($ep_pct) : undef;
      my $epp  = ($epp_pct < 1) ? normal_std_ppf($epp_pct) : undef;

      push @$param_ref, $ep, $epp;

      my $CDF = normal_truncated_gen_cdf($param_ref);
      my $PPF = normal_truncated_gen_ppf($param_ref);
      my $PDF = normal_truncated_gen_pdf($param_ref);

      return ($CDF, $param_ref, $PPF, $PDF);
    }
  }
}




# This version is an attempt at fitting a truncated normal with
# varying weights per point.  The main use of this function is
# for estimating mixtures of truncated normals, a very expensive
# mix -- but sometimes, it's the only one that applies.
sub normal_truncated_est_wtd($$) {
  my $data_ref = shift;
  my $wtd_ref  = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my @nonzero  = ();
  my @nonzero_wtd  = ();

  # print "ntew begin\n";

  if (grep { $_ <= 0 } @$wtd_ref) {
    my $i=0;
    my $ct = scalar @$wtd_ref;

    for ($i=0; $i < $ct; $i++) {
      if ($wtd_ref->[$i] > 0) {
        push @nonzero, $data_ref->[$i];
        push @nonzero_wtd, $wtd_ref->[$i];
      }
    }

    $data_ref = \@nonzero;
    $wtd_ref  = \@nonzero_wtd;
  }

  # Levenberg-Marquardt over up to $max_pts points, which should allow a
  # pretty good fit.

  $__UNIRAND_DEBUG_PRINTF && print "NTEW fitting on ", scalar(@$data_ref), " points.\n";
  {
    my @pairs = ();
    my $ct = scalar @$data_ref;

    if ($ct < 3) {
      # Too few points.
      return undef;
    }

    for (my $i=0; $i < $ct; $i++) {
      push @pairs, +[
      $data_ref->[$i],
      $wtd_ref->[$i]
      ];
    }
    @pairs = sort { $a->[0] <=> $b->[0] } @pairs;

    $data_ref = +[ map { $_->[0] } @pairs ];
    $wtd_ref  = +[ map { $_->[1] } @pairs ];
  }

  my $min  = $data_ref->[0];
  my $max  = $data_ref->[-1];

  my $ct   = scalar @$data_ref;
  my $max_pts = 1000;

  my @percentiles = ();
  my @vals        = ();
  my @weights     = ();

  my $wt_sum = 0;

  map { $wt_sum += $_ } @$wtd_ref;

  if ($ct > $max_pts) {
    my $cwt = 0;
    my $idx = 0;

    for (my $i=0; $i < $max_pts; $i++) {
      my $pct_min = $i / $max_pts;

      reach_min:  {
        $cwt += $wtd_ref->[$idx];
        if (($cwt / $wt_sum) < $pct_min) {
          $idx++;
          redo reach_min;
        }
      }

      push @percentiles, ($cwt/$wt_sum);
      push @vals, $data_ref->[$idx];
    }

    for (my $i=0; $i < $max_pts; $i++) {
      my $wt = 0;

      if ($i > 0) {
        $wt += ($percentiles[$i] - $percentiles[$i-1]) / 2;
      }

      if ($i < ($max_pts-1)) {
        $wt += ($percentiles[$i+1] - $percentiles[$i]) / 2;
      }

      push @weights, $wt;
    }

  } else {
    @vals    = @$data_ref;
    @weights = @$wtd_ref;

    my $cwt = 0;

    for (my $i=0; $i < $ct; $i++) {
      my $pct = $cwt / $wt_sum;
      $cwt += $wtd_ref->[$i];
      push @percentiles, $pct;
    }
  }

  # Levenberg-Marquardt least-squares fitting needs a function...
  # We'll fit over mean and deviation space, while deriving ep
  # and epp (the truncation points will be based on min and
  # maximum observations).

  my $f_gen = sub {
    my $mean = shift;
    my $dev  = shift;

    if ($dev <= 0) {
      return sub {
        # For impossible values, return a BAD fit.
        return ($max + 1e100);
      };
    }

    my $Fnt     = normal_gen_cdf(+[$mean, $dev]);
    my $Fnt_min = &$Fnt($min);
    my $Fnt_max = &$Fnt($max);

    # From solving the system.
    my $ep_pct =
    (
      ((($ct ** 2) - $ct) * $Fnt_min) -
      ($ct * $Fnt_max)
    ) / (($ct) * ($ct-2));
    my $epp_pct = $ep_pct + ($ct * ($Fnt_min - $ep_pct));

    if (($ep_pct >= 1) || ($epp_pct <= 0) ||
    ($epp_pct <= $ep_pct)) {
      return sub {
        return ($max + 1e100);
      };
    }

    my $ep   = ($ep_pct > 0) ? normal_std_ppf($ep_pct) : undef;
    my $epp  = ($epp_pct < 1) ? normal_std_ppf($epp_pct) : undef;
    my $f = normal_truncated_gen_ppf(+[ $mean, $dev, $ep, $epp ]);

    return sub {
      my $y = shift @_;
      my $x = &$f($y);

      return (defined($x) ? $x : ($max + 1e100));
    };
  };

  my @initial_guess = ( ArrayFoo::arrMeanWtd(\@vals, \@weights),
  ArrayFoo::arrDevWtd(\@vals, \@weights) );

  ((scalar @percentiles) == (scalar @vals)) || die;
  ((scalar @percentiles) == (scalar @weights)) || die;


  my $param_ref = Numerical::LM_perturb(
    "f_gen" => $f_gen,
    "x_ref" => \@percentiles,
    "y_ref" => \@vals,
    "a_ref" => \@initial_guess,
    "min_rel" => 0.05,
    "max_lambda" => 729,
    "tol"   => 1e-3,
    "max_runs" => 10,
    "p_best" => 0.8,
    "rel_perturb" => 0.1,
    "wtd_ref" => \@weights
  );

  # print "ntew end\n";
  my ($mean, $dev) = @$param_ref;
  if ((!defined($dev)) || ($dev <= 0)) {
    return (undef, undef, undef, undef);
  } else {
    my $Fnt     = normal_gen_cdf(+[$mean, $dev]);
    my $Fnt_min = &$Fnt($min);
    my $Fnt_max = &$Fnt($max);

    # From solving the system.
    my $ep_pct =
    (
      ((($ct ** 2) - $ct) * $Fnt_min) -
      ($ct * $Fnt_max)
    ) / (($ct) * ($ct-2));
    my $epp_pct = $ep_pct + ($ct * ($Fnt_min - $ep_pct));

    ($ep_pct <= 1) || return undef;
    ($epp_pct > 0) || return undef;
    ($epp_pct > $ep_pct) || return undef;

    my $ep   = ($ep_pct > 0) ? normal_std_ppf($ep_pct) : undef;
    my $epp  = ($epp_pct < 1) ? normal_std_ppf($epp_pct) : undef;

    push @$param_ref, $ep, $epp;

    my $CDF = normal_truncated_gen_cdf($param_ref);
    my $PPF = normal_truncated_gen_ppf($param_ref);
    my $PDF = normal_truncated_gen_pdf($param_ref);

    return ($CDF, $param_ref, $PPF, $PDF);
  }
}



# The CDF of a truncated normal is relatively simple to compute
# given a standard N(0,1) normal.
#
sub normal_truncated_gen_cdf($) {
  my $param_ref = shift;
  my ($mu, $sigma, $eta_prime, $eta_prime_prime) = @$param_ref;

  my $normal_cdf  = normal_gen_cdf(+[$mu, $sigma]);

  my $trunc_less = defined($eta_prime) ? &$normal_cdf($mu + ($sigma * $eta_prime)) : 0;
  my $trunc_more = defined($eta_prime_prime) ?
  &$normal_cdf($mu + ($sigma * $eta_prime_prime)) : 1;
  my $bulk       = $trunc_more - $trunc_less;

  if ($sigma < 0) {
    return undef;
  }

  if ((defined($eta_prime)) && (defined($eta_prime_prime)) &&
  ($eta_prime > $eta_prime_prime)) {
    return undef;
  }

  return sub {
    my $x = shift @_;

    defined($x) || die;
    defined($mu) || die;
    defined($sigma) || die;
    # Normalize.
    $x = ($x - $mu ) / $sigma;

    if (defined($eta_prime) && ($x <= $eta_prime)) {
      return 0;
    }

    if (defined($eta_prime_prime) && ($x >= $eta_prime_prime)) {
      return 1;
    }

    # Normally, the CDF _would_ be...
    my $cdf_val_non_trunc = normal_std_cdf($x);

    # But its truncated, so it isn't.
    my $cdf_val_trunc = ($cdf_val_non_trunc - $trunc_less) / $bulk;

    return $cdf_val_trunc;
  };
}


# The PDF of a truncated normal is scaled up so the area under
# the curve is still 1.
sub normal_truncated_gen_pdf($) {
  my $param_ref = shift;
  my ($mu, $sigma, $eta_prime, $eta_prime_prime) = @$param_ref;

  defined($mu) || die;
  defined($sigma) || die;

  ($sigma > 0) || return undef;

  my $normal_cdf = normal_gen_cdf(+[$mu, $sigma]);

  my $trunc_less = defined($eta_prime) ? &$normal_cdf($mu +($sigma * $eta_prime)) : 0;
  my $trunc_more = defined($eta_prime_prime) ?
  &$normal_cdf($mu + ($sigma*$eta_prime_prime)) : 1;

  my $bulk       = $trunc_more - $trunc_less;
  my $denom     = $__SQRT2PI * $sigma;

  if ((defined($eta_prime)) && (defined($eta_prime_prime)) &&
  ($eta_prime > $eta_prime_prime)) {
    return undef;
  }


  return sub {
    my $x = shift @_;

    defined($x) || die;

    # Normalize.
    my $x_norm = ($x - $mu ) / $sigma;

    if (defined($eta_prime) && ($x_norm <= $eta_prime)) {
      return 0;
    }

    if (defined($eta_prime_prime) && ($x_norm >= $eta_prime_prime)) {
      return 0;
    }

    # Normally, the PDF _would_ be...
    my $pdf_val_non_trunc = exp(-(($x-$mu) ** 2) / (2*$sigma*$sigma)) /
    $denom;

    # But its truncated, so it isn't.  If we truncated half
    # the normal, then we must double the PDF values to fix
    # the area, and so forth.
    my $pdf_val_trunc = $pdf_val_non_trunc / $bulk;

    return $pdf_val_trunc;
  };
}



# The PPF of a truncated normal... Hm.
#
# If x% was truncated off the left (percentage of a full
# normal), and y% off the right, then z% in the new graph
# should be something like...
#
#      x + (z * (1-x-y))
#
# in the original.  So we can transform and chain into
# that PPF, being sure to retransform to normal space.
#
sub normal_truncated_gen_ppf($) {
  my $param_ref = shift;
  my ($mu, $sigma, $eta_prime, $eta_prime_prime) = @$param_ref;

  my $normal_cdf = normal_gen_cdf(+[$mu, $sigma]);

  my $trunc_less = defined($eta_prime) ? &$normal_cdf($mu + ($sigma*$eta_prime)) : 0;
  my $trunc_more = defined($eta_prime_prime) ?
  &$normal_cdf($mu + ($sigma * $eta_prime_prime)) : 1;
  my $bulk       = $trunc_more - $trunc_less;

  if ((defined($eta_prime)) && (defined($eta_prime_prime)) &&
  ($eta_prime > $eta_prime_prime)) {
    return undef;
  }

  return sub {
    my $y_trunc = shift @_;
    my $x = undef;

    if ($y_trunc <= 0) {
      defined($eta_prime) || return undef;
      $x = $mu + ($sigma * $eta_prime);
    } elsif ($y_trunc >= 1) {
      defined($eta_prime_prime) || return undef;
      $x = $mu + ($sigma * $eta_prime_prime);
    } else {
      my $y_non_trunc = $trunc_less + ($y_trunc * $bulk);
      my $x_standard  = normal_std_ppf($y_non_trunc);
      defined($x_standard) || return undef;
      $x           = ($x_standard * $sigma) + $mu;
    }

    return $x;
  };
}




# Given a reference to the data, use Numerical::em_converge to
# fit a mixture of TRUNCATED univariate
# Gaussians.  You can also give it an optional maximum k.
#
# Criterion for stopping one iteration:
#    Log-likelihood appearing to converge.
# Criterion for stopping testing with one particular $k:
#    Just because -- it's arbitrary.  4*$k for now -- we
#    run more passes when the model is more complex.
# Criterion for stopping testing (final $k reached):
#    BIC increases.
#
# Returns:  3 items.
#   a) List (ref) of tuples
#      Each consists of <mixing prob, estimator, pdf, parameter ref list>
#   b) Log-likelihood.
#   c) BIC.
#
# Estimate initialization of mixing probability:
#   All passes:  1/$k
#
# Estimate initialization of means:
#   Pass #1:     Uniform wrt density.
#   Pass #2:     Uniform ignoring density.
#   Pass #3-x:  Random initialization.
#
# Estimate initialization of standard deviations:
#   All passes:  Sample standard deviation.
#
# BIC = -2 log_likelihood + (q * log(n))
#
# where n is sample size and q is parameter count (thus
# dependent on $k).
#
#
# NOTE ON TRUNCATION:
#
# It is very unclear to me to decide when to truncate.  The global
# extrema provide obvious truncation points for the left and right,
# and this will be factored in.  However, by default a normal has
# responsbility for EVERY point, barring numerical precision limits,
# ergo a normal would never be truncated except at the global limits.
#
# To stimulate truncation, I must tamper with the standard EM
# algorithm and adjust some responsibilities to 0.  The obvious
# approach is, for each mean, to identify the leftmost and rightmost
# area where it still has "some" substantial responsibility, and
# truncate other responsibilities to 0.  Then, the responsibilities
# must be renormalized.
#
# This, however, is not handled here, but in Numerical's em routines.
# create a separate em 'path' for this sort.
sub normal_truncated_em($;$$$) {
  my $data_ref = shift;
  my $k_max    = shift;
  my $k_min    = shift;
  my $means    = shift;  # list of means
  my $k        = defined($k_min) ? $k_min : 2;
  my $n        = scalar @$data_ref;
  my $lo       = ArrayFoo::arrSelect($data_ref, 0);
  my $hi       = ArrayFoo::arrSelect($data_ref, $n-1);
  my $range    = ($hi-$lo);

  my $lo_ext   = $lo - (0.0001 * $range);
  my $hi_ext   = $hi + (0.0001 * $range);

  # Standard deviation.
  my $sigma_global = ArrayFoo::arrDev($data_ref);

  # List, LL, BIC
  my @global_best = (undef, undef, undef);

  if (defined($means)) {
    $k_min = scalar @$means;
    $k = $k_min;
  }

  test_k:  {
    my @k_best = (undef, undef, undef);
    my $passes = 20;

    pass:  for (my $pass=0; $pass < $passes; $pass++) {
      my @current = (+[], undef, undef);


      # Initialization.
      for (my $k_idx=0; $k_idx < $k; $k_idx++) {
        my $mean     = undef;

        if ($pass == 0) {
          if ((defined($means)) && ($k_idx < (scalar @$means))) {
            $mean = $means->[$k_idx];
          } else {
            # Equally spaced in terms of density, including extrema.
            my $real_idx = ($k_idx / ($k - 1)) * $n;
            $real_idx = ($real_idx == $n) ? ($n-1) : $real_idx;

            if ($real_idx == int($real_idx)) {
              $mean = ArrayFoo::arrSelect($data_ref, $real_idx);
            } else {
              my $lo_idx = int($real_idx);
              my $hi_idx = $lo_idx+1;

              my $lo_mean = ArrayFoo::arrSelect($data_ref, $lo_idx);
              my $hi_mean = ArrayFoo::arrSelect($data_ref, $hi_idx);

              $mean = $lo_mean + (($hi_mean - $lo_mean) *
              ($real_idx - $lo_idx));
            }
          }
        } elsif (defined($means) && ($k_idx < scalar(@$means))) {
          $mean = $means->[$k_idx];
        } else {
          if ($pass % 2) {
            $mean = $data_ref->[int(rand($n))];
          } else {
            $mean = (rand() * ($hi-$lo)) + $lo;
          }
        }

        defined($mean) || die;
        defined($lo_ext) || die;
        defined($hi_ext) || die;

        my $sigma_l = abs(($lo_ext - $mean)/2);
        my $sigma_r = abs(($hi_ext - $mean)/2);

        my $sigma = ($sigma_l > $sigma_r) ? $sigma_l : $sigma_r;

        my $ltrunc = ($lo_ext-$mean)/$sigma;
        my $rtrunc = ($hi_ext-$mean)/$sigma;
        my $param_ref = +[ $mean, $sigma, $ltrunc, $rtrunc ];

        $__UNIRAND_DEBUG_PRINTF && print "Component init [$mean, $sigma, $ltrunc, $rtrunc]\n";
        my $pdf       = normal_truncated_gen_pdf($param_ref);

        $current[0]->[$k_idx] = +[ 1/$k, \&normal_truncated_est_wtd, $pdf ];
      }
      $__UNIRAND_DEBUG_PRINTF && print "k=$k, pass=$pass\n";

      # Initialization complete.  Run EM to convergence.
      my ($final_ref, $ll, $BIC) = Numerical::em_converge($data_ref,
      $current[0], 1);

      if (!defined($final_ref)) {
        next pass;
      }

      @current = ($final_ref, $ll, $BIC);

      if ((!defined($k_best[2])) || ($k_best[2] > $BIC)) {
        # This is the new winner for this particular 'k'.
        @k_best = @current;
      }
    }

    if ((!defined($global_best[2])) || ($global_best[2] > $k_best[2])) {
      # This increased k yielded the new winner under BIC.

      @global_best = @k_best;

      if ((!defined($k_max)) || ($k < $k_max)) {
        if ((scalar (@{$k_best[0]})) == $k) {
          $k++;
          redo test_k;
        }
      }
    }

    return @global_best;
  }
}




# Given a reference to the data, use Numerical::em_converge to
# fit a mixture of TRUNCATED univariate
# Gaussians.  You can also give it an optional maximum k.
#
# Criterion for stopping one iteration:
#    Log-likelihood appearing to converge.
# Criterion for stopping testing with one particular $k:
#    Just because -- it's arbitrary.  4*$k for now -- we
#    run more passes when the model is more complex.
# Criterion for stopping testing (final $k reached):
#    BIC increases.
#
# Returns:  3 items.
#   a) List (ref) of tuples
#      Each consists of <mixing prob, estimator, pdf, parameter ref list>
#   b) Log-likelihood.
#   c) BIC.
#
# BIC = -2 log_likelihood + (q * log(n))
#
# where n is sample size and q is parameter count (thus
# dependent on $k).
#
#
# NOTE ON TRUNCATION:
#
# It is very unclear to me to decide when to truncate.  The global
# extrema provide obvious truncation points for the left and right,
# and this will be factored in.  However, by default a normal has
# responsbility for EVERY point, barring numerical precision limits,
# ergo a normal would never be truncated except at the global limits.
#
# To stimulate truncation, I must tamper with the standard EM
# algorithm and adjust some responsibilities to 0.  The obvious
# approach is, for each mean, to identify the leftmost and rightmost
# area where it still has "some" substantial responsibility, and
# truncate other responsibilities to 0.  Then, the responsibilities
# must be renormalized.
#
# This, however, is not handled here, but in Numerical's em routines.
# create a separate em 'path' for this sort.
#

# I'm currently experimenting with this version, which differs from
# the previous in that
#
# - It prefers to start from one component, initialized by using the
#   standard unicomponent estimator.
# - It adds new components without discarding the end results of the
#   previous set of components, instead of simply reinitializing the
#   k components without regards to the k-1.
#
#   This initialization is done in such a way as to try to put new
#   components where they're needed, according to a weighted average
#   of the least-likely 1% of points.
sub normal_truncated_em2($;$$$) {
  my $data_ref = shift;
  my $k_max    = shift;
  my $k_min    = shift;
  my $means    = shift;  # list of means
  my $k        = defined($k_min) ? $k_min : 1;
  my $iter        = 0;
  my $iter_auto   = 5;
  my $n        = scalar @$data_ref;
  my $lo       = ArrayFoo::arrSelect($data_ref, 0);
  my $hi       = ArrayFoo::arrSelect($data_ref, $n-1);
  my $range    = ($hi-$lo);

  my $lo_ext   = $lo - (0.0001 * $range);
  my $hi_ext   = $hi + (0.0001 * $range);

  # Standard deviation.
  my $sigma_global = ArrayFoo::arrDev($data_ref);

  # List, LL, BIC
  my @global_best = (undef, undef, undef);

  if (defined($means)) {
    $k_min = scalar @$means;
    $k = $k_min;
  }

  my @current = (+[], undef, undef);

  # Initialization.

  if ($k > 1) {
    for (my $k_idx=0; $k_idx < $k; $k_idx++) {
      my $mean     = undef;
      my $real_idx = undef;

      $real_idx = ($k_idx / ($k - 1)) * $n;
      $real_idx = ($real_idx == $n) ? ($n-1) : $real_idx;

      if ($real_idx == int($real_idx)) {
        $mean = ArrayFoo::arrSelect($data_ref, $real_idx);
      } else {
        my $lo_idx = int($real_idx);
        my $hi_idx = $lo_idx+1;

        my $lo_mean = ArrayFoo::arrSelect($data_ref, $lo_idx);
        my $hi_mean = ArrayFoo::arrSelect($data_ref, $hi_idx);

        $mean = $lo_mean + (($hi_mean - $lo_mean) *
        ($real_idx - $lo_idx));
      }

      my $sigma_l = abs(($lo_ext - $mean)/3);
      my $sigma_r = abs(($hi_ext - $mean)/3);
      my $sigma = ($sigma_l > $sigma_r) ? $sigma_l : $sigma_r;
      my $ltrunc = ($lo_ext-$mean)/$sigma;
      my $rtrunc = ($hi_ext-$mean)/$sigma;
      my $param_ref = +[ $mean, $sigma, $ltrunc, $rtrunc ];

      $__UNIRAND_DEBUG_PRINTF && print "Component init [$mean, $sigma, $ltrunc, $rtrunc]\n";
      my $pdf       = normal_truncated_gen_pdf($param_ref);

      $current[0]->[$k_idx] = +[ 1/$k, \&normal_truncated_est_wtd, $pdf ];
    }
  } else {
    my ($cdf, $param_ref, $ppf, $pdf) = normal_truncated_est($data_ref);
    defined($cdf) || return undef;


    $current[0]->[0] = +[ 1, \&normal_truncated_est_wtd, $pdf,
    $param_ref ];

    my $ll = Numerical::compute_log_likelihood($data_ref, $pdf,
    $__LL_PROB_FLOOR);
    defined($ll) || return undef;

    # prob + 4 base params of normal-truncated
    my $q = 5;
    my $BIC = (-2*$ll) + ($q * log($n)/log(2));


    $__UNIRAND_DEBUG_PRINTF && print "Component init [",
    join(", ", @$param_ref), "]\n";
    $__UNIRAND_DEBUG_PRINTF && print "ll=$ll, BIC=$BIC\n";

    $current[1] = $ll;
    $current[2] = $BIC;
  }

  $__UNIRAND_DEBUG_PRINTF && print "k=$k\n";

  em_loop:  {
    my ($final_ref, $ll, $BIC) = (undef, undef, undef);
    my $again = 0;

    if ($k > 1) {
      ($final_ref, $ll, $BIC) = Numerical::em_converge($data_ref,
      $current[0], 1);
    } else {
      ($final_ref, $ll, $BIC) = @current;
    }

    if (!defined($final_ref)) {
      return @global_best;
    }

    defined($ll) || die;
    defined($BIC) || die;

    $k = scalar @$final_ref;
    @current = (util::deep_copy($final_ref), $ll, $BIC);

    if ((!defined($global_best[2])) || ($global_best[2] > $current[2])) {
      # This increased k yielded the new winner under BIC.
      @global_best = (util::deep_copy($final_ref), $ll, $BIC);
      $again = 1;
    }

    if ($iter < $iter_auto) {
      $again = 1;
    }

    if ($again && ((!defined($k_max)) || ($k < $k_max))) {
      $k++;
      $iter++;

      # Identify area of least weighted likelihood, and put a new
      # component there.

      my $glob_param_ref = +[];

      foreach (@{$current[0]}) {
        my ($prob, $base_params) = @{$_}[0,3];
        push @$glob_param_ref, +[ $prob, @$base_params ];
      }

      my $global_pdf =  normal_truncated_mix_gen_pdf($glob_param_ref);
      my @density_pairs = ();

      defined($global_pdf) || die;
      foreach (@$data_ref) {
        my $datum      = $_;
        my $likelihood = &$global_pdf($datum);

        # Consistency check.
        defined($likelihood) || die;
        ($likelihood > 0) || die;
        push @density_pairs, +[ $datum, $likelihood ];
      }

      @density_pairs = sort { $a->[1] <=> $b->[1] } @density_pairs;

      # Keep just the least-likely 1%.  1% is arbitrary.
      my $pts = int(100*$n);

      # But always at least 1...
      $pts = ($pts > 0) ? $pts : 1;

      splice @density_pairs, $pts;

      my $wt_sum = 0;
      my $sum    = 0;

      foreach (@density_pairs) {
        my ($itm, $lh) = @{$_};

        $wt_sum += 1/$lh;
        $sum    += $itm/$lh;
      }

      my $mean    = $sum / $wt_sum;
      my $sigma_l = abs(($lo_ext - $mean)/3);
      my $sigma_r = abs(($hi_ext - $mean)/3);
      my $sigma = ($sigma_l > $sigma_r) ? $sigma_l : $sigma_r;
      my $ltrunc = ($lo_ext-$mean)/$sigma;
      my $rtrunc = ($hi_ext-$mean)/$sigma;
      my $param_ref = +[ $mean, $sigma, $ltrunc, $rtrunc ];

      $__UNIRAND_DEBUG_PRINTF && print "Component init [$mean, $sigma, $ltrunc, $rtrunc]\n";
      my $comp_pdf       = normal_truncated_gen_pdf($param_ref);


      ($k > 1) || die;
      # Reduce probabilities accordingly.
      map { $_->[0] *= (($k-1)/$k) } @{$current[0]};

      push @{$current[0]}, +[ 1/$k, \&normal_truncated_est_wtd,
      $comp_pdf, $param_ref ];
      redo em_loop;
    }
  }

  return @global_best;
}



# Fit a mixture of truncated norms.  Be warned that this is a VERY
# expensive operation.
#
# (1) Normal EM is very expensive.
# (2) Estimating a normal is fast -- it just requires some moment
#     computations.
# (3) Estimating a TRUNCATED normal requires either numerical search
#     or numerical integration, and thus is far more expensive than
#     a standard normal.
#
# The result is something that shouldn't be used unless you really
# think it applies.  When it does, however, it's quite possibly the
# only game in town when it comes to applicable distributions.
#
#
# CHANGE:
#   I've added a 'windowed' search technique.  And, since you should
#   not be using this extraordinarily expensive method anyway unless
#   you really need a decent fit to something odd, it'll try BOTH
#   methods and choose the better one according to A2.

sub normal_truncated_mix_est($;$$$) {
  my $data_ref = shift;
  my $k_max    = shift;
  my $k_min    = shift;
  my $means    = shift;

  if (defined($means)) {
    $k_min = scalar @$means;
  }

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $EM_param_ref = undef;  # EM
  my $WD_param_ref = undef;  # windowed

  if (0) {
    # The EM way.
    my ($mixture_ref, $ll, $BIC) = normal_truncated_em2($data_ref, $k_max,
    $k_min, $means);

    if (defined($mixture_ref) && (scalar(@$mixture_ref))) {
      loop:  foreach (@$mixture_ref) {
        my ($mix_prob, $estimator, $pdf, $params) = @{$_};

        if (!defined($params)) {
          $EM_param_ref = undef;
          last loop;
        }

        my ($mu, $sigma, $ep, $epp) = @$params;

        push @$EM_param_ref, +[ $mix_prob, $mu, $sigma, $ep, $epp ];
      }
    }
  }

  if (1) {
    # The windowed way.
    $WD_param_ref = window_est($data_ref, \&normal_truncated_est);
  }


  my $EM_CDF  = defined($EM_param_ref) ?
  normal_truncated_mix_gen_cdf($EM_param_ref) : undef;
  my $WD_CDF  = defined($WD_param_ref) ?
  normal_truncated_mix_gen_cdf($WD_param_ref) : undef;
  my $EM_A2   = defined($EM_CDF) ?
  AndersonDarling::AndersonDarling($data_ref, $EM_CDF) : undef;
  my $WD_A2   = defined($WD_CDF) ?
  AndersonDarling::AndersonDarling($data_ref, $WD_CDF) : undef;

  (defined($EM_A2) || defined($WD_A2)) || return undef;

  my $param_ref = undef;

  if (!defined($EM_A2)) {
    $param_ref = $WD_param_ref;
  } elsif (!defined($WD_A2)) {
    $param_ref = $EM_param_ref;
  } elsif (($EM_A2 > $WD_A2) ||
  (scalar(@$EM_param_ref) > scalar(@$WD_param_ref))) {
    $param_ref = $WD_param_ref;
  } else {
    $param_ref = $EM_param_ref;
  }

  my $CDF_sub = normal_truncated_mix_gen_cdf($param_ref);
  my $PPF_sub = normal_truncated_mix_gen_ppf($param_ref);
  my $PDF_sub = normal_truncated_mix_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}



# The "windowed" estimator for the mixture of truncated normals
# should work fine if given the original data.  However, if you
# give it quantiles or any other sample, it is quite possible
# that some points not represented in the sample will be "left
# out" -- that is, in regions with zero density according to
# distribution -- since the componentss will not necessarily join
# to cover the entire range.
#
# This function is designed to modify the distribution parameters
# to ensure that all points are covered.  Give it a ref to the
# parameter vector and a ref to the (full) data vector.
#
# There's a third parameter as well.  If set to 1 (default), it'll
# just do a linear scan through the data vector tweaking parameters
# as need be -- fast, but this will be order-dependent.  If 0, it'll
# use a far slower method that should be far less order-dependent.
#
# We return a ref to the new parameter vector.  If wantarray(),
# we'll actually return ($CDF, $param_ref_new, $PPF_sub, $PDF_sub)
# just as the estimator.
#
# How we choose which component to modify:
#
# For each component, the proposed modification is to extend the
# nearer truncation point so that the datum in question is either
# at cdf(datum)=p, or cdf(datum)=1-p, where p=1/ceil(N*Pdist).
#
# The badness of the modification is the probability of the
# component multiplied by the increase in area covered.

sub normal_truncated_mix_fix($$;$) {
  my $param_ref = shift;
  my $data_ref  = shift;
  my $linear    = (scalar(@_)) ? shift : 1;

  defined($param_ref) || return undef;
  defined($data_ref)  || return undef;
  defined($linear)    || ($linear = 1);

  defined($param_ref->[0]) || return undef;

  # Guarantee param_ref is in hierarchical form.
  if (!ref($param_ref->[0])) {
    my @param_copy = @$param_ref;

    $param_ref = +[];

    while (scalar(@param_copy)) {
      # prob, mu, sigma, ep, epp
      push @$param_ref, +[ splice @param_copy, 0, 5 ];
    }
  }

  my $pdf = normal_truncated_mix_gen_pdf($param_ref);
  my $N   = scalar @$data_ref;

  if ($linear) {
    linear_scan:  foreach (@$data_ref) {
      my $x = $_;

      if (&$pdf($x) > 0) {
        next linear_scan;
      }

      my ($best_param_ref, $badness) =
      _normal_truncated_mix_fix_helper($param_ref, scalar(@$data_ref), $x);
      defined($best_param_ref) || die "Can't fix for $x!";

      $param_ref = $best_param_ref;
      $pdf = normal_truncated_mix_gen_pdf($param_ref);

      (&$pdf($x) > 0) || die;
    }
  } else {
    my @exceptions = grep { &$pdf($_) <= 1e-6 } @$data_ref;

    while (scalar(@exceptions)) {
      my $exception_count = scalar @exceptions;
      my $best_bad        = undef;
      my $best_fix        = undef;

      $__UNIRAND_DEBUG_PRINTF && print "mtn_fix:  $exception_count remaining\n";
      for (my $i=0; $i < $exception_count; $i++) {
        my ($fix_ref, $fix_bad) =
        _normal_truncated_mix_fix_helper($param_ref, scalar(@$data_ref),
        $exceptions[$i]);
        defined($fix_ref) || next;
        if ((!defined($best_bad)) || ($best_bad > $fix_bad)) {
          $best_bad = $fix_bad;
          $best_fix = $fix_ref;
        }
      }

      defined($best_fix) || die;

      $param_ref = $best_fix;
      $pdf       = normal_truncated_mix_gen_pdf($param_ref);
      @exceptions = grep { &$pdf($_) <= 1e-6 } @$data_ref;
    }
  }

  if (wantarray()) {
    my $cdf = normal_truncated_mix_gen_cdf($param_ref);
    my $ppf = normal_truncated_mix_gen_ppf($param_ref);

    return ($cdf, $param_ref, $ppf, $pdf);
  } else {
    return $param_ref;
  }
}


# Given the current parameter vector, total count, and a single datum,
# return a new parameter vector and the 'badness' of the change.
sub _normal_truncated_mix_fix_helper($$$) {
  my $param_ref = shift;
  my $N         = shift;
  my $datum     = shift;

  defined($param_ref) || die;
  defined($N)         || die;
  defined($datum)     || die;

  my @queue = ();
  my $comp_idx = 0;
  my $comp_ct  = scalar @$param_ref;

  for (my $param_idx=0; $param_idx < $comp_ct; $param_idx++) {
    my ($prob, $mu, $sigma, $ep, $epp) = $param_ref->[$param_idx];
    my $ltrunc = defined($ep) ? ($mu + ($sigma * $ep)) : undef;
    my $rtrunc = defined($epp) ? ($mu + ($sigma * $epp)) : undef;
    my $component_ppf = normal_truncated_gen_ppf(+[ $mu, $sigma, $ep, $epp ]);

    my $expected_coverage = int(($prob * $N) + 0.5);
    my $pct_left  = 1/($expected_coverage+1);
    my $pct_right = 1 - $pct_left;
    my $param_copy = util::deep_copy($param_ref);
    my $min        = undef;
    my $max        = undef;

    if ((defined($ltrunc)) && ($ltrunc >= $datum)) {
      # off left edge
      $min = $datum;
      $max = &$component_ppf($pct_right);
    } elsif ((defined($rtrunc)) && ($rtrunc <= $datum)) {
      # off right edge
      $max = $datum;
      $min = &$component_ppf($pct_left);
    } else {
      # already covered
      return ($param_copy, 0);
    }

    my $Fnt     = normal_gen_cdf(+[$mu, $sigma]);
    my $Fnt_min = &$Fnt($min);
    my $Fnt_max = &$Fnt($max);

    # From solving the system.
    my $ep_pct =
    (
      ((($N ** 2) - $N) * $Fnt_min) -
      ($N * $Fnt_max)
    ) / (($N) * ($N-2));
    my $epp_pct = $ep_pct + ($N * ($Fnt_min - $ep_pct));

    $ep_pct   = ($ep_pct <= 0) ? 0 : $ep_pct;
    $epp_pct  = ($epp_pct >=1) ? 1 : $epp_pct;

    my $ep_new  = ($ep_pct > 0)  ? normal_std_ppf($ep_pct) : undef;
    my $epp_new = ($epp_pct < 1) ? normal_std_ppf($epp_pct) : undef;

    my $ltrunc_new = defined($ep_new) ? ($mu + ($sigma * $ep_new)) : undef;
    my $rtrunc_new = defined($epp_new) ? ($mu + ($sigma * $epp_new)) : undef;

    if (defined($ltrunc) && ($ltrunc >= $min)) {
      # It was off the left edge.  Is it still?
      ((!defined($ltrunc_new)) || ($ltrunc_new < $min)) || next;

      # It's included.  Consider modified $ep.
      $param_copy->[$param_idx] = +[ $prob, $mu, $sigma, $ep_new, $epp ];

      my $ep_old_pct = normal_std_cdf($ep);
      my $area_added = $ep_old_pct - $ep_pct;
      my $badness    = $prob * $area_added;
      push @queue, +[ $param_copy, $badness ];
    } elsif (defined($rtrunc) && ($rtrunc <= $min)) {
      # It was off the right edge.  Is it still?
      ((!defined($rtrunc_new)) || ($rtrunc_new > $min)) || next;

      # It's included.  Consider modified $ep.
      $param_copy->[$param_idx] = +[ $prob, $mu, $sigma, $ep, $epp_new ];

      my $epp_old_pct = normal_std_cdf($epp);
      my $area_added  = $epp_pct - $epp_old_pct;
      my $badness     = $prob * $area_added;

      push @queue, +[ $param_copy, $badness ];
    } else {
      # Inconsistency error.
      die;
    }
  }

  if (!scalar(@queue)) {
    # Can't do it ?!
    return undef;
  }

  # sort by ascending badness
  @queue = sort { $a->[1] <=> $b->[1] } @queue;

  my $param_final   = $queue[0]->[0];
  my $badness_final = $queue[0]->[1];

  return ($param_final, $badness_final);
}

# Generate the CDF for a mixture of normals.  The parameters
# are expected as a list of (probability, mean, standard
# deviation, ltrunc, rtrunc) quintuples, as outputted by the
# estimator.
sub normal_truncated_mix_gen_cdf($) {
  my $param_ref = shift;
  my @pairs     = ();  # prob, cdf

  if (!(ref($param_ref->[0]))) {
    # In flat form.  We'll fudge it into the expected form.
    my @param_flat = @$param_ref;
    my $ct         = (scalar @$param_ref)/5;

    $param_ref = +[];
    ($ct == int($ct)) || die;
    for (my $i=0; $i < $ct; $i++) {
      my ($p, $mean, $dev, $ep, $epp) = splice @param_flat, 0, 5;
      $param_ref->[$i] = +[ $p, $mean, $dev, $ep, $epp];
    }
  }

  my $prob_sum = 0;
  foreach (@$param_ref) {
    my ($prob, $mu, $sigma, $ep, $epp) = @{$_};
    my $cdf = undef;

    ($prob < 0) && return undef;
    if ($prob > 0) {
      $cdf = normal_truncated_gen_cdf(+[ $mu, $sigma, $ep, $epp ]);
      push @pairs, +[ $prob, $cdf ];
      $prob_sum += $prob;
    }
  }

  ($prob_sum > 0) || return undef;
  map { $_->[0] /= $prob_sum } @pairs;

  return sub {
    my $x   = shift @_;
    my $sum = 0;

    foreach (@pairs) {
      my ($prob, $cdf) = @{$_};
      defined($x) || die;
      $sum += $prob * &$cdf($x);
    }

    return $sum;
  };
}


# Generate the PDF for a mixture of normals.
sub normal_truncated_mix_gen_pdf($) {
  my $param_ref = shift;
  my @pairs     = ();  # prob, pdf

  if (!(ref($param_ref->[0]))) {
    # In flat form.  We'll fudge it into the expected form.
    my @param_flat = @$param_ref;
    my $ct         = (scalar @$param_ref)/5;

    $param_ref = +[];
    ($ct == int($ct)) || die;
    for (my $i=0; $i < $ct; $i++) {
      my ($p, $mean, $dev, $ep, $epp) = splice @param_flat, 0, 5;
      $param_ref->[$i] = +[ $p, $mean, $dev, $ep, $epp ];
    }
  }

  my $prob_sum = 0;
  foreach (@$param_ref) {
    my ($prob, $mu, $sigma, $ep, $epp) = @{$_};
    my $pdf = undef;

    ($prob < 0) && return undef;
    if ($prob > 0) {
      $pdf = normal_truncated_gen_pdf(+[ $mu, $sigma, $ep, $epp ]);
      push @pairs, +[ $prob, $pdf ];
      $prob_sum += $prob;
    }
  }

  ($prob_sum > 0) || return undef;
  map { $_->[0] /= $prob_sum } @pairs;

  return sub {
    my $x   = shift @_;
    my $sum = 0;

    foreach (@pairs) {
      my ($prob, $pdf) = @{$_};
      $sum += $prob * &$pdf($x);
    }

    return $sum;
  };
}



# Generate the PPF for a mixture of normals.  The unmixing,
# in the general case, is probably quite nasty.
sub normal_truncated_mix_gen_ppf($) {
  my $param_ref  = shift;
  my $mix_cdf    = normal_truncated_mix_gen_cdf($param_ref);
  my @param_copy = flatten(@$param_ref);
  my @guides     = ();

  my $min = undef;
  my $max = undef;
  my $no_min = 0;
  my $no_max = 0;
  my $prob_sum = 0;

  defined($mix_cdf) || return undef;

  while (scalar(@param_copy) > 0) {
    my ($prob, $mean, $sigma, $ep, $epp) = splice(@param_copy, 0, 5);
    defined($mean) || die;


    ($prob < 0) && return undef;
    ($prob > 0) || next;
    $prob_sum += $prob;
    my $cdf_val = &$mix_cdf($mean);
    my $pdf_fun = normal_truncated_gen_pdf(+[ $mean, $sigma, $ep, $epp ]);

    if (defined($ep) && (!$no_min)) {
      if ((!defined($min)) || ($min > ($mean + ($ep * $sigma)))) {
        $min = $mean + ($ep * $sigma);
      }
    } else {
      $no_min = 1;
    }

    if (defined($epp) && (!$no_max)) {
      if ((!defined($max)) || ($max < ($mean + ($epp * $sigma)))) {
        $max = $mean + ($epp * $sigma);
      }
    } else {
      $no_max = 1;
    }

    # We do this to avoid changing $param_ref.
    push @guides, +[ $prob, $mean, $sigma, $cdf_val, $pdf_fun];
  }

  ($prob_sum > 0) || return undef;
  map { $_->[0] /= $prob_sum } @guides;

  my $base_ppf = _mix_gen_ppf($mix_cdf, @guides);

  $min = $no_min ? undef : $min;
  $max = $no_max ? undef : $max;

  my $min_pct = $no_min ? 0 : &$mix_cdf($min);
  my $max_pct = $no_max ? 1 : &$mix_cdf($max);

  return sub {
    my $x = shift;

    if ($x <= $min_pct) {
      return $min;
    } elsif ($x >= $max_pct) {
      return $max;
    } else {
      return &$base_ppf($x);
    }
  }
}




# This approach is something quite different from EM, but has a
# similar purpose of finding appropriate mixture models.
#
# Inputs:
#   A reference to the univariate data array itself.
#   A reference to the single-component estimator.
#
# Outputs:
#   undef, on failure.
#   A reference to a list of mixture parameters in the usual form --
#   one tuple per component, consisting of a probability followed by
#   all the usual parameters for that component type.
#
# Warning:  This might scale with the SQUARE of the number of
# 'breakpoints', and linearly with the size of the data.  It's
# expensive.  However, it may work even in situations where my EM
# initialization / component location schemes go horribly wrong.

sub window_est($$) {
  my $data_ref = shift;
  my $comp_est = shift;

  defined($data_ref) || die;
  defined($comp_est) || die;

  (ref($data_ref) eq 'ARRAY') || die;
  my $N = scalar @$data_ref;
  ($N > 1) || return undef;

  my @breakpoints = ();

  $data_ref = +[ sort { $a <=> $b } @$data_ref ];
  my ($bp_ref, $x_dense) = Numerical::rls_logparabolic_breakpoints(data => $data_ref, sorted => 1);

  if (defined($bp_ref)) {
    push @breakpoints, @$bp_ref;
  }

  $bp_ref = Numerical::density_breakpoints(data => $data_ref, sorted => 1,
  radius => 2, x_dense => $x_dense);

  if (defined($bp_ref)) {
    push @breakpoints, @$bp_ref;
  }

  {
    # uniqueness
    my %table = ();
    foreach (@breakpoints) {
      $table{$_} = 1;
    }

    foreach (@$bp_ref) {
      $table{$_} = 1;
    }

    @breakpoints = sort {$a <=> $b } (keys %table);
  }

  scalar(@breakpoints) || (return undef);

  # Force the minimum to be a breakpoint.
  unshift @breakpoints, $data_ref->[0];

  # At any one time, we visualize the data on which we haven't yet found
  # a satisfactory component into two parts.
  #
  #   left <== data we're trying to fit
  #    right ==> data we've postponed trying to fit
  #
  # It's a greedy method.  We try to fit over the entire data first,
  # and push parts of the data from the 'left' to the 'right' until
  # we get a reasonable fit.  Then we discard everything we just fit,
  # and push all the rest back to the left again.
  #
  # Now, pushing ONE datum at a time would be too expensive.  Therefore,
  # we predetermined 'breakpoints' -- boundaries, basically.  If we push
  # a breakpoint from the left to the right, we also push all the data
  # that at is as large as the breakpoint, as well.  Derivation of the
  # breakpoints is probably the more complicated, thornier part of this
  # problem.

  my @buf_left    = @$data_ref;
  my @brk_left    = @breakpoints;
  my @buf_right   = ();
  my @brk_right   = ();

  # Arbitrary.  Increasing this value will speed things up, at the cost
  # of accepting worse fits.
  my $max_A2      = 3;
  my @mix_params = ();


  # clear the reference
  $data_ref       = undef;

  # The following two track the best fit (in terms of Anderson-Darling
  # criterion) we've seen that includes the leftmost segment.  I'm
  # favoring Anderson-Darling here because it actually takes into
  # account /shape/ of the distribution, in terms of correlation with
  # the expected cumulative distribution function, rather than mere
  # log-likelihood.
  #
  # Log-likelihood likes it when all the "normal" data is piled onto
  # the mean.  I don't, unless the estimated standard deviation is
  # REALLY tiny.

  my $min_A2_seen = undef;
  my @best_fit    = ();

  evaluate:
  if (!scalar(@buf_left)) {
    # Are we done yet?
    goto done;
  }

  my ($cdf, $param_ref, $ppf, $pdf) = &$comp_est(\@buf_left);

  if (defined($cdf)) {
    my $A2 = AndersonDarling::AndersonDarling(\@buf_left, $cdf, 1);

    if (!defined($A2)) {
      goto reject;
    }

    if ((!defined($min_A2_seen)) || ($min_A2_seen > $A2)) {
      # We found _a_ fit.  If we find no _good_ fit, we may
      # resort to the best we've seen.
      $min_A2_seen = $A2;
      my $comp_prob = scalar(@buf_left) / $N;
      @best_fit = ( +[ $comp_prob, @$param_ref], +[ @buf_right ], +[ @brk_right ] );
    }

    if ($A2 <= $max_A2) {
      my $comp_prob = scalar(@buf_left) / $N;
      push @mix_params, +[ $comp_prob, @$param_ref ];

      if (!scalar(@buf_right)) {
        goto done;
      }

      $min_A2_seen = undef;
      @best_fit    = ();

      @buf_left = @buf_right;
      @brk_left = @brk_right;
      @buf_right = ();
      @brk_right = ();

      # Next segment
      goto evaluate;
    }
  }

  reject:
  # Consistency check.  I put an automatic breakpoint at the beginning
  # of the data, and that breakpoint should never be removed since to
  # shift it to the right would bring ALL of the data along with it.
  (scalar(@brk_left) > 0) || die;

  if ($brk_left[-1] <= $buf_left[0]) {
    if (defined($min_A2_seen)) {
      # We found at least one fit, although a bad one, that includes
      # the leftmost segment.  Use it.
      push @mix_params, +[ @{$best_fit[0]} ];
      @buf_left = @{$best_fit[1]};
      @brk_left = @{$best_fit[2]};
      @buf_right = ();
      @brk_right = ();

      # Clear best-fit information.
      $min_A2_seen = undef;
      @best_fit = ();

      # Start iterating
      goto evaluate;
    } else {
      # We didn't find ANY fit that includes this leftmost segment.
      return undef;
    }
  }

  # If we're here,
  #  1.  We rejected the fit on this segment.
  #  2.  We have at least one more breakpoint we can 'push' to
  #      divide the data.
  #
  # Push now.

  my $brk      = pop @brk_left;
  unshift @brk_right, $brk;

  while ($buf_left[-1] >= $brk) {
    unshift @buf_right, (pop @buf_left);
  }

  goto evaluate;

  done:
  {
    # At this point, we have a solution... but maybe an inefficient one.
    # It's even possible to have multiple near-identical components that
    # differ only slightly in the mean and standard deviation.  The
    # easiest and most obvious case would be when:
    #
    #   means are very close
    #   standard deviations are very close
    #   truncation points are very close (possible?)
    #
    # and the next easiest is when the truncation points are
    # consecutive.
    #
    # Of course, the distribution may not be truncated at all.  *shrug*
    my $cct     = scalar @mix_params;
    my $istrunc = (scalar @{$mix_params[0]}) > 3;

    # Tolerate this much difference, relative to 1.
    my $matchtol = 0.05;

    for (my $i=0; $i < ($cct-1); $i++) {
      my $tuple_i  = $mix_params[$i];
      my $prob_i   = $tuple_i->[0];
      my $mu_i     = $tuple_i->[1];
      my $sigma_i  = $tuple_i->[2];
      my $ep_i     = $istrunc ? $tuple_i->[3] : undef;
      my $epp_i    = $istrunc ? $tuple_i->[4] : undef;

      # m for modified, and matching.  Undef is pretty close to
      # 3.5 devs out.
      my $epm_i  = defined($ep_i)  ? $ep_i  : -3.5;
      my $eppm_i = defined($epp_i) ? $epp_i : 3.5;

      for (my $j=$i+1; $j < $cct; $j++) {
        my $tuple_j  = $mix_params[$j];
        my $prob_j   = $tuple_j->[0];
        my $mu_j     = $tuple_j->[1];
        my $sigma_j  = $tuple_j->[2];
        my $ep_j     = $istrunc ? $tuple_j->[3] : undef;
        my $epp_j    = $istrunc ? $tuple_j->[4] : undef;


        my $epm_j  = defined($ep_j)  ? $ep_j  : -3.5;
        my $eppm_j = defined($epp_j) ? $epp_j : 3.5;

        my $mu_diff = abs($mu_i - $mu_j);
        my $mu_comp = (abs($mu_i) > abs($mu_j)) ? abs($mu_i) : abs($mu_j);

        if ($mu_diff > ($matchtol * $mu_comp)) {
          next;
        }

        my $sigma_diff = abs($sigma_i - $sigma_j);
        my $sigma_comp = (abs($sigma_i) > abs($sigma_j)) ? abs($sigma_i) :
        abs($sigma_j);

        if ($sigma_diff > ($matchtol * $sigma_comp)) {
          next;
        }

        # Hooooookay.  Both mean and sigma differences are within
        # tolerances.  We're already done testing IF the distributions
        # aren't truncated.  Otherwise...

        if ($istrunc) {
          # Check case 1:  Are they basically the same?  Here, undef
          # will match with a magnitude of 3.5, as only ~ 0.04% of the
          # data should be further away than that.

          my $match = 0;

          # case 1:
          {
            my $ep_diff = abs($epm_i - $epm_j);
            my $ep_comp = (abs($epm_i) > abs($epm_j)) ? $epm_i : $epm_j;
            my $epp_diff = abs($eppm_i - $eppm_j);
            my $epp_comp = (abs($eppm_i) > abs($eppm_j)) ? $eppm_i : $eppm_j;

            if (($ep_diff <= ($ep_comp * $matchtol)) &&
            ($epp_diff <= ($epp_comp * $matchtol))) {
              $match = 1;
            }
          }

          if (!$match) {
            # Case 2a:  i is left before j
            my $ep_diff = abs($eppm_i - $epm_j);
            my $ep_comp = (abs($eppm_i) > abs($epm_j)) ? $eppm_i : $epm_j;

            if ($ep_diff <= ($ep_comp * $matchtol)) {
              $match = 1;
            }
          }


          if (!$match) {
            # Case 2b:  j is left before i
            my $ep_diff = abs($epm_i - $eppm_j);
            my $ep_comp = (abs($epm_i) > abs($eppm_j)) ? $epm_i : $eppm_j;

            if ($ep_diff <= ($ep_comp * $matchtol)) {
              $match = 1;
            }
          }

          if ($match) {
            $tuple_i->[0] += $tuple_j->[0];
            $tuple_i->[1]  = ($tuple_i->[1] + $tuple_j->[1]) / 2;
            $tuple_i->[2]  = ($tuple_i->[2] + $tuple_j->[2]) / 2;

            if (defined($ep_i) &&  defined($ep_j)) {
              my $lt_i = $mu_i + (2*$ep_i);
              my $lt_j = $mu_j + (2*$ep_j);
              my $lt_new = ($lt_i < $lt_j) ? $lt_i : $lt_j;

              $tuple_i->[3] = ($lt_new - $tuple_i->[1]) / ($tuple_i->[2]);
            } else {
              $tuple_i->[3] = undef;
            }

            if (defined($epp_i) &&  defined($epp_j)) {
              my $rt_i = $mu_i + (2*$epp_i);
              my $rt_j = $mu_j + (2*$epp_j);
              my $rt_new = ($rt_i > $rt_j) ? $rt_i : $rt_j;

              $tuple_i->[4] = ($rt_new - $tuple_i->[1]) / ($tuple_i->[2]);
            } else {
              $tuple_i->[4] = undef;
            }

            splice @mix_params, $j, 1;
            goto done;
          }
        } else {
          # Truncation not an issue.  In that case, we can just merge now.
          $tuple_i->[0] += $tuple_j->[0];
          $tuple_i->[1]  = ($tuple_i->[1] + $tuple_j->[1]) / 2;
          $tuple_i->[2]  = ($tuple_i->[2] + $tuple_j->[2]) / 2;
          splice @mix_params, $j, 1;
          goto done;
        }
      }
    }
  }


  return \@mix_params;
}


# Fit a uniform function to the data.  For boundaries, we'll use
# the extremes of the data.  We return the low and the high as
# the parameters.
#
# Parameters:  ($low [minimum], $high [maximum])
#
# F($x)   = ($x-$low)/($high-$low)
# f($x)   = 1/($high-$low)
# F-1($y) = $low + ($y * ($high-$low))
#
# $x must be within [$low,$high].
sub uniform_est($) {
  my $data_ref = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $ct      = scalar @$data_ref;
  my $low     = ArrayFoo::arrSelect($data_ref, 0);
  my $high    = ArrayFoo::arrSelect($data_ref, $ct-1);

  my $step    = ($ct > 1) ? ($high-$low)/($ct-1) : 0;

  my $param_ref = +[ $low-$step, $high+$step];
  my $CDF_sub = uniform_gen_cdf($param_ref);
  my $PPF_sub = uniform_gen_ppf($param_ref);
  my $PDF_sub = uniform_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}




# Weighted version, for EM.  For now, it's the same as the regular
# version.

sub uniform_est_wtd($$) {
  my $data_ref = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  return uniform_est($data_ref);
}



# Given the appropriate parameters, generate the CDF.
sub uniform_gen_cdf($) {
  my $param_ref = shift @_;

  my $low = $param_ref->[0];
  my $high = $param_ref->[1];

  return sub {
    my $x = shift @_;

    if ($x <= $low) {
      return 0;
    } elsif ($x >= $high) {
      return 1;
    } else {
      return (($x-$low)/($high-$low));
    }
  };
}


# Likewise, for the percentage-point function.
sub uniform_gen_ppf($) {
  my $param_ref = shift @_;
  my $low = $param_ref->[0];
  my $high = $param_ref->[1];

  return sub {
    my $y = shift @_;

    die unless (($y >= 0) && ($y <= 1));

    return ($low + (($high - $low) * $y));
  };
}

# Likewise, for the percentage-point function.
sub uniform_gen_pdf($) {
  my $param_ref = shift @_;
  my $low = $param_ref->[0];
  my $high = $param_ref->[1];
  my $diff = $high-$low;

  if ($diff > 0) {
    return sub {
      my $x = shift;

      if (($x < $low) || ($x > $high)) {
        return 0;
      } else {
        return 1/$diff;
      }
    };
  } else {
    return sub {
      my $x = shift;

      if (($x < $low) || ($x > $high)) {
        return 0;
      } else {
        return 1;
      }
    };
  }
}


# Exponential fitting.
#
# Parameters:  ($loc [location], $lambda [shape])
#
# F($x)   =  1 - exp(-$lambda * ($x-$loc));
# f($x)   =  $lambda * exp(-$lambda * ($x-$loc));
# F-1($y) =  $loc - (log(1-$y)/$lambda)
#
# $x > $loc, $lambda > 0.
sub exponential_est($) {
  my $data_ref  = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $min      = ArrayFoo::arrSelect($data_ref, 0);
  my $loc      = ($min >= $__ABOVE_ZERO) ? 0 : ($min-$__ABOVE_ZERO);
  my $mean     = ArrayFoo::arrMean($data_ref);

  if ($mean == $loc) {
    return undef;
  }

  my $lambda   = 1/(ArrayFoo::arrMean($data_ref) - $loc);
  my $param_ref = +[ $loc, $lambda ];

  my $CDF_sub  = exponential_gen_cdf($param_ref);
  my $PPF_sub  = exponential_gen_ppf($param_ref);
  my $PDF_sub  = exponential_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}



# Weighted version for EM.
sub exponential_est_wtd($$) {
  my $data_ref  = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $wtdRef   = shift;
  my $min      = ArrayFoo::arrSelect($data_ref, 0);
  my $loc       = ($min >= $__ABOVE_ZERO) ? 0 : ($min-$__ABOVE_ZERO);
  my $mean     = ArrayFoo::arrMean($data_ref);

  if ($mean == $loc) {
    return undef;
  }

  my $lambda   = 1/(ArrayFoo::arrMeanWtd($data_ref, $wtdRef) - $loc);
  my $param_ref = +[ $loc, $lambda ];

  my $CDF_sub  = exponential_gen_cdf($param_ref);
  my $PPF_sub  = exponential_gen_ppf($param_ref);
  my $PDF_sub  = exponential_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}




sub exponential_gen_cdf($) {
  my $param_ref = shift @_;
  my $loc        = $param_ref->[0];
  my $lambda    = $param_ref->[1];

  return sub {
    my $x = shift @_;

    $x = $x - $loc;
    if ($x < 0) {
      return 0;
    }

    return (1 - exp(-$lambda * $x));
  };
}


sub exponential_gen_ppf($) {
  my $param_ref = shift @_;
  my $loc        = $param_ref->[0];
  my $lambda    = $param_ref->[1];


  return sub {
    my $y = shift @_;

    if ($y == 1) {
      return undef;
    }

    (($y >= 0) && ($y < 1)) || (return undef);

    return ( $loc - (log(1-$y)/$lambda) );
  };
}


sub exponential_gen_pdf($) {
  my $param_ref = shift @_;
  my $loc        = $param_ref->[0];
  my $lambda    = $param_ref->[1];

  return sub {
    my $x = shift @_;

    if ($x <= $loc) {
      return 0;
    }

    return ($lambda * exp(-$lambda * ($x - $loc)));
  };
}



# Three-parameter lognormal, estimated rather simply.
# Location, for instance, is only nonzero if a shift is
# needed to insure that all data exceeds zero by at
# least a small epsilon.
#
# Parameters:  ([$mu [mean of normal], $sigma [standard
#                deviation of normal], $loc [location])
#
# F($x)   = Fgauss(log($x - $loc))
# f($x)   = (1/($x-$loc)) * fgauss(log($x-$loc));
# F-1($y) = $loc + exp(F-1gauss($y))
#
# $x > $loc
sub lognormal_est($) {
  my $data_ref  = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $itm      = undef;
  my $min      = ArrayFoo::arrSelect($data_ref, 0);
  my $loc       = ($min >= $__ABOVE_ZERO) ? 0 : ($min-$__ABOVE_ZERO);

  my @arrLog = map { log($_ - $loc) } @$data_ref;

  my ($CDF_norm, $param_ref, $PPF_norm) = normal_est(\@arrLog);

  push @$param_ref, $loc;
  my $CDF_sub  = lognormal_gen_cdf($param_ref);
  my $PPF_sub  = lognormal_gen_ppf($param_ref);
  my $PDF_sub  = lognormal_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}


# Weighted version for EM.
sub lognormal_est_wtd($$) {
  my $data_ref  = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $wtdRef   = shift;
  my $itm      = undef;
  my $min      = ArrayFoo::arrSelect($data_ref, 0);
  my $loc       = ($min >= $__ABOVE_ZERO) ? 0 : ($min-$__ABOVE_ZERO);

  my @arrLog = map { log($_ - $loc) } @$data_ref;

  my ($CDF_norm, $param_ref, $PPF_norm) = normal_est_wtd(\@arrLog,
  $wtdRef);

  push @$param_ref, $loc;
  my $CDF_sub  = lognormal_gen_cdf($param_ref);
  my $PPF_sub  = lognormal_gen_ppf($param_ref);
  my $PDF_sub  = lognormal_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}



sub lognormal_gen_cdf($) {
  my $param_ref    = shift @_;
  my @gauss_params = @$param_ref;
  my $loc        = pop @gauss_params;
  my $normal_cdf   = normal_gen_cdf(\@gauss_params);

  if (!defined($normal_cdf)) {
    return undef;
  }

  return  sub {
    my $x = shift @_;

    $x -= $loc;
    if ($x <= 0) {
      return 0;
    } else {
      return &$normal_cdf(log($x));
    }
  };
}


sub lognormal_gen_ppf($) {
  my $param_ref    = shift @_;
  my @gauss_params = @$param_ref;
  my $loc        = pop @gauss_params;

  my $normal_ppf = normal_gen_ppf(\@gauss_params);

  if (!defined($normal_ppf)) {
    return undef;
  }

  return sub {
    my $y = shift @_;

    die unless (($y >= 0) && ($y < 1));

    return (exp(&$normal_ppf($y)) + $loc);
  };
}

# f($x)   = (1/($x-$loc)) * fgauss(log($x-$loc));

sub lognormal_gen_pdf($) {
  my $param_ref    = shift @_;
  my @gauss_params = @$param_ref;
  my $loc        = pop @gauss_params;

  my $normal_pdf = normal_gen_pdf(\@gauss_params);

  if (!defined($normal_pdf)) {
    return undef;
  }

  return sub {
    my $x = shift @_;

    if ($x > $loc) {
      return ((1/($x-$loc)) * &$normal_pdf(log($x-$loc)));
    } else {
      return 0;
    }
  };
}




# Estimate a mixture of lognormals.  This is done in a BASIC
# fashion -- we assume that the 'location' parameter is the
# same for all the mixture components.
#
# This means that we can perform the
#
#   $x <= log($x-$loc)
#
# transformation and then compute a mixture of lognormals.
# The set of fitted parameters then contains:
#
#   - location parameter
#   - tuples, each of which contains
#     - mixing probablity
#     - mean
#     - standard deviation
#
# Hence, 3k+1 parameters.
#
# The CDF and PDF, again, are simple to compose as weighted
# sums of lognormal CDFs and PDFs (note -- must not forget
# loc parameters there).
#
# If you like, give it a maximum $k.
sub lognormal_mix_est($;$$$) {
  my $data_ref = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $k_max    = shift;
  my $k_min    = shift;
  my $means    = shift;
  my $itm      = undef;
  my $min      = ArrayFoo::arrSelect($data_ref, 0);
  my $loc       = ($min >= $__ABOVE_ZERO) ? 0 : ($min-$__ABOVE_ZERO);

  my @arrLog = map { log($_ - $loc) } @$data_ref;

  my ($CDF_norm, $param_mix, $PPF_norm, $PDF_norm) =
  normal_mix_est(\@arrLog, $k_max, $k_min, $means);

  if (!defined($CDF_norm)) {
    return undef;
  }

  my $param_ref = +[ $loc, @$param_mix ];

  my $CDF = lognormal_mix_gen_cdf($param_ref);
  my $PPF = lognormal_mix_gen_ppf($param_ref);
  my $PDF = lognormal_mix_gen_pdf($param_ref);

  return ($CDF, $param_ref, $PPF, $PDF);
}



# The CDF of a mixture of lognormals is merely a weighted sum
# of the CDFs of the individual lognormals.
sub lognormal_mix_gen_cdf($) {
  my $param_ref  = shift;
  my @param_flat = flatten(@$param_ref);
  my $loc        = shift @param_flat;

  my @prob_cdf_pairs = ();

  while (scalar(@param_flat) > 0) {
    my ($prob, $mu, $sigma) = splice (@param_flat,0,3);

    my $param_list = +[$mu, $sigma, $loc ];
    my $cdf_single = lognormal_gen_cdf($param_list);

    push @prob_cdf_pairs, +[ $prob, $cdf_single ];
  }

  return sub {
    my $x   = shift @_;
    my $sum = 0;

    foreach (@prob_cdf_pairs) {
      my ($wt, $cdf) = @{$_};

      $sum += $wt * &$cdf($x);
    }

    return $sum;
  };
}



# The PDF of a mixture of lognormals is merely a weighted sum
# of the PDFs of the individual lognormals.
sub lognormal_mix_gen_pdf($) {
  my $param_ref  = shift;
  my @param_flat = flatten(@$param_ref);
  my $loc        = shift @param_flat;

  my @prob_pdf_pairs = ();

  while (scalar(@param_flat) > 0) {
    my ($prob, $mu, $sigma) = splice (@param_flat,0,3);

    my $param_list = +[$mu, $sigma, $loc ];
    my $pdf_single = lognormal_gen_pdf($param_list);

    push @prob_pdf_pairs, +[ $prob, $pdf_single ];
  }

  return sub {
    my $x   = shift @_;
    my $sum = 0;

    foreach (@prob_pdf_pairs) {
      my ($wt, $pdf) = @{$_};

      $sum += $wt * &$pdf($x);
    }

    return $sum;
  };
}



# Generate the PPF for a mixture of lognormals.  The unmixing,
# in the general case, is probably quite nasty.
#
# Incoming parameters are expected in the form:
#   location (global)
#  and a bunch of <probability, mean, standard deviation>
#   tuples.

sub lognormal_mix_gen_ppf($) {
  my $param_ref  = shift;
  my $mix_cdf    = lognormal_mix_gen_cdf($param_ref);
  my @param_copy = flatten(@$param_ref);
  my $loc        = shift @param_copy;
  my @guides     = ();

  while (scalar(@param_copy) > 0) {
    my ($prob, $mean, $sigma) = splice(@param_copy, 0, 3);

    my $cdf_val = &$mix_cdf($mean);
    my $pdf_fun = lognormal_gen_pdf(+[ $mean, $sigma, $loc ]);

    # We do this to avoid changing $param_ref.
    push @guides, +[ $prob, $mean, $sigma, $cdf_val, $pdf_fun];
  }

  return _mix_gen_ppf($mix_cdf, @guides);
}



# Fit a truncated lognormal.
#
# This is a very ad-hoc version, because the location parameter
# is inferred merely as either 0 (if the data is all positive)
# or sufficiently negative to make them all positive.  So we
# shift the data, then log it, then use the MLE estimator for a
# truncated normal.
#
# The final parameters are:
#    [ $mu, $sigma, $eta_prime, $eta_prime_prime, $loc ]
sub lognormal_truncated_est($) {
  my $data_ref  = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $itm       = undef;
  my $min       = ArrayFoo::arrSelect($data_ref, 0);
  my $loc       = ($min >= $__ABOVE_ZERO) ? 0 : ($min-$__ABOVE_ZERO);

  my @arrLog = map { log($_ - $loc) } @$data_ref;

  my ($CDF_norm, $param_ref, $PPF_norm) = normal_truncated_est(\@arrLog);

  push @$param_ref, $loc;
  my $CDF_sub  = lognormal_truncated_gen_cdf($param_ref);
  my $PPF_sub  = lognormal_truncated_gen_ppf($param_ref);
  my $PDF_sub  = lognormal_truncated_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}


# Generate a CDF for truncated lognormals.
sub lognormal_truncated_gen_cdf($) {
  my $param_ref = shift;
  my ($mu, $sigma, $eta_prime, $eta_prime_prime, $loc) = @$param_ref;
  my $full_cdf   = lognormal_gen_cdf(+[ $mu, $sigma, $loc ]);


  if ((defined($eta_prime)) && (defined($eta_prime_prime)) &&
  ($eta_prime > $eta_prime_prime)) {
    return undef;
  }

  # Truncation points.
  my $trunc_min  = defined($eta_prime) ?
  ($loc + exp($mu + ($sigma * $eta_prime))) : undef;
  my $trunc_max  = defined($eta_prime_prime) ?
  ($loc + exp($mu + ($sigma * $eta_prime_prime))) : undef;

  # CDF at truncated points.
  my $trunc_min_cdf = defined($trunc_min) ? &$full_cdf($trunc_min) : 0;
  my $trunc_max_cdf = defined($trunc_max) ? &$full_cdf($trunc_max) : 1;
  my $remaining     = $trunc_max_cdf - $trunc_min_cdf;

  return sub {
    my $x = shift @_;
    my $basic = &$full_cdf($x);
    my $truncated = ($basic - $trunc_min_cdf) / $remaining;

    return $truncated;
  }
}


# Generate a PDF for truncated lognormals.  It's just a
# matter of scaling so the some of the integration is still
# 1.
sub lognormal_truncated_gen_pdf($) {
  my $param_ref = shift;
  my ($mu, $sigma, $eta_prime, $eta_prime_prime, $loc) = @$param_ref;


  if ((defined($eta_prime)) && (defined($eta_prime_prime)) &&
  ($eta_prime > $eta_prime_prime)) {
    return undef;
  }

  my $full_cdf   = lognormal_gen_cdf(+[ $mu, $sigma, $loc ]);
  my $full_pdf   = lognormal_gen_pdf(+[ $mu, $sigma, $loc ]);

  # Truncation points.
  my $trunc_min  = defined($eta_prime) ?
  ($loc + exp($mu + ($sigma * $eta_prime))) : undef;
  my $trunc_max  = defined($eta_prime_prime) ?
  ($loc + exp($mu + ($sigma * $eta_prime_prime))) : undef;

  # CDF at truncated points.
  my $trunc_min_cdf = defined($trunc_min) ? &$full_cdf($trunc_min) : 0;
  my $trunc_max_cdf = defined($trunc_max) ? &$full_cdf($trunc_max) : 1;
  my $remaining     = $trunc_max_cdf - $trunc_min_cdf;

  return sub {
    my $x = shift @_;
    my $basic = &$full_pdf($x);
    my $truncated = $basic / $remaining;

    return $truncated;
  }
}



# The PPF of a truncated lognormal... Hm.
#
# If x% was truncated off the left (percentage of a full
# normal), and y% off the right, then z% in the new graph
# should be something like...
#
#      x + (z * (1-x-y))
#
# in the original.  So we can transform and chain into
# that PPF, being sure to retransform to normal space.
#
sub lognormal_truncated_gen_ppf($) {
  my $param_ref = shift;
  my ($mu, $sigma, $eta_prime, $eta_prime_prime, $loc) = @$param_ref;


  if ((defined($eta_prime)) && (defined($eta_prime_prime)) &&
  ($eta_prime > $eta_prime_prime)) {
    return undef;
  }

  # Truncation points.
  my $trunc_min  = defined($eta_prime) ?
  ($loc + exp($mu + ($sigma * $eta_prime))) : undef;
  my $trunc_max  = defined($eta_prime_prime) ?
  ($loc + exp($mu + ($sigma * $eta_prime_prime))) : undef;

  my $full_cdf   = lognormal_gen_cdf(+[ $mu, $sigma, $loc ]);
  my $full_pdf   = lognormal_gen_pdf(+[ $mu, $sigma, $loc ]);

  # CDF at truncated points.
  my $trunc_min_cdf = defined($trunc_min) ? &$full_cdf($trunc_min) : 0;
  my $trunc_max_cdf = defined($trunc_max) ? &$full_cdf($trunc_max) : 1;
  my $remaining     = $trunc_max_cdf - $trunc_min_cdf;

  return sub {
    my $y_trunc = shift @_;
    my $y_non_trunc = $trunc_min_cdf + ($y_trunc * $remaining);

    if ($y_trunc <= 0) {
      return $trunc_min;
    } elsif ($y_trunc >= 1) {
      return $trunc_max;
    }

    my $x_standard  = normal_std_ppf($y_non_trunc);
    defined($x_standard) || return undef;
    my $log_x       = ($x_standard * $sigma) + $mu;

    # The lognormal transformaiton.
    my $x = $loc + exp($log_x);

    return $x;
  };
}




# Return a function that uses the Wilson-Hilferty method to generate
# a chi-square random variable (see the section on gamma random
# variables in Kotz and Johnson's _Continuous Univariate
# Distributions_; it's Chapter 17, Section 5.  This, in turn, cites
# Wilson and Hilferty in _Proceedings of the National Academy
# of Sciences_, vol. 17, pp 684-688, 1931.

# We return the random variable in CDF form.

# I don't have an estimator for a pure chi-square distribution yet,
# as it's mostly here
#
# (a) to support gamma, and
# (b) to support chi-square significance testing.
sub chi_square_gen_cdf($;$) {
  my $self = shift;
  my $v  = shift;

  # Allow for invoking either as an object method or "normally".
  if ((!defined($v)) && ((ref $self) ne 'ChiSquare')) {
    $v = $self;
  }

  # Allow +[ param_list ] syntax
  if ((ref $v) eq 'ARRAY') {
    $v = $v->[0];
  }

  (defined($v) && ($v > 0)) || return undef;

  my $cdf = sub {
    my $x = shift;

    ($x >= 0) || return 0;

    my $temp    = ($x/$v) ** (1.0/3);
    $temp -= 1;
    $temp += 2.0 / (9*$v);

    $temp *= sqrt(4.5*$v);
    my $phi_val = normal_std_cdf($temp);

    return $phi_val;
  };

  return $cdf;
}



# We return the random variable in PDF form.
sub chi_square_gen_pdf($;$) {
  my $self = shift;
  my $v  = shift;

  # Allow for invoking either as an object method or "normally".
  if ((!defined($v)) && ((ref $self) ne 'ChiSquare')) {
    $v = $self;
  }

  # Allow +[ param_list ] syntax
  if ((ref $v) eq 'ARRAY') {
    $v = $v->[0];
  }

  (defined($v) && ($v > 0)) || return undef;
  my $pdf = sub {
    my $x = shift;

    ($x >= 0) || return 0;

    my $temp = ($x/$v) ** (1.0/3);
    $temp -= 1;
    $temp += 2.0 / (9*$v);
    my $phi_val = normal_std_pdf($temp);

    my $y = $phi_val * sqrt(4.5*$v);

    # Chain rule.
    $y *= ($x ** (-2.0/3)) / (3 * ($v ** (1.0/3)));

    # Just in case of numerical problems (?).
    $y = ($y >= 0) ? $y : 0;
    return $y;
  };

  return $pdf;
}


# We return the random variable in PPF form.
sub chi_square_gen_ppf($;$) {
  my $self = shift;
  my $v    = shift;


  # Allow for invoking either as an object method or "normally".
  if ((!defined($v)) && ((ref $self) ne 'ChiSquare')) {
    $v = $self;
  }

  # Allow +[ param_list ] syntax
  if ((ref $v) eq 'ARRAY') {
    $v = $v->[0];
  }


  (defined($v) && ($v > 0)) || return undef;

  my $ppf = undef;

  if ($v < 60) {
    $ppf = sub {
      # Wilson-Hilferty.
      my $epsilon  = shift @_;
      my $Ue       = normal_std_ppf($epsilon);
      defined($Ue) || return undef;
      my $temp = ($Ue * sqrt(1/(4.5*$v))) + 1 - (1/(4.5*$v));
      $temp = $temp ** 3;
      my $x = $temp * $v;
      return $x;
    };
  } else {
    $ppf = sub {
      my $epsilon = shift @_;

      # Cornish-Fisher expansion (Eq. 29, c17, J&K, _CUD_).
      #
      # The ($v < 60) cutoff was used because this formula prefers
      # high-$v to minimize error.
      my $Ue      = normal_std_ppf($epsilon);
      defined($Ue) || return undef;
      my $Ue2     = $Ue ** 2;
      my $Ue3     = $Ue ** 3;
      my $Ue4     = $Ue ** 4;
      my $Ue5     = $Ue ** 5;

      my $temp = $v + (sqrt(2*$v) * $Ue) + (($Ue2 - 1) * (2.0/3));

      $temp += ($Ue3 - (7*$Ue)) / (9*sqrt($v * 2));
      $temp -= ((6*$Ue4) + (14*$Ue2) - 433) / (405*$v);
      $temp += ((9*$Ue5) + (256*$Ue3) - (433*$Ue)) /
      (4860*sqrt(2)*($v**(1.5)));

      return $temp;
    };
  }



  return $ppf;
}



# Returns Gamma($a).
#
# We need to use Numerical::Simpson's Rule, as I don't yet
# know of any closed-form approximators for the Gamma function.
sub gamma_function($) {
  my $a = shift @_;

  if ($a <= 0) {
    return undef;
  }

  if ($a == 0.5) {
    # special case
    return sqrt($__M_PI);
  } elsif ($a == int($a)) {
    # Another special case.
    my $fact = 1;

    while ($a > 1) {
      $fact *= $a;
      $a--;
    }
    return $fact;
  } else {
    my $gamma_integrator_function = sub {
      my $t = shift @_;

      if ($t <= 0) {
        return undef;
      }

      return (exp((($a-1) * log($t)) - $t));
    };


    # Fourth derivative.
    my $a_prod_1 = ($a-1);
    my $a_prod_2 = ($a_prod_1) * ($a - 2);
    my $a_prod_3 = ($a_prod_2) * ($a - 3);
    my $a_prod_4 = ($a_prod_3) * ($a - 4);

    my $gamma_integrator_function_4 = sub {
      my $t = shift @_;

      if ($t <= 0) {
        return undef;
      }
      my $log_t = log($t);
      my $e_t   = exp(-$t);

      my $sum = $a_prod_4     * exp(($a-5) * $log_t);
      $sum   -= 4 * $a_prod_3 * exp(($a-4) * $log_t);
      $sum   += 6 * $a_prod_2 * exp(($a-3) * $log_t);
      $sum   -= 4 * $a_prod_1 * exp(($a-2) * $log_t);
      $sum   += exp(($a-2) * $log_t);
      $sum   *= $e_t;

      return $sum;
    };


    # Numerical::Simpson's Rule.
    # Start integrating at 0, and continue until the intervals
    # don't matter much anymore.

    #return Numerical::Simpsons_rule($gamma_integrator_function,
    #$gamma_integrator_function_4,
    #$__ABOVE_ZERO, undef, undef);
    return Numerical::Romberg_indefinite($gamma_integrator_function,
    $__ABOVE_ZERO, undef, undef, 1);
  }
}




# Returns Gamma($a, $x).
#
# $x is limit, $a is 'a' in the integral.
#
# Could also express this in terms of gamma_function, but that would
# involve redundant computation.
sub gamma_incomplete_function($$) {
  my $a = shift @_;
  my $x = shift @_;

  my $gamma_integrator_function = sub {
    my $t = shift @_;

    if ($t <= 0) {
      return undef;
    }

    return (exp((($a-1) * log($t)) - $t));
  };


  # Fourth derivative.
  my $a_prod_1 = ($a-1);
  my $a_prod_2 = ($a_prod_1) * ($a - 2);
  my $a_prod_3 = ($a_prod_2) * ($a - 3);
  my $a_prod_4 = ($a_prod_3) * ($a - 4);

  my $gamma_integrator_function_4 = sub {
    my $t = shift @_;

    if ($t <= 0) {
      return undef;
    }
    my $log_t = log($t);
    my $e_t   = exp(-$t);

    my $sum = $a_prod_4     * exp(($a-5) * $log_t);
    $sum   -= 4 * $a_prod_3 * exp(($a-4) * $log_t);
    $sum   += 6 * $a_prod_2 * exp(($a-3) * $log_t);
    $sum   -= 4 * $a_prod_1 * exp(($a-2) * $log_t);
    $sum   += exp(($a-2) * $log_t);
    $sum   *= $e_t;

    return $sum;
  };


  # Numerical::Simpson's Rule.
  # Start integrating at 0, and continue up to $x.
  #return Numerical::Simpsons_rule($gamma_integrator_function,
  # $gamma_integrator_function_4,
  #0, $x, undef, undef, 0.001, 0.001);
  return Numerical::Romberg_definite($gamma_integrator_function, $__ABOVE_ZERO, $x);
}



# Three-parameter Gamma function.  Location is estimated the
# same way as it was for lognormals -- 0, or a minimum shift
# needed to insure positive-ness if that proves necessary.
# The other two are estimated via a method listed in Johnson
# and Kotz.
#
# Parameters:  ($loc [location], $gamma [shape], $beta [scale])
#
# Standard form ($loc=0, $beta=1, $gamma=?)
#
# F($x)   = gamma_incomplete($gamma, $x) / gamma($gamma)
# f($x)   = ($x^{$gamma - 1}e^{-$x}) / gamma($gamma)
# F-1($x) = numeric only
#
# General form:
# $x <= ($x-$loc)/$beta
# f($x) = just use chain rule
#
# $x > $loc

sub gamma_est($) {
  my $data_ref = shift @_;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $min      = ArrayFoo::arrSelect($data_ref, 0);
  my $mu       = ArrayFoo::arrMean($data_ref);
  my $sigma    = ArrayFoo::arrDev($data_ref);


  # Blind assumption.  We'll make sure it's less than the minimum, at
  # least by a smidgen.

  my $loc       = ($min >= $__ABOVE_ZERO) ? 0 : ($min - $__ABOVE_ZERO);
  my $gamma    = undef;
  my $beta     = undef;

  # Thomson estimator.  See Johnson, Kotz.
  {
    # Y = log ((arithmetic mean - mu) / (geometric(data-mu)));
    my $geolog = 0;
    my $x      = undef;

    foreach $x (@$data_ref) {
      $geolog += log($x - $loc);
    }

    $geolog /= scalar(@$data_ref);
    my $Y = log($mu - $loc) - $geolog;

    if ($Y == 0) {
      return undef;
    }

    # Black magic.
    $gamma = (1/(4*$Y)) * (1+sqrt(1+(4*$Y)/3));

    if ($gamma > 0.9) {
      # Correction factor, namely more black magic.
      $gamma += (($gamma-1) / (24 - 96*$gamma)) + 0.0092;
    }

    # Scale parameter estimation.
    $beta = ($mu - $loc) / $gamma;
  }


  # print "$loc, $gamma, $beta\n";
  if ($gamma <= 0) {
    return undef;
  }

  ($beta > 0) || return undef;

  #  my $gamma_gamma = gamma_function($gamma);
  my $param_ref   = +[ $loc, $gamma, $beta ];

  my $CDF_sub = gamma_gen_cdf($param_ref);
  my $PPF_sub = gamma_gen_ppf($param_ref);
  my $PDF_sub = gamma_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}




# Weighted version for EM, in case anybody's sufficiently
# insane to contemplate such a nasty idea.
sub gamma_est_wtd($$) {
  my $data_ref = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $wtd_ref  = shift;
  my $min      = ArrayFoo::arrSelect($data_ref, 0);
  my $mu       = ArrayFoo::arrMeanWtd($data_ref, $wtd_ref);
  my $sigma    = ArrayFoo::arrDevWtd($data_ref, $wtd_ref);


  # Blind assumption.  We'll make sure it's less than the minimum, at
  # least by a smidgen.

  my $loc       = $min - $__ABOVE_ZERO;
  my $gamma    = undef;
  my $beta     = undef;

  # Modified Thomson estimator.  See Johnson, Kotz.
  {
    # Y = log ((arithmetic mean - mu) / (geometric(data-mu)));
    my $geolog = 0;
    my $x      = undef;

    my $wtsum  = 0;

    my $ct = scalar @$data_ref;
    for (my $i=0; $i < $ct; $i++) {
      my $x  = $data_ref->[$i];
      my $wt = $wtd_ref->[$i];

      if ($wt > 0) {
        $wtsum += $wt;
        $geolog += log($x - $loc) * $wt;
      }
    }

    $geolog /= $wtsum;
    my $Y = log($mu - $loc) - $geolog;

    # Black magic.
    $gamma = (1/(4*$Y)) * (1+sqrt(1+(4*$Y)/3));

    if ($gamma > 0.9) {
      # Correction factor, namely more black magic.
      $gamma += (($gamma-1) / (24 - 96*$gamma)) + 0.0092;
    }

    # Scale parameter estimation.
    $beta = ($mu - $loc) / $gamma;
  }

  my $param_ref   = +[ $loc, $gamma, $beta ];

  my $CDF_sub = gamma_gen_cdf($param_ref);
  my $PPF_sub = gamma_gen_ppf($param_ref);
  my $PDF_sub = gamma_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}



# The standard form:  gamma is the only parameter (shape).
sub gamma_gen_std_cdf($) {
  my $param_ref   = shift @_;

  if (0) {
    # Numerical integration.  This is expensive, and -- much
    # more importantly -- fails on the tails.
    my $gamma       = $param_ref->[0];
    my $gamma_gamma = gamma_function($gamma);
    if (!defined($gamma_gamma)) {
      return undef;
    }

    return sub {
      my $x = shift @_;
      if ($x <= 0) {
        return 0;
      } else {
        my $gi = gamma_incomplete_function($gamma, $x);
        return ($gi / $gamma_gamma);
      }
    };
  } else {
    # Use the closed-form approximator for chi-square.
    my $gamma   = $param_ref->[0];
    my $v       = $gamma * 2;
    my $chi_cdf = chi_square_gen_cdf(+[$v]);
    my $gamma_cdf = sub {
      my $x = shift @_;

      $x *= 2;
      return &$chi_cdf($x);
    };

    return $gamma_cdf;
  }
}


# x <= scale(x-loc)
sub gamma_gen_cdf($) {
  my $param_ref   = shift;
  my $loc    = $param_ref->[0];
  my $gamma = $param_ref->[1];
  my $beta  = $param_ref->[2];
  my $gamma_std_cdf = gamma_gen_std_cdf(+[$gamma]);

  if (!defined($gamma_std_cdf)) {
    return undef;
  }

  return sub {
    my $x = shift @_;

    $x = ($x-$loc)/$beta;
    if ($x <= 0) {
      return 0;
    }
    return (&$gamma_std_cdf($x));
  };
}



# Generating the PPF for the general Gamma function.
sub gamma_gen_std_ppf($) {
  my $param_ref = shift;
  my $gamma  = $param_ref->[0];
  my $gamma_std_cdf = gamma_gen_std_cdf(+[$gamma]);
  my $converge   = 1e-6;

  if (!defined($gamma_std_cdf)) {
    return undef;
  }

  return sub {
    my $y          = shift @_;
    my $err_thresh = shift @_;
    my $guess      = $gamma + (($y - 0.5) * sqrt($gamma));
    my $lo         = $guess - $gamma;
    my $hi         = $guess + $gamma;

    if ($lo <= $gamma) {
      $lo = $gamma;
    }

    # Don't be silly.
    (return undef) unless (($y >= 0) && ($y <= 1));
    if (!defined($err_thresh)) {
      $err_thresh = 0.00001;
    }

    my $loval = undef;
    my $hival = undef;
    my $old_loval = undef;
    my $old_hival = undef;

    # Extend the extremes if need be.  Take smaller steps at extremes
    # due to overflow issues.
    while ((($loval=&$gamma_std_cdf($lo)) > $y) &&
    ((!defined($old_loval)) ||
    (($old_loval - $loval) > $converge))) {
      if ($loval > 0.2) {
        $lo *= 0.5;
      } else {
        $lo -= 0.05;
      }
      $old_loval = $loval;
    }

    while ((($hival=&$gamma_std_cdf($hi)) < $y) &&
    ((!defined($old_hival)) ||
    (($hival - $old_hival) > $converge))) {
      if ($hival < 0.8) {
        $hi++;
      } else {
        $hi += 0.05;
      }
      $old_hival = $hival;
    }

    # There are difficulties estimating around this region.
    if (($y == 0) || ($hi <= 0)) {
      return 0;
    }

    my $x = Numerical::inverse_search($gamma_std_cdf, $y,
    $lo, $hi, $err_thresh);
    defined($x) || (return undef);
    return $x;
  };
}




# Generating the PPF for the general Gamma function.
sub gamma_gen_ppf($) {
  my $param_ref = shift;
  my $loc    = $param_ref->[0];
  my $gamma = $param_ref->[1];
  my $beta  = $param_ref->[2];

  my $gamma_std_ppf = gamma_gen_std_ppf(+[$gamma]);

  if (!defined($gamma_std_ppf)) {
    return undef;
  }

  return sub {
    my $y          = shift @_;

    my $x = &$gamma_std_ppf($y);
    defined($x) || return undef;
    $x = $loc + ($x*$beta);

    return $x;
  };
}



# The standard form:  gamma is the only parameter (shape).
sub gamma_gen_std_pdf($) {
  my $param_ref   = shift;
  my $gamma       = $param_ref->[0];

  if (0) {
    # Old way -- numerical integration.
    my $gamma_gamma = gamma_function($gamma);

    if (!defined($gamma_gamma)) {
      return undef;
    }

    return sub {
      my $x = shift @_;
      if ($x <= 0) {
        return 0;
      } else {
        # Warning:  if it's _really_ not a gamma, and x is large
        # after normalization, this can return a value that should
        # be positive (but tiny) but instead is 0.
        my $nom = ($x ** ($gamma-1)) * exp(-$x);
        return ($nom / $gamma_gamma);
      }
    };
  } else {
    # New way -- based on chi-square.
    my $v       = $gamma * 2;
    my $chi_pdf = chi_square_gen_pdf(+[$v]);

    my $gamma_pdf = sub {
      my $x = shift @_;
      $x *= 2;
      return &$chi_pdf($x);
    };

    return $gamma_pdf;
  }
}


# x <= scale(x-loc)

sub gamma_gen_pdf($) {
  my $param_ref   = shift;
  my $loc    = $param_ref->[0];
  my $gamma = $param_ref->[1];
  my $beta  = $param_ref->[2];
  my $gamma_std_pdf = gamma_gen_std_pdf(+[$gamma]);

  if (!defined($gamma_std_pdf)) {
    return undef;
  }

  return sub {
    my $x = shift @_;

    $x = ($x-$loc)/$beta;
    if ($x <= 0) {
      return 0;
    }

    # Chain rule.
    return (&$gamma_std_pdf($x)/$beta);
  };
}



# Pareto distribution, two-parameter form.  No shift -- data
# must be positive.
#
# Parameters:  (loc [location], shape [shape] )
#
# $x >= location >= 0
#
# F(x$)   = 1 - ($loc / $x)^{$shape}
#         = 1 - (($loc^$shape)*($x^{-$shape}))
# f($x)   = $shape * ($loc^$shape) * ($x^{-$shape-1})
# F-1($y) = $loc / ((1-$y)^{1/$shape})

sub Pareto_est($) {
  my $data_ref  = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $shape     = undef;
  my $loc       = ArrayFoo::arrSelect($data_ref, 0);
  my $N = scalar @$data_ref;

  if ($loc <= 0) {
    return undef;
  }

  {
    my $log_sum = 0;
    my $val     = 0;

    foreach $val (@$data_ref) {
      $log_sum += log($val);
    }

    $log_sum -= $N * log($loc);

    if ($log_sum == 0) {
      return undef;
    }

    $shape = $N / $log_sum;
  }
  my $param_ref = +[ $loc, $shape ];

  my $CDF_sub   = Pareto_gen_cdf($param_ref);
  my $PPF_sub   = Pareto_gen_ppf($param_ref);
  my $PDF_sub   = Pareto_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}



# Weighted version for EM.
sub Pareto_est_wtd($$) {
  my $data_ref  = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $wtd_ref   = shift;
  my $shape     = undef;
  my $loc       = ArrayFoo::arrSelect($data_ref, 0);
  my $N         = scalar @$data_ref;

  if ($loc <= 0) {
    return undef;
  }

  {
    my $log_sum = 0;
    my $val     = 0;
    my $wts     = 0;
    my $ct      = scalar @$data_ref;

    for (my $i=0; $i < $ct; $i++) {
      my $val = $data_ref->[$i];
      my $wt  = $wtd_ref->[$i];

      if ($wt > 0) {
        $wts += $wt;
        $log_sum += log($val)*$wt;
      }
    }

    $log_sum -= $wts * log($loc);

    $shape = $wts / $log_sum;
  }
  my $param_ref = +[ $loc, $shape ];

  my $CDF_sub   = Pareto_gen_cdf($param_ref);
  my $PPF_sub   = Pareto_gen_ppf($param_ref);
  my $PDF_sub   = Pareto_gen_pdf($param_ref);

  return ($CDF_sub, $param_ref, $PPF_sub, $PDF_sub);
}




sub Pareto_gen_cdf($) {
  my $param_ref = shift;
  my $loc = $param_ref->[0];
  my $shape = $param_ref->[1];

  if ($loc <= 0) {
    return undef;
  }

  return sub {
    my $x = shift @_;

    if ($x < $loc) {
      return 0;
    }

    return (1-exp($shape * log($loc / $x)));
  };
}



sub Pareto_gen_ppf($) {
  my $param_ref = shift;
  my $loc = $param_ref->[0];
  my $shape = $param_ref->[1];

  return sub {
    my $y = shift @_;

    # CDF==1 => $x is undefined
    if ($y == 1) {
      return undef;
    }

    die unless (($y >= 0) && ($y < 1));

    return ($loc / ((1-$y) ** (1/$shape)));
  };
}

# f($x)  = $shape * ($loc^$shape) * ($x^{-$shape-1})
sub Pareto_gen_pdf($) {
  my $param_ref = shift;
  my $loc       = $param_ref->[0];
  my $shape     = $param_ref->[1];

  if ($loc <= 0) {
    return undef;
  }

  return sub {
    my $x = shift;

    if ($x < $loc) {
      return undef;
    }
    my $density = ($shape * ($loc ** $shape) * ($x ** (-$shape-1)));

    $density = ($density >= 0) ? $density : 0;
    return $density;
  };
}




#####################################################################
# DGX_prime is based off of the original DGX distribution as
# described in
#
#   Bi, Zhiqiang; Christos Faloutsos; and Flip Korn.  The "DGX"
#   distribution for mining massive skewed data.
#
# However, it has been modified to better reflect one particular
# generative process:
#
#   1.  Take a lognormal.  We'll pretend that the location
#       parameter is always 0; for the rank/frequency data on
#       which DGX was intended to be used, this should hold.
#   2.  Truncate it, at least at the lower end (always >= 1).
#   3.  Discretize it by truncating all the numbers to the
#       integer portion.
#
# Hence, the designation 'DGX_prime' to distinguish it from the
# original DGX, which has a different PDF.
#
# This version should therefore be MUCH faster to compute.  For
# instance, estimating the area under the curve simply means
# estimating the CDF at the left truncation point (1), whereas
# the original DGX requires the normalization constant 'A',
# which requires a summation of often quite large size, plus
# the use of BigFloat which appears to slow things down
# considerably.
#
# Note that using the lognormal code introduces some approximations
# of its own.  For instance, the lognormal estimation of location
# parameter is, in the general case, bogus (it assumes that it's 0
# unless the values aren't all positive).  In addition, the CDF
# computation uses a closed-form approximation rather than
# numerical indefinite integration, and its inverse goes through a
# numerical unidimensional binary/linear search routine.

sub dgx_prime_est($) {
  my $data_ref = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $n   = scalar @$data_ref;

  # DGX_prime is /based/ on a lognormal -- a truncated one,
  # actually.  Let's locate the initial estimates based on
  # that.
  my ($ln_cdf, $ln_param_ref, $ln_ppf, $ln_pdf) =
  lognormal_est($data_ref);

  my $minimize_this = sub {
    my $mu_hat    = shift @_;
    my $sigma_hat = shift @_;

    if (!defined($sigma_hat)) {
      return $__VERY_HIGH;
    }

    if ($sigma_hat <= 0) {
      # This bogosity tells Simplex not to take this route.
      return $__VERY_HIGH;
    }

    my $pdf = dgx_prime_gen_pdf(+[$mu_hat, $sigma_hat]);
    my $ll  = Numerical::compute_log_likelihood($data_ref,
    $pdf, $__LL_PROB_FLOOR);

    if (!defined($ll)) {
      return $__VERY_HIGH;
    }

    return (-$ll);
  };

  my $mu0    = $ln_param_ref->[0];
  my $sigma0 = $ln_param_ref->[1];

  my @initial_simplex =
  Numerical::simplex_wrap(+[ $mu0, $sigma0 ], 1);

  # Bound it.
  foreach (@initial_simplex) {
    my $r = $_;

    if ($r->[1] < 0.5) {
      $r->[1] = 0.5;
    }
  }

  # 1 = tolerance
  #
  # We're computing log-likelihoods, so a difference of 1 will
  # normally be trivial on any set of meaningful size.

  my ($param_ref, $neg_ll) =
  Numerical::Nelder_Mead_simplex($minimize_this, \@initial_simplex,
  undef, undef, undef, 1);

  my $cdf = dgx_prime_gen_cdf($param_ref);
  my $ppf = dgx_prime_gen_ppf($param_ref);
  my $pdf = dgx_prime_gen_pdf($param_ref);

  return ($cdf, $param_ref, $ppf, $pdf);
}


# Compute a DGX_prime CDF.
#
# It's identical to the lognormal CDF, except that we need to scale
# it to deal with the left truncation point, and that we need to
# bump up the $k value to ceil($k) because everything up to that
# point would have been truncated to $k anyway.
#
sub dgx_prime_gen_cdf($) {
  my $param_ref = shift;
  my $mu        = $param_ref->[0];
  my $sigma     = $param_ref->[1];

  my $ln_cdf_ref  = lognormal_gen_cdf(+[$mu, $sigma, 0 ]);

  if (!defined($ln_cdf_ref)) {
    return undef;
  }

  my $truncated   = &$ln_cdf_ref(1);


  return sub {
    my $k   = shift @_;

    if ($k <= 1) {
      return 0;
    }

    my $next_integer = (($k == int($k)) ? $k : (int($k)+1));

    my $ln_cdf = &$ln_cdf_ref($next_integer);

    return (($ln_cdf-$truncated) / (1 - $truncated));
  }
}




# Compute a DGX_prime PPF.
#
# Again, we exploit the lognormal functions.   The PPF of the DGX_prime
# distribution can be found via
#
#  - Take the percentage value and scale it based on the truncation.
#    For instance, say 20% was truncated off the left due to insisting
#    that all values be at least 1.  Then, we take the fraction $X and
#    turn it into
#
#    0.2 + ($x * 0.8)
#
#  - Send this to the standard non-truncated lognormal PPF.
#  - Take the value returned, and truncate it (floor).  This matches
#    the DGX_prime discretization.
#  - Return this value.

sub dgx_prime_gen_ppf($) {
  my $param_ref = shift;
  my $mu        = $param_ref->[0];
  my $sigma     = $param_ref->[1];

  my $ln_cdf_ref  = lognormal_gen_cdf(+[$mu, $sigma, 0 ]);
  my $ln_ppf_ref  = lognormal_gen_ppf(+[$mu, $sigma, 0 ]);
  my $truncated   = &$ln_cdf_ref(1);

  return sub {
    my $pct   = shift @_;

    (($pct >= 0) && ($pct <= 1)) || return undef;

    my $pct_scale = $truncated + ($pct * (1-$truncated));
    my $ln_ppf = &$ln_ppf_ref($pct_scale);
    my $k      = int($ln_ppf);

    # Consistency check.
    ($k >= 1) || die;

    return $k;
  }
}



# Compute a DGX_prime PDF.   We again look to the lognormal for
# guidance.
#
# The PDF should, in theory, be the derivative of the CDF.  However,
# the CDF is a step function, because the PDF is 0 everywhere except
# for positive integers.
#
# It's a bit odd that way.  We do, however, need /something/ here
# because we need it for computing log-likelihoods.  I'll use the
# deltas of the CDF:
#
# PDF(x) =   0              if x != int(x)
#            0              if x < 1
#            F(x+1) - F(x)  otherwise
#
# The summation of these values, then, will be 1 as x goes from 1
# to infinity.

sub dgx_prime_gen_pdf($) {
  my $param_ref = shift;
  my ($mu, $sigma) = @$param_ref;

  # Use a standard deviation that's positive, please.
  if ($sigma <= 0) {
    return sub {
      return 0;
    }
  }

  my $cdf_ref = dgx_prime_gen_cdf($param_ref);

  return sub {
    my $k = shift @_;

    if (($k < 1) || ($k != int($k))) {
      return 0;
    }

    my $cdf_k          = &$cdf_ref($k);
    my $cdf_k_plus_one = &$cdf_ref($k+1);

    return ($cdf_k_plus_one - $cdf_k);
  }
}


#############################################
# Generalized lambda distribution tables.
#############################################
#
# Table 1 from:
#  Karian, Zaven A., Edward J. Dudewicz, Patrick McDonald.
#  "The Extended Generalized Lambda Distribution System for Fitting
#  Distributions to Data:  History, Completion of Theory, Tables,
#  Applications, The `Final Word' on Moment Fits", in _Comm. Sat.:
#  Simulation and Computation_, 25(3):611-642, 1996.

#
# "__GLD_TABLE_val" is the table for skew equal to 'val'.  Each row
# contains a kurtosis value, and standardized estimates for lambda1
# through lambda4.

my @__GLD_TABLE_015 =
(
  +[ 1.9,  -1.5589,  0.2826,    0.007231,  0.8107    ],
  +[ 2.0,  -1.3231,  0.2934,    0.03145,   0.7203    ],
  +[ 2.1,  -1.1221,  0.3010,    0.05487,   0.6387    ],
  +[ 2.2,  -0.9405,  0.3056,    0.07814,   0.5622    ],
  +[ 2.3,  -0.7772,  0.3066,    0.1009,    0.4891    ],

  +[ 2.4,  -0.6175,  0.3031,    0.1215,    0.4194    ],
  +[ 2.5,  -0.4830,  0.2939,    0.1366,    0.3553    ],
  +[ 2.6,  -0.3760,  0.2791,    0.1435,    0.2994    ],
  +[ 2.7,  -0.2984,  0.2604,    0.1421,    0.2533    ],
  +[ 2.8,  -0.2436,  0.2398,    0.1350,    0.2156    ],

  +[ 2.9,  -0.2049,  0.2187,    0.1249,    0.1846    ],
  +[ 3.0,  -0.1766,  0.1980,    0.1135,    0.1586    ],
  +[ 3.1,  -0.1550,  0.1778,    0.1017,    0.1362    ],
  +[ 3.2,  -0.1384,  0.1584,    0.09006,   0.1167    ],
  +[ 3.3,  -0.1251,  0.1397,    0.07885,   0.09558   ],

  +[ 3.4,  -0.1143,  0.1219,    0.06815,   0.08428   ],
  +[ 3.5,  -0.1053,  0.1048,    0.05803,   0.07052   ],
  +[ 3.6,  -0.09772, 0.08838,   0.04848,   0.05806   ],
  +[ 3.7,  -0.09124, 0.07268,   0.03947,   0.04669   ],
  +[ 3.8,  -0.08564, 0.05764,   0.03100,   0.03627   ],

  +[ 3.9,  -0.08082, 0.04320,   0.02301,   0.02667   ],
  +[ 4.0,  -0.07649, 0.02935,   0.01548,   0.01780   ],
  +[ 4.1,  -0.07268, 0.01603,   0.008378,  0.009654  ],
  +[ 4.2,  -0.06927, 0.003218,  0.001667,  0.001891  ],
  +[ 4.3,  -0.06618, -0.009116, -0.004682, -0.005279 ],

  +[ 4.4,  -0.06344, -0.02099,  -0.01069,  -0.01199  ],
  +[ 4.5,  -0.06093, -0.03245,  -0.01639,  -0.01830  ],
  +[ 4.6,  -0.05863, -0.04350,  -0.02180,  -0.02424  ],
  +[ 4.7,  -0.05653, -0.05417,  -0.02694,  -0.02984  ],
  +[ 4.8,  -0.05460, -0.06448,  -0.03184,  -0.03513  ],

  +[ 5.0,  -0.05116, -0.08410,  -0.04095,  -0.04489  ],
  +[ 5.2,  -0.04820, -0.1025,   -0.04927,  -0.05371  ],
  +[ 5.4,  -0.04562, -0.1198,   -0.05668,  -0.06171  ],
  +[ 5.6,  -0.04336, -0.1361,   -0.06388,  -0.06902  ],
  +[ 5.8,  -0.04135, -0.1514,   -0.07033,  -0.07571  ],

  +[ 6.0,  -0.03955, -0.1660,   -0.07631,  -0.08188  ],
  +[ 6.2,  -0.03794, -0.1798,   -0.08186,  -0.08758  ],
  +[ 6.4,  -0.03649, -0.1928,   -0.08704,  -0.09286  ],
  +[ 6.6,  -0.03517, -0.2053,   -0.09187,  -0.09778  ],
  +[ 6.8,  -0.03396, -0.2171,   -0.09639,  -0.1024   ],

  +[ 7.1,  -0.03234, -0.2339,   -0.1027,   -0.1087   ],
  +[ 7.4,  -0.03090, -0.2495,   -0.1084,   -0.1145   ],
  +[ 7.7,  -0.02962, -0.2641,   -0.1136,   -0.1197   ],
  +[ 8.0,  -0.02848, -0.2779,   -0.1185,   -0.1246   ],
  +[ 8.3,  -0.02744, -0.2908,   -0.1229,   -0.1290   ],

  +[ 8.8,  -0.02593, -0.3108,   -0.1297,   -0.1357   ],
  +[ 9.3,  -0.02463, -0.3290,   -0.1357,   -0.1417   ],
  +[ 9.8,  -0.02349, -0.3457,   -0.1411,   -0.1470   ],
  +[ 10.3, -0.02249, -0.3611,   -0.1459,   -0.1518   ],
  +[ 10.8, -0.02161, -0.3753,   -0.1503,   -0.1561   ],

  +[ 11.6, -0.02039, -0.3959,   -0.1565,   -0.1622   ],
  +[ 12.4, -0.01936, -0.4144,   -0.1620,   -0.1676   ],
  +[ 13.2, -0.01847, -0.4310,   -0.1668,   -0.1722   ],
  +[ 14.0, -0.01770, -0.4460,   -0.1710,   -0.1764   ],
  +[ 14.8, -0.01703, -0.4597,   -0.1748,   -0.1801   ]
);


my @__GLD_TABLE_035 =
(
  +[ 2.1,  -1.4414,  0.2630,    0.007342,  0.6297    ],
  +[ 2.2,  -1.2523,  0.2667,    0.02556,   0.5599    ],
  +[ 2.3,  -1.0934,  0.2674,    0.04168,   0.4979    ],
  +[ 2.4,  -0.9548,  0.2653,    0.05590,   0.4415    ],
  +[ 2.5,  -0.8324,  0.2604,    0.06797,   0.3897    ],

  +[ 2.6,  -0.7245,  0.2527,    0.07745,   0.3423    ],
  +[ 2.7,  -0.6307,  0.2424,    0.08394,   0.2992    ],
  +[ 2.8,  -0.5504,  0.2297,    0.08727,   0.2606    ],
  +[ 2.9,  -0.4830,  0.2153,    0.08761,   0.2263    ],
  +[ 3.0,  -0.4271,  0.1996,    0.08543,   0.1961    ],

  +[ 3.1,  -0.3809,  0.1832,    0.08131,   0.1695    ],
  +[ 3.2,  -0.3428,  0.1665,    0.07581,   0.1462    ],
  +[ 3.3,  -0.3112,  0.1498,    0.06944,   0.1255    ],
  +[ 3.4,  -0.2847,  0.1333,    0.06254,   0.1072    ],
  +[ 3.5,  -0.2623,  0.1171,    0.05540,   0.09077   ],

  +[ 3.6,  -0.2432,  0.1014,    0.04819,   0.07602   ],
  +[ 3.7,  -0.2268,  0.08619,   0.04104,   0.06267   ],
  +[ 3.8,  -0.2126,  0.07144,   0.03403,   0.05063   ],
  +[ 3.9,  -0.2001,  0.05718,   0.02722,   0.03943   ],
  +[ 4.0,  -0.1892,  0.04341,   0.02062,   0.02925   ],

  +[ 4.1,  -0.1794,  0.03011,   0.01427,   0.01986   ],
  +[ 4.2,  -0.1707,  0.01727,   0.008158,  0.01116   ],
  +[ 4.3,  -0.1629,  0.004870,  0.002292,  0.003090  ],
  +[ 4.4,  -0.1559,  -0.007105, -0.003332, -0.004431 ],
  +[ 4.5,  -0.1495,  -0.01868,  -0.008725, -0.01146  ],

  +[ 4.6,  -0.1437,  -0.02986,  -0.01389,  -0.01804  ],
  +[ 4.7,  -0.1383,  -0.04067,  -0.01884,  -0.02423  ],
  +[ 4.8,  -0.1334,  -0.05113,  -0.02358,  -0.03004  ],
  +[ 4.9,  -0.1289,  -0.06125,  -0.02813,  -0.03553  ],
  +[ 5.0,  -0.1247,  -0.07106,  -0.03249,  -0.04072  ],

  +[ 5.2,  -0.1173,  -0.08977,  -0.04070,  -0.05029  ],
  +[ 5.4,  -0.1108,  -0.1074,   -0.04827,  -0.05893  ],
  +[ 5.6,  -0.1051,  -0.1240,   -0.05528,  -0.06677  ],
  +[ 5.8,  -0.1001,  -0.1396,   -0.06178,  -0.07392  ],
  +[ 6.0,  -0.09563, -0.1545,   -0.06782,  -0.08048  ],

  +[ 6.2,  -0.09162, -0.1685,   -0.07345,  -0.08652  ],
  +[ 6.4,  -0.08801, -0.1818,   -0.07871,  -0.09210  ],
  +[ 6.6,  -0.08473, -0.1945,   -0.08364,  -0.09727  ],
  +[ 6.8,  -0.08175, -0.2066,   -0.08826,  -0.1021   ],
  +[ 7.0,  -0.07902, -0.2181,   -0.09261,  -0.1066   ],

  +[ 7.3,  -0.07534, -0.2344,   -0.09866,  -0.1128   ],
  +[ 7.6,  -0.07207, -0.2497,   -0.1042,   -0.1184   ],
  +[ 7.9,  -0.06916, -0.2640,   -0.1093,   -0.1236   ],
  +[ 8.2,  -0.06654, -0.2774,   -0.1141,   -0.1283   ],
  +[ 8.5,  -0.06417, -0.2901,   -0.1185,   -0.1327   ],

  +[ 9.0,  -0.06088, -0.3097,   -0.1251,   -0.1393   ],
  +[ 9.5,  -0.05768, -0.3286,   -0.1310,   -0.1451   ],
  +[ 10.0, -0.05506, -0.3440,   -0.1364,   -0.1503   ],
  +[ 10.5, -0.05276, -0.3592,   -0.1412,   -0.1550   ],
  +[ 11.0, -0.05072, -0.3732,   -0.1456,   -0.1592   ],

  +[ 11.8, -0.04789, -0.3935,   -0.1518,   -0.1652   ],
  +[ 12.6, -0.04549, -0.4117,   -0.1573,   -0.1704   ],
  +[ 13.4, -0.04343, -0.4282,   -0.1621,   -0.1750   ],
  +[ 14.2, -0.04164, -0.4430,   -0.1664,   -0.1790   ],
  +[ 15.0, -0.04007, -0.4566,   -0.1703,   -0.1827   ]
);

my @__GLD_TABLE_070 =
(
  +[ 2.7,  -1.3282,  0.2170,    0.002251,  0.4090    ],
  +[ 2.8,  -1.1943,  0.2132,    0.01296,   0.3651    ],
  +[ 2.9,  -1.0829,  0.2077,    0.02165,   0.3264    ],
  +[ 3.0,  -0.9867,  0.2008,    0.02858,   0.2918    ],
  +[ 3.1,  -0.9025,  0.1926,    0.03393,   0.2604    ],

  +[ 3.2,  -0.8283,  0.1833,    0.03781,   0.2319    ],
  +[ 3.3,  -0.7625,  0.1731,    0.04033,   0.2059    ],
  +[ 3.4,  -0.7041,  0.1621,    0.04159,   0.1821    ],
  +[ 3.5,  -0.6523,  0.1505,    0.04173,   0.1604    ],
  +[ 3.6,  -0.6062,  0.1386,    0.04091,   0.1406    ],

  +[ 3.7,  -0.5653,  0.1263,    0.03925,   0.1225    ],
  +[ 3.8,  -0.5288,  0.1138,    0.03691,   0.1060    ],
  +[ 3.9,  -0.4963,  0.1014,    0.03403,   0.09077   ],
  +[ 4.0,  -0.4673,  0.08889,   0.03072,   0.07683   ],
  +[ 4.1,  -0.4413,  0.07654,   0.02710,   0.06401   ],

  +[ 4.2,  -0.4179,  0.06434,   0.02324,   0.05219   ],
  +[ 4.3,  -0.3969,  0.05234,   0.01922,   0.04126   ],
  +[ 4.4,  -0.3779,  0.04058,   0.01511,   0.03116   ],
  +[ 4.5,  -0.3607,  0.02905,   0.01094,   0.02177   ],
  +[ 4.6,  -0.3450,  0.01779,   0.006767,  0.01303   ],

  +[ 4.7,  -0.3306,  0.006799,  0.002607,  0.004872  ],
  +[ 4.8,  -0.3175,  -0.003918, -0.001512, -0.002751 ],
  +[ 4.9,  -0.3055,  -0.01436,  -0.005574, -0.009894 ],
  +[ 5.0,  -0.2944,  -0.02453,  -0.009565, -0.01660  ],
  +[ 5.1,  -0.2842,  -0.03443,  -0.01348,  -0.02291  ],

  +[ 5.2,  -0.2747,  -0.04407,  -0.01730,  -0.02885  ],
  +[ 5.3,  -0.2659,  -0.05345,  -0.02103,  -0.03447  ],
  +[ 5.4,  -0.2577,  -0.06258,  -0.02467,  -0.03978  ],
  +[ 5.5,  -0.2501,  -0.07147,  -0.02822,  -0.04481  ],
  +[ 5.6,  -0.2429,  -0.08012,  -0.03167,  -0.04958  ],

  +[ 5.8,  -0.2300,  -0.09675,  -0.03828,  -0.05843  ],
  +[ 6.0,  -0.2186,  -0.1125,   -0.04453,  -0.06647  ],
  +[ 6.2,  -0.2084,  -0.1275,   -0.05043,  -0.07380  ],
  +[ 6.4,  -0.1993,  -0.1417,   -0.05599,  -0.08051  ],
  +[ 6.6,  -0.1911,  -0.0812,   -0.03167,  -0.08669  ],

  +[ 6.8,  -0.1837,  -0.1682,   -0.06622,  -0.09240  ],
  +[ 7.0,  -0.1769,  -0.1805,   -0.07091,  -0.09769  ],
  +[ 7.2,  -0.1708,  -0.1923,   -0.07536,  -0.1026   ],
  +[ 7.4,  -0.1651,  -0.2035,   -0.07957,  -0.1072   ],
  +[ 7.6,  -0.1599,  -0.2143,   -0.8357,   -0.1115   ],

  +[ 7.9,  -0.1529,  -0.2296,   -0.08919,  -0.1174   ],
  +[ 8.2,  -0.1466,  -0.2440,   -0.09440,  -0.1228   ],
  +[ 8.5,  -0.1409,  -0.2575,   -0.09925,  -0.1278   ],
  +[ 8.8,  -0.1358,  -0.2703,   -0.1038,   -0.1323   ],
  +[ 9.1,  -0.1312,  -0.2824,   -0.1080,   -0.1365   ],

  +[ 9.6,  -0.1243,  -0.3011,   -0.1145,   -0.1429   ],
  +[ 10.1, -0.1184,  -0.3183,   -0.1203,   -0.1485   ],
  +[ 10.6, -0.1132,  -0.3341,   -0.1255,   -0.1535   ],
  +[ 11.1, -0.1086,  -0.3487,   -0.1303,   -0.1581   ],
  +[ 11.6, -0.1045,  -0.3623,   -0.1347,   -0.1622   ],

  +[ 12.4, -0.09883, -0.3821,   -0.1410,   -0.1680   ],
  +[ 13.2, -0.09399, -0.3999,   -0.1466,   -0.1731   ],
  +[ 14.0, -0.08981, -0.4159,   -0.1515,   -0.1775   ],
  +[ 14.8, -0.08618, -0.4305,   -0.1559,   -0.1815   ],
  +[ 15.6, -0.08299, -0.4438,   -0.1598,   -0.1850   ]
);



# Estimate the four parameters for the generalized lambda
# disribution, via the method of moments.  This is /purely/
# table-based; there is *** NO *** interpolation being done
# right now.
#
# In other words, accuracy is strictly limited by the table,
# and if you have "out of range" data, it'll be way off.
#
# The generalized lambda distribution (aka generalized Tukey
# distribution) is defined as:
#
#   F^{-1}(y) = \lambda_{1} +
#     \frac{y^{\lambda_{3} - (1-y)^{\lambda_{4}}}{\lambda_{2}}
#
# I make an adjustment:  lambda_2 will be non-negative.  That
# way, F^{-1}(0) <= F^{-1}(1) and densities will be positive.
#
sub gld_est($) {
  my $data_ref = shift;

  if ((!defined($data_ref)) || (ref($data_ref) ne 'ARRAY') ||
  (!scalar(@$data_ref))) {
    return undef;
  }

  my $mean     = ArrayFoo::arrMean($data_ref);
  my $var      = 0;
  my $skew     = 0;
  my $kurtosis = 0;

  my $n = scalar @$data_ref;

  foreach (@$data_ref) {
    my $diff = ($_ - $mean);

    $var      += ($diff ** 2);
    $skew     += ($diff ** 3);
    $kurtosis += ($diff ** 4);
  }

  $var /= $n;

  my $dev = sqrt($var);

  if (($dev == 0) || ($n == 0)) {
    return undef;
  }

  $skew     /= ($n * ($dev ** 3));
  $kurtosis /= ($n * ($dev ** 4));

  my $table_ref = undef;

  if (abs($skew) < 0.25) {
    $table_ref = \@__GLD_TABLE_015;
  } elsif (abs($skew) < 0.525) {
    $table_ref = \@__GLD_TABLE_035;
  } else {
    $table_ref = \@__GLD_TABLE_070;
  }

  # print "mean=$mean, dev=$dev, skew=$skew, k=$kurtosis\n";
  my $min_kurtosis = $table_ref->[0]->[0];
  my $max_kurtosis = $table_ref->[-1]->[0];

  my $tuple = undef;

  if ($kurtosis <= $min_kurtosis) {
    $tuple = $table_ref->[0];
  } elsif ($kurtosis >= $max_kurtosis) {
    $tuple = $table_ref->[-1];
  } else {
    # Search.

    my $lo_idx = 0;
    my $hi_idx = (scalar @$table_ref) - 1;

    # Binary search.  Implicit assumption:  table is sorted.
    search: {
      my $mid_idx = int(($lo_idx + $hi_idx)/2);
      my $mid_val = $table_ref->[$mid_idx]->[0];

      if ($kurtosis < $mid_val) {
        $hi_idx = $mid_idx;
      } elsif ($kurtosis > $mid_val) {
        $lo_idx = $mid_idx;
      } else {
        # equal
        $lo_idx = $hi_idx = $mid_idx;
      }

      if ($hi_idx <= ($lo_idx + 1)) {
        # converged
        last search;
      } else {
        redo search;
      }
    }

    if ($lo_idx == $hi_idx) {
      $tuple = $table_ref->[$lo_idx];
    } else {
      my $lo_k = $table_ref->[$lo_idx]->[0];
      my $hi_k = $table_ref->[$hi_idx]->[0];

      if (($kurtosis - $lo_k) < ($hi_k - $kurtosis)) {
        $tuple = $table_ref->[$lo_idx];
      } else {
        $tuple = $table_ref->[$hi_idx];
      }
    }
  }

  defined($tuple) || die;

  my ($k_table, $lambda1, $lambda2, $lambda3, $lambda4) = @$tuple;

  if ($skew < 0) {
    # Exchange lambda3, lambda4.  Change sign of lambda1.

    my $temp = $lambda3;
    $lambda3 = $lambda4;
    $lambda4 = $temp;

    $lambda1 *= -1;
  }

  # De-standardize lambda1, lambda2.
  $lambda1 = ($lambda1 * $dev) + $mean;
  $lambda2 = $lambda2 / $dev;


  if (($lambda3 <= 0) && ($lambda4 >= 0) && ($lambda4 <= 1)) {
    # Invalid:  region V1
    return undef;
  } elsif (($lambda3 >= 0) && ($lambda3 <= 1) && ($lambda4 <= 0)) {
    # Invalid:  region V2
    return undef;
  } elsif (($lambda3 > -1) && ($lambda3 < 0) && ($lambda4 > 1)) {
    # Grey area, second quadrant.
    my $lhs_num = (1-$lambda3) ** (1-$lambda3);
    my $lhs_den = ($lambda4 - $lambda3) ** ($lambda4 - $lambda3);
    my $lhs_mul = ($lambda4 - $lambda1) ** ($lambda4 - $lambda1);
    my $lhs     = ($lhs_num / $lhs_den) * $lhs_mul;
    my $rhs     = -$lambda3/$lambda4;

    if ($lhs < $rhs) {
      # Invalid.
      return undef;
    }
  } elsif (($lambda3 > -1) && ($lambda3 < 0) && ($lambda4 > 1)) {
    # Grey area, fourth quadrant.
    my $lhs_num = (1-$lambda4) ** (1-$lambda4);
    my $lhs_den = ($lambda3 - $lambda4) ** ($lambda3 - $lambda4);
    my $lhs_mul = ($lambda3 - $lambda1) ** ($lambda3 - $lambda1);
    my $lhs     = ($lhs_num / $lhs_den) * $lhs_mul;
    my $rhs     = -$lambda4/$lambda3;

    if ($lhs < $rhs) {
      # Invalid.
      return undef;
    }
  }

  {
    # Does $lambda2 disqualify it?  Evaluate eqn (11) from
    # Karian, Dudewicz and McDonald
    my $sign = undef;

    for (my $y=0.01; $y < 1; $y += 0.01) {
      my $val = ($lambda3 * ($y ** ($lambda3 - 1))) +
      ($lambda4 * ((1-$y) ** ($lambda4 -1 )));

      if ($val == 0) {
        return undef;
      }

      my $this_sign = ($val > 0);

      if (!defined($sign)) {
        $sign = $this_sign;
      } else {
        ($sign == $this_sign) || return undef;
      }
    }
  }

  my $param_ref = +[ $lambda1, $lambda2, $lambda3, $lambda4 ];

  my $cdf = gld_gen_cdf($param_ref);
  my $ppf = gld_gen_ppf($param_ref);
  my $pdf = gld_gen_pdf($param_ref);

  return ($cdf, $param_ref, $ppf, $pdf);
}




# The generalized lambda distribution cdf must be solved for
# numerically.
sub gld_gen_cdf($) {
  my $param_ref = shift;

  my $ppf = gld_gen_ppf($param_ref);
  defined($ppf) || return undef;

  my $min = &$ppf(0);
  my $max = &$ppf(1);


  return sub {
    my $x = shift @_;

    if ($x <= $min) {
      return 0;
    } elsif ($x >= $max) {
      return 1;
    }

    my $y = Numerical::inverse_search(\&normal_std_cdf, $x, 0, 1);
    defined($y) || return undef;

    $y = ($y >= 0 ) ? $y : 0;
    $y = ($y <= 1 ) ? $y : 1;
    return $y;
  };
}



# Since the generalized lambda distribution is /defined/ in terms of
# the percentage-point function, it's easy to compose.
sub gld_gen_ppf($) {
  my $param_ref = shift;

  my ($lambda1, $lambda2, $lambda3, $lambda4) = @$param_ref;

  return sub {
    my $y = shift @_;

    if (($y < 0) || ($y > 1)) {
      # Don't be silly.
      return undef;
    }

    if ($lambda2 < 0) {
      # To preserve the idea that 0=min, 1=max
      $y = 1 - $y;
    }

    if ($y == 0) {
      return ($lambda1 - (1/$lambda2));
    } elsif ($y == 1) {
      return ($lambda1 + (1/$lambda2));
    }

    my $val = $lambda1 + ((($y **$lambda3) - ((1-$y) ** $lambda4)) /
    $lambda2);

    return $val;
  };
}


# PDF.  Once one figures out the actual percentage point (via the CDF)
# this isn't hard to compute.

sub gld_gen_pdf($) {
  my $param_ref = shift;

  my ($lambda1, $lambda2, $lambda3, $lambda4) = @$param_ref;

  my $cdf = gld_gen_cdf($param_ref);
  my $ppf = gld_gen_ppf($param_ref);

  defined($cdf) || return undef;
  defined($ppf) || return undef;

  my $min = &$ppf(0);
  my $max = &$ppf(1);

  if ($min > $max) {
    # Sanity check.  This should be impossible if the validity tests
    # pass, and the $y <= 1-$y transformation works.
    print "gld pdf: [$lambda1, $lambda2, $lambda3, $lambda4]\n";

    for (my $i=0; $i <= 1; $i += 0.05) {
      my $v = &$ppf($i);

      print "$i $v\n";
    }
    die;
  }


  return sub {
    my $x = shift @_;
    my $p = &$cdf($x);

    defined($p) || return undef;
    ($x >= $min) || return 0;
    ($x <= $max) || return 0;

    if ($lambda2 < 0) {
      $p = 1-$p;
    }


    defined($p) || return undef;

    my $temp = $lambda3 * ($p ** ($lambda3 - 1));
    $temp += $lambda4 * ((1-$p) ** ($lambda4 -1 ));

    ($temp != 0) || return undef;
    $temp = 1/$temp;
    $temp *= $lambda2;

    $temp = ($temp >= 0) ? $temp : 0;
    return $temp;
  };
}





# For any continuous probability density function, we can derive a
# discrete form through the following:
#
# Let F(x) and f(x) be the original CDF and pdf, respectively.
# Then, the discrete forms D(x) and d(x) would be:
#
#  D(x) <= F(ceil(x))
#  d(x) <= F(x+1) - F(x)   x an integer
#       <= 0               otherwise
#
# And the inverse CDF becomes a floor()'d version of the original
# inverse CDF.
#
# This function can be used to produce such discretized versions.
# Note that
#  (a) It will refuse if the data contains any non-integers.
#  (b) It does not do truncation by itself.  For instance, DGX
#      isn't quite the composition of this and lognormal; it's
#      more like the composition of this and a left-truncated
#      lognormal.
#
# This function will use the original estimator to make the
# initial guess, then use Nelder-Mead simplex to optimize.
#
# Give it a reference to the data array and a reference to the
# distribution descriptor (the tuple of the form
#
#   $tuple->[0] = "Name";
#   $tuple->[1] = foo_est();
#   $tuple->[2] = +[ "param0_name", "param1_name"... ];
#   $tuple->[3] = foo_gen_cdf();
#   $tuple->[4] = foo_gen_ppf();
#   $tuple->[5] = foo_gen_pdf();
# ).
#
# Such tuples can be found in @UniRand::DIST_LIST.
sub discretize_dist($$;$$@) {
  my $data_ref = shift;
  my $tuple    = shift;

  # For the following two parameters, see 'discretize_est'.
  my $xf         = shift;
  my $xf_inverse = shift;
  my @extras     = @_;        # These get passed to the estimator.

  my ($dist_name,          # not used
  $dist_est,
  $dist_param_name,    # not used
  $dist_gen_cdf,
  $dist_gen_ppf,
  $dist_gen_pdf) = @$tuple;

  my ($estimator, @results) = discretize_est($dist_est,
  $dist_gen_cdf,
  $dist_gen_ppf,
  $dist_gen_pdf,
  $data_ref,
  @extras);


  return @results;
}



# Give it an estimator and the three generative functions (cdf,
# ppf, pdf) of the continuous distribution.
#
# Optional Argument #1:  xf
#    Take an integer data set.  Transform it using some function,
#    such as a logarithm.  It quite possibly won't be an integer
#    set anymore..
#
#    If you used such a transformation, define xf.  Otherwise,
#    identity is assumed.
#
# Optional Argument #2:  xf_inverse
#    Inverse of 'xf'.   If neither xf or xv_inverse is defined,
#    identity is used.  If one of them is defined, so must the
#    the other be.
#
# Optional Argument #3:  data_ref
#    If you don't give it a ref to the data, you get a discretized
#    version of the estimator which will produce discretized versions
#    of the CDF, PPF and PDF.
#
# If you DO give it a ref to the data,
sub discretize_est($$$$;$$$@) {
  my $dist_est       = shift;
  my $dist_gen_cdf   = shift;
  my $dist_gen_ppf   = shift;
  my $dist_gen_pdf   = shift;
  my $xf             = shift;
  my $xf_inverse     = shift;
  my $data           = shift;

  if (!defined($xf)) {
    if (defined($xf_inverse)) {
      return undef;
    }
  } elsif (!defined($xf_inverse)) {
    return undef;
  }


  my $discrete_estimator = sub {
    my $data_ref = shift @_;
    my @additional = @_;  # passed to the estimator
    my $ct       = scalar @$data_ref;
    my $tol      = (1e-2) * $ct;
    my $max_iter = 1000;
    my $reassemble = undef;

    # Make sure that the data is discrete.
    foreach (@$data_ref) {
      my $itm = $_;
      my $original = defined($xf_inverse) ?
      &$xf_inverse($itm) : $itm;

      # Allow for round-off error, but not much else.
      (abs($original - int($original)) < 1e-10) ||
      (abs($original - int(1+$original)) < 1e-10) ||
      return undef;
    }

    # Generate initial estimates.
    my ($init_cdf, $init_params, $init_ppf, $init_pdf) =
    &$dist_est($data_ref, @additional);

    if (!defined($init_cdf)) {
      return undef;
    }

    if (ref($init_params->[0]) eq 'ARRAY') {
      my $size = undef;

      # The simplex code does not handle hierarchical parameter
      # structures, yet.  However, wrt my own code, only the
      # mixtures work that way, and it's not difficult to make
      # the mixture CDF/PPF/PDF generators know about flat lists.
      #
      # If all the items are refs to lists of the same length,
      # we'll save that length and restore the structure once
      # done.
      foreach (@{$init_params}) {
        if (!ref($_)) {
          $reassemble = undef;
          last;
        }

        my $list_length = scalar @{$_};

        if ((!defined($size)) || ($size == $list_length)) {
          $size = $list_length;
          $reassemble = $size;
        } else {
          $reassemble = undef;
          last;
        }
      }

      $init_params = +[ flatten(@$init_params) ];
    }

    # Simplex of arbitrary size.
    my $initial_simplex = Numerical::simplex_wrap($init_params, 1);


    my $minimize_this = sub {
      my @params = @_;

      my $pdf = &$dist_gen_pdf(\@params);
      my $cdf = &$dist_gen_cdf(\@params);

      if ((!defined($pdf)) || (!defined($cdf))) {
        # Illegal combination of parameters.  For instance, you can't
        # have a normal distribution with /negative/ standard deviation.
        return $__VERY_HIGH;
      }

      my ($discrete_cdf, $discrete_pdf) =
      discretize_cdf($cdf, $xf, $xf_inverse);

      if ((!defined($discrete_cdf)) || (!defined($discrete_pdf))) {
        return $__VERY_HIGH;
      }

      my $ll = Numerical::compute_log_likelihood($data_ref,
      $discrete_pdf, $__LL_PROB_FLOOR);

      if (!defined($ll)) {
        return $__VERY_HIGH;
      } else {
        return -$ll;
      }
    };

    my ($param_ref, $badness) =
    Numerical::Nelder_Mead_simplex($minimize_this, $initial_simplex,
    undef, undef, undef, $tol, $max_iter);

    defined($param_ref) || return undef;

    if (defined($reassemble)) {
      my @hierarchical_params = ();
      my $tuples = (scalar @$param_ref) / $reassemble;

      ($tuples == int($tuples)) || die;
      while (scalar(@$param_ref)) {
        my @tuple = splice @$param_ref, 0, $reassemble;
        push @hierarchical_params, \@tuple;
      }
      $param_ref = \@hierarchical_params;

      # If it's my code, again it's a mixture.  Simplex will treat the
      # probabilities as unconstrained weights, so renormalize them.
      my $prob_sum = 0;

      map { $prob_sum += $_->[0] } @$param_ref;
      ($prob_sum > 0) || return undef;
      map { $_->[0] /= $prob_sum } @$param_ref;
    }


    my $continuous_cdf  = &$dist_gen_cdf($param_ref);
    my $continuous_ppf  = &$dist_gen_ppf($param_ref);
    my $continuous_pdf  = &$dist_gen_pdf($param_ref);

    (defined($continuous_cdf) &&
    defined($continuous_ppf) &&
    defined($continuous_pdf)) || return undef;

    my ($discrete_cdf, $discrete_pdf) =
    discretize_cdf($continuous_cdf, $xf, $xf_inverse);
    my $discrete_ppf =
    discretize_ppf($continuous_ppf, $xf, $xf_inverse);

    return ($discrete_cdf, $param_ref, $discrete_ppf, $discrete_pdf);
  };


  if (defined($data)) {
    return ($discrete_estimator, &$discrete_estimator($data));
  } else {
    return $discrete_estimator;
  }
}



# Given a continuous CDF, discretize it under the assumption that
# floats get FLOOR'd.  Also return the discrete PDF, which is most
# readily generated from the CDF.
sub discretize_cdf($;$$) {
  my $continuous_cdf = shift;
  my $xf             = shift;
  my $xf_inverse     = shift;

  (defined($xf) && defined($xf_inverse)) ||
  ((!defined($xf)) && (!defined($xf_inverse))) ||
  return undef;

  my $discrete_cdf =
  sub {
    my $x = shift @_;
    my $orig_x = defined($xf_inverse) ? &$xf_inverse($x) : $x;
    my $orig_x_ceil = POSIX::ceil($orig_x);
    my $x_ceil = defined($xf) ? &$xf($orig_x_ceil) : $orig_x_ceil;
    return (&$continuous_cdf($x_ceil));
  };

  my $discrete_pdf =
  sub {
    my $x = shift @_;

    my $orig_x = defined($xf_inverse) ? &$xf_inverse($x) : $x;

    if ((abs($orig_x - int($orig_x)) > 1e-10) &&
    (abs($orig_x - int($orig_x+1)) > 1e-10)) {
      # Not an integer.
      return 0;
    }

    my $orig_x_plus = $orig_x + 1;
    my $x_plus      = defined($xf) ? &$xf($orig_x_plus) : $orig_x_plus;

    defined($x_plus) || return undef;

    my $x_cdf = &$continuous_cdf($x);
    my $x_plus_cdf = &$continuous_cdf($x_plus);


    defined($x_cdf)      || return undef;
    defined($x_plus_cdf) || return undef;
    return ($x_plus_cdf-$x_cdf);
  };

  return (wantarray() ? ($discrete_cdf, $discrete_pdf) :
  $discrete_cdf);
}


# Again, assume that values are discretized by a floor() operation.
# This time, the input should be the percentage-point function (PPF),
# not the CDF.
sub discretize_ppf($;$$) {
  my $ppf = shift;
  my $xf  = shift;
  my $xf_inverse = shift;


  (defined($xf) && defined($xf_inverse)) ||
  ((!defined($xf)) && (!defined($xf_inverse))) ||
  die;

  return sub {
    my $pct = shift @_;
    my $x   = &$ppf($pct);

    defined($x) || return undef;

    my $orig_x = defined($xf_inverse) ? &$xf_inverse($x) : $x;

    defined($orig_x) || return undef;

    if (!($orig_x =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)) {
      if ($__UNIRAND_DEBUG_PRINTF) {
        print "Inverse failed for $x\n";
      }
      return undef;
    }

    my $orig_x_floor = POSIX::floor($orig_x);

    # ...unless the value is barely less than an integer.  We do this
    # because approximations are common and might result in some Very
    # Bad Things when floor'd() such as impossible values going
    # through a Box-Cox function.
    if (($orig_x - $orig_x_floor) >= (1-(1e-10))) {
      $orig_x_floor = POSIX::ceil($orig_x);
    }

    my $x_floor = defined($xf) ? &$xf($orig_x_floor) : $orig_x_floor;

    my $factor = 1;
    loop: {
      # Deal with infinitely long tail causing problems w/ bc.
      if ((!defined($x_floor)) && defined($xf)) {

        if ($pct < 0.5) {
          $orig_x_floor += $factor;
        } else {
          $orig_x_floor -= $factor;
        }
        $factor *= 1.2;
        $x_floor = &$xf($orig_x_floor);
        redo loop;
      }
    }

    if (!defined($x_floor)) {
      if ($__UNIRAND_DEBUG_PRINTF) {
        print " ox = $orig_x\n";
        print "oxf = $orig_x_floor\n";
        print " xf = undef\n";
      }
      return undef;
    }

    return $x_floor;
  };
}



# Crude density estimation using a truncated normal as the kernel.
#
# The first optional parameter, if defined and non-zero, skips the
# sort that would normally be used.  Don't do this unless the data
# is already sorted...
#
# The last three optional parameters define the standard deviation
# of the kernel (in absolute terms; default is 0.4*data dev), and
# the truncation points.
#
# Returns:  an array of [$x, $density] tuples.
#
# This won't really approximate a PDF, since it is unnormalized.
sub density_est($;$$$$) {
  my $data_ref        = shift;
  my $already_sorted  = shift;
  my $dense_est_dev   = shift;
  my $dense_est_left  = shift;
  my $dense_est_right = shift;

  my $min   = ArrayFoo::arrSelect($data_ref, 0);
  my $max   = ArrayFoo::arrSelect($data_ref, -1);
  my $range = $max - $min;
  my $ct    = scalar @$data_ref;

  if ($range == 0) {
    # Eh?  It's a single spike.
    return (+[$min, 1]);
  }

  my $min20 = ArrayFoo::arrSelect($data_ref, int(0.2*$ct));
  my $min80 = ArrayFoo::arrSelect($data_ref, int(0.8*$ct));

  # Get a somewhat robust estimate of 1% of the range.  I don't
  # use min/max directly here because of the outliers.
  $dense_est_dev = defined($dense_est_dev) ?
  $dense_est_dev : (($min80 - $min20) / 60);
  $dense_est_left  = defined($dense_est_left) ?
  $dense_est_left : -2;
  $dense_est_right = defined($dense_est_right) ?
  $dense_est_right : 2;

  # Create a truncated normal PDF.
  my $normal_pdf =
  UniRand::normal_truncated_gen_pdf(+[ 0, $dense_est_dev,
  $dense_est_left,
  $dense_est_right ]);

  defined($normal_pdf) || return undef;

  my $trunc_min = $dense_est_dev * $dense_est_left;
  my $trunc_max = $dense_est_dev * $dense_est_right;

  if ($trunc_min < $trunc_max) {
    return undef;
  }

  my @window     = ();
  my $window_right = 0;
  my $window_left  = -1;

  # Need to access them in order, so an (n log n) sort is better than
  # n O(n) selects.

  my $sorted_ref = $data_ref;

  if ((!defined($already_sorted)) || (!$already_sorted)) {
    my @sorted =  sort { $a <=> $b } @$data_ref;
    $sorted_ref = \@sorted;
  }

  my @density    = (0) x $ct;
  my @means      = (0) x $ct;
  my $base_area  = UniRand::normal_std_cdf($dense_est_right) -
  UniRand::normal_std_cdf($dense_est_left);

  for (my $i=0; $i < $ct; $i++) {
    my $val   = $sorted_ref->[$i];
    my $w_min = $val + $trunc_min;
    my $w_max = $val + $trunc_max;
    my $area  = $base_area;

    if ($w_min < $min) {
      $area -= UniRand::normal_std_cdf(($min - $val) / $dense_est_dev) -
      UniRand::normal_std_cdf($dense_est_left);
    }

    die unless ($area > 0);

    if ($w_max > $max) {
      $area -= UniRand::normal_std_cdf($dense_est_right) -
      UniRand::normal_std_cdf(($max - $val) / $dense_est_dev);
    }


    # Trim the left part of the window.
    while ((scalar @window) && ($window[0] < $w_min)) {
      shift @window;
      $window_left++;
    }

    # Push the right edge of the window.
    while (($window_right < $ct) && ($sorted_ref->[$window_right] <= $w_max)) {
      my $candidate = $sorted_ref->[$window_right];

      if ($candidate >= $w_min) {
        push @window, $candidate;
      }
      $window_right++;
    }

    # Compute density for $val.
    foreach (@window) {
      my $peer = $_;
      my $relative = $peer - $val;
      my $weight   = &$normal_pdf($relative);

      $density[$i] += $weight * $base_area / $area;
    }
  }

  # Turn them into tuples.
  for (my $i=0; $i < $ct; $i++) {
    $density[$i] = +[ $sorted_ref->[$i], $density[$i] ];
  }

  # Make 'em unique in x-value.
  for (my $i=$ct-1; $i >= 1; $i--) {
    if ($density[$i]->[0] == $density[$i-1]->[0]) {
      splice @density, $i, 1;
      $ct--;
    }
  }

  # Normalize them.
  {
    my $trap_area = 0;
    my $lim       = $ct - 1;

    for (my $i=0; $i < $lim; $i++) {
      $trap_area += ($density[$i+1]->[0] - $density[$i]->[0]) *
      ($density[$i+1]->[1] + $density[$i]->[1]);
    }
    $trap_area /= 2;
    map { $_->[1] /= $trap_area } @density;
  }

  return @density;
}


# Match all listed distributions against the data.  Give it a
# reference to the data, and a bucket count for the Chi-Square if
# you like (default 64).  If you like, you can specify exactly
# what distributions get checked -- just add on tuples of the form
#   <name {scalar}, estimator {function reference}, list reference
#    to name of parameters {for human use only}>
#
# You can also give it a maximum number of mixtures to use.
#
# The end result is a list of tuples, where each tuple corresponds
# to a distribution where fitting was possible.  These tuples contain:
#
#   Name           (scalar; see below)
#   log-likelihood (scalar)
#   param count    (scalar)
#   BIC            (scalar)
#   AIC            (scalar)
#   Chi-Square sig (scalar; may be undef, if off the table)
#   CDF            (function ref)
#   parameters     (list ref)
#   PPF            (function ref)
#   PDF            (function ref)
#   estimator      (function ref)
#
#
# By default, this uses @DIST_LIST_PLAIN instead of @DIST_LIST,
# since trying to fit mixtures to non-mixture data is dodgy at
# best, and potentially terribly expensive.


sub all_match($;$$@) {
  my $data_ref     = shift;
  my $bucket_count = shift;
  my $k_max        = shift;

  my @dists        = @_;
  # This list is manually edited, not autogenerated.  If you add
  # another estimator to the package, don't forget to put it here
  # if you want it automatically used.

  my $Chi           = new ChiSquare;
  my @results       = ();
  my $dist_list_ref = \@DIST_LIST_PLAIN;

  if (!defined($bucket_count)) {
    $bucket_count = 64;
  }

  if (scalar(@dists) > 0) {
    $dist_list_ref = \@dists;
  }

  if ((scalar(@$data_ref) / $bucket_count) < 5) {
    $bucket_count = int(scalar(@$data_ref) / 5);
  }

  my $distref = undef;

  foreach $distref (@{$dist_list_ref}) {
    my ($name, $estimator, $param_names) = @$distref;
    my @params = ($data_ref, $estimator);

    if ($name =~ /mixture/) {
      push @params, $k_max;
    }

    # print "Fitting $name...\n";
    my ($cdf_ref, $param_ref, $ppf_ref, $pdf_ref,
    $ll, $BIC) = BIC_wrap(@params);

    if (!defined($cdf_ref)) {
      next;
    }

    my @flat = flatten(@$param_ref);
    my $k = scalar @flat;

    # May be undef.  Otherwise, it'll be the highest known
    # significance level at which it passes.
    # print "Chi-square testing $name...\n";
    my $sig = $Chi->chi_square($data_ref, $bucket_count,
    $cdf_ref, scalar @$param_ref);
    my $AIC = (-2*$ll) + (2*$k);

    # print "$name ll=$ll BIC=$BIC\n";
    my $tuple = +[];

    $tuple->[$MATCH_INDICES{"name"}]          = $name;
    $tuple->[$MATCH_INDICES{"param_names"}]   = $param_names;
    $tuple->[$MATCH_INDICES{"ll"}]            = $ll;
    $tuple->[$MATCH_INDICES{"k"}]             = $k;
    $tuple->[$MATCH_INDICES{"BIC"}]           = $BIC;
    $tuple->[$MATCH_INDICES{"AIC"}]           = $AIC;
    $tuple->[$MATCH_INDICES{"chi"}]           = $sig;
    $tuple->[$MATCH_INDICES{"cdf"}]           = $cdf_ref;
    $tuple->[$MATCH_INDICES{"params"}]        = $param_ref;
    $tuple->[$MATCH_INDICES{"ppf"}]           = $ppf_ref;
    $tuple->[$MATCH_INDICES{"pdf"}]           = $pdf_ref;
    $tuple->[$MATCH_INDICES{"est"}]           = $estimator;

    push @results, $tuple;
  }

  return @results;
}



# Sic all_match on data.  Return the best matches, according to
# ChiSquare or BIC.  You can also give it a threshold (fraction
# of BIC to use to determine qualifying range), number of
# buckets (passed to all_match, used for determining chi-square),
# and a list of distributions (tuples of name (scalar),
# estimator (function ref), and parameter names (tuple ref)),
# if you like.
#
# Criteria:
#  1- If anybody's a good-enough match according to ChiSquare.pm,
#     return all of those tuples that have the best (highest)
#     significance level.  Otherwise...
#  2- Sort by BIC.  Find the best (lowest) value.  Calculate 10%
#     (or other; provide as a fraction) of the magnitude.  This
#     defines the accepted range.
#
#     Change:  tolerance will be based on the range of BICs, which
#     should make more sense than merely magnitude of best value.
#
# The return format is identical to that of all_match.
#
#
# Example of how you might use the 'dists' parameter:
#
# my $data_ref   = +[ data ];
# my $sample_ref = +[ {-compute sample of data} ];
#
# 0.2 = generous threshold?  Arbitrary.
#
# my @initial_fits = UniRand::best_match($data_ref, 0.2);
#
# my $name_idx   = UniRand::$MATCH_INDICES{"name"};
# my $est_idx    = UniRand::$MATCH_INDICES{"est"};
# my $pnames_idx = UniRand::$MATCH_INDICES{"param_names"};
#
# my @dists_used = map { +[ $_->[$name_idx],
#                           $_->[$est_idx],
#                           $_->[$pnames_idx] ] } @initial_fits;
#
# # Fit on the full data, using only the distributions that seemed
# # to give good fits.
# my @final_fits = UniRand::best_match($data_ref,
#                              undef,    # relative threshold for BIC
#                              undef,    # number of chi-square buckets
#                              undef,    # maximum number of components
#                              @dists_used);

sub best_match($;$$$@) {
  my $data_ref  = shift;
  my $thresh    = shift;
  my $buckets   = shift;
  my $k_max     = shift;
  my @dists     = @_;

  my @results   = all_match($data_ref, $buckets, $k_max, @dists);
  my $chi_index = $MATCH_INDICES{"chi"};
  my $bic_index = $MATCH_INDICES{"BIC"};
  my @chi_sig   = grep { defined($_->[$chi_index]) } @results;

  if (!defined($thresh)) {
    $thresh = 0.1;
  }

  if (scalar(@results) == 0) {
    return ();
  }

  if (scalar(@chi_sig) > 0) {
    # Somebody's significant enough to match via chi-square.
    # Extract all the chi-square values and sort them in
    # ascending order.  Then, find the maximum.
    my @chis = sort { $a <=> $b } (map { $_->[$chi_index] } @chi_sig);
    my $max  = $chis[-1];
    my @max_tuples = grep { $_->[$chi_index] == $max } @chi_sig;

    return @max_tuples;
  } else {
    # Fine.  We do it the BIC way (alternatives would be raw
    # log-likelihood and AIC).  Lower = better.
    my @BICs = sort { $a <=> $b } (map { $_->[$bic_index] } @results);
    my $min_BIC  = $BICs[0];
    my $max_BIC  = $BICs[-1];
    my $tolerance = $thresh * ($max_BIC - $min_BIC);
    my $max  = $min_BIC + $tolerance;

    my @ok_tuples = grep { ($_->[$bic_index] <= $max ) } @results;

    return @ok_tuples;
  }
}


# Give it the tuples from the _match functions.  This is one way
# to generate simple text-based reports.
sub fit_report(@) {
  my @final_fits = @_;
  my $name_idx  = $MATCH_INDICES{"name"};
  my $pname_idx = $MATCH_INDICES{"param_names"};
  my $ll_idx    = $MATCH_INDICES{"ll"};
  my $BIC_idx   = $MATCH_INDICES{"BIC"};
  my $param_idx = $MATCH_INDICES{"params"};
  my $k_idx     = $MATCH_INDICES{"k"};
  my $chi_idx   = $MATCH_INDICES{"chi"};

  foreach (@final_fits) {
    my @tuple = @{$_};
    my $name      = $tuple[$name_idx];
    my $pname_ref = $tuple[$pname_idx];
    my $ll        = $tuple[$ll_idx];
    my $BIC       = $tuple[$BIC_idx];
    my $param_ref = $tuple[$param_idx];
    my $k         = $tuple[$k_idx];
    my $chi       = $tuple[$chi_idx];
    my @params    = flatten(@$param_ref);

    print "Distribution type:  '$name'\n";
    print "Log-likelihood:      $ll\n";
    print "BIC:                 $BIC\n";
    print "Parameter count:     $k\n";
    print "Chi-Square:          ",
    defined($chi) ? $chi : "None",
    "\n";


    my $col_width1 = 30;
    my $col_width2 = 10;
    my $gap        = 8;
    my $fmt1       = "%" . $col_width1 . "s" . (" " x $gap) .
    "%" . $col_width2 . "s\n";
    my $fmt2       = "%" . $col_width1 . "s" . (" " x $gap) .
    "%" . $col_width2 . "f\n";

    {
      printf($fmt1, "Parameter", "Value");
      print "=" x $col_width1, " " x $gap, "=" x $col_width2, "\n";

      if (!($name =~ /mixture/)) {
        my $i=0;

        for ($i=0; $i < $k; $i++) {
          my $pname = $pname_ref->[$i];
          my $val   = $param_ref->[$i];

          printf($fmt2, $pname, $val);
        }
      } else {
        # Mixtures.
        if ($name =~ /lognormal/) {
          my $loc = shift @params;

          printf($fmt2, "Location", $loc);
        }

        # Remaining parameters are for each Gaussian:
        # mixing prob, mean, sdev

        while (scalar(@params)) {
          my $mixprob  = shift @params;
          my $mean     = shift @params;
          my $sdev     = shift @params;

          printf($fmt2, "Mixing probability", $mixprob);
          printf($fmt2, "Mean", $mean);
          printf($fmt2, "Standard deviation", $sdev);
        }
      }
    }

    print "\n\n\n";
  }
}


# This is merely a simple wrapper for any of the '_est' functions --
# the distribution estimators.
#
# Inputs:
#    Reference to data (of form suitable for sending to estimator)
#    Reference to estimator function
# Outputs:
#    cdf
#    parameters
#    ppf
#    pdf
#    log likelihood
#    Bayesian information criterion
#
#    if it succeeds, and
#      undef x 6
#    if the estimator complains.
#
# BIC numbers might not be the easiest things to interpret (beyond
# 'lower is better'), but they can be computed much more easily
# than Chi-Square, which gives nullo results unless it's significant
# enough to be in the table.
#
# BIC = Bayesian Information Criterion
#
# Extra parameters get sent to the estimator.
sub BIC_wrap($$;@){
  my $data_ref  = shift;
  my $estimator = shift;
  my @extra     = @_;
  my ($cdf, $parameters, $ppf, $pdf) = &$estimator($data_ref, @extra);

  if (!defined($cdf)) {
    # Undefined => estimator produced no fit.  Can happen with
    # picky fitters that won't shift when needed.
    return ((undef) x 6);
  } else {
    # print "Computing log-likelihood.\n";
    my $ll = Numerical::compute_log_likelihood($data_ref, $pdf,
    $__LL_PROB_FLOOR);

    if (!defined($ll)) {
      # fit _very_ weak, precision problem, or logic bug
      return ((undef) x 6);
    }

    my $n  = scalar @$data_ref;

    my @params = flatten(@$parameters);
    my $q  = scalar @params;

    my $BIC = (-2*$ll) + ($q * log($n)/log(2));

    return ($cdf, $parameters, $ppf, $pdf, $ll, $BIC);
  }
}



# This is merely a simple wrapper for any of the '_est' functions --
# the distribution estimators.
#
# Inputs:
#    Reference to data (of form suitable for sending to estimator)
#    Reference to estimator function
# Outputs:
#    cdf
#    parameters
#    ppf
#    pdf
#    log likelihood
#    Bayesian information criterion
#
#    if it succeeds, and
#      undef x 6
#    if the estimator complains.
#
# AIC numbers might not be the easiest things to interpret (beyond
# 'lower is better'), but they can be computed much more easily
# than Chi-Square, which gives nullo results unless it's significant
# enough to be in the table.
#
# AIC = Akaike Information Criterion
sub AIC_wrap($$){
  my $data_ref  = shift;
  my $estimator = shift;
  my @extra     = @_;
  my ($cdf, $parameters, $ppf, $pdf) = &$estimator($data_ref, @extra);

  if (!defined($cdf)) {
    # Undefined => estimator produced no fit.  Can happen with
    # picky fitters that won't shift when needed.
    return ((undef) x 6);
  } else {
    my $ll = Numerical::compute_log_likelihood($data_ref, $pdf,
    $__LL_PROB_FLOOR);
    my $n  = scalar @$data_ref;

    my @params = flatten(@$parameters);
    my $q  = scalar @params;

    my $AIC = (-2*$ll) + (2*$q);

    return ($cdf, $parameters, $ppf, $pdf, $ll, $AIC);
  }
}


# Used for flattening a nested list of arrays.
sub flatten(@) {
  my @args      = @_;
  my @flat_list = ();
  my $item      = undef;

  while (scalar(@args)) {
    $item = shift @args;

    if ((ref $item) eq "ARRAY") {
      my @flat_part = @{$item};
      unshift @args, @flat_part;
    } else {
      push @flat_list, $item;
    }
  }
  return @flat_list;
};





# Compute the correlation of the quantile-quantile plot, with given
# data, estimated PPF (from a postulated distribution), quantile
# range, and step size.
#
# Due to the number and variety of arguments, parameters should be
# in the form of a hash table.
#
# If wantarray, we return not only the correlation, but two array
# references:  data quantiles, and estimated quantiles (from ppf).
# If the data quantiles were given to us, we return the exact same
# reference.
sub qq_correlate(@) {
  my %params = @_;

  ############ DATA SOURCE
  # Define exactly one of the following (data source).  They will
  # be checked in this order.
  #
  # $data_qnt:  ref to data in the ultimate form we want it, which is
  # an array of the values we'll use, in order.
  #
  # $data_GKM:  ref to an object that allows us to extract quantiles
  # from the data.  It should support the GKQuantile interface --
  # namely, $data_GKM->quantize(something between 0 and 1).
  #
  # $data_raw:  raw data.  We'll quantize it /exactly/, using a standard
  # sort.

  my $data_qnt = $params{"data_qnt"};
  my $data_GKQ = $params{"data_GKQ"};
  my $data_raw = $params{"data_raw"};

  defined($data_qnt) || defined($data_GKQ) || defined($data_raw) || die;


  ############# ESTIMATED PERCENTAGE-POINT-FUNCTION (inverse CDF)
  # This MUST be defined.  It should take values within the percentile
  # range, and return what /would/ be the correct quantile if the ppf
  # were a perfect match.

  my $est_ppf = $params{"est_ppf"};

  defined($est_ppf) || die;


  ############# PERCENTAGE RANGE PARAMETERS
  # The first two may be defined.  If no maximum is given, 0.999 is used;
  # if no minimum, 0.001.
  #
  # Exactly one of the second two must be defined -- either count or
  # step size.
  my $pct_min = $params{"pct_min"};
  my $pct_max = $params{"pct_max"};

  # We don't use defaults of 0 or 1, because open-ended distributions
  # would obviously have trouble with that.
  $pct_min = defined($pct_min) ? $pct_min : 0.001;
  $pct_max = defined($pct_max) ? $pct_max : 0.999;

  ($pct_min >= 0) || die;
  ($pct_max <= 1) || die;
  ($pct_min < $pct_max) || die;

  my $pct_ct    = $params{"pct_ct"};
  my $pct_step  = $params{"pct_step"};

  defined($pct_ct) || defined($pct_step) || die;
  (defined($pct_ct) && (!int($pct_ct)))  && die;
  (defined($pct_ct) && ($pct_ct < 2))    && die;  # at least two points
  (defined($pct_step) && ($pct_step <= 0)) && die;

  # Both defined?  Then they'd better click.
  if (defined($pct_ct) && defined($pct_step)) {
    ($pct_ct > 1) || die;

    my $req_ct = 1 + int(($pct_max - $pct_min) / $pct_step);

    ($req_ct == $pct_ct) || die;
  }

  if (!defined($pct_ct)) {
    $pct_ct   = 1 + int(($pct_max - $pct_min) / $pct_step);
  }

  ($pct_ct > 1) || die;

  #############################################################
  # First order of business:  compute the percentages to use.

  my @pcts = ($pct_min);

  for (my $i=1; $i < ($pct_ct-1); $i++) {
    # Computing the $pct each time, instead of adding a step
    # size, should reduce error by not allowing it to accumulate
    # addition after addition.
    my $pct = $pct_min + (($pct_max - $pct_min) * $i / ($pct_ct-1));
    push @pcts, $pct;
  }
  push @pcts, $pct_max;



  #############################################################
  # Second:  complete $data_qnt, if not given to us.
  if (defined($data_qnt)) {
    # Given.  Skip this step.
  } elsif (defined($data_GKQ)) {
    # Implicit:  extract from quantile object.
    $data_qnt = +[];

    foreach (@pcts) {
      my $pct = $_;
      my $x   = $data_GKQ->quantile($pct);
      defined($x) || die;
      push @$data_qnt, $x;
    }
  } else {
    # Data explicitly given.  Compute exact quantiles.
    my @sorted = sort { $a <=> $b } @$data_raw;
    my $ct     = scalar @sorted;
    $data_qnt = +[];

    foreach (@pcts) {
      my $pct   = $_;
      my $x_idx = $pct * ($ct-1);

      if ($x_idx == int($x_idx)) {
        # Really?!
        push @$data_qnt, $sorted[$x_idx];
      } else {
        my $x_lo = POSIX::floor($x_idx);
        my $x_hi = $x_lo+1;
        my $frac = $x_idx - $x_lo;

        my $y_lo = $sorted[$x_lo];
        my $y_hi = $sorted[$x_hi];
        my $est  = $y_lo + (($y_hi - $y_lo) * $frac);

        push @$data_qnt, $est;
      }
    }
  }


  #############################################################
  # Third:  compute quantiles from the PPF function.
  my @ppf_qnt_arr = ();

  foreach (@pcts) {
    my $pct = $_;
    my $x   = &$est_ppf($pct);

    if (!defined($x)) {
      # print "PPF out of bounds:  $pct\n";
      return undef;
    }

    push @ppf_qnt_arr, $x;
  }


  # Sanity check.
  ((scalar @$data_qnt) == (scalar @ppf_qnt_arr)) || die;

  my $corr = ArrayFoo::arrCorrelate($data_qnt, \@ppf_qnt_arr);

  if (wantarray()) {
    return ($corr, $data_qnt, \@ppf_qnt_arr);
  } else {
    return $corr;
  }
}


# This is a function for generating percentage-point functions
# (inverse CDFs) from mixtures.  The inputs consist of --
#
#  - A mixture CDF, e.g. from bleah_mix_gen_cdf().
#  - An array of tuples, each corresponding one mixing component
#    and containing...
#
#    - Probability of this component.
#    - Mean (x-value).  It doesn't have to be a true mean, just
#      a ballpark X value.
#    - Sigma.  (x-delta).  It doesn't have to be a true dev,
#      it's just a measure that's used to decide how far to
#      adjust x when finding upper and lower bounds.
#    - CDF value.  This is the MIXTURE's cdf evaluated at the
#      provided mean value.
#    - PDF function for this component.  This is used to determine
#      how much we should pay attention to this component when
#      deciding how far to jump.
#
# The CDF must be continuous.

sub _mix_gen_ppf($@) {
  my $mix_cdf = shift;
  my @guides  = @_;
  my $max_err = 1e-6;
  my $absurd  = 6;

  # Ignore components with zero probability.
  @guides = grep { $_->[0] > 0 } @guides;

  # Sort in terms of increasing means.
  @guides = sort { $a->[1] <=> $b->[1] } @guides;

  my $ct = scalar @guides;
  my $avg_dev = 0;

  foreach (@guides) {
    $avg_dev += $_->[0] * $_->[2];
  }

  # Guarantee that it moves.
  $avg_dev += 1;

  defined($avg_dev) || die;

  return sub {
    my $y    = shift @_;
    defined($y) || die;
    my $x_lo = $guides[0]->[1];
    my $x_hi = $guides[-1]->[1];

    foreach (@guides) {
      my ($mu, $sigma) = @{$_};
      my $mu_lo = $mu - ($absurd * $sigma);
      my $mu_hi = $mu + ($absurd * $sigma);

      if ($x_lo > $mu_lo) {
        $x_lo = $mu_lo;
      }

      if ($x_hi < $mu_hi) {
        $x_hi = $mu_hi;
      }
    }

    defined($x_lo) || die;
    defined($x_hi) || die;

    my $y_lo = &$mix_cdf($x_lo);
    my $y_hi = &$mix_cdf($x_hi);

    defined($y_lo) || die;
    defined($y_hi) || die;

    my $x_lo_bad = $x_lo - ($absurd * $avg_dev);
    my $x_hi_bad = $x_hi + ($absurd * $avg_dev);

    my $factor = 1;
    my $y_lo_old = 1.1;
    my $tie_lo   = 0;

    while (($y_lo > ($y+$max_err)) && ($x_lo > $x_lo_bad)) {
      $x_lo -= $avg_dev * $factor;
      $factor *= 1.5;
      $y_lo  = &$mix_cdf($x_lo);

      if ($y_lo > $y_lo_old) {
        return undef;
      } elsif ($y_lo == $y_lo_old) {
        if (++$tie_lo > 100) {
          return undef;
        }
      } else {
        $tie_lo = 0;
        $y_lo_old = $y_lo;
      }
    }

    if ($y_lo > ($y+$max_err)) {
      if ($__UNIRAND_DEBUG_PRINTF) {
        print "_mix_gen_ppf():  y_lo=$y_lo, y=$y\n";
      }

      return undef;
    }


    $factor = 1;
    my $tie_hi   = 0;
    my $y_hi_old = -0.1;

    while (($y_hi < ($y-$max_err)) && ($x_hi < $x_hi_bad)) {
      $x_hi += $avg_dev * $factor;
      $y_hi  = &$mix_cdf($x_hi);
      $factor *= 1.5;
      defined($y_hi) || die;

      if ($y_hi < $y_hi_old) {
        return undef;
      } elsif ($y_hi == $y_hi_old) {
        if (++$tie_hi == 100) {
          return undef;
        }
      } else {
        $tie_hi = 0;
        $y_hi_old = $y_hi;
      }
    }

    if ($y_hi < ($y-$max_err)) {
      if ($__UNIRAND_DEBUG_PRINTF) {
        print "_mix_gen_ppf():  y_hi=$y_hi, y=$y\n";
      }
      return undef;
    }

    # Now, we should have bracketed the value, and since
    # mixtures of Gaussians have continuous CDFs, that's
    # all we need to start the search.

    return Numerical::inverse_search($mix_cdf, $y, $x_lo,
    $x_hi);
  }
}

return 1;
