#!/usr/local/bin/perl5 -w
#
##########################################################################
# $Id: SEM.pm,v 1.16 2004/07/09 14:58:24 lw2j Exp $
##########################################################################
# $Log:	SEM.pm,v $
# Revision 1.16  2004/07/09  14:58:24  lw2j
# When it says "Leaving secondary summarization", actually do so.
# 
# Revision 1.8  2004/02/26  18:29:18  lw2j
# Now gives you a mean and null covariance matrix if asked to estimate
# a Gaussian for a single point.
#
# Revision 1.3  2004/01/13  18:32:37  lw2j
# Assorted bug fixes.  Attempts to squash bug related to near-zero
# determinants for covariance matrices, possibly related to numerical
# precision issues?
#
# _ExEM now counts iterations as significant only if nontrivial
# progress is made, even when a new 'best' is reached -- new might
# be only marginally better.
#
# If and when estimated parameters collapse to the point where no PDF
# can be generated -- probably due to aforementioned issues with
# determinants -- k-Means is automatically run with random initialization
# to find a better position.  If during the loop, this counts as an
# iteration spent wandering in the wasteland.
#
# Revision 1.2  2004/01/08  14:13:49  lw2j
# Reworked estimation of mean, covariance matrix from sum,
# product-sum matrices.  Seems functional in early testing.
#
# Revision 1.1  2004/01/06  19:27:05  lw2j
# Initial revision
#
# ------------------------------------------------------------------------
# Logs below refer to a previous version that did not handle multivariate
# normals.
# ------------------------------------------------------------------------
# Revision 1.5  2003/05/30  17:04:33  lw2j
# Disabled debugging information.  It's now stable enough that it
# solved a well-separated six-component univariate normal.
#
# Revision 1.4  2003/05/30  16:13:21  lw2j
# More bug fixes and tweaks.  ExEM will now automatically terminate
# after not finding local minima for a while.  $B and $C are now
# saved as they should be.
#
# Revision 1.3  2003/05/29  14:49:00  lw2j
# Fixed assorted bugs, including a few related to apparent typographical
# errors.
#
# Revision 1.2  2003/05/27  16:36:43  lw2j
# Fixed a variety of bugs.  Testing... debugging messages still
# enabled by default.
#
# Revision 1.1  2003/05/27  14:33:53  lw2j
# Initial revision
#
##########################################################################
#
# Purpose:
#   To implement the Scalable EM algorithm proposed in
#
#   Bradley, Paul S; Fayyad, Usama M.  Reina, Cory A.  Scaling EM
#   (Expectation-Maximization) Clustering to Large Databases.
#   Technical Report MSR-TR-98-35, Microsoft Research, orig 1998
#   revised 1999.
#
# This code was written without assistance from the authors of the
# aforementioned paper, and deliberately deviates from their
# algorithm in various places (noted).  Implementation errors here
# are my fault, not theirs.
#
#
##########################################################################
#
# Package Header
#
##########################################################################

package SEM;
require Exporter;

use strict;
use lib '/usr/lw2j/private/Stat/Perl';

require ArrayFoo;
require Heap;
require kMeans;
require Numerical;
require UniRand;
require util;



##########################################################################

BEGIN {
  use Exporter ();
  use Symbol;
  use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

  $VERSION     = 1.0;
  @ISA         = qw(Exporter);
  @EXPORT      = qw();
  %EXPORT_TAGS = qw();
  @EXPORT_OK   = qw(
    new

    cluster

    set_k
    set_k_prime
    set_epsilon
    set_p
    set_beta
    set_beta_relative
    set_drop_size
    set_merge_close
    set_drop_time
    set_d

    get_k
    get_k_prime
    get_epsilon
    get_p
    get_beta
    get_beta_relative
    get_drop_size
    get_merge_close
    get_drop_time
    get_d

    clear_params
    clear_data
    set_default_params

    set_estimates
    get_estimates

    insert_data
    normal_est
    normal_gen_pdf
    normal_gen_rand
  );

  return 1;
}


sub _compute_ll_buf($$$);
sub _ExEM($);
sub _primary_summarization($);
sub _secondary_summarization($);

sub _debug_print(@);

my $__M_PI      = 4*atan2(1,1);


# Create a new object.  You MUST set parameters before it can be used.
sub new($@) {
  my $class = shift;
  my $self  = +{};

  bless $self, $class;
  $self->clear_params();
  $self->clear_data();
  $self->set_default_params();


  return $self;
}


# Used to enable/disable debugging in just one place.
sub _debug_print(@) {
  if (0) {
    print @_;
  }
  return;
}



# Run the whole clustering process with default settings and
# kMeans-based initialization.
#
# Required parameters:
#  data:  ref to array of data.  May be unidimensional, may be
#         array of equal-length tuples.
#  k:     how many clusters.
#
# Optional:
#  self:  feel free to specify SEM::cluster() instead of first
#         creating an SEM object.
#  boot_ct:   How much data should we give to kMeans for bootstrapping?
#             Measured in tuples/items.  Defaults to all.
#  bat_ct:    How much data should we use per subsequent update?
#             Defaults to everything not in boot_ct.
#
# Return value:
#  (scalar context) Returns ref to the 'phi' parameter vector.
#  Vector contains one tuple per estimated Gaussian.  Each of
#  these tuples contains a scalar mixing probability, a mean
#  (scalar for scalar data, vector for vector data), and either
#  variance (for scalars) or a covariance matrix (for vector
#  data; covariance matrix is in row-major form).
#
#  (array context) Returns the $self pointer, followed by the
#   above ref.
sub cluster($$;$$) {
  my $self = undef;
  my $data = shift;

  if (ref($data) eq 'SEM') {
    $self = $data;
    $data = shift;
  }

  my $n   = scalar(@$data);
  my $k   = shift;
  my $boot_ct = (scalar(@_)) ? shift : $n;

  $boot_ct = ($boot_ct <= $n) ? $boot_ct : $n;

  my $bat_ct  = (scalar(@_)) ? shift : ($n - $boot_ct);

  if (!defined($self)) {
    $self = SEM::new SEM();
  }

  $self->clear_data();
  $self->clear_params();
  $self->set_default_params();
  $self->set_k($k);

  my $idx         = 0;
  my @first_batch = map { $data->[$_] }
  (0..($boot_ct-1));

  # Does bootstrapping.
  $self->insert_data(@first_batch);

  $idx = $boot_ct;

  while ($idx < $n) {
    my $batch_end = $idx + $bat_ct - 1;

    if ($batch_end >= $n) {
      $batch_end = $n-1;
    }

    my @batch = map { $data->[$_] } ($idx .. $batch_end);

    $self->insert_data(@batch);

    $idx = $batch_end+1;
  }
  my $phi_copy = util::deep_copy($self->{'phi'});

  if (wantarray()) {
    return ($self, $phi_copy);
  } else {
    return $phi_copy;
  }
}


# Reset characteristics that specify how the system behaves.
#
# Semantics:
#   k              number of Gaussians to fit [required, no default]
#   k_prime        number of sub-clusters to try via K-Means during
#                  secondary data summarization [defaults to 2k]
#   epsilon        Convergence criterion for both the main "loop" and
#                  the Extended EM part. [defaults to 0.001]
#   p              Fraction of points that are summarized as part of
#                  the Primary Data Summarization part.  0.005 = half a
#                  percent, 1=entire data, etc. [defaults to 0.5]
#
#                  Note that in the SEM paper, while p is normally
#                  used in "p%" implying 0..100, section 4.2.1 in
#                  the experiments section implies that p is a
#                  fraction from 0..1, and that '1' means that most
#                  of the data enters primary summaries, and that
#                  the value of '0.5' used there would therefore
#                  mean that half the data should be summarized
#                  each primary-summarization phase.
#
#   beta           Parameter stipulating how tight the sub-clusters
#                  need to be in order to qualify for membership in
#                  C, the set associated with secondary data.
#
#                  The exact formulation of the termination criterion
#                  has been changed by me; while the values compared
#                  versus $beta in the paper appear to be variances
#                  except for being divided by dimensionality instead
#                  of count, I've made them true variances as that
#                  should avoid unduly penalizing large potential
#                  clusters.
#
#                  This is a difficult parameter to set in that it is
#                  in /absolute/ terms and thus is quite
#                  data-dependent.
#
#   beta_relative  This parameter normally is 0.05 if you set beta,
#                  you should explicitly set beta_relative to undefined
#                  because this parameter overrides beta.  Basically,
#                  if set then beta is always
#                        beta_relative * max_global_variance
#
#
#
#                  summarization. [defaults to 0.5]
#   drop_size      Maximum fraction of membership to be allowed in a
#                  component to be considered empty enough to
#                  reseed elsewhere.  If 0, only empty sets qualify.
#                  [ defaults to 0 ]
#   merge_close    Maximum fraction of difference between parameters,
#                  relative to the higher of the absolute values of
#                  each parameter. [ defaults to 0.005 ]
#   drop_time      Minimum number of points to see before dropping and
#                  reseeding of nearly-empty components is permitted.
#                  If undefined, this will NEVER occur.
#                  [defaults to 100]
sub clear_params($) {
  my $self = shift;

  $self->{"k"}            = undef;
  $self->{"k_prime"}      = undef;
  $self->{"epsilon"}      = undef;
  $self->{"p"}            = undef;
  $self->{"beta"}         = undef;
  $self->{"drop_size"}    = undef;
  $self->{"merge_close"}  = undef;
  $self->{"drop_time"}    = undef;

  return $self;
}



# Reset fields that track things specific to the data.
#
# Semantics:
#   R             The buffer of vectors NOT summarized; that is, stored
#                 exactly.  Format:  list reference of list references,
#                 each of which represents a tuple.
#   B             Primary summarization buffer; sufficient statistics
#                 on a per-component basis.
#   C             Secondary summarization buffer; dense regions of
#                 outliers.
#   phi           Array of current model estimates.  This should be a
#                 reference to a list of references to lists -- one
#                 tuple per component, each containing a weight (as
#                 fraction), mean vector, and covariance matrix.
#                 [ Initial estimate required! ]
#
#                 Yes, that's COVARIANCE instead of standard deviation,
#                 in order to be consistent with the paper.
#
#   m             Number of data seen so far.
#   d             Dimensionality of data.  Must be undefined or a scalar;
#                 if scalar, all subsequent entries must match.
sub clear_data($) {
  my $self = shift;

  $self->{"R"}            = +[];
  $self->{"B"}            = +[];
  $self->{"C"}            = +[];
  $self->{"phi"}          = +[];
  $self->{"m"}            = 0;
  $self->{"d"}            = undef;

  $self->{"sums"}         = undef;
  $self->{"sum2s"}        = undef;

  return $self;
}




# Set default values for some of the parameters.  The number of
# components, and the number of sub-clusters are up to the user.
sub set_default_params($) {
  my $self = shift;

  $self->{"epsilon"}       = 0.001;
  $self->{"p"}             = 0.5;
  $self->{"beta"}          = 0.5;
  $self->{"beta_relative"} = 0.05;
  $self->{"drop_size"}     = 0.001;
  $self->{"merge_close"}   = 0.05;
  $self->{"drop_time"}     = 100;

  return $self;
}



# Parameter mutators / accessors follow.

# k_prime will be set to k*2, if not already set.
sub set_k($$) {
  my $self = shift;
  my $k    = shift;

  $self->{"k"} = $k;

  if (!defined($self->{"k_prime"})) {
    $self->{"k_prime"} = $k * 2;
  }
}

sub  set_k_prime($$) {
  my $self = shift;
  $self->{"k_prime"} = shift;
  return $self;
}

sub set_epsilon($$) {
  my $self = shift;
  $self->{"epsilon"} = shift;
  return $self;
}

sub set_p($$) {
  my $self = shift;
  $self->{"p"} = shift;
  return $self;
}

sub set_beta($$) {
  my $self = shift;
  $self->{"beta"} = shift;
  return $self;
}

sub set_beta_relative($$) {
  my $self = shift;
  $self->{"beta_relative"} = shift;
  return $self;
}

sub set_drop_size($$) {
  my $self = shift;
  $self->{"drop_size"} = shift;
  return $self;
}

sub set_merge_close($$) {
  my $self = shift;
  $self->{"merge_close"} = shift;
  return $self;
}

sub set_drop_time($$) {
  my $self = shift;
  $self->{"drop_time"} = shift;
  return $self;
}

sub set_d($$) {
  my $self = shift;
  $self->{"d"} = shift;
  return $self;
}


sub get_k($$) {
  my $self = shift;

  return $self->{"k"};
}

sub  get_k_prime($$) {
  my $self = shift;

  return $self->{"k_prime"};
}

sub get_epsilon($$) {
  my $self = shift;
  return $self->{"epsilon"};
}

sub get_p($$) {
  my $self = shift;
  return $self->{"p"};
}

sub get_beta($$) {
  my $self = shift;
  return $self->{"beta"};
}

sub get_beta_relative($$) {
  my $self = shift;
  return $self->{"beta_relative"};
}

sub get_drop_size($$) {
  my $self = shift;
  return $self->{"drop_size"};
}

sub get_merge_close($$) {
  my $self = shift;
  return $self->{"merge_close"};
}

sub get_drop_time($$) {
  my $self = shift;
  return $self->{"drop_time"};
}

sub get_d($$) {
  my $self = shift;
  return $self->{"d"};
}

# end of accessors / mutators



# Format:
#  The second argument should be a list ref, where the list contains
#  one tuple per component.  Each tuple should contain the following:
#  a probability prior / weight, mean vector (scalar permitted if
#  dimensionality is to be 1), covariance matrix in row-major form
#  (scalar permitted if dimensionality is to be 1).
sub set_estimates($$) {
  my $self = shift;
  my $lref = shift;

  # Make a copy, just to be safe.
  $self->{"phi"} = util::deep_copy($lref);

  my $dref = \$self->{"d"};

  foreach (@$lref) {
    my ($prob, $mean, $cov) = @{$_};

    # Is the mean vector/scalar of a dimension consistent with prior
    # settings of 'd'?
    my $d_mean      = undef;
    my $d_covar_row = undef;
    my $d_covar_col = undef;

    if (ref($mean) eq 'ARRAY') {
      $d_mean = scalar(@$mean);
    } elsif (!ref($mean)) {
      $d_mean = 1;
    } else {
      die "Initial estimate of mean needs to be a vector or a scalar.";
    }

    if (ref($cov) eq 'ARRAY') {
      $d_covar_row = scalar @$cov;

      if ($d_covar_row == 0) {
        die "Initial estimate of covariance matrix cannot be 0x0!";
      }

      my $first_row = $cov->[0];

      if (ref($first_row) eq 'ARRAY') {
        $d_covar_col = scalar @$first_row;
      } elsif ((!ref($first_row)) && ($d_covar_row == 1)) {
        # If it's a 1-row list, we'll allow a single scalar there
        $d_covar_col = 1;
      } else {
        die "Covariance matrix strangely made.";
      }
    } elsif (!ref($cov)) {
      $d_covar_row = 1;
      $d_covar_col = 1;
    } else {
      die "Malformed covariance matrix; should be matrix or scalar.";
    }

    # Dimensionality is 'd'.
    # 'd' should match dimensionality of mean vector (or be 1 if
    # mean is scalar), and each dimension of covariance matrix.
    if (!defined($$dref)) {
      $$dref = $d_mean;
    }

    ($$dref == $d_mean) || die "Dimensionality of mean does not match.";
    ($$dref == $d_covar_row) || die "No. rows of covariance matrix does not match.";
    ($$dref == $d_covar_col) || die "No. rows of covariance matrix does not match.";
  }

  my $B = +[];
  $self->{"B"}   = $B;

  foreach (@$lref) {
    # Vector of sums, matrix of products, count.

    if ($$dref == 1) {
      # Unidimensional.
      push @$B, +[ 0, 0, 0];
    } else {
      # Multidimensional.
      my $new_sum_vector = Numerical::vector_new($$dref);
      my $new_prod_matrix = Numerical::matrix_new($$dref, $$dref);

      push @$B, +[ $new_sum_vector, $new_prod_matrix, 0];
    }
  }
  scalar (@$B == scalar(@$lref)) || die;

  return $self;
}


# This does not RE-estimate, it only retrieves.
sub get_estimates($) {
  my $self = shift;
  my $phi  = util::deep_copy($self->{"phi"});

  return $phi;
}



# Use k-Means to generate the initial estimates.
# Data must have already been loaded, k must have already been
# chosen.
#
# (1) Use k-Means with random initialization to generate
#     cluster means and covariance matrices.
# (2) From the assignments, determine the initial probabilities.
sub _bootstrap($$$) {
  my $self = shift;
  my $data = shift;
  my $k    = shift;

  (defined($data)) || die;
  (defined($k)) || die;

  my $n       = scalar @$data;

  my ($means, $assignments, $covariances) = kMeans::cluster($data, $k);
  my @counts = (0) x $k;
  my @phi    = ();

  if (!defined($means)) {
    return undef;
  }

  foreach (@$assignments) {
    $counts[$_]++;
  }

  my @probs = @counts;
  map { $_ /= $n } @probs;

  for (my $i=0; $i < $k; $i++) {
    push @phi, +[ $probs[$i], $means->[$i], $covariances->[$i] ];
  }

  $self->{'phi'} = \@phi;
  return $self;
}


######## The meat of the algorithm starts here.

# All required parameters must have already been specified.  Supply
# the object reference and the next sample subset.  We'll return a
# value that will be 1 if it /recommends/ termination according to
# the stopping criterion of section 2.  This is completely non-
# binding, of course.
#
# Alteration:  if mean/[co]variances have not been estimated yet,
# automatically call the _bootstrap() routine to do so on the first
# batch.  This will invoke kMeans clustering.
sub insert_data($@) {
  my $self   = shift;

  if (!scalar(@_)) {
    # NOP.
    return 1;
  }

  my @subset = util::deep_copy(@_);

  # shallow copies, all
  my $k       = $self->{'k'};
  my $R       = $self->{'R'};
  my $B       = $self->{"B"};
  my $C       = $self->{"C"};
  my $phi     = $self->{"phi"};
  my $m_old   = $self->{"m"};
  my $epsilon = $self->{"epsilon"};
  my $d       = $self->{"d"};

  my $sums    = $self->{"sums"};
  my $sum2s   = $self->{"sum2s"};
  my $rechecked = 0;

  if (!defined($d)) {
    if (ref($subset[0])) {
      $d = scalar @{$subset[0]};
    } else {
      $d = 1;
    }
    $self->{'d'} = $d;
  }


  if (!scalar(@$B)) {
    if ($d == 1) {
      foreach  (1..$k) {
        push @$B, +[ 0, 0, 0];
      }
    } else {
      foreach  (1..$k) {
        push @$B, +[+[ (0) x $d ], Numerical::matrix_new($d, $d), 0 ];
      }
    }
  }

  if (!defined($sums)) {
    if ($d == 1) {
      $sums = 0;
    } else {
      $sum2s = +[ (0) x $d ];
    }
  }

  {
    # Verify consistent dimensionality.

    # Already checked that subset was non-empty.
    my $first = $subset[0];

    if (ref($first) eq 'ARRAY') {
      # Expected format:  list of vectors.  Confirm consistent dimensions.
      foreach (@subset) {
        my $item = $_;

        (ref($item) eq 'ARRAY') || die "Got scalar instead of vector.";
        if (scalar(@$item) != $d) {
          die "Data has inconsistent dimensionality.";
        }
      }
    } elsif (((scalar @subset) % $d) == 0) {
      # Note:  If $d=1, scalars.  Otherwise, reform as vectors.
      if ($d > 1) {
        my @temp = @subset;

        @subset = ();

        # Necessary step for consistent computation of $m.
        while (scalar(@temp)) {
          push @subset, +[ (splice @temp, 0, $d) ];
        }
      }
    } else {
      die "Data is of unknown format.";
    }
  }

  my $m       = $m_old + (scalar @subset);
  push @$R, @subset;



  if ((!defined($phi)) || (!scalar(@$phi))) {
    $self->{'m'} = $m;
    if (!defined($self->_bootstrap(\@subset, $k))) {
      return undef;
    }

    $phi = $self->{"phi"};

    my $c_ref = undef;

    recheck:

    foreach $c_ref (@$phi) {
      my ($prob, $mean, $cov) = @$c_ref;

      if (!defined(normal_gen_pdf($mean, $cov))) {
        if ($d == 1) {
          die "This should be impossible!";
        } elsif (($rechecked++) < 10) {
          # Should be impossible, but is possible due to numerical
          # issues.
          if (rand() < 0.5) {
            foreach (0..($d-1)) {
              $cov->[$_]->[$_] *= 1.001;
            }
            goto recheck;
          } else {
            $self->_bootstrap(+[ @$R, @subset ], $k);
            $phi = $self->{"phi"};
            goto recheck;
          }
        } else {
          # Failure to initialize.
          return undef;
        }
      } else {
        $rechecked = 0;
      }
    }
  }

  my $phi_old = util::deep_copy($phi);
  my $ll_avg_old = undef;

  if ($m_old > 0) {
    my $ll_old     = _compute_ll_buf($phi, $R, +[ @$B, @$C ]);
  }


  # Allow the garbage collector to work a touch more efficiently.
  $R = undef;
  $B = undef;
  $C = undef;

  # Update count.
  $self->{"m"} = $m;

  # Update sums.
  if ($d == 1) {
    foreach (@subset) {
      $sums  += $_;
      $sum2s += ($_ ** 2);
    }
  } else {
    my $tuple = undef;

    foreach $tuple (@subset) {
      for (0..($d-1)) {
        $sums->[$_] += $tuple->[$_];
        $sum2s->[$_] += ($tuple->[$_] ** 2);
      }
    }
  }

  $self->{"sums"} = $sums;
  $self->{"sum2s"} = $sum2s;


  $self->_ExEM();
  $self->_primary_summarization();
  $self->_secondary_summarization();

  $R       = $self->{'R'};
  $B       = $self->{"B"};
  $C       = $self->{"C"};

  my $B_ct = 0;
  my $C_ct = 0;

  # Remember:  mean vector/scalar, covariance matrix/variance, count.
  map { $B_ct += $_->[2] } @$B;
  map { $C_ct += $_->[2] } @$C;

  _debug_print "*DEBUG*   Size of R:  ", scalar(@$R), "\n";
  _debug_print "*DEBUG*   Size of B:  ", scalar(@$B), " [$B_ct]\n";
  _debug_print "*DEBUG*   Size of C:  ", scalar(@$C), " [$C_ct]\n";


  if (!defined($ll_avg_old)) {
    # Don't terminate yet!
    return 0;
  }

  $phi     = $self->{"phi"};

  if (!defined($phi)) {
    return undef;
  }

  my $ll_avg = _compute_ll_buf($phi, $R, +[@$B, @$C ] ) / $m;

  if (!defined($ll_avg)) {
    return undef;
  }

  return ((abs($ll_avg - $ll_avg_old) <= $epsilon) ? 1 : 0);
}





# Multivariate form.  Give it a ref to an array of data.  Each
# datum should be either a scalar or a tuple ref.  All data
# should have identical dimensionality of course.
#
# Returns list of either (mean, variance) or
# (mean vector, covariance matrix).
sub normal_est($) {
  my $data = shift;

  if ((!defined($data)) || (ref($data) ne 'ARRAY')) {
    return undef;
  }

  my $n = scalar @$data;

  # Don't be absurd.
  if ($n < 1) {
    return undef;
  } elsif ($n == 1) {
    # Single point.
    my $mean = util::deep_copy($data->[0]);
    my $cov  = undef;

    my $d = scalar @{$data->[0]};

    if ($d == 1) {
      $cov = 0;
    } else {
      $cov = Numerical::matrix_new($d, $d);
    }

    return ($mean, $cov);
  }

  if (!ref($data->[0])) {
    # Unidimensional.  Method of moments.
    my $sum  = 0;
    my $sum2 = 0;

    foreach (@$data) {
      my $datum = $_;

      $sum += $datum;
      $sum2 += ($datum ** 2);
    }

    my $E_x   = $sum/$n;
    my $E_xx  = $sum2/$n;



    return ($E_x, ($E_xx - ($E_x * $E_x)));

  } else {
    my $d = scalar @{$data->[0]};
    my @sum  = (0) x $d;
    my $prod_sum = Numerical::matrix_new($d, $d);
    my $tuple = undef;

    foreach $tuple (@$data) {
      for (0..($d-1)) {
        $sum[$_] += $tuple->[$_];
      }

      for (my $i=0; $i < $d; $i++) {
        my $val_i = $tuple->[$i];

        $prod_sum->[$i]->[$i] += $val_i * $val_i;

        for (my $j=$i+1; $j < $d; $j++) {
          my $val_j = $tuple->[$j];
          my $prod  = $val_i * $val_j;

          $prod_sum->[$i]->[$j] += $prod;
        }
      }
    }

    my @mean = map { $_ / $n } @sum;

    my $covariance = Numerical::matrix_new($d, $d);

    for (my $i=0; $i < $d; $i++) {
      my $E_x   = $mean[$i];
      my $E_xx  = $prod_sum->[$i]->[$i]/$n;
      my $var   = $E_xx - ($E_x * $E_x);
      $covariance->[$i]->[$i] = $var;

      for (my $j=$i+1; $j < $d; $j++) {
        my $E_y   = $mean[$j];
        my $E_xy  = $prod_sum->[$i]->[$j]/$n;
        my $cov_xy   = $E_xy - ($E_x * $E_y);
        $covariance->[$i]->[$j] = $cov_xy;
        $covariance->[$j]->[$i] = $cov_xy;
      }
    }

    return (\@mean, $covariance);
  }
}

# It's a multivariate-capable version of what's in UniRand.
#
# Exception:
#   Give it the covariance/variance instead of standard
#   deviation.

sub normal_gen_pdf($$) {
  my $mean = shift;
  my $cov  = shift;

  if ((!defined($cov)) && (ref($mean) eq 'ARRAY') &&
  (scalar(@$mean)==2)) {
    # +[ $mean, $covariance ] form.
    my $temp = $mean;

    $mean = $temp->[0];
    $cov  = $temp->[1];
  }

  defined($mean) || die;
  defined($cov) || die;

  my $d          = undef;

  if (!ref($mean)) {
    # Mean is scalar.  Covariance should be plain variance.
    $d = 1;
  } elsif (ref($mean) eq 'ARRAY') {
    # Vector of means.
    $d = scalar(@$mean);

    ($d > 0) || die "Zero-dimensional Gaussian?";

    if ($d == 1) {
      # Make it into a scalar.
      $mean = $mean->[0];
    }
  } else {
    die "Expected either scalar or vector for mean.";
  }

  if (!ref($cov)) {
    ($d == 1) || die "Vector mean, scalar covariance.";
  } elsif (ref($cov) eq 'ARRAY') {
    ($d == scalar(@$cov)) || die "Mean, covariance mismatch.";
    ($d == scalar(@{$cov->[0]})) || die "Mean, covariance mismatch.";

    if ($d == 1) {
      # Convert to scalar.
      $cov = $cov->[0]->[0];
    }
  } elsif (ref($cov)) {
    die "Expected either scalar or vector for covariance.";
  }

  if ($d == 1) {
    my $denominator = sqrt(2*$__M_PI*$cov);
    ($denominator != 0) || die;
    my $denominator_inverse = 1/$denominator;
    my $power_factor = 0.5 / $cov;

    my $fun = sub {
      my $x = shift @_;

      # Convert from matrix to vector to scalar if need be.
      while (ref($x) eq 'ARRAY') {
        $x = $x->[0];
      }

      my $power = -(($x-$mean)**2) * $power_factor;
      my $val = exp($power) * $denominator_inverse;

      # 0 shouldn't happen, actually, but due to numerical
      # approximations it could.
      ($val >= 0) || die;
      return $val;
    };

    return $fun;
  } else {
    # f(x) = e^{-0.5(x-m)^T * cov^{-1} * (x-m)} /
    #        ((2*PI)^{d/2} * (det(cov))^0.5)

    # The Numerical package currently has three matrix inversion
    # methods -- SVD-based pseudoinverse, Gaussian elimination,
    # and QR factorization.

    my $cov_inverse = Numerical::matrix_inverse($cov);

    if (!defined($cov_inverse)) {
      return undef;
    }

    # Need to compute determinant.
    my $det_cov = Numerical::matrix_determinant($cov);

    if (!defined($det_cov)) {
      # Singular or bogus.
      # print "Bogosity -- det=$det_cov:\n";
      # Numerical::matrix_print($cov);
      return undef;
    } elsif (abs($det_cov) <= 1e-25) {
      # Numerical issues.  Very dangerous value to play with,
      # potentially; can make difference between NAN/INF and
      # normal numbers.
      $det_cov = 1e-25;
    } elsif ($det_cov < 0) {
      # Should be impossible.
      return undef;
    }

    if (0) {
      print "Covariance matrix:\n";
      Numerical::matrix_print($cov);
      print "Det = $det_cov\n";
    }

    my $denominator = ((2*$__M_PI) ** ($d/2)) * (sqrt($det_cov));

    if (!($denominator >= 0)) {
      return undef;
    }

    if ($denominator =~ /^nan$/i) {
      return undef;
    }

    if ($denominator =~ /^inf$/i) {
      return undef;
    }

    my $denominator_inv = 1/$denominator;

    # Takes input as a list reference.
    my $fun = sub {
      my @items      = Numerical::vector_preprocess(@_);
      my $row_matrix = +[ \@items ];

      for (0..($d-1)) {
        $row_matrix->[0]->[$_] -= $mean->[$_];
      }

      my $col_matrix = Numerical::matrix_transpose($row_matrix);

      my $power_matrix = Numerical::matrix_multiply($row_matrix,
      $cov_inverse);
      $power_matrix = Numerical::matrix_multiply($power_matrix, $col_matrix);

      # Should be 1x1.
      #
      # (1xd) * (dxd) * (dx1)
      #     \     /
      #      (1xd)    * (dx1)
      #         \        /
      #           (1x1)
      (scalar(@$power_matrix) == 1) || die;
      (scalar(@{$power_matrix->[0]}) == 1) || die;

      my $power = -0.5 * ($power_matrix->[0]->[0]);
      my $numerator = exp($power);

      my $val = $numerator * $denominator_inv;

      # Numerical issues.
      if (($val < 1e-30) && ($val >= -1e-30)) {
        return 1e-30;
      }

      if ($val =~ /^nan$/i) {
        return undef;
      }

      if ($val =~ /^inf$/i) {
        return undef;
      }

      return $val;
    };

    return $fun;
  }
}


# Function for generating random multivariate Gaussians.
#
# Input format:
#    Ref to list containing mean, covariance vectors.
#    May be supplied as separate arguments.
#
# Function returns either a scalar (univariate case)
# or an array ref (multivariate).

sub normal_gen_rand($;$) {
  my $mean = shift;
  my $cov  = shift;

  if ((!defined($cov)) && (ref($mean) eq 'ARRAY') &&
  (scalar(@$mean)==2)) {
    # +[ $mean, $covariance ] form.
    my $temp = $mean;

    $mean = $temp->[0];
    $cov  = $temp->[1];
  }

  defined($mean) || die;
  defined($cov) || die;

  my $d          = undef;

  if (!ref($mean)) {
    # Mean is scalar.  Covariance should be plain variance.
    $d = 1;
  } elsif (ref($mean) eq 'ARRAY') {
    # Vector of means.
    $d = scalar(@$mean);

    ($d > 0) || die "Zero-dimensional Gaussian?";

    if ($d == 1) {
      # Make it into a scalar.
      $mean = $mean->[0];
    }
  } else {
    die "Expected either scalar or vector for mean.";
  }

  if (!ref($cov)) {
    ($d == 1) || die "Vector mean, scalar covariance.";
  } elsif (ref($cov) eq 'ARRAY') {
    ($d == scalar(@$cov)) || die "Mean, covariance mismatch.";
    ($d == scalar(@{$cov->[0]})) || die "Mean, covariance mismatch.";

    if ($d == 1) {
      # Convert to scalar.
      $cov = $cov->[0]->[0];
    }
  } elsif (ref($cov)) {
    die "Expected either scalar or vector for covariance.";
  }


  if ($d == 1) {
    # Univariate Gaussian.  Use UniRand's code.
    # UniRand format is mean/deviation, not mean/variance.
    my $uni_ppf = UniRand::normal_gen_ppf(+[ $mean, sqrt($cov) ]);

    my $fun = sub {
      my $val  = undef;

      loop:  {
        my $roll = rand();
        my $val = &$uni_ppf($roll);
        defined($val) || redo loop;
      }

      return $val;
    };
    return $fun;
  } else {
    # Multivariate Gaussian.
    my ($L, $D, $U) = Numerical::matrix_LDU($cov);

    defined($L) || die;

    # Given that the covariance matrix is supposed to be non-
    # negative definite, symmetric...
    # $U=$L^{$T}

    map {
      $D->[$_]->[$_] = sqrt($D->[$_]->[$_])
    } (0..($d-1));

    # $cov = AA^{T} now.
    my $A = Numerical::matrix_multiply($L, $D);

    my $fun = sub {
      my @X = ();

      for (1..$d) {
        my $val = undef;

        loop:
        {
          my $roll = rand();
          $val  = UniRand::normal_std_ppf($roll);

          if (!defined($val)) {
            redo loop;
          }
        }

        push @X, $val;
      }

      # Matrix result.
      my $result = Numerical::matrix_multiply($A, \@X);

      # Convert to vector.
      my @vector = Numerical::vector_preprocess($result);

      for (0..($d-1)) {
        $vector[$_] += $mean->[$_];
      }
      return \@vector;
    };

    return $fun;
  }
}

##################################################################
# Private methods only beyond this point.
##################################################################






# Compute L_hat (Sec. 2.2, equation 7); it's log-likelihood, but on
# not only data points but summarized data points.
sub _compute_ll_buf($$$) {
  # phi:  Parameter array.  Specifically, a ref to a list whose
  #       members are each references to lists.  Each of THESE lists
  #       must contain a weight (weights must sum to 1), a mean, and
  #       a variance (not deviation!).
  #
  # R:    Ref to list of data.
  # BC:   Ref to list of data summaries; each summary = lref of
  #       sum, sum-squared, count.  Named BC because in general,
  #       it'll be the union of the B and C sets.

  my $phi = shift;
  my $R   = shift;
  my $BC  = shift;

  # $sum == l_hat.

  my $sum = 0;

  if (!scalar($R)) {
    return $sum;
  }

  # Dimensionality.
  my $d = (ref($R->[0])) ? scalar(@{$R->[0]}) : 1;

  my $improbable = 1e-40;  # Numerical-error time?
  my @pdf_wts = ();

  # _debug_print "*DEBUG*   In _compute_ll_buf\n";

  ##################################################################
  # Step 0:  Computing initial PDFs
  ##################################################################
  {
    my $comp = undef;

    foreach $comp (@$phi) {
      defined($comp) || die;
      my ($weight, $mean, $variance) = @$comp;

      ($weight > 0) || next;

      # SEM's version of normal_gen_pdf accepts multivariate normals,
      # and far less importantly uses variance instead of standard
      # deviation for the parameter.

      my $pdf = normal_gen_pdf( $mean, $variance );

      if (!defined($pdf)) {
        return undef;
      }
      push @pdf_wts, +[ $pdf, $weight];
    }
  }


  ##################################################################
  # Step 1:  Computing likelihood for individual data
  ##################################################################

  STEP1:  {
    my $datum = undef;

    if (!defined($R)) {
      last STEP1;
    }

    foreach $datum (@$R) {
      my $pdf_wt  = undef;
      my $lh      = 0;

      foreach $pdf_wt (@pdf_wts) {
        my ($pdf, $weight) = @$pdf_wt;

        ($weight > 0) || next;
        my $prob = &$pdf($datum);

        if (!($prob)) {
          return undef;
        }

        ($prob >= 0) || die;
        $lh += $prob * $weight;
      }
      if ($lh > $improbable) {
        $sum += log($lh);
      } else {
        $sum += log($improbable);
      }
    }
  }


  ##################################################################
  # Step 2:  Computing likelihood for summaries
  ##################################################################

  STEP2:  {
    my $summary = undef;

    if (!defined($BC)) {
      last STEP2;
    }


    # Recall:
    #   Vector of sums, matrix of products, count.
    foreach $summary (@$BC) {
      my ($c_sum, $c_sum2, $c_N) = @$summary;
      my $lh = 0;

      ($c_N > 0) || next;

      my $pdf_wt = undef;
      my $c_mean   = 0;

      if ($d == 1) {
        # Univariate.
        $c_mean = $c_sum / $c_N;
      } else {
        # Multivariate.
        $c_mean = util::deep_copy($c_sum);
        map { $_ /= $c_N } @$c_mean;
      }

      foreach $pdf_wt (@pdf_wts) {
        my ($pdf, $weight) = @$pdf_wt;

        my $prob = &$pdf($c_mean);
        if (!$prob) {
          return undef;
        }
        $lh += $prob * $weight;
      }

      if ($lh > $improbable) {
        $sum += $c_N * log($lh);
      } else {
        $sum += $c_N * log($improbable);
      }
    }
  }

  return $sum;
}




# Extended EM -- See Algorithm 3 in section 2.2.
sub _ExEM($) {
  my $self = shift;
  my $R   = $self->{'R'};
  my $B   = $self->{"B"};
  my $C   = $self->{"C"};
  my $phi = $self->{"phi"};
  my $d   = $self->{"d"};
  my $epsilon = $self->{"epsilon"};
  my $m   = $self->{"m"};

  # And EM went forth into the desert for $max_iter EM cycles and $max_iter
  # prune/merge/reseed cycles without finding a better way, and
  # then its quest was declared over.
  my $max_iter = 40;
  my $iter     = 0;

  my $total_iter = 0;
  my $max_total_iter = 200;

  # When the covariance matrix determinant does not compute well, multiply
  # the variances by just this much.
  my $var_hack_factor = 1.01;

  my $R_ct      = scalar @$R;
  my $BC        = +[ @$B, @$C ];
  my $BC_ct     = scalar @$BC;

  defined($R) || die;
  defined($B) || die;
  defined($C) || die;
  defined($phi) || die;
  defined($epsilon) || die;

  my $k = $self->{"k"};
  ($k ==  (scalar @$phi)) || die;

  _debug_print "*DEBUG*   In _ExEM\n";

  my $ll_old      = undef;
  my $reseed      = 0;

  while ((!defined($ll_old)) && ($reseed++ < 10)) {
    $ll_old = _compute_ll_buf($phi, $R, $BC);

    if (!defined($ll_old)) {
      @$B = grep { $_->[2] > 0 } @$B;
      push @$C, @$B;

      if ($d == 1) {
        @$B = (+[0,0,0]) x $k;
      } else {
        @$B = ();

        for (1..$k) {
          push @$B, +[ +[(0)x$d], Numerical::matrix_new($d, $d), 0];
        }
      }

      if (rand() < 0.5) {
        @$BC = ( @$B, @$C );
        $self->_bootstrap($R, $k);
        $phi = util::deep_copy($self->{'phi'});

        _debug_print "*DEBUG*   In _ExEM - reseeding (initial-1)\n";
      } else {
        # Increase variances, marginally
        if ($d == 1) {
          map { $_->[2] *= $var_hack_factor } @$phi;
        } else {
          for (my $i=0; $i < $k; $i++) {
            for (my $j=0; $j < $d; $j++) {
              $phi->[$i]->[2]->[$j]->[$j] *= $var_hack_factor;
            }
          }
        }
        $var_hack_factor *= 1.1;
        $self->{'phi'} = util::deep_copy($phi);
        _debug_print "*DEBUG*   In _ExEM - reseeding (initial-2)\n";
      }
    }
  }

  if (!defined($ll_old)) {
    return;
  }

  my $ll_best     = $ll_old;

  my $phi_best    = util::deep_copy($phi);
  my $B_best      = util::deep_copy($B);
  my $C_best      = util::deep_copy($C);

  loop:  {

    ##################################################################
    # Step 0:  Computing initial PDFs
    ##################################################################
    # _debug_print "*DEBUG*   In _ExEM main loop, step 0\n";

    my @pdf_wts = ();
    # Compute PDFs first so we don't need to recompute for every datum.
    {
      my $comp = undef;

      foreach $comp (@$phi) {
        defined($comp) || die;
        my ($weight, $mean, $variance) = @$comp;

        if ($weight == 0) {
          push @pdf_wts, +[ undef, 0 ];
          next;
        }

        my $pdf = normal_gen_pdf($mean, $variance);
        defined($pdf) || die;
        push @pdf_wts, +[ $pdf, $weight];
      }
    }



    ##################################################################
    # Step 1:  Compute membership for singleton records in R.
    ##################################################################
    #
    # For each item in R, we need to compute the probability (fuzzy
    # membership) for each cluster in phi.
    my @weights_R  = ();

    {
      my $datum = undef;

      # _debug_print "*DEBUG*   In _ExEM main loop, step 1\n";
      foreach $datum (@$R) {
        defined($datum) || die;

        my $sum_prob  = 0;
        my @wtd_probs = ();
        my $pdf_wt    = undef;

        foreach $pdf_wt (@pdf_wts) {
          my ($pdf, $weight) = @$pdf_wt;

          if ($weight == 0) {
            push @wtd_probs, 0;
            next;
          }

          my $prob   = &$pdf($datum);
          defined($prob) || die;

          my $product = $weight*$prob;

          push @wtd_probs, $product;
          $sum_prob += $product;
        }

        if ($sum_prob <= 1e-6) {
          $sum_prob = 1e-6;
        }

        # Normalize the weights.
        map { $_ /= $sum_prob } @wtd_probs;

        push @weights_R, \@wtd_probs;
      }
    }



    ##################################################################
    # Step 2:  Computing membership for sufficient statistics in B, C
    ##################################################################
    my @weights_BC = ();

    {
      my $tuple = undef;

      # _debug_print "*DEBUG*   In _ExEM main loop, step 2\n";
      foreach $tuple (@$BC) {
        defined($tuple) || die;

        my ($sum, $sum2, $n) = @$tuple;

        defined($sum) || die;
        defined($sum2) || die;
        defined($n)   || die;

        if ($n == 0) {
          push @weights_BC, undef;
          next;
        }

        my $mean      = 0;
        my $sum_prob  = 0;
        my @wtd_probs = ();
        my $pdf_wt    = undef;

        if ($d == 1) {
          # Univariate.
          $mean = $sum/$n;
        } else {
          # Multivariate.
          $mean = util::deep_copy($sum);
          map { $_ /= $n } @$mean;
        }

        foreach $pdf_wt (@pdf_wts) {
          my ($pdf, $weight) = @$pdf_wt;

          if ($weight == 0) {
            push @wtd_probs, 0;
            next;
          }

          my $prob  = &$pdf($mean);

          defined($prob) || die;

          # N.B.  This does NOT get multiplied by $n.
          my $product = ($prob * $weight);

          push @wtd_probs, $product;
          $sum_prob += $product;
        }

        ($sum_prob > 0) || die;
        map { $_ /= $sum_prob } @wtd_probs;
        push @weights_BC, \@wtd_probs;
      }
    }


    ##################################################################
    # Step 3:  Updating cluster parameters
    ##################################################################
    my $phi_new = +[];


    {
      # _debug_print "*DEBUG*   In _ExEM main loop, step 3\n";
      # Straight sums, weighted appropriately according to
      # datum/component responsibility.
      if ($d == 1) {
        # Unidimensional.
        for (my $j=0; $j < $k; $j++) {
          my $sum  = 0;
          my $sum2 = 0;
          my $Nh   = 0;

          for (my $i=0; $i < $R_ct; $i++) {
            my $likely    = $weights_R[$i];
            my $datum     = $R->[$i];
            my $weight_ij = $likely->[$j];

            defined($weight_ij) || die;
            $Nh   += $weight_ij;
            $sum  += $datum * $weight_ij;
            $sum2 += $datum * $datum * $weight_ij;
          }

          for (my $i=0; $i < $BC_ct; $i++) {
            my $likely = $weights_BC[$i];
            my ($c_sum, $c_sum2, $c_ct) = @{$BC->[$i]};
            my $weight_ij = $likely->[$j];

            if (!($c_ct)) {
              next;
            }

            defined($weight_ij) || die;

            $Nh   += $weight_ij * $c_ct;
            $sum  += $c_sum  * $weight_ij;
            $sum2 += $c_sum2 * $weight_ij;
          }


          my $prob = $Nh/$m;
          my $mean = $sum/$Nh;
          my $E_xx = $sum2/$Nh;
          my $var  = $E_xx - ($mean * $mean);

          $phi_new->[$j] = +[ $prob, $mean, $var ];
        }
      } else {
        # Multidimensional.
        for (my $j=0; $j < $k; $j++) {
          my $Nh   = 0;
          my @sum  = (0) x $d;
          my $sum2 = Numerical::matrix_new($d, $d);

          for (my $i=0; $i < $R_ct; $i++) {
            my $likely = $weights_R[$i];
            my $datum  = $R->[$i];
            my $weight_ij = $likely->[$j];

            defined($weight_ij) || die;
            $Nh     += $weight_ij;

            for (0..($d-1)) {
              $sum[$_] += $datum->[$_] * $weight_ij;
            }

            for (my $row=0; $row < $d; $row++) {
              for (my $col=$row; $col < $d; $col++) {
                my $prod = $weight_ij * $datum->[$row] * $datum->[$col];
                $sum2->[$row]->[$col] += $prod;
              }
            }
          }

          for (my $i=0; $i < $BC_ct; $i++) {
            my $likely = $weights_BC[$i];
            my ($c_sum, $c_sum2, $c_ct) = @{$BC->[$i]};
            my $weight_ij = $likely->[$j];

            if (!($c_ct)) {
              next;
            }

            defined($weight_ij) || die;
            $Nh     += $weight_ij * $c_ct;

            for (0..($d-1)) {
              $sum[$_] += $c_sum->[$_] * $weight_ij;
            }

            for (my $row=0; $row < $d; $row++) {
              for (my $col=$row; $col < $d; $col++) {
                my $prod = $weight_ij * $c_sum2->[$row]->[$col];
                $sum2->[$row]->[$col] += $prod;
              }
            }
          }

          my $prob = $Nh / $m;
          my @mean = map { $_ / $Nh } @sum;
          my $cov  = Numerical::matrix_new($d, $d);

          for (my $row=0; $row < $d; $row++) {
            for (my $col=$row; $col < $d; $col++) {
              my $e_xy = $sum2->[$row]->[$col] / $Nh;
              my $e_x  = $sum[$row] / $Nh;
              my $e_y  = $sum[$col] / $Nh;
              my $cov_xy = $e_xy - ($e_x * $e_y);

              $cov->[$row]->[$col] = $cov_xy;
              $cov->[$col]->[$row] = $cov_xy;
            }
          }

          $phi_new->[$j] = +[ $prob, \@mean, $cov ];
        }
      }
    }

    _debug_print "*DEBUG*   In _ExEM, testing goodness of new values\n";


    my $ll_new = undef;

    while (!defined($ll_new)) {
      $ll_new = _compute_ll_buf($phi_new, $R, $BC);

      if (!defined($ll_new)) {
        # Don't do this forever.
        $iter++;
        $total_iter++;

        if (($iter >= $max_iter) || ($total_iter >= $max_total_iter)) {
          $phi           = util::deep_copy($phi_best);
          $B             = util::deep_copy($B_best);
          $C             = util::deep_copy($C_best);
          $ll_old        = $ll_best;
          $BC            = +[ @$B, @$C ];
          $BC_ct         = scalar @$BC;
          last loop;
        }

        if (rand() < 0.5) {
          @$BC = ( @$B, @$C );
          $self->_bootstrap($R, $k);
          $phi_new = util::deep_copy($self->{'phi'});

          _debug_print "*DEBUG*   In _ExEM - reseeding (late-1)\n";
        } else {
          # Increase variances, marginally
          if ($d == 1) {
            map { $_->[2] *= $var_hack_factor } @$phi_new;
          } else {
            for (my $i=0; $i < $k; $i++) {
              for (my $j=0; $j < $d; $j++) {
                $phi_new->[$i]->[2]->[$j]->[$j] *= $var_hack_factor;
              }
            }
          }
          _debug_print "*DEBUG*   In _ExEM - reseeding (late-2)\n";
        }
      }
    }

    my $diff   =  ($ll_new - $ll_old);

    _debug_print "*DEBUG*   In _ExEM, ll=$ll_new [diff=$diff]\n";
    if ((!defined($ll_best)) || ($ll_best < $ll_new)) {
      # Is this the best we've seen so far?
      my $ll_best_old = $ll_best;

      $ll_best = $ll_new;
      $phi_best = util::deep_copy($phi_new);
      $B_best   = util::deep_copy($B);
      $C_best   = util::deep_copy($C);

      # Reset "iterations spent wandering in the desert" counter
      # IF difference is significant.
      if ((defined($ll_best_old)) &&
      (($ll_best - $ll_best_old)/$m) > $epsilon) {
        $iter = 0;
      } else {
        $iter++;
      }
    } else {
      $iter++;
    }

    if ((abs($diff/$m) < $epsilon) || ($iter >= $max_iter)) {
      # Either it converged, or we've gone too long without
      # setting a new "best" parameter set.  Revert to best-seen
      # configuration.
      $phi           = util::deep_copy($phi_best);
      $B             = util::deep_copy($B_best);
      $C             = util::deep_copy($C_best);
      $ll_old        = $ll_best;
      $BC            = +[ @$B, @$C ];
      $BC_ct         = scalar @$BC;
      last loop;
    } else {
      $ll_old = $ll_new;
      $phi = util::deep_copy($phi_new);
      redo loop;
    }
  }


  ##################################################################
  # MAIN LOOP ENDED
  ##################################################################

  _debug_print "*DEBUG*   In _ExEM, exited main loop\n";

  {
    # Just to be sure.  It shouldn't be necessary, 'tho.  We don't
    # draw upon @$B for reseeding because those are already
    # close to existing cluster means on a 1:1 basis.

    my $active = scalar @{$C};
    if ($active < ($k-1)) {
      # We want at least k-1 there, as theoretically we could
      # find ourselves replacing that many nodes.
      goto end;
    }
  }

  ##################################################################
  # Step 4:  Removal of tiny components
  ##################################################################
  drop_tiny:  {
    _debug_print "*DEBUG*   In _ExEM, detecting tiny components\n";

    my $drop_time = $self->{"drop_time"};
    my $drop_size = $self->{"drop_size"};
    my $m         = $self->{"m"};

    if ((!defined($drop_time)) || (!defined($drop_size)) ||
    ($m < $drop_time)) {
      # Behavior disabled (unspecified parameters) or it's too
      # early (not enough data seen yet).
      _debug_print "*DEBUG*   In _ExEM, skipping tiny detection\n";
      last drop_tiny;
    }

    # ($weight, $mean, $variance)
    my $ct = scalar @$phi;

    loop:  for (my $i=0; $i < $ct; $i++) {
      my ($weight, $mean, $variance) = @{$phi->[$i]};

      if ($weight <= $drop_size) {
        splice @$phi, $i, 1;
        $ct--;

        # The entries in B are supposed to be parallel to the
        # components in phi.
        _debug_print "*DEBUG*   In _ExEM, dropping tiny component $i\n";
        splice @$B, $i, 1;

        if ($i < $ct) {
          redo loop;
        } else {
          last loop;
        }
      }
    }
  }



  ##################################################################
  # Step 5:  Merging of coincident components
  ##################################################################
  merge_too_close: {
    _debug_print "*DEBUG*   In _ExEM Merging phase\n";

    my $ct = scalar @$phi;
    my $merge_close = $self->{"merge_close"};
    defined($merge_close) || last merge_too_close;
    # $ct may be less than $k now

    outer_loop:
    for (my $i=0; $i < ($ct-1); $i++) {
      my ($weight_i, $mean_i, $var_i) = @{$phi->[$i]};

      inner_loop:
      for (my $j=$i+1; $j < $ct; $j++) {
        my ($weight_j, $mean_j, $var_j) = @{$phi->[$j]};

        if ($d == 1) {
          # Unidimensional.
          my $mag_mean = (abs($mean_i) >= abs($mean_j)) ? abs($mean_i) :
          abs($mean_j);
          my $mag_var = (abs($var_i) >= abs($var_j)) ? abs($var_i) :
          abs($var_j);

          my $diff_mean = abs($mean_i - $mean_j);
          my $diff_var  = abs($var_i - $var_j);

          if ($diff_mean > ($merge_close * $mag_mean)) {
            next inner_loop;
          }

          if ($diff_var > ($merge_close * $mag_var)) {
            next inner_loop;
          }
        } else {
          # Multidimensional.

          # Check mean.
          for (my $d_idx=0; $d_idx < $d; $d_idx++) {
            my $mag = (abs($mean_i->[$d_idx]) >= abs($mean_j->[$d_idx])) ?
            abs($mean_i->[$d_idx]) : abs($mean_j->[$d_idx]);
            my $diff = abs($mean_i->[$d_idx] - $mean_j->[$d_idx]);

            if ($diff > ($merge_close * $mag)) {
              next inner_loop;
            }
          }

          # Mean vectors close enough.  Check covariance matrices.

          for (my $row=0; $row < $d; $row++) {
            for (my $col=0; $col < $d; $col++) {
              my $var_i_rc = $var_i->[$row]->[$col];
              my $var_j_rc = $var_j->[$row]->[$col];

              my $mag = (abs($var_i_rc) >= abs($var_j_rc)) ?
              abs($var_i_rc) : abs($var_j_rc);
              my $diff = abs($var_i_rc - $var_j_rc);

              if ($diff > ($merge_close * $mag)) {
                next inner_loop;
              }
            }
          }
        }


        # Components sufficiently close.  Merge them.
        $phi->[$i]->[0] += $weight_j;

        # Recall tuple format:
        # (d=1)
        #    sum, sum of squared values, count
        # (d>1)
        #    sum (vector), sum of vector/transpose product (matrix), count

        $B->[$i]->[2] += $B->[$j]->[2];
        if ($d == 1) {
          $B->[$i]->[0] += $B->[$j]->[0];
          $B->[$i]->[1] += $B->[$j]->[1];
        } else {
          for (0..($d-1)) {
            $B->[$i]->[0]->[$_] += $B->[$j]->[0]->[$_];
          }

          for (my $row=0; $row < $d; $row++) {
            for (my $col=0; $col < $d; $col++) {
              $B->[$i]->[1]->[$row]->[$col] +=
              $B->[$j]->[1]->[$row]->[$col];
            }
          }
        }

        _debug_print "*DEBUG*   In _ExEM, merging $i, $j\n";
        splice @$phi, $j, 1;
        splice @$B, $j, 1;

        $ct--;

        if ($j == $ct) {
          # That was the last one.
          last inner_loop;
        } else {
          # At least one more. Use 'redo' to avoid incrementing
          # $j again.
          redo inner_loop;
        }
      }
    }
  }


  ##################################################################
  # Step 6:  Reseeding components and restarting ExEM, if need be
  ##################################################################

  {
    my $dropped_components = $k - scalar(@$phi);
    _debug_print "*DEBUG*   In _ExEM Reseeding stage\n";

    if ($dropped_components == 0) {
      # Nothing was dropped or merged away, so we can terminate
      # Extended EM now.
      goto end;
    }

    my $total_weight = 0;

    # How much weight do the surviving components have?
    map { $total_weight += $_->[0] } @$phi;

    # Sanity check.
    die unless ($total_weight > 0);

    # Normalize them so that they add up to
    # ($k-$dropped_components)/$k.

    my $factor = ($k-$dropped_components) / ($k * $total_weight);
    map { $_->[0] *= $factor } @$phi;


    # Each.
    my $weight_new = 1/$k;
    my $C_ct  = scalar @$C;

    if ($C_ct < $dropped_components) {
      goto end;
    }

    for (my $i=0; $i < $dropped_components; $i++) {
      # Mirror, mirror on the wall,
      # Which summarized group is the most unlikely of them all?
      my $min_ll_avg_idx = undef;
      my $min_ll_avg_val = undef;

      for (my $j=0; $j < $C_ct; $j++) {
        my $n = $C->[$j]->[2];

        ($n > 0) || next;

        # Modification:  Go by average log-likelihood, so between an
        # unlikely small cluster and an unlikely large cluster we
        # don't necessarily favor the smaller one.
        my $ll_avg = _compute_ll_buf($phi, undef, +[ $C->[$j] ]) / $n;

        if ((!defined($min_ll_avg_val)) || ($min_ll_avg_val > $ll_avg)) {
          $min_ll_avg_idx = $j;
          $min_ll_avg_val = $ll_avg;
        }
      }

      (defined($min_ll_avg_idx)) || die;

      my @tuple            = @{$C->[$min_ll_avg_idx]};
      my $C_sum  = $tuple[0];
      my $C_sum2 = $tuple[1];
      my $C_n    = $tuple[2];

      ($C_n > 0) || next;

      my $mean = undef;
      my $variance = undef;

      if ($d == 1) {
        # Univariate reseed.
        my $E_x  = $C_sum/$C_n;
        my $E_xx = $C_sum2/$C_n;

        $mean = $E_x;
        $variance  = $E_xx - ($E_x * $E_x);

        if ($variance < 0.001) {
          $variance = 0.001;
        }
      } else {
        $mean = +[];
        $variance = Numerical::matrix_new($d, $d);

        for (0..($d-1)) {
          $mean->[$_] = $C_sum->[$_] / $C_n;
        }


        for (my $row=0; $row < $d; $row++) {
          my $E_x  = $C_sum->[$row] / $C_n;

          for (my $col=$row; $col < $d; $col++) {
            my $E_xy = $C_sum2->[$row]->[$col] / $C_n;
            my $E_y  = $C_sum->[$col] / $C_n;

            $variance->[$row]->[$col] = $E_xy - ($E_x * $E_y);
            $variance->[$col]->[$row] = $E_xy - ($E_x * $E_y);
          }
        }


        # Sanity check.
        {
          my $bad = 0;
          for (0..($d-1)) {
            if ($variance->[$_]->[$_] < 0) {
              $bad = 1;
              last;
            }
          }

          $bad = $bad || (!defined(normal_gen_pdf($mean, $variance)));

          if ($bad) {
            return undef;

            # Or do the following:
            print "n:  $C_n\n";
            print "Estimated variance:\n";
            Numerical::matrix_print($variance);
            print "sum2:\n";
            Numerical::matrix_print($C_sum2);
            die;
          }
        }
      }

      push @$B, +[ $C_sum, $C_sum2, $C_n ];
      splice @$C, $min_ll_avg_idx, 1;
      push @$phi, +[ $weight_new, $mean, $variance ];


      _debug_print "*DEBUG*   In _ExEM, reseeded a component $i/$dropped_components/$k\n";
      # _debug_print "*DEBUG*   In _ExEM, reseed is [$weight_new, $mean, $variance]\n";
      $C_ct--;

      ((scalar @$B) == scalar (@$phi)) || die;
    }

    $BC = +[ @$B, @$C ];
    $BC_ct = scalar @$BC;


    _debug_print "*DEBUG*   Estimates after reseeding:\n";
    foreach (@$phi) {
      _debug_print "\t[", join(", ", @{$_}), "]\n";
    }
    _debug_print "\n";

    $ll_old = _compute_ll_buf($phi, $R, $BC);
    if (!defined($ll_old)) {
      last;
    }

    if ($ll_best < $ll_old) {
      if ((($ll_old-$ll_best)/$m) > $epsilon) {
        $iter = 0;
      }
      $ll_best = $ll_old;
      $phi_best = util::deep_copy($phi);
      $B_best   = util::deep_copy($B);
      $C_best   = util::deep_copy($C);
    } else {
      last;
    }

    _debug_print "*DEBUG*   In _ExEM, restarting EM loop\n";
    goto loop;
  }


  end:
  # These are were deep-copied before, so we don't need to again.
  $self->{"phi"} = $phi_best;
  $self->{"B"}   = $B_best;
  $self->{"C"}   = $C_best;
}





# Primary summarization, as in 2.3.1.
#
# Difference:  Do not add a point to one component's summary if
# it would be better attached to an other component based on
# PDFs.
sub _primary_summarization($) {
  my $self = shift;

  my $R   = $self->{'R'};
  my $B   = $self->{"B"};
  my $phi = $self->{"phi"};
  my $p   = $self->{"p"};
  my $k   = $self->{"k"};
  my $d   = $self->{"d"};
  _debug_print "*DEBUG*   In primary summarization\n";

  if (!defined($p)) {
    _debug_print "*DEBUG*   Leaving primary summarization\n";
    return undef;
  }

  # How many do we allocate to each component?
  my $ct = int(($p * (scalar @$R)) / $k);

  if (!$ct) {
    # Too few to summarize.
    _debug_print "*DEBUG*   No prime-sum, as ct would be only $ct\n";
    return;
  }

  # Compute ALL Mahalanobis distances (well, squared distances;
  # squaring preserves order).  Store them in <datum, sq-distance>
  # heaps  according to which component they'd
  # currently be assigned.
  my @distance_funs   = ();
  my @distance_heaps  = ();
  my @pdfs            = ();

  for (my $i=0; $i < $k; $i++) {
    my $comp_prob = $phi->[$i]->[0];
    my $comp_mean = $phi->[$i]->[1];
    my $comp_var  = $phi->[$i]->[2];

    _debug_print "*DEBUG*   Computing Mahalanobis function for $i/$k\n";
    $distance_heaps[$i] = Heap::new Heap();


    my $pdf = normal_gen_pdf($comp_mean, $comp_var);

    if (!defined($pdf)) {
      return undef;
    }

    push @pdfs, sub {
      my $x         = shift @_;
      my $base_prob = &$pdf($x);

      if (!defined($base_prob)) {
        # Or darn close.
        return 0;
      } else {
        return ($base_prob * $comp_prob);
      }
    };

    if ($d == 1) {
      push @distance_funs, sub {
        my $x    = shift @_;
        my $dist = (($x-$comp_mean)**2)/$comp_var;
        return $dist;
      };
    } else {
      my $var_inverse = Numerical::matrix_inverse($comp_var);
      defined($var_inverse) || die;

      push @distance_funs, sub {
        my $xvec = shift @_;
        my @dvec = map { $xvec->[$_] - $comp_mean->[$_] }
        (0..($d-1));
        my $dist = Numerical::matrix_multiply(+[ \@dvec ],
        Numerical::matrix_multiply($var_inverse, \@dvec ));
        $dist = $dist->[0]->[0];
        return $dist;
      };
    }
  }

  my $datum  = undef;
  my $new_R  = +[];

  _debug_print "*DEBUG*   Computing Mahalanobis distances\n";
  foreach $datum (@$R) {
    my $best_comp = undef;
    my $best_dist = undef;
    my $best_prob = undef;
    my $i         = undef;

    for $i (0..($k-1)) {
      my $pdf  = $pdfs[$i];
      my $prob = &$pdf($datum);
      my $distance_fun = $distance_funs[$i];

      if (defined($prob) && ((!defined($best_prob)) ||
      ($best_prob < $prob))) {
        $best_prob = $prob;
        $best_comp = $i;
        $best_dist = &$distance_fun($datum);
      }
    }

    if (defined($best_prob)) {
      my $heap = $distance_heaps[$best_comp];
      $heap->insert($best_dist, $datum);
    } else {
      # None?!
      push @$new_R, $datum;
    }
  }

  _debug_print "*DEBUG*   Processing heaps\n";
  for (my $i=0; $i < $k; $i++) {
    my $heap        = $distance_heaps[$i];
    my $heap_ct     = $heap->count();
    my $thresh      = undef;

    my $tuple = undef;

    for (my $j=0; $j < $heap_ct; $j++) {
      my ($dist, $datum) = $heap->remove();

      if ((!defined($thresh)) || ($dist <= $thresh)) {
        if ($d == 1) {
          $B->[$i]->[0] += $datum;
          $B->[$i]->[1] += ($datum**2);
          $B->[$i]->[2]++;
        } else {
          for (my $j=0; $j < $d; $j++) {
            $B->[$i]->[0]->[$j] += $datum->[$j];
          }

          for (my $j=0; $j < $d; $j++) {
            for (my $x=0; $x < $d; $x++) {
              $B->[$i]->[1]->[$j]->[$x] += $datum->[$j] *
              $datum->[$x];
            }
          }
          $B->[$i]->[2]++;
        }
        if (($j+1) == $ct) {
          $thresh = $dist;
        }
      } else {
        push @$new_R, $datum;
        last;
      }
    }

    $heap_ct = $heap->count();

    for (my $j=0; $j < $heap_ct; $j++) {
      my ($dist, $datum) = $heap->remove();
      push @$new_R, $datum;
    }
  }

  _debug_print "*DEBUG*   Primary summarization complete\n";
  $self->{'R'} = $new_R;
}




# Secondary summarization, as in 2.3.2; K-Means followed by
# hierarchical agglomerative clustering.
sub _secondary_summarization($) {
  my $self = shift;

  my $R = $self->{'R'};
  my $C = $self->{"C"};
  my $d = $self->{'d'};

  my $k_prime = $self->{"k_prime"};
  my $beta = $self->{"beta"};
  my $beta_relative = $self->{"beta_relative"};

  _debug_print "*DEBUG*   Secondary summarization\n";

  if (defined($beta_relative)) {
    my $sums  = $self->{"sums"};
    my $sum2s = $self->{"sum2s"};
    my $m     = $self->{"m"};

    if (!($m > 1)) {
      _debug_print "*DEBUG*  Leaving secondary summarization, m=$m\n";
      return undef;
    }

    if ($d == 1) {
      my $global_variance = ($sum2s - ($sums * $sums / $m))/$m;

      $beta = $global_variance * $beta_relative;
    } else {
      my $max_variance = undef;

      for (0..($d-1)) {
        my $attr_variance = ($sum2s->[$_] - (($sums->[$_] ** 2)/$m))/$m;

        if ((!defined($max_variance)) || ($max_variance < $attr_variance)) {
          $max_variance = $attr_variance;
        }
      }

      $beta = $max_variance * $beta_relative;
    }
  }

  if (!(defined($k_prime) && defined($beta) &&
  (scalar(@$R) > $k_prime))) {
    _debug_print "*DEBUG*  Leaving secondary summarization\n";
    return undef;
  }


  ##################################################################
  # Step 0:  k-Means
  ##################################################################
  my $km = kMeans::new kMeans;

  _debug_print "*DEBUG* Initializing via kMeans\n";
  my ($means, $assignments) = $km->cluster($R, $k_prime);

  if (!defined($means)) {
    _debug_print "*DEBUG* Initializing via kMeans -- failed\n";
    return undef;
  }

  ##################################################################
  # Step 1:  Creation of new summaries.
  ##################################################################
  {
    my $R_ct = scalar @$R;
    my %valid_clusters = ();

    _debug_print "*DEBUG* Creating new summaries\n";
    if ($d == 1) {
      my @k_sums  = (0) x $k_prime;
      my @k_sum2s = (0) x $k_prime;
      my @k_cts   = (0) x $k_prime;

      for (my $i=0; $i < $R_ct; $i++) {
        my $which = $assignments->[$i];
        my $datum = $R->[$i];

        $k_sums[$which]  += $datum;
        $k_sum2s[$which] += $datum ** 2;
        $k_cts[$which]++;
      }


      _debug_print "*DEBUG*   In secondary summarization\n";
      #_debug_print "*DEBUG*   K-means are:\n";
      for (my $i=0; $i < $k_prime; $i++) {
        my $sum  = $k_sums[$i];
        my $sum2 = $k_sum2s[$i];
        my $ct   = $k_cts[$i];

        # _debug_print "*DEBUG*  <", $k_means[$i], ", $sum, $sum2, $ct>\n";
        if ($ct < 2) {
          next;
        }

        # Similar to Sec 2.3.2, Eq 8, but using 1/ct instead of 1/d,
        # as this makess more sense to me.
        my $variance = ($sum2 - (($sum ** $sum) / $ct)) / $ct;

        if ($variance < 0) {
          $variance = 0;
        }

        if (sqrt($variance) > $beta) {
          next;
        }

        $valid_clusters{$i} = 1;
        push @$C, +[ $sum, $sum2, $ct ];
      }
    } else {
      # Multidimensional.
      my @k_sums = ();
      my @k_sums2s = ();
      my @k_cts = (0) x $k_prime;

      for (1..$k_prime) {
        push @k_sums, +[ (0) x $d ];
        push @k_sums2s, Numerical::matrix_new($d, $d);
      }

      for (my $i=0; $i < $R_ct; $i++) {
        my $which = $assignments->[$i];
        my $datum = $R->[$i];

        for (0..($d-1)) {
          $k_sums[$which]->[$_]  += $datum->[$_];
        }

        for (my $row=0; $row < $d; $row++) {
          my $x = $datum->[$row];

          for (my $col=0; $col < $d; $col++) {
            my $y = $datum->[$col];

            $k_sums2s[$which]->[$row]->[$col] += $x * $y;
          }
        }

        $k_cts[$which]++;
      }

      %valid_clusters = ();

      _debug_print "*DEBUG*   In secondary summarization\n";
      #_debug_print "*DEBUG*   K-means are:\n";

      for (my $i=0; $i < $k_prime; $i++) {
        my $sum  = $k_sums[$i];
        my $sum2 = $k_sums2s[$i];
        my $ct   = $k_cts[$i];

        # _debug_print "*DEBUG*  <", $k_means[$i], ", $sum, $sum2, $ct>\n";
        if ($ct < 2) {
          next;
        }

        my $max_val = undef;

        # Similar to Sec 2.3.2, Eq 8, but using 1/ct instead of 1/d,
        # as this makess more sense to me.
        for (my $t=0; $t < $d; $t++) {
          my $val = ($sum2->[$t]->[$t] - (($sum->[$t] ** 2)/$ct))/$ct;

          $val = ($val >= 0) ? $val : 0;
          $val = sqrt($val);

          if ((!defined($max_val)) || ($val > $max_val)) {
            $max_val = $val;
          }
        }

        if ($max_val > $beta) {
          next;
        }

        $valid_clusters{$i} = 1;
        push @$C, +[ $sum, $sum2, $ct ];
      }

    }

    if (!scalar(keys %valid_clusters)) {
      _debug_print "*DEBUG* Leaving secondary summarization\n";
      return;
    }

    # Trim @$R
    my @new_R = ();

    for (my $i=0; $i < $R_ct; $i++) {
      if (!defined($valid_clusters{$assignments->[$i]})) {
        push @new_R, $R->[$i];
      }
    }
    @$R = @new_R;
  }


  ##################################################################
  # Step 2:  Hierarchical agglomerative clustering.
  #
  # Like most of the rest of the code, this is a LOT easier in the
  # unidimensional case.  Aaaaargh.
  ##################################################################

  {
    # Finding the nearest two clusters will be easier if we sort
    # them by their means.

    _debug_print "*DEBUG* Beginning HAC\n";

    my @C_means   = ();
    my $C_ct      = scalar @$C;
    ($C_ct > 1) || return;

    # This will be a sorted (ascending) list of
    #  < cluster_id, cluster_id, distance >
    # pairs.

    my $distheap = Heap::new Heap;

    {
      @C_means = ();

      # $C format:
      #   (sum, sum of squared, count)
      for (my $i=0; $i < $C_ct; $i++) {
        my $sum = $C->[$i]->[0];
        my $ct    = $C->[$i]->[2];

        ($ct > 0) || next;
        my $mean = util::deep_copy($sum);

        if ($d == 1) {
          $mean /= $ct;
        } else {
          map { $_ /= $ct } @$mean;
        }
        push @C_means, $mean;
      }

      # Determine all pairwise distances.  Ugh.
      for (my $i=0; $i < ($C_ct-1); $i++) {
        my $mean_i = $C_means[$i];

        for (my $j=$i+1; $j < $C_ct; $j++) {
          my $mean_j = $C_means[$j];
          my $dist = 0;

          if ($d == 1) {
            $dist = abs($mean_i - $mean_j);
          } else {
            map {
              $dist += ($mean_i->[$_] - $mean_j->[$_])**2
            } (0..($d-1));
            $dist = sqrt($dist);
          }

          $distheap->insert($dist, +[ $dist, $i, $j ]);
        }
      }
    }

    merge_seek:
    {
      my ($pri, $tuple) = $distheap->remove();

      if (!defined($pri)) {
        last merge_seek;
      }

      my ($dist, $i, $j) = @{$tuple};
      my ($sum_i, $sum2_i, $ct_i) = @{$C->[$i]};
      my ($sum_j, $sum2_j, $ct_j) = @{$C->[$j]};

      my $sum_merged  = undef;
      my $sum2_merged = undef;
      my $ct_merged   = undef;
      my $criterion   = undef;

      if ($d == 1) {
        # Consider unidimensional merge.
        $sum_merged  = $sum_i  + $sum_j;
        $sum2_merged = $sum2_i + $sum2_j;
        $ct_merged   = $ct_i   + $ct_j;

        # var[x] = E[X^2] - (E[X]^2)

        $criterion  = ($sum2_merged - ($sum_merged * $sum_merged /
        $ct_merged)) / $ct_merged;

        $criterion = sqrt($criterion);
      } else {
        $sum_merged  = +[];
        $sum2_merged = Numerical::matrix_new($d, $d);
        $ct_merged   = $ct_i + $ct_j;

        for (0..($d-1)) {
          $sum_merged->[$_] = $sum_i->[$_] + $sum_j->[$_];
        }

        for (my $row=0; $row < $d; $row++) {
          for (my $col=0; $col < $d; $col++) {
            $sum2_merged->[$row]->[$col] = $sum2_i->[$row]->[$col] +
            $sum2_j->[$row]->[$col];
          }
        }

        # Sec. 2.3.2, Equation 8.
        # Modified to use 1/$ct_merged instead of 1/$d.
        for (my $t=0; $t < $d; $t++) {
          my $val = ($sum2_merged->[$t]->[$t] -
          (($sum_merged->[$t] ** 2)/$ct_merged))/$ct_merged;

          $val = ($val >= 0) ? $val : 0;
          $val = sqrt($val);

          if ((!defined($criterion)) || ($val > $criterion)) {
            $criterion = $val;
          }
        }
      }

      # Numerical issues?
      if ($criterion < 0) {
        # Sanity check.
        if ($criterion < -0.1) {
          die;
        }
        $criterion = 0;
      }


      if ($criterion > $beta) {
        redo merge_seek;
      }

      # Merge them.
      my $new_id = scalar @$C;

      push @$C, +[ $sum_merged, $sum2_merged, $ct_merged ];

      # Undef the original clusters.
      $C->[$i] = undef;
      $C->[$j] = undef;

      # Remove now-bogus distances from the heap.
      my $purge_fun = sub {
        my ($pri, $tuple) = @_;
        my ($q_dist, $q_i, $q_j) = @$tuple;

        if (($q_i == $i) || ($q_i == $j) ||
        ($q_j == $i) || ($q_j == $j)) {
          return 1;
        }
        return 0;
      };

      $distheap->purge($purge_fun);


      # Now we need to add new distances to the queue.
      # First, determine the mean of the merged cluster.
      my $mean_merged = undef;

      if ($d == 1) {
        $mean_merged = $sum_merged/$ct_merged;
      } else {
        $mean_merged = +[];

        for (0..($d-1)) {
          $mean_merged->[$_] = $sum_merged->[$_] / $ct_merged;
        }
      }

      $C_means[$new_id] = $mean_merged;


      # Then iterate through the other clusters.
      for (my $c_id=0; $c_id < $new_id; $c_id++) {
        defined($C->[$c_id]) || next;

        my $mean_k = $C_means[$c_id];
        my $dist = 0;

        if ($d == 1) {
          $dist = abs($mean_merged - $mean_k);
        } else {
          map {
            $dist += ($mean_merged->[$_] - $mean_k->[$_])**2
          } (0..($d-1));
          $dist = sqrt($dist);
        }

        $distheap->insert($dist, +[ $dist, $c_id, $new_id ]);
      }

      redo merge_seek;
    }
  }

  ##################################################################
  # Finally:
  ##################################################################
  @$C = grep {
    defined($_) && ($_->[2] > 0)
  } @$C;

  _debug_print "*DEBUG* Finished secondary summarization HAC\n";

}

return 1;
