#!/usr/local/bin/perl5 -w
#
# Purpose:
#   To implement a space-efficient PAC stream-capable quantile
#   estimator.
#
#   This is based on:
#
#   Greenwald, Michael, and Sanjeev Khanna.  "Space-Efficient
#   Online Computation of Quantile Summaries", SIGMOD 2001.
#
#  Basic usage:
#
#    my $GKQ = GKQuantile::new GKQuantile();
#    $GKQ->set_epsilon(0.0001);             # or whatever, default is 0.001
#
#    while (my $data = <$data_fh>) {
#      $GKQ->insert($data);
#    }
#
#    for (my $pct=0; $pct <= 1; $pct += 0.1) {
#      if (we_absolutely_care_about_epsilon) {
#        my ($quantile, $err) = $GKQ->quantile($pct);
#      } elsif (give_me_the_best_value_if_none_meets_bound) {
#        my ($quantile, $err) = $GKQ->quantile_lax($pct);
#      }
#    }
#
# Mind you, quantile() should practically /always/ give you an
# answer unless you've seen very, very, very little data and
# are asking for an extreme.
#
##########################################################################
# $Id: GKQuantile.pm,v 1.15 2004/02/26 17:12:11 lw2j Exp $
# $Log:	GKQuantile.pm,v $
# Revision 1.15  2004/02/26  17:12:11  lw2j
# Altered compression routine to deal with possible deadlock and
# out-of-bounds array issue.
# 
# Revision 1.14  2002/11/05  18:27:23  lw2j
# Indented.
#
# Revision 1.13  2002/11/04  16:13:32  lw2j
# Updated comment.
#
# Revision 1.12  2002/10/30  10:17:43  lw2j
# Removed default setting for compress_min; it's now undef (
# original GK-specified behavior).
#
# Revision 1.11  2002/10/29  10:07:36  lw2j
# Added support for lazy-sorting (on by default) and not compressing
# unless the number of samples exceeds a given threshold (5000 by
# default, may be undef).  These should improve performance by
# (a) avoiding a de-facto insertion sort, and (b) avoiding compression
# when the sample size is small enough that memory is not an issue.
#
# Revision 1.10  2002/10/25  17:32:57  lw2j
# Removed extra "subtract 1" in quantize().
#
# Revision 1.9  2002/10/07  12:24:34  lw2j
# Fixed range bug (iteration in compression should arguably go down to 1,
# not 0, as otherwise v_0 might vanish) which caused lower extreme to
# sometimes be dropped.
#
# Revision 1.8  2002/10/05  13:13:22  lw2j
# In subtree computation, replaced explicit recursion with an internal
# stack to reduce function overhead and Perl's complaints about deep
# recursion.
#
# Revision 1.7  2002/09/20  15:57:54  lw2j
# Minor tweaks.
#
# Revision 1.6  2002/09/20  14:02:16  lw2j
# Now saves state from the previous quantile() query.  If
#  1.  No updates have taken place since then, and
#  2.  The newly queried percentile is after the previous one,
# it doesn't have to do a /full/ linear scan, but instead can start where
# the old scan left off.
#
# Revision 1.5  2002/09/04  14:11:31  lw2j
# Fixed the 'quantize' routine so it computes $pct each time
# via division, rather than accumulating error.
#
# Revision 1.4  2002/08/22  15:41:15  lw2j
# The 'quantile' function now takes a short cut when the minimum
# (phi=0) or maximum (phi=1) is requested.
#
# Revision 1.3  2002/08/13  10:48:52  lw2j
# Fixed inane bug in Quantize.
#
# Revision 1.2  2002/08/12  17:55:06  lw2j
# Added a 'quantize' subroutine for convenience.
#
# Revision 1.1  2002/08/09  15:18:08  lw2j
# Initial revision
#
##########################################################################

##########################################################################
#
# Package Header
#
##########################################################################


package GKQuantile;
require Exporter;

use strict;

##########################################################################

BEGIN {
  use Exporter ();
  use Symbol;
  use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

  $VERSION     = 1.0;
  @ISA         = qw(Exporter);
  @EXPORT      = qw(
    new
    clear
    set_epsilon
    set_lazy_sort
    set_compress_min

    insert
    quantile
    quantile_lax
    get_s
    get_n

    quantize
  );
  %EXPORT_TAGS = ();
  @EXPORT_OK   = ();

  return 1;
}



# Internal functions only.  Do not access from other packages.
sub _insert_helper($$);
sub _band($$);
sub _compute_bands($);
sub _compute_tree($);
sub _compute_subtree($$$$);
sub _compress($);


# Internal constant.
my $__log2 = log(2);



sub new() {
  my $self  = +{};

  bless $self;

  $self->clear();
  $self->set_epsilon(1e-3);

  # Turn lazy sorting on.  As this should not affect anything except
  # performance, there may not be much reason for turning it off.  If
  # off, then insertions are performed in a way that preserves ordering
  # on the data values.  If on, however, they are inserted near one
  # end, preserving only the property that the observed extrema are the
  # sample extrema.  Sorting is performed only when the sample is
  # compressed or a quantile is requested -- in other words, it should
  # get away from the O(s^{2}) insertion-sort model.
  #
  # CORRECTION:
  #   It will affect results, but it should still be within bounds. The
  # reason it will affect results is because the items being sorted are
  # not single numbers, but tuples of three numbers.  The order of the
  # latter two in each tuple is unspecified in the insertion-sort model;
  # hence, the lazy sort algorithm will quite probably not give the very
  # same order.  This, in turn, affects compression.
  #
  #   I'll leave this defaulting to ON, however.  I've seen a 4:1 speed
  # boost on a data set, and no penalty yet (well, unless you don't want
  # the deferred computation), so unless it turns out to violate bounds
  # or there's an inherent, unfixable bug in this method it'll likely
  # stay.
  $self->set_lazy_sort(1);

  # Don't compress if X samples or fewer are stored.  This feature
  # DOES affect results, so if you want to match the results from the
  # original GK quantizer, you may want to turn this off with
  #
  #  $GKobject->set_compress_min(undef);
  #
  # If your system is really overtaxed on memory, you might also
  # consider disabling or lowering this count.  Note that this is NOT
  # a maximum sample count -- that is, exceeding this is necessary but
  # insufficient for triggering sample compression.
  #
  # However, since compression is expensive, you may lose performance,
  # and this code isn't all that fast anyway... to my knowledge,
  # leaving this on should not /worsen/ results.

  # CHANGE:  Currently defaults to off (act as original GK algorithm),
  # due to
  #
  # (a) It does change results.
  #
  # (b) Since it not only affects compression frequency (less often),
  #     but also its cost (greater when compressing a larger array),
  #     the effects on performance may be non-obvious to guess.
  $self->set_compress_min(undef);


  # epsilon of 1e-3 means compression takes place every
  # 1/2e = 500 insertions, and that quantiles should be exact
  # for 1,000 observations or less.

  # epsilon of 1e-5 means compression takes place every
  # 1/2e = 50000 insertions, and that quantiles should be exact
  # for 100,000 observations or less.
  return $self;
}


# Clear all buffers, and make sure that the object knows this
sub clear($) {
  my $self = shift;
  defined($self) || die;

  $self->{"summary"} = +[];  # Vector of tuples (samples)
  $self->{'s'}      = 0;     # Samples stored right now
  $self->{'n'}      = 0;     # Number of observations seen
  $self->{'qstate'} = undef; # Saving state for queries.

  # Each summary tuple is a triple:
  #   v = associated value
  #   g = r_min(v_i) - r_min(v_(i-1)) = weight of v, essentially
  #   d = r_max(v_i) - r_min(v_i)     = range of possible ranks of v
  #
  # r_min(v_i) = minimum rank of v  \___ deterministic bounds, not PAC!
  # r_max(v_i) = maximum rank of v  /
  #
  # Other invariants include:
  #   summary is sorted by v (increasing order)
  #   minimum and maximum observations are always in summary

  return $self;
}



# Let the user specify epsilon.  This is the allowable error bound
# (IOW, how far, relative to N, our rank estimates can be off).  It
# gets used for sundry details such as deciding when to compress.
#
# And, of course, it must be strictly positive (otherwise we signal
# displeasure by returning undef).
#
# WARNING:  This will call 'clear', and erase the summary, if the
# size of the summary is non-zero.

sub set_epsilon($$) {
  my $self    = shift;
  my $epsilon = shift;

  # Epsilon should be a fraction between 0 and 1.  For any quantile()
  # call, the real rank should be no more than (epsilon * N) ranks
  # away from the reported rank.  0 is impossible for this algorithm,
  # and 1 means you don't care at all, so neither makes sense.

  ($epsilon > 0) || return undef;
  ($epsilon < 1) || return undef;

  $self->{'epsilon'}           = $epsilon;

  my $interval = 1/(2*$epsilon);

  if ($interval != int($interval)) {
    $interval = 1+int($interval);
  }

  # Call 'compress' every #$interval insertions.
  $self->{'interval'} = $interval;

  if ($self->{'s'} > 0) {
    $self->clear();
  }

  return $self;
}


# If the parameter is 1, then the summary data will NOT be sorted
# until compression or a quantile request takes place.   Otherwise,
# the insertion routine essentially does insertion sort... ouch.
sub set_lazy_sort($$) {
  my $self = shift;
  my $lazy = shift;

  $self->{'__LAZY_SORT'} = $lazy;
}

# If a threshold is given, do not run the 'compress' routine unless
# the sample count exceeds it, even if epsilon is such that we
# normally would to save space.  This may, in theory, boost
# performance.
sub set_compress_min($$) {
  my $self  = shift;
  my $comp_min = shift;

  $self->{'__COMPRESS_MIN'} = $comp_min;
}

# One of the two 'external' access points, this one is used for
# estimating a quantile.
#
# Given the object reference and the relative rank phi (0~minimum,
# (N-1)/N ~ maximum), return...
#
#   - undef if not enough data is in the sample to meet the epsilon
#     bound (might happen if the user lowers it /after/ data has
#     been read and compressed)
#   - the estimate, otherwise
#     (and the error bound, as a fraction, if wantarray())
#
# This version now saves the state from the last query -- be
# careful to clear it when you insert() or clear().  This should
# somewhat accelerate the 'usual' quantile procedure, e.g.
# invoking 'quantize'.

sub quantile($$) {
  my $self = shift;
  my $phi  = shift;

  my $rank = $phi * $self->{'n'};

  if ($rank != int($rank)) {
    # ceiling
    $rank = 1+int($rank);
  }
  my $n        = $self->{'n'};
  my $epsilon  = $self->{'epsilon'};
  my $s        = $self->{'s'};
  my $r_min_i  = 0;
  my $r_max_i  = 0;


  if (defined($self->{'__OUT_OF_ORDER'})) {
    # Sort and clear the flag.
    my $min = shift @{$self->{"summary"}};
    my $max = pop @{$self->{"summary"}};
    @{$self->{"summary"}} = sort { $a->[0] <=> $b->[0] } @{$self->{"summary"}};
    delete $self->{'__OUT_OF_ORDER'};

    if (defined($min)) {
      unshift @{$self->{"summary"}}, $min;
    }

    if (defined($max)) {
      push @{$self->{"summary"}}, $max;
    }
  }

  if ($s > 0) {
    # Shortcuts.
    if ($phi == 0) {
      my $min = $self->{"summary"}->[0]->[0];
      return (wantarray() ? ($min, 0) : $min);
    } elsif ($phi == 1) {
      my $max = $self->{"summary"}->[-1]->[0];
      return (wantarray() ? ($max, 0) : $max);
    }
  }

  my $start_pos = 0;

  if (defined($self->{'qstate'})) {
    my $qstate       = $self->{'qstate'};
    my $last_query   = $qstate->{'query'};
    my $last_r_min_i = $qstate->{'r_min_i'};
    my $last_r_max_i = $qstate->{'r_max_i'};
    my $last_i       = $qstate->{'i'};
    my $v_i          = $self->{"summary"}->[$last_i]->[0];

    my $diff_lo = $rank - $last_r_min_i;
    my $diff_hi = $last_r_max_i - $rank;
    my $diff    = ($diff_lo > $diff_hi) ? $diff_lo : $diff_hi;

    $diff /= $n;


    if ($last_query == $phi) {
      # Repeat question.
      return (wantarray() ? ($v_i, $diff) : $v_i);
    } elsif ($last_query > $phi) {
      # restart from beginning, ugh; ignore this state
    } else {
      if ($diff <= $epsilon) {
        # Last answer was still good enough.
        return (wantarray() ? ($v_i, $diff) : $v_i);
      } else {
        # Resume starting at the next i.
        $start_pos = $last_i + 1;
        $r_min_i   = $last_r_min_i;
        $r_max_i   = $last_r_max_i;
      }
    }
  }


  # Linear scan, but at least it's O(s), not O(n)
  for (my $i=$start_pos; $i < $s; $i++) {
    my $tuple = $self->{"summary"}->[$i];

    # $v_i = associated value
    # $g_i = r_min(v_i) - r_min(v_i-1)
    # $d_i = r_max(v_i) - r_min(v_i)

    my ($v_i, $g_i, $d_i) = @$tuple;

    $r_min_i += $g_i;
    $r_max_i  = $r_min_i + $d_i;


    my $diff_lo = $rank - $r_min_i;
    my $diff_hi = $r_max_i - $rank;
    my $diff    = ($diff_lo > $diff_hi) ? $diff_lo : $diff_hi;

    # $diff is the maximum error of predicted rank versus
    # expected rank, in number of ranks.  Normalize it wrt
    # number of observations.

    $diff /= $n;

    if ($diff <= $epsilon) {
      $self->{'qstate'} = +{
        query   => $phi,
        r_min_i => $r_min_i,
        r_max_i => $r_max_i,
        i       => $i
      };
      return (wantarray() ? ($v_i, $diff) : $v_i);
    }
  }
  return undef;
}



# This is an alternate version of the above.  It will return an
# estimate for the quantile, *even if the error is not bounded
# by epsilon*.  If you're using this, you will almost certainly
# want to use it in an array context so you find out just how
# off the estimate might be.
#
# If an estimate with an epsilon-bounded error is possible, it
# will return the first one it finds.  Otherwise, it returns
# the best estimate it can find.
#
# Given the object reference and the relative rank phi (0~minimum,
# (N-1)/N ~ maximum), return...
#
#   - undef if the summary is empty
#   - the estimate, otherwise
#     and, if wantarray() is true, we also return the maximum
#     error as a fraction (error in ranks / observations)
#
#
# You shouldn't have to use this very often at all, unless you're
# asking for the 0th quantile after very few observations, or
# you're editing the code to impose a hard limit on the summary
# size.

sub quantile_lax($$) {
  my $self = shift;
  my $phi  = shift;

  my $rank = $phi * $self->{'n'};

  if ($rank != int($rank)) {
    # ceiling
    $rank = 1+int($rank);
  }

  if (defined($self->{'__OUT_OF_ORDER'})) {
    # Sort and clear the flag.
    my $min = shift @{$self->{"summary"}};
    my $max = pop @{$self->{"summary"}};

    @{$self->{"summary"}} = sort { $a->[0] <=> $b->[0] } @{$self->{"summary"}};
    delete $self->{'__OUT_OF_ORDER'};

    if (defined($min)) {
      unshift @{$self->{"summary"}}, $min;
    }

    if (defined($max)) {
      push @{$self->{"summary"}}, $max;
    }
  }

  my $best_v    = undef;
  my $best_diff = undef;

  my $s        = $self->{'s'};
  my $n        = $self->{'n'};
  my $epsilon  = $self->{'epsilon'};
  my $r_min_i  = 0;
  my $r_max_i  = 0;

  if ($s == 0) {
    # NO samples recorded, so we can't give /any/ value
    return undef;
  }

  my $start_pos = 0;

  if (defined($self->{'qstate'})) {
    my $qstate       = $self->{'qstate'};
    my $last_query   = $qstate->{'query'};
    my $last_r_min_i = $qstate->{'r_min_i'};
    my $last_r_max_i = $qstate->{'r_max_i'};
    my $last_i       = $qstate->{'i'};
    my $v_i          = $self->{"summary"}->[$last_i]->[0];

    my $diff_lo = $rank - $last_r_min_i;
    my $diff_hi = $last_r_max_i - $rank;
    my $diff    = ($diff_lo > $diff_hi) ? $diff_lo : $diff_hi;

    $diff /= $n;


    if ($last_query == $phi) {
      # Repeat question.
      return (wantarray() ? ($v_i, $diff) : $v_i);
    } elsif ($last_query > $phi) {
      # restart from beginning, ugh; ignore this state
    } else {
      if ($diff <= $epsilon) {
        # Last answer was still good enough.
        return (wantarray() ? ($v_i, $diff) : $v_i);
      } else {
        # Resume starting at the next i.
        $start_pos = $last_i + 1;
        $r_min_i   = $last_r_min_i;
        $r_max_i   = $last_r_max_i;
      }
    }
  }


  # Linear scan, but at least it's O(s), not O(n)
  scan:  for (my $i=$start_pos; $i < $s; $i++) {
    my $tuple = $self->{"summary"}->[$i];

    # $v_i = associated value
    # $g_i = r_min(v_i) - r_min(v_i-1)
    # $d_i = r_max(v_i) - r_min(v_i)

    my ($v_i, $g_i, $d_i) = @$tuple;

    $r_min_i += $g_i;
    $r_max_i  = $r_min_i + $d_i;

    my $diff_lo = $rank - $r_min_i;
    my $diff_hi = $r_max_i - $rank;
    my $diff    = ($diff_lo > $diff_hi) ? $diff_lo : $diff_hi;

    # $diff is the maximum error of predicted rank versus
    # expected rank, in number of ranks.  Normalize it wrt
    # number of observations.

    $diff /= $n;
    if ((!defined($best_diff)) || ($best_diff > $diff)) {
      $best_v    = $v_i;
      $best_diff = $diff;
    }

    if ($diff <= $epsilon) {
      # Good enough, so stop.  best_diff already reflects this
      # one.
      $self->{'qstate'} = +{
        query   => $phi,
        r_min_i => $r_min_i,
        r_max_i => $r_max_i,
        i       => $i
      };
      last scan;
    }
  }

  if (wantarray()) {
    return ($best_v, $best_diff);
  } else {
    return $best_v;
  }
}




# Information-only:  how large is the summary, in tuples?
sub get_s($) {
  my $self = shift;

  return $self->{'s'};
}



# Information-only:  how many observations have we seen?
sub get_n($) {
  my $self = shift;

  return $self->{'n'};
}


# This is the second main external access point.  Basically, we take
# an observation (we don't care where it came from, so it's simple to
# wrap so you can use an array, a file handle, a black-box
# function...), decides whether or not it needs to compress the sample,
# do so if need be, and then we call _insert_helper.
#
# Changed:  it can now take a list of values.
sub insert($@) {
  my $self = shift;
  my @vals = @_;
  my $x    = undef;

  foreach $x (@vals) {
    if (($self->{'n'} % $self->{'interval'}) == 0) {
      # Check to see whether the user specified a minimum sample size
      # before compression.
      if ((!defined($self->{'__COMPRESS_MIN'})) ||
      ($self->{'s'} > $self->{'__COMPRESS_MIN'})) {
        $self->_compress();
      }
    }

    # _insert_helper takes care of everything else, including
    # updating $n and $s.
    $self->_insert_helper($x);
  }

  # Contents updated, so clear query state.
  $self->{'qstate'} = undef;
}




# Assume that compression has already been done, if necessary.
# Add the value to the summary and return the new summary size.
sub _insert_helper($$) {
  my $self = shift;
  my $x    = shift;
  my $s    = $self->{'s'};
  my $lo   = 0;
  my $hi   = $s-1;

  # Another observation.
  $self->{'n'}++;


  if ($s == 0) {
    # Special case:  this is the first item in the summary.
    $self->{"summary"}->[0] = +[ $x, 1, 0 ];

    $s++;
    $self->{'s'} = $s;
    return $s;
  }


  if ($self->{"summary"}->[0]->[0] >= $x) {
    # New low.
    unshift @{$self->{"summary"}}, +[ $x, 1, 0];
    $s++;
    $self->{'s'} = $s;
    return $s;
  } elsif ($self->{"summary"}->[-1]->[0] <= $x) {
    # New high.
    push @{$self->{"summary"}}, +[ $x, 1, 0];
    $s++;
    $self->{'s'} = $s;
    return $s;
  }

  if ($self->{"__LAZY_SORT"} == 1) {
    # Set a special flag indicating that the sort is out of order.
    $self->{"__OUT_OF_ORDER"} = 1;

    my $delta = int(2*$self->{'epsilon'}*$self->{'n'});

    # And make the new tuple next-to-last.
    splice @{$self->{"summary"}}, ($self->{'s'}-1), 0, +[ $x, 1, $delta];
  }  else {
    # Otherwise, it's strictly within the present range.
    # Search.
    search:  {
      my $mid   = int(($lo+$hi)/2);
      my $v_mid = $self->{"summary"}->[$mid]->[0];
      my $flag  = 0;

      # Invariants:
      #   val($lo) <= $x
      #   val($hi) >= $x
      #   $lo <= $hi

      if ($v_mid <= $x) {
        $flag = $flag || ($lo != $mid);
        $lo = $mid;
      }

      if ($v_mid >= $x) {
        $flag = $flag || ($hi != $mid);
        $hi = $mid;
      }

      if ($flag) {
        redo search;
      }
    }


    # Three cases:
    #     $lo=$hi   and $v_mid==$x
    #   $lo+1=$hi and v(lo) <= $x < v($lo+1)
    #   $lo < $mid < $hi  and they're ALL equal to mid
    # In either case... we can insert at $lo+1.

    my $delta = int(2*$self->{'epsilon'}*$self->{'n'});
    splice(@{$self->{"summary"}}, $lo+1, 0, +[ $x, 1, $delta ]);
  }

  $s++;
  $self->{'s'} = $s;

  return $s;
}



# Given delta (d_i), compute band(t_i, n).  This is used for
# compression.
sub _band($$) {
  my $self = shift;
  my $d_i  = shift;


  my $epsilon = $self->{'epsilon'};
  my $n       = $self->{'n'};

  my $p         = int(2*$epsilon*$n);
  my $alpha_max = (log(2*$epsilon*$n)/$__log2)+1;

  if ($alpha_max != int($alpha_max)) {
    $alpha_max = 1+int($alpha_max);
  }

  my $pow2alpha_less_1 = 1;
  my $pow2alpha        = 2;


  if ($d_i == 0) {
    return $alpha_max+1;
  }

  if ($d_i == $p) {
    # Special case:  a band of their own.
    return 0;
  }

  for (my $alpha=1; $alpha <= $alpha_max; $alpha++) {
    my $lo_bound = $p - $pow2alpha - ($p % $pow2alpha);
    my $hi_bound = $p - $pow2alpha_less_1 - ($p % $pow2alpha_less_1);

    if (($d_i > $lo_bound) && ($d_i <= $hi_bound)) {
      return $alpha;
    }

    $pow2alpha_less_1 = $pow2alpha;
    $pow2alpha       *= 2;
  }

  return undef;
}



# Internal function for computing all the bands of the summary
# tuples.  Returns 'em as an array.
sub _compute_bands($) {
  my $self = shift;
  my $s        = $self->{'s'};
  my @bands    = ();

  for (my $i=0; $i < $s; $i++) {
    my $d_i = $self->{"summary"}->[$i]->[2];
    $bands[$i] = $self->_band($d_i);
    die unless defined($bands[$i]);
  }

  return @bands;
}


# This imposes a tree structure over the tuples.  We return two
# list references:
#
# (1) parent array (undef if no parent == root)
# (2) band values  (because we need them, and so does _compress)
#
# This isn't being used for now, but if I switch back from lazy
# computation of the gstar and parent values it might be.
sub _compute_tree($) {
  my $self     = shift;
  my $s        = $self->{'s'};
  my @bands    = $self->_compute_bands();
  my @gstar    = ();
  my @parents  = (undef) x $s;

  # *frown* This is quadratic.  Perhaps this should be delayed, lazily,
  # so that compress gets the bands() array, but we only compute
  #  1. descendants
  #  2. gstar
  # when necessary.
  for (my $i=0; $i < ($s-1); $i++) {
    my $band_i = $bands[$i];

    inner_loop:
    for (my $j=$i+1; $j < $s; $j++) {
      my $band_j = $bands[$j];

      if ($band_j > $band_i) {
        $parents[$i] = $j;
        last inner_loop;
      }
    }
  }

  return (\@parents, \@bands);
}




# Compute g_star and parents for a subtree, recursively.  We don't
# compute for the entire sample just in case it turns out that we
# don't need to.
sub _compute_subtree($$$$) {
  my $self        = shift;
  my $start       = shift;
  my $g_stars_ref = shift;
  my $parents_ref = shift;

  my @stack = ($start);
  my $i     = undef;

  while (defined($i = pop @stack)) {
    if (defined($g_stars_ref->[$i])) {
      # Already done.
      next;
    }

    my $g_i      = $self->{"summary"}->[$i]->[1];
    defined($g_i) || die;
    $g_stars_ref->[$i] = $g_i;

    my @children = ();

    # This is expensive, but hopefully we won't have to
    # do this very often.
    descendant_scan:  for (my $j=$i-1; $j >= 0; $j--) {
      # Identify parent.nnn
      my $parent = $parents_ref->[$j];

      if (!defined($parent)) {
        # Scan, but limit search to this subtree.
        my $g_j = $self->{"summary"}->[$j]->[1];

        parent_scan:  for (my $p=$j+1; $p <= $i; $p++) {
          my $g_p = $self->{"summary"}->[$p]->[1];
          if ($g_p > $g_j) {
            $parent = $p;
            last parent_scan;
          }
        }
      }

      if ((!defined($parent)) || ($parent > $i)) {
        # Parent not in range => not a descendent of $i
        # And, in fact, there will be no descendants of $i before
        # $j, so the scan can stop.
        last descendant_scan;
      }

      $parents_ref->[$j] = $parent;
      if ($parent == $i) {
        # @children is in decreasing order.
        push @children, $j;
      }
    }

    {
      my $defer_flag = 0;
      my $child = undef;

      my @still_needed = ();

      # g_star is the sum of the g values for i and the children.
      foreach $child (@children) {
        my $g_child     = $g_stars_ref->[$child];

        if (!defined($g_child)) {
          push @still_needed, $child;
          $defer_flag = 1;
        } else {
          $g_stars_ref->[$i] += $g_child;
        }
      }

      # We may need to revisit this value of $i after we iterate
      # through the children.  It's done this way now, instead of
      # the old direct recursive way, because there'd be some
      # pretty deep recursion at times.
      #
      # This should always terminate.  After all, eventually there
      # are nodes with no children... in addition, it shouldn't
      # need to recompute the parents of its possible descendants
      # again, because the results are saved in $parents_ref.
      if ($defer_flag) {
        # Simulate recursion.
        (scalar(@still_needed) > 0) || die;

        # Clear, otherwise we'll skip it the second time around.
        $g_stars_ref->[$i] = undef;

        # Children put in increasing order.
        @still_needed = reverse @still_needed;
        push @stack, $i;
        push @stack, @still_needed;
      }
    }
  }

  return $self;
}



# Call this periodically to collapse entire branches of the tree
# and merge them into siblings.
sub _compress($) {
  my $self = shift;
  my $s = $self->{'s'};
  my $epsilon       = $self->{'epsilon'};
  my $n             = $self->{'n'};
  my $bound         = 2 * $epsilon * $n;

  if (defined($self->{'__OUT_OF_ORDER'})) {
    my $min = shift @{$self->{"summary"}};
    my $max = pop @{$self->{"summary"}};

    @{$self->{"summary"}} = sort { $a->[0] <=> $b->[0] } @{$self->{"summary"}};
    delete $self->{'__OUT_OF_ORDER'};

    if (defined($min)) {
      unshift @{$self->{"summary"}}, $min;
    }

    if (defined($max)) {
      push @{$self->{"summary"}}, $max;
    }
  }

  # my ($parents_ref, $bands_ref) = $self->_compute_tree();
  # my @parents = @$parents_ref;
  # my @bands   = @$bands_ref;

  my @bands   = $self->_compute_bands();
  my @g_stars = (undef) x $s;
  my @parents = (undef) x $s;

  # Compute g_star values.
  # for (my $i=0; $i < $s; $i++) {
  #  my $g_i = $self->{"summary"}->[$i]->[1];
  #
  #  $g_stars[$i] += $g_i;
  #
  #  my $parent = $parents_ref->[$i];
  #
  #  while (defined($parent)) {
  #    $g_stars[$parent] += $g_i;
  #    $parent = $parents_ref->[$parent];
  #  }
  #}

  for (my $i=$s-2; $i >= 0; $i--) {
    my $tuple = $self->{"summary"}->[$i];
    my ($v_i, $g_i, $d_i) = @$tuple;

    ($i <= ($s-2)) || next;

    my ($v_plus, $g_plus, $d_plus) = @{$self->{"summary"}->[$i+1]};

    my $band_i        = $bands[$i];
    my $band_plus     = $bands[$i+1];

    #    if (($band_i <= $band_plus) &&
    #    (($g_stars[$i] + $g_plus + $d_plus) < $bound)) {

    if (($band_i <= $band_plus) && (($g_plus + $d_plus) < $bound)) {
      # Note that $g_star_i, which we haven't computed yet, must be
      # non-negative.  I'm trying to avoid descendant/$g_plus
      # computation unless necessary.

      my $g_star = $g_stars[$i];

      if (!defined($g_star)) {
        $self->_compute_subtree($i, \@g_stars, \@parents);
        $g_star = $g_stars[$i];
      }

      if (($g_star + $g_plus + $d_plus) >= $bound) {
        # It didn't qualify.
        next;
      }

      # Identify descendants of node $i.
      my $g_sum = $g_i;
      my $k     = $i-1;
      descendant_search:
      for ($k=$i-1; $k >= 1; $k--) {
        my $k_parent = $parents[$k];

        if ((!defined($k_parent)) || ($k_parent > $i)) {
          # NOT a descendant of $i
          last descendant_search;
        }
      }


      # Fix g_star values of parents
      {
        my $par = $parents[$i];
        if (defined($par)) {
          parent_loop: {
            if (defined($g_stars[$par])) {
              $g_stars[$par] -= $g_stars[$i];
              $par = $parents[$par];
              defined($par) && redo parent_loop;
            }
          }
        }

        $par = $parents[$i+1];
        if (defined($par)) {
          parent_loop: {
            if (defined($g_stars[$par])) {
              $g_stars[$par] += $g_stars[$i];
              $par = $parents[$par];
              defined($par) && redo parent_loop;
            }
          }
        }
      }

      # g_star is no longer correct for the parents of this subtree,
      # but it doesn't matter since we won't look at them -- since
      # the parents have roots > $i, and we count down.

      defined($k) || die;
      # $k points the closest one which ISN'T a descendant
      $k++;

      my $del_ct = $i-$k+1;

      ($del_ct > 0) || die;

      # Delete/merge all descendents of $t_i and the tuple $t_i itself.
      # We do this by a replacement and then a splice.
      $self->{"summary"}->[$i+1]->[1] += $g_stars[$i];

      splice(@{$self->{"summary"}}, $k, $del_ct);
      splice(@bands, $k, $del_ct);
      splice(@parents, $k, $del_ct);
      splice(@g_stars, $k, $del_ct);

      # We need to renumber the parents, too.
      map { $_ = ((defined($_)) && ($_ >= $k)) ?
      ($_ - $del_ct) : $_ } @parents;

      # Bump down $i to $k so that when the decrement kicks in
      # it goes down to $k-1, which should be the highest-ranked
      # non-deleted value that hasn't been looked at yet..
      $i = ($k+1);

      # Decrement s.
      $s -= $del_ct;
      $self->{'s'} = $s;
    }
  }

  return $self;
}



# Produce an array of $count points where they should correspond
# to evenly-distributed quantiles.  For instance, were $count to
# be 11, they would correspond to the quantiles at:
#
#  (0, 0.1, 0.2, ... 1)
#
# This is merely a helpful macro, because the rest of the package
# lets you do this quite easily.
sub quantize($$) {
  my $self   = shift;
  my $count  = shift;

  if (($self->{'s'} == 0) || ($count < 2) || ($count != int($count))) {
    return undef;
  }

  my $step   = 1/($count-1);
  my @arr    = ();

  for (my $i=0; $i < $count; $i++) {
    my $pct = $i/($count-1);
    my $val = $self->quantile($pct);

    # It should almost never be undefined, but...
    defined($val) || ($val = $self->quantile_lax($pct));

    defined($val) || die "Should be impossible!";
    push @arr, $val;
  }

  die unless ((scalar @arr) == $count);
  return @arr;
}

return 1;
