#!/usr/local/bin/perl5 -w
#: # Find perl...
#eval 'exec perl5 -w -S $0 "$@"'
#if 0;

##############################################################
#
# There is now a way to specify which attribute is what transformed
# version of which original attribute.  Basically, if the data has a
#
#   ;;; ==== BEGIN TRANSFORMATION CHOICE BLOCK ====
#   ;;;  <original_num, transformation, params... >
#   ;;;  <original_num, transformation, params... >
#   ;;;  <original_num, transformation, params... >
#   ;;;  <original_num, transformation, params... >
#   ;;; ==== END TRANSFORMATION CHOICE BLOCK ====
#
# UPDATE:
#   Additional <> tuples in each line, e.g.
#   <1, FooBar, 0, 3> <BIC=-23414> <ChiSquare=0.05>
#                     ^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^
#   will be propagated to the final logs.
#
# Then these tuples are used to map transformed attributes to
# original ones, so we don't spuriously select two from the
# same family.  The tuples must correspond to attributes
# 0..(n-1) in the current mapping, in order.  If there are
# multiple such blocks, all are processed as if they were one
# giant block, in order.
#
# If this block does NOT exist, then the implict assumption is
# that it's a 1:1 mapping of original to current attributes.
#
# This system obsoletes the old $copies variable.
#
# Incidentally, this means that we open the file twice.  The
# first time, we open it normally in order to parse the
# comment blocks.  The second time, we use Wrapper, which
# ignores such things as semicolon-denoted comments.
#
##############################################################

use strict;
use Symbol;
use FileHandle;
use English;
use Benchmark;


# <change_me>
use lib '/usr/lw2j/private/FracDim';
use lib '/usr/lw2j/private/Stat/Perl';
# </change_me>


# These three modules are my own creations.  The DiskFracDim
# module comes from my FracDim package, and requires basically
# a full set (including, preferably, the FDC set and patch;
# the C++ core is significantly faster than the Perl code for
# pair-counting, and appears to give identical results as it
# should).
#
# We now rely on Wrapper completely.  DiskWrapper and
# MemoryWrapper are obsolete.

require DiskFracDim;
require Wrapper;
require ParseArgs;

sub test_set($$$);
sub calc_fd($$);
sub print_usage_and_exit();
sub enqueue($$$);
sub dequeue($$$);
sub delplace($$);
sub delindex($$);

sub process_block($$);

autoflush STDOUT 1;
autoflush STDERR 1;

#####################################################################
# Start the clock.
#####################################################################

my $time_start = new Benchmark;


##############################################################
my $wrapper_obj   = undef;
my $FD_obj        = new DiskFracDim();
my $fh_old        = undef;
my $fh_new        = undef;

my $fd_true       = undef;
my $attribs       = undef;

my $fd_new        = 0;
my $fd_old        = 0;
my %single_fd     = ();  # Track the FD of each individual attribute.
my @fdqueue       = ();  # The FDs and indices of those still eligible.
my @maxqueue      = ();

# Maps current attribute indices (as in the data) to original ones
# (as specified by the TRANSFORMATION CHOICE block, if any).  We
# store the entire <orig, xf, params...> tuple so we can output them
# later.
my @attr_mapping       = ();

# Extra <> blocks get attached here, with the same index.
my @extras             = ();

# The reverse.  Note that values in this table are list references,
# since it's potentially a one-to-many.  The index isn't the entire
# tuple anymore; for these purposes, it's only the 'orig' field.
#
# A hash, since technically we aren't necessarily using all the
# original attributes; 0, 1e6, et all are possible.
my %gnippam_rtta = ();



# Initializing these will be the responsibility of ParseArgs given the
# $argument_data_ref list.

my $min_single;
my $use_memory;
my $max_select;
my $min_gain;
my $max_candidates;
my $min_corr;
my $fn_new;
my $fn_old;
my $fd_whole;
my $exponent;
my $swap_bonus;
my $dynamic_reorder;
my $min_single_is_strict;

my $use_stdin;
my $use_stdout;
my $use_stderr;


my $argument_data_ref =
+[
+[+[ "min_single", "mnsel" ],
"Minimum fractal dimension estimate of an individual attribute to " .
"be considered at all.",
+[ +[ \$min_single, 0.4, "Threshold", 0,
sub {
  my $x = shift @_;
  return (defined($x) && ($x > 0)) ? 1 : 0;
}
]],
undef,
undef,
1],
+[+[ "memory", "mem" ],
"Whether or not to load the data into memory.  Mandatory if data " .
"is coming from a non-seekable source.",
+[ +[ \$use_memory, 0, "Binary flag", 1,
sub {
  my $x = shift @_;
  return (defined($x) && (($x == 0) || ($x == 1))) ? 1 : 0;
}
]],
\$use_memory,
0,
1],
+[+[ "max_select", "mxsel"],
"Maximum number of attributes to retain.",
+[ +[ \$max_select, undef, "Number of attributes", 0,
sub {
  my $x = shift @_;
  return ((!defined($x)) || (($x == int($x)) && ($x > 0))) ?
  1 : 0;
}
]],
undef,
undef,
1],
+[+[ "min_gain", "mngain" ],
"Minimum gain for an addition or exchange to be accepted.",
+[ +[ \$min_gain, 0.1, "Minimum gain", 0,
sub {
  my $x = shift @_;
  return ((!defined($x)) || (($x > 0))) ? 1 : 0;
}
]],
undef,
undef,
1],
+[+[ "max_candidates", "mxcand" ],
"Maximum candidates to consider for each potential addition.",
+[ +[ \$max_candidates, undef, "Maximum number of candidates",
0,
sub {
  my $x = shift @_;
  return ((!defined($x)) || (($x > 0) && ($x == int($x)))) ?
  1 : 0;
}
]],
undef,
undef,
1],
+[+[ "min_corr", "mncorr" ],
"Minimum correlation for a fractal dimension estimate to be " .
"accepted.",
+[ +[ \$min_corr, 0.98, "Threshold",
0,
sub {
  my $x = shift @_;
  return ((!defined($x)) || (($x > 0) && ($x <= 1))) ? 1 : 0;
}
]],
undef,
undef,
1
],
+[+[ "fn_old", "fold", "in" ],
"Name of the input filename, including full path.",
+[ +[ \$fn_old, undef, "Filename", 0,
sub {
  my $x = shift @_;
  $use_stdin = 0;
  return ((-f $x) && (-r $x)) ? 1 : 0;
}
]],
undef,
undef,
1
],
+[+[ "fn_new", "fnew", "out" ],
"Name of the output filename, including full path.",
+[ +[ \$fn_new, undef, "Filename", 0,
sub {
  my $x = shift @_;
  $use_stderr = 0;
  $use_stdout = 0;
  return ((!(-f $x)) || (-w $x)) ? 1 : 0;
}
]],
undef,
undef,
1
],
+[+[ "fd_whole_d", "whole" ],
"Also compute the fractal dimension of the entire set as-is.",
+[ +[ \$fd_whole, 0, "Binary value", 1 ,
sub {
  my $x = shift @_;
  return ((defined($x)) && (($x == 0) || ($x == 1))) ? 1 : 0;
}
]],
\$fd_whole,
0,
1
],
+[+[ "swap_bonus", "swap" ],
"Additive score bonus to swaps when comparing against additions.",
+[ +[ \$swap_bonus, 0, "Additive bonus", 0,
sub {
  my $x = shift @_;
  return ((defined($x)) ? 1 : 0);
}
]],
undef,
undef,
1],
+[+[ "stdin" ],
"Use standard input as the main input source.  Turns on memory buffering.",
+[ +[ \$use_stdin, 0, "Binary value", 1,
sub {
  my $x = shift @_;
  $fn_old = undef;
  $use_memory = 1;
  return ((defined($x) && (($x == 0) || ($x == 1))) ? 1 : 0);
}
]],
\$use_stdin,
0,
1],
+[+[ "stdout" ],
"Use standard output.",
+[ +[ \$use_stdout, 0, "Binary value", 1,
sub {
  my $x = shift @_;
  $fn_new = undef;
  $use_stderr = 0;
  return ((defined($x) && (($x == 0) || ($x == 1))) ? 1 : 0);
}
]],
\$use_stdout,
0,
1],
+[+[ "exponent", "exp" ],
"Exponent used for the fractal dimension computations.",
+[ +[ \$exponent, 1, "Exponent", 1,
sub {
  my $x = shift @_;
  return ((defined($x)) ? 1 : 0);
}
]],
undef,
undef,
1],
+[+[ "min_single_is_strict", "strict"],
"Whether or not attributes whose fractal dimensions are questionable " .
"are rejected in the initial filtering.",
+[ +[ \$min_single_is_strict, 0, "Binary value", 1,
sub {
  my $x = shift @_;
  return ((defined($x) && (($x == 0) || ($x == 1))) ? 1 : 0);
}
]],
\$min_single_is_strict,
0,
1],
+[+[ "dynamic_reorder", "reorder"],
"Whether or to reorder the queue based on later estimates. " .
"Meaningless unless max_candidates is less than the number " .
"of attributes that survive the initial filtering.",
+[ +[ \$dynamic_reorder, 0, "Binary value", 1,
sub {
  my $x = shift @_;
  return ((defined($x) && (($x == 0) || ($x == 1))) ? 1 : 0);
}
]],
\$dynamic_reorder,
0,
1]
];

my $parser = new ParseArgs($argument_data_ref);

$parser->set_preamble("A Perl script for attribute selection based upon " .
"intrinsic dimensionality.");
$parser->set_epilogue("Don't worry too much about most of the knobs and " .
"switches.  The most important ones are probably the " .
"minimums for filtering (min_single) and for " .
"accepting a change (min_gain), and the maximum " .
"numer of attributes (max_select).\n" .
"Oh, and while neither --fn_old or --stdin is itself " .
"mandatory, you must specify one.  Likewise, you must " .
"specify an output source, via --fn_new, --stdout or " .
"--stderr.  This specifies where the new data goes.");
$parser->set_validator(
  sub {
    # Make sure input is defined and meaningful.
    if (defined($fn_old)) {
      (!$use_stdin) || return 0;
      $fh_old = gensym();
      open($fh_old, $fn_old) || return 0;
    } elsif ($use_stdin) {
      ($use_memory) || return 0;
      $fh_old = \*STDIN;
    } else {
      return 0;
    }

    if (defined($fn_new)) {
      (!$use_stdout) || return 0;
      (!$use_stderr) || return 0;
      $fh_new = gensym();
      open($fh_new, ">$fn_new") || return 0;
    } elsif ($use_stdout) {
      (!$use_stderr) || return 0;
      $fh_new = \*STDOUT;
    } elsif ($use_stderr) {
      $fh_new = \*STDERR;
    } else {
      return 0;
    }
    return 1;
  }
);


# Parse the arguments, verify sanity regarding I/O, and set the
# file handles.
$parser->parse_args(@ARGV);


#####################################################################
# Read the comment blocks and parse for blocks we recognize.
# Oh, and if $fh_new == \*STDIN, then we'll buffer the data
# ourselves, and then initialize the wrapper via set_contents().
# If not, then we can fseek() to the beginning, and initialize
# the wrapper normally.
#####################################################################
{
  $wrapper_obj= new Wrapper;
  $wrapper_obj->set_buffer($use_memory ?
  ($wrapper_obj->{'BUFFER_PACK'}) :
  ($wrapper_obj->{'NO_BUFFER'}));
  if ($fh_old == \*STDIN) {
    # Buffer it.
    my @lines = <STDIN>;
    my $line  = undef;
    my $attr_idx = 0;

    foreach $line (@lines) {
      process_block($line, \$attr_idx);
    }

    ($fh_old, $fn_old) = $wrapper_obj->set_contents(@lines);
  } else {
    my $line = undef;
    my $attr_idx = 0;

    while (defined($line = <$fh_old>)) {
      process_block($line, \$attr_idx);
    }

    seek $fh_old, 0, 0;
    $wrapper_obj->load_file($fh_old);
  }

  $attribs = $wrapper_obj->get_dims();

  # If there is no xf choice block, we'll need to manually
  # create the mapping.
  if (!scalar(%gnippam_rtta)) {
    my $i=0;

    for ($i=0; $i < $attribs; $i++) {
      $attr_mapping[$i] = +[ $i, "identity" ];
      $gnippam_rtta{$i} = +[ $i ];
    }
  }
}







#####################################################################
# Summarize parameters.
#####################################################################
{
  my $corr = 0;

  # Non-data output will be prefixed with ';;;' to make it easy
  # to filter in/out.


  print ";;;\n";
  print ";;;       Input:   $fn_old\n";
  print ";;;      Output:   $fn_new\n";
  print ";;;\n";
  print ";;; Minimums:\n";
  print ";;;        Fractal dimension of a single candidate:  ";

  if (defined($min_single)) {
    print $min_single, "\n";
    print ";;;       (Singleton fractal dimension checking is:  ";
    if ($min_single_is_strict) {
      print "strict)\n";
    } else {
      print "lax)\n";
    }
  } else {
    print "none\n";
  }

  print ";;;    Fractal dimension improvement per iteration:  ";
  print (defined($min_gain) ? "$min_gain\n" : "none\n");
  print ";;;      Fractal dimension correlation coefficient:  ";
  print (defined($min_corr) ? "$min_corr\n" : "none\n");
  print ";;;\n";
  print ";;; Maximums:\n";
  print ";;;        Attributes to retain:  ";
  print (defined($max_select) ? "$max_select\n" : "all\n");
  print ";;;    Candidates per iteration:  ";
  print (defined($max_candidates) ? "$max_candidates\n" : "all\n");
  print ";;;\n";
  print ";;; Miscellaneous:\n";
  print ";;;       Swap bonus:  $swap_bonus\n";
  print ";;;   Buffering data:  $use_memory\n";
  print ";;; Queue reordering:  ", $dynamic_reorder ? "on" : "off", "\n";
  print ";;;\n";
  print ";;; Statistics of original data:\n";
  print ";;;                Cardinality:  ", $wrapper_obj->get_count(), "\n";
  print ";;;   Embedding dimensionality:  $attribs\n";

  if ($fd_whole) {
    ($fd_old, $corr) = calc_fd($FD_obj, $wrapper_obj);
    print ";;; Original fractal dimension:  $fd_old\n";
    print ";;;  FD correlation coefficent:  $corr\n";
  }

  print ";;;\n";
}



#####################################################################
# For each of the attributes, compute its individual fractal
# dimension and maintain these in a priority queue.
#####################################################################

{
  my $i    = 0;
  my $fd   = undef;
  my $corr = undef;
  my $passed = 0;

  @fdqueue = ();

  for ($i=0; $i < $attribs; $i++) {

    if ((defined($max_candidates) && ($max_candidates < $attribs)) ||
    (defined($min_single))) {
      ($fd, $corr) = test_set($wrapper_obj, $FD_obj, +[ $i ]);
    } else {
      # If we don't care about limiting the number of candidates,
      # then we don't need to order them.  If we don't care about
      # fd of individual attributes, then we don't need to verify
      # them.
      #
      # If we don't order and we don't verify, then we don't even
      # *need* the real numbers.
      $fd = 1;
      $corr = 1;
    }

    if ((defined($min_corr)) && ($corr < $min_corr)) {
      if ($min_single_is_strict) {
        print ";;; Disqualifying $i (too unreliable:  $corr)\n";
        next;
      } else {
        print ";;; Not disqualifying $i (correlation too low:  $corr)\n";
        enqueue(\@fdqueue, $fd, $i);
        $single_fd{$i} = $fd;
        $passed++;
        next;
      }
    }

    if ((defined($min_single)) && ($min_single > $fd)) {
      print ";;; Disqualifying $i (too low fd:  $fd)\n";
      next;
    }

    enqueue(\@fdqueue, $fd, $i);
    $single_fd{$i} = $fd;
    $passed++;
  }

  print ";;; Single attributes exceeding thresholds:  $passed\n";
}




#####################################################################
# We'll actually maintain *two* queues.  One will be fdqueue; if
# 'max_candidates' is low, then enqueue may hold the bulk of the
# attributes.  The primary queue 'maxqueue' will be one which
# will contain the best ones, up to $max_candidates if defined,
# and which we will need to keep filling up at the same rate it
# is depleted.
#
# Iterations can then iterate over maxqueue, and when removing an
# item, an item can be replaced via fdqueue.  Items that are added
# back -- e.g. after a swap is made -- get put back into the
# fdqueue pool.
#
# Incidentally, when iterating, we'll iterate over the entire
# maxqueue ignoring order.
#####################################################################

{
  if (defined($max_candidates) && ($max_candidates < scalar(@fdqueue))) {
    my $i  = 0;

    @maxqueue = ();
    for ($i=0; $i < $max_candidates; $i++) {
      my $idx = undef;
      my $fd  = undef;

      dequeue(\@fdqueue,  \$fd, \$idx);
      enqueue(\@maxqueue, $fd, $idx);
    }
  } else {
    # No candidate limit, or the limit is at least the size of
    # the entire pool => only one queue.
    @maxqueue = @fdqueue;
    @fdqueue  = ();
  }
}




#####################################################################
# Do the actual additions and exchanges.
#####################################################################
my @current_tuple = ();
my $selected      = 0;

# We don't have any attributes yet, so we cannot have a non-zero
# fractal dimension.
$fd_old = 0;
$fd_new = 0;



#####################################################################
# We will also track something else that is interesting.
#
# Note that we compute at each iteration the fractal dimension
# gain (or loss) caused by adding each candidate to the already-
# selected tuple.
#
# We should be able to derive more information from this.  In
# particular, suppose the utility of an attribute drops sharply
# between two different iterations.  Then, theoretically, the
# selected attributes at that iteration might make the candidate
# redundant, and this is interesting to know.
#
# This is not foolproof.  For instance, if a bad attribute somehow
# gets accepted, it may devalue everybody by making them look like
# noise (by having a much broader range and distribution).
#
# Hence, we store a table which, for each candidate, stores the
# previous fractal dimension gain and the tuple at that time.  When
# it changes in status (was below threshold => is now above, or
# vice versa) we report it.
#
# Obvious and dire limitations:
#   This takes into account only additions, not exchanges.  Perhaps
#   they should be on a parallel track of sorts.
#
#   This is not yet used for pruning/reordering.  I've an idea
#   about how to go about this -- well, two of them.  The pruning
#   will be tricky to do correctly.
#####################################################################

my %previous_delta_fd = ();

selection: {
  # Sanity check.
  (scalar(@maxqueue) > 0) || last selection;

  my $attr              = undef;
  my $best_to_swap_fd   = undef;
  my $best_to_swap_attr = undef;
  my $best_to_drop_fd   = undef;
  my $best_to_drop_attr = undef;

  # If there are at least two items in @current_tuple, we first
  # consider exchanges.

  consider_swap: {
    if (scalar(@current_tuple) > 2) {
      drop_loop:    foreach $attr (@current_tuple) {
        my @mask = grep { $_ != $attr } @current_tuple;
        my ($fd, $corr) = test_set($wrapper_obj, $FD_obj, \@mask);

        if (defined($min_corr) && ($corr < $min_corr)) {
          # Too unreliable an estimate.
          next drop_loop;
        }

        if ((!defined($best_to_drop_fd)) ||
        ($fd > $best_to_drop_fd)) {
          $best_to_drop_fd   = $fd;
          $best_to_drop_attr = $attr;
        }
      }

      defined($best_to_drop_attr) || last consider_swap;

      my @base_mask = grep { $_ != $best_to_drop_attr } @current_tuple;

      # Find the best replacement.
      # Now here's an odd bit...
      #
      # Suppose we drop an attribute.  There may be other "versions" of
      # that attribute which may be preferable.  Hence, we could add
      # those to the queue, if they're not already in.
      #
      # What we do is duplicate the maxqueue() since it should be
      # undisturbed for the add process.  We then identify the other
      # versions of the dropped attribute.  If they're not in the
      # copy, we add them.  We will then have to be sure to use only
      # max_candidates.

      my @queue_copy = @fdqueue;

      {
        # Identify other versions.
        my $original = $attr_mapping[$best_to_drop_attr]->[0];
        my $aliases  = $gnippam_rtta{$original};
        my $other    = undef;

        foreach $other (@$aliases) {
          # Don't consider the one which we'll remove.
          if ($other == $best_to_drop_attr) {
            next;
          }

          # Don't consider those that failed the minimum-fd check.
          if (!exists($single_fd{$other})) {
            next;
          }

          # Don't add if it's already in the queue.
          if (grep { $_->[1] == $other } @queue_copy) {
            next;
          }

          enqueue(\@queue_copy, $single_fd{$other}, $other);
        }
      }


      {
        # Dequeue and consider the candidates.
        my $to_consider = $max_candidates;
        my $i=0;

        if (!defined($max_candidates)) {
          $to_consider = scalar @queue_copy;
        } elsif ($to_consider > scalar@queue_copy) {
          $to_consider = scalar @queue_copy;
        }

        swap_loop:  for ($i=0; $i < $to_consider; $i++) {
          my $fd_single = undef;
          my $idx       = undef;

          dequeue(\@queue_copy, \$fd_single, \$idx);

          my @mask = @base_mask;
          push @mask, $idx;
          my ($fd, $corr)   = test_set($wrapper_obj, $FD_obj, \@mask);

          defined($corr) || next;

          if (defined($min_corr) && ($corr < $min_corr)) {
            next swap_loop;
          }

          if ((!defined($best_to_swap_fd)) || ($fd > $best_to_swap_fd)) {
            $best_to_swap_fd   = $fd;
            $best_to_swap_attr = $idx;
          }
        }
      }
    }
  }




  # At this point, either there were no acceptable exchanges (too few
  # attributes already selected, or those remaining do not have
  # reliable correlations); or out of those accepted (which was limited
  # to the best $max_candidates, including possibly "relatives" of the
  # dropped attribute), we know which one was best.
  #
  # Now we concern ourselves with finding the best one for a straight-
  # out addition.

  my $best_to_add_fd   = undef;
  my $best_to_add_attr = undef;
  my %these_deltas     = ();

  consider_add:  {
    if ((defined $max_select) && ($selected == $max_select)) {
      # We've already selected enough.  At this stage, we permit only
      # exchanges.
      last consider_add;
    }

    my $itmref = undef;

    # invariant:  maxqueue() contains at most max_candidates, and
    # every last one of them is at least as good as every one in
    # the fdqueue().

    add_loop:  foreach $itmref (@maxqueue) {
      my @mask = @current_tuple;
      my $attr = $itmref->[1];
      push @mask, $attr;

      my ($fd, $corr)   = test_set($wrapper_obj, $FD_obj, \@mask);

      if (!defined($fd)) {
        next add_loop;
      }

      if (defined($min_corr) && ($min_corr > $corr)) {
        next add_loop;
      }

      my $fd_delta = $fd - $fd_old;

      # Statistics tracking.
      if (defined($min_gain)) {
        if (defined($previous_delta_fd{$attr})) {
          # Did state (acceptability) change?
          my ($old_delta, $old_tuple) = @{$previous_delta_fd{$attr}};
          if (($old_delta > $min_gain) && (($fd_delta < $min_gain))) {
            print ";;; Info:  Adding $attr to (", join(", ", @{$old_tuple}), ") = $old_delta\n";
            print ";;; Info:  Adding $attr to (", join(", ", @current_tuple), ") = $fd_delta\n";
          } elsif (($old_delta < $min_gain) && (($fd_delta > $min_gain))) {
            print ";;; Info:  Adding $attr to (", join(", ", @{$old_tuple}), ") = $old_delta\n";
            print ";;; Info:  Adding $attr to (", join(", ", @current_tuple), ") = $fd_delta\n";
          }
        }

        $previous_delta_fd{$attr} = +[ $fd_delta, +[ @current_tuple ]];
      }

      if ($dynamic_reorder) {
        $these_deltas{$attr} = $fd_delta;
      }

      if ((!defined($best_to_add_fd)) || ($fd > $best_to_add_fd)) {
        $best_to_add_fd   = $fd;
        $best_to_add_attr = $attr;
      }
    }
  }



  ####################################################################
  # Dynamic reordering.  If enabled (see $dynamic_reorder_d for
  # default), we reorder the queue every iteration based upon the
  # $fd_deltas for addition candidates.  Those that suddenly seem
  # less valuable get sent to the back of the queue (we hope), and
  # so forth.  If we do not enable this, the queue order is fixed
  # for the entire script, based upon the single-attribute fractal
  # dimension.
  ####################################################################

  if ($dynamic_reorder) {
    my $attr     = undef;
    my $fd_delta = undef;
    my $ct       = scalar @maxqueue;
    my @new_maxqueue = ();

    while (($attr, $fd_delta) = each %these_deltas) {
      # Delete and reinsert into the queue.  Slightly tricky
      # because while they start in @maxqueue, perhaps they should
      # not remain there.  We'll delete them from @maxqueue,
      # and replace as need be.

      my $i=0;

      for ($i=0; $i < $ct; $i++) {
        if ($maxqueue[$i]->[1] == $attr) {
          # Found.
          #
          # If the new value is greater than or equal to the
          # greatest value in @fdqueue (the first one), it stays
          # in this queue.  Otherwise, no (and in the latter case,
          # we need to promote from @fdqueue to this one).

          if ((scalar(@fdqueue)) && ($fd_delta < $fdqueue[0]->[0])) {
            # Switches queues.
            # delplace(\@maxqueue, $i);
            enqueue(\@fdqueue, $fd_delta, $attr);

            # Grab a replacement.
            my $fd_replace   = undef;
            my $attr_replace = undef;

            dequeue(\@fdqueue, \$fd_replace, \$attr_replace);
            enqueue(\@new_maxqueue, $fd_replace, $attr_replace);

            # Don't increment the index.  In fact...
            $ct--;
            redo;
          } else {
            # It stays in this queue.  Either there was nobody in
            # @fdqueue, meaning that either there's no max_candidates
            # limit or @maxqueue is small enough that it's under the
            # limit, or there is a limit but the best item in @fdqueue
            # still isn't good enough to qualify even with the
            # change.

            my $old_value = $maxqueue[$i]->[0];

            enqueue(\@new_maxqueue, $fd_delta, $maxqueue[$i]->[1]);
          }

          # Stop the search.
          last;
        } else {
          # Not something we need to reevaluate.  Still need to copy it
          # over, 'tho.  Why are we copying?  Because the reordering, if
          # done completely in @maxqueue, will mangle the order, meaning
          # that our searches will need to be more expensive (frequently
          # searching from the start, that is).  So we insert everything
          # into \@new_maxqueue that should be in the next version of
          # @maxqueue.
          enqueue(\@new_maxqueue, $maxqueue[$i]->[0],
          $maxqueue[$i]->[1]);
        }
      }
    }

    @maxqueue = @new_maxqueue;
  }


  ####################################################################
  # Evaluate our options.  At this point, we should not have changed
  # the state (the queues or the mask), but we should know what, if
  # any, are the somewhat-good exchanges and additions (somewhat-good
  # because the filtering and scoring is heuristic and greedy).
  ####################################################################

  evaluate_options:  {
    my $swap_gain = 0;
    my $add_gain  = 0;

    if (defined($best_to_swap_fd) && ($best_to_swap_fd > $fd_new)) {
      $swap_gain = $best_to_swap_fd - $fd_new;
    }

    if (defined($best_to_add_fd) && ($best_to_add_fd > $fd_new)) {
      $add_gain = $best_to_add_fd - $fd_new;
    }

    if (($add_gain == 0) && ($swap_gain == 0)) {
      # Hard threshold.
      print ";;; No possible gain found.  Stopping selection.\n";
      last selection;
    }

    # So at least one of the operations had a positive gain.
    # Let's threshold them to make sure that they're *sufficient*.
    if (defined($min_gain)) {
      if ($add_gain < $min_gain) {
        $add_gain = 0;
      }

      if ($swap_gain <= 0) {
        $swap_gain = 0;
      }
    }

    if (($add_gain == 0) && ($swap_gain == 0)) {
      print ";;; No gains observed.  Stopping.\n";
      last selection;
    }

    my $attr_drop = undef;
    my $attr_add  = undef;

    if (($add_gain == 0) ||
    (($swap_gain > 0) &&
    ($add_gain <= ($swap_gain + $swap_bonus)))) {
      # Prefer swap.  Well, as long as $swap_bonus >= 0.
      $attr_drop = $best_to_drop_attr;
      $attr_add  = $best_to_swap_attr;
      $fd_new = $best_to_swap_fd;
    } else {
      # Either no swap met criteria, or they failed the biased
      # comparison with an add that did match.
      $attr_add = $best_to_add_attr;
      $fd_new = $best_to_add_fd;
      # The number of attributes currently selected has increased.
      $selected++;
    }

    if (defined($attr_drop)) {
      # We need to
      #   1.  Remove $attr_drop from @current_tuple.
      #   2.  Regenerate a correct pair of queues including $attr_drop
      #       and all its other versions (which should NOT be in the
      #       current tuple.)
      #
      #       They can't be just added to the @fdqueue pool, because they
      #       might be good enough to add to @maxqueue.  Same goes for
      #       just adding to @maxqueue.  So we need to form a unified
      #       pool.

      my $drop_delta = $best_to_drop_fd - $fd_old;

      print ";;; Dropping:  $attr_drop [$drop_delta]\n";
      @current_tuple = grep { $_ != $attr_drop } @current_tuple;

      my $bigref    = undef;
      my $smallref  = undef;

      # We'll use the larger queue for the pool.
      if (scalar(@maxqueue) >= scalar(@fdqueue)) {
        $bigref   = \@maxqueue;
        $smallref = \@fdqueue;
      } else {
        $bigref    = \@fdqueue;
        $smallref  = \@maxqueue;
      }

      merge_pools:  {
        my $pair = shift @{$smallref};

        defined($pair) || last merge_pools;
        enqueue($bigref, $pair->[0], $pair->[1]);

        (scalar @{$smallref}) || last merge_pools;
        redo merge_pools;
      }

      {
        # Add the clones.

        my $original  = $attr_mapping[$best_to_drop_fd]->[0];
        my $aliases   = $gnippam_rtta{$original};
        my $other     = undef;

        foreach $other (@$aliases) {
          if (defined($single_fd{$other})) {
            enqueue($bigref, $single_fd{$other}, $other);
          }
        }
      }

      # The gang's all here.  Now, we have two possibilities.
      # Either the $bigref pool should become the new
      # @maxqueue and @fdqueue should be empty -- because
      # there is no candidate maximum or we're under the
      # limit -- or we should pull off $max_candidates, make
      # THAT maxqueue, and the remainder becomes @fdqueue.

      if ((!defined($max_candidates)) ||
      ($max_candidates >= scalar(@{$bigref}))) {
        @maxqueue = @{$bigref};
        @fdqueue  = ();
      } else {
        my $i=0;

        @fdqueue  = @{$bigref};
        @maxqueue = ();

        for ($i=0; $i < $max_candidates; $i++) {
          my $fd   = undef;
          my $idx  = undef;

          dequeue(\@fdqueue, \$fd, \$idx);
          enqueue(\@maxqueue, $fd, $idx);
        }
      }
    }

    # We got here either through a swap or an addition, because
    # when both fail we should have used a 'last selection' to
    # jump out of the loop.

    defined($attr_add) || die;

    {
      # So we add $attr_add.
      #
      # What this involves is a) deleting $attr_add from the
      # @maxqueue (it MUST have been there), b) deleting all
      # its variations from @maxqueue and @fdqueue as well.
      #
      # Correction:  It's not necessarily in maxqueue.  It
      # might have been a relative of a swapped-out attribute.
      #
      # Every deletion from @maxqueue must be balanced by
      # a subsequent dequeue/enqueue from @fdqueue, as long
      # as @fdqueue is non-empty.

      # (grep { $! == $attr_add } @maxqueue) || die;


      if (defined($attr_drop)) {
        my $add_delta  = $best_to_swap_fd - $best_to_drop_fd;
        my $swap_delta = $best_to_swap_fd - $fd_old;

        print ";;; Replacing with:   $attr_add [$add_delta]\n";
        print ";;; Total swap delta: [$swap_delta]\n";
      } else {
        my $add_delta = $best_to_add_fd - $fd_old;
        print ";;; Adding:  $attr_add [$add_delta]\n";
      }

      # Oh, and add it to the current tuple, as well.
      push @current_tuple, $attr_add;

      {
        # Delete all variations.

        my $i  = 0;
        my $lref = undef;

        defined($attr_add) || die;

        my $original  = $attr_mapping[$attr_add]->[0];
        my $aliases   = $gnippam_rtta{$original};
        print ";;; Deleting all aliases:  ", join(", ", @$aliases), "\n";

        foreach $lref (\@maxqueue, \@fdqueue) {
          my $ct = scalar @{$lref};

          $i=0;
          delloop:  {
            ($i < $ct) || last delloop;
            my $other = $lref->[$i]->[1];

            if (grep { $other == $_} @$aliases) {
              delplace($lref, $i);
              $ct--;
              $i=0;
            } else {
              $i++;
            }
            redo delloop;
          }
        }
      }

      if (defined($max_candidates)) {
        # Refill @maxqueue.

        my $numtoshift = $max_candidates - scalar(@maxqueue);
        my $i=0;

        if ($numtoshift > scalar(@fdqueue)) {
          $numtoshift = scalar(@fdqueue);
        }

        for ($i=0; $i < $numtoshift; $i++) {
          my $fd = undef;
          my $id = undef;

          dequeue(\@fdqueue, \$fd, \$id);
          enqueue(\@maxqueue, $fd, $id);
        }
      }
    }
  }

  $fd_old = $fd_new;

  if (!(scalar(@maxqueue))) {
    print ";;; All attributes selected.  Stopping.\n";
    last selection;
  }

  redo selection;
};  # selection:



print ";;; Final attribute tuple:  [", join(" ", @current_tuple), "]\n";
print ";;; Final attribute count:  $selected\n";

print ";;;\n";

{
  # Print information on all selected attributes.  This really should
  # contain enough information to reconstruct the originals, if need
  # be.
  print ";;; ==== BEGIN ATTRIBUTE INFORMATION BLOCK ====\n";

  my $attr = 0;
  foreach $attr (@current_tuple) {
    my $lref = $attr_mapping[$attr];

    print ";;;  $attr == <" . join(", ", @$lref), ">";

    if (defined($extras[$attr])) {
      my $xlref = $extras[$attr];

      map { print " <$_>" } @$xlref;
    }
    print "\n";
  }

  print ";;; ==== END ATTRIBUTE INFORMATION BLOCK ====\n";

}
print ";;; Final FD:  $fd_new\n";

print ";;; \n";




#####################################################################
# Stop the clock.
#####################################################################

my $time_stop = new Benchmark;

print ";;; Time elapsed:  ", timestr(timediff($time_stop, $time_start)), "\n";
print ";;;\n";


#####################################################################
# Print the new vectors.
#####################################################################

print ";;; Actual vectors follow.\n";
print ";;; ", "-" x 30, "CUT-----HERE", "-"  x 30, "\n";
# write vectors

# NOT using masking... because that does not preserve ordering, and
# at this time rewriting that may confuse other scripts.  Possibly.
# In any event, it'd be a highly-visible API change.

# wrapper_obj->set_mask(@current_tuple);
$wrapper_obj->clear_mask();
$FD_obj->clear();


my $idx = undef;
my $ct  = $wrapper_obj->get_count();


for ($idx=0; $idx < $ct; $idx++) {
  my @vec = $wrapper_obj->get_obj($idx);
  my @new_vec = ();

  # We stopped using masking here, so we need to assemble
  # the vectors in order of attribute selection.  When
  # originally written, the DFD masking system rearranges
  # the dimensions as it sees fit (in numerical order
  # instead of order as given).

  @new_vec = map { $vec[$_] } @current_tuple;

  my $line = (join " ", @new_vec) . "\n";
  print $fh_new $line;
}


if ($fh_old != \*STDIN) {
  close($fh_old) || die "Failed to close '$fn_old':  $!";
}

if (($fh_new != \*STDERR) && ($fh_new != \*STDOUT)) {
  close($fh_new) || die "Failed to close '$fn_new':  $!";
}

exit(0);





##############################################################
#
# Subroutines only beyond this point.
#
##############################################################






##############################################################
# We do lots of temporary dimensionality changes.  This
# function basically saves the old set of attribute indices
# (since we don't transform them ourselves, and we can return
# to the full unmodified set at any time, we only need to
# save the indices), replaces them with a set to check,
# runs the fractal dimension computation, and then resets
# the attributes.
##############################################################

sub test_set($$$) {
  my $wrapper_obj = shift;
  my $FD_obj      = shift;
  my $attribs_ref = shift;
  my $fd          = undef;
  my $corr        = undef;
  my $has_mask    = defined($wrapper_obj->get_mask());
  my @old_mask    = $wrapper_obj->get_mask();

  defined($wrapper_obj) || die;
  defined($FD_obj) || die;
  defined($attribs_ref) || die;

  map { die unless defined($_) } @{$attribs_ref};
  map { die unless ($_ == int($_)) } @{$attribs_ref};

  $wrapper_obj->set_mask(@{$attribs_ref});
  #  print "Computing FD for (", join(", ", @$attribs_ref), ")\n";
  ($fd, $corr) = calc_fd($FD_obj, $wrapper_obj);
  #  print " ==> $fd ($corr)\n";

  if ($has_mask) {
    map { die unless defined($_) } @old_mask;
    $wrapper_obj->set_mask(@old_mask);
  } else {
    $wrapper_obj->clear_mask();
  }

  if (wantarray()) {
    return ($fd, $corr);
  } else {
    # Don't do this.  You REALLY want the $corr value, since it's
    # the only thing telling you whether or not the $fd value is to
    # be trusted.  Well, a human being could eyeball the pair-count
    # plot, but this is a Perl script, not a human being with a
    # pair of eyeballs.
    return $fd;
  }
}



##############################################################
# This function invokes the fractal dimension methods of the
# DiskFracDim package, accessing the data via $wrapper_obj.
# Pay attention to the $corr value.  A decent threshold is
# 0.97 or so; perhaps a bit higher if you want to be quite
# strict regarding self-similarity.
##############################################################

sub calc_fd($$) {
  my $FD_obj      = shift;
  my $wrapper_obj = shift;
  my $fd          = undef;
  my $corr        = undef;

  $FD_obj->clear();
  $FD_obj->set_params(data_o         => $wrapper_obj,
  q              => $exponent,
  two_table      => 1,
  counter_memory => 1,
  speed          => undef,
  data_memory    => $use_memory);

  # The middle value is the y-intercept, which we really don't
  # care about.
  ($fd, $corr) = ($FD_obj->fracdim())[0,2];

  #  print "Computing FD for:  $mask_str:  ";
  #  print (defined($fd) ? "$fd\n" : "undef\n");

  return ($fd, $corr);
}



#####################################################################
# Priority queue management.
#
# Stored are references to two-element lists, with the first one
# being the fractal dimension (the queue key; large is good) and
# the second being the attribute index.
#####################################################################

sub upqueue($$$);
sub dnqueue($$$);


# Add an element to the queue.
sub enqueue($$$) {
  my $qref = shift;
  my $qkey = shift;
  my $lref = shift;
  my $ct   = scalar @{$qref};

  push @{$qref}, +[ $qkey, $lref ];
  return upqueue($qref, $ct, $qkey);
}



# Remove the item with maximum priority.  The priority and data
# are returned via the parameters (call by reference, e.g.
# dequeue(\@queue, \$priority, \$data).
sub dequeue($$$) {
  my $qref     = shift;
  my $qkey_ref = shift;
  my $item_ref = shift;

  if (!(scalar(@{$qref}))) {
    $$qkey_ref = undef;
    $$item_ref = undef;
  }

  $$qkey_ref = $qref->[0]->[0];
  $$item_ref = $qref->[0]->[1];

  # Remove last...
  my $iref = pop @{$qref};

  if (scalar(@{$qref}) > 0) {
    # And replace the first.
    $qref->[0] = $iref;

    return dnqueue($qref, 0, $iref->[0]);
  } else {
    # It WAS the first (and last).
    return $qref;
  }
}



# Delete from the queue the data at a given location in the array.
sub delplace($$) {
  my $qref = shift;
  my $idx  = shift;

  if ((scalar(@{$qref}) <= $idx)) {
    return $qref;
  }

  my $iref = pop @{$qref};

  if (scalar(@{$qref}) > $idx) {
    $qref->[$idx] = $iref;
    dnqueue($qref, $idx, $iref->[0]);

    if ($qref->[$idx] == $iref) {
      upqueue($qref, $idx, $iref->[0]);
    }
    return $qref;
  } else {
    return $qref;
  }
}



# Delete from the queue the item with a specific data value (the
# attribute index).  Requires a full linear scan since, well, it's
# a heap.  We actually could avoid this by using a hash table to
# store data -> slot associations, but the added coding and hashing
# probably won't buy us that much, since usually there aren't THAT
# many attributes (and the time cost probably is more towards
# pair-counting rather than attribute list scanning...).
#
# It'd be something to consider, 'tho, for usage on very, very
# high-dimensional sets.
sub delindex($$) {
  my $qref = shift;
  my $idx  = shift;
  my $i    = 0;
  my $ct   = scalar @{$qref};

  for ($i=0; $i < $ct; $i++) {
    if ($qref->[$i]->[1] == $idx) {
      return delplace($qref, $i);
    }
  }

  return $qref;
}



# Maintain the heap property by swapping upwards as necessary.
# Typically used after insertions.
sub upqueue($$$) {
  my $qref   = shift;
  my $idx    = shift;
  my $itmkey = shift;
  my $par    = int(($idx-1)/2);

  if ($idx == 0) {
    return $qref;
  }

  my $parkey = $qref->[$par]->[0];

  if ($parkey < $itmkey) {
    # Oops.   Need to maintain maxheap property.
    my $swap_ref = $qref->[$par];

    $qref->[$par] = $qref->[$idx];
    $qref->[$idx] = $swap_ref;

    return upqueue($qref, $par, $itmkey);
  } else {
    return $qref;
  }
}


# Maintain the heap property by swapping downwards as necessary.
# Typically used after deletions.
sub dnqueue($$$) {
  my $qref   = shift;
  my $idx    = shift;
  my $itmkey = shift;

  my $ct = scalar @{$qref};
  my $cidx = (2*$idx)+1;

  if ($cidx >= $ct) {
    # No left child.
    return $qref;
  } elsif (($cidx+1) >= $ct) {
    # No right child.
    my $cref = $qref->[$cidx];

    if ($cref->[0] > $itmkey) {
      # Swap and recurse.
      $qref->[$cidx] = $qref->[$idx];
      $qref->[$idx]  = $cref;

      return dnqueue($qref, $cidx, $itmkey);
    }
  } else {
    # Has both children.
    my $cref = $qref->[$cidx];

    if ($cref->[0] < $qref->[$cidx+1]->[0]) {
      $cidx++;
      $cref = $qref->[$cidx];
    }

    if ($cref->[0] > $itmkey) {
      # Swap and recurse.
      $qref->[$cidx] = $qref->[$idx];
      $qref->[$idx]  = $cref;

      return dnqueue($qref, $cidx, $itmkey);
    }
  }


  return $qref;
}



# Block-processing function.
{
  my $block      = undef;

  sub process_block($$) {
    my $line     = shift;
    my $attr_ref = shift;

    ref($attr_ref) || die;

    chomp($line);
    $line =~ s/^\s+//o;
    $line =~ s/\s+$//o;
    ($line =~ /^\;/) || return;

    if (!defined($block)) {
      if ($line =~ /= BEGIN\s+(.*)\s+BLOCK =/o) {
        $block = $1;
      }

      if ($block eq 'TRANSFORMATION CHOICE') {
        print ";;; Reproducing TC block from input file.\n";
        print ";;; ==== BEGIN TRANSFORMATION CHOICE BLOCK ====\n";
      }
    } else {
      if ($line =~ /= END\s+(.*)\s+BLOCK =/o) {
        ($block eq $1) || die "Block begin/end mismatch -- $1, $block";
        if ($block eq 'TRANSFORMATION CHOICE') {
          print ";;; ==== END TRANSFORMATION CHOICE BLOCK ====\n";
        }
        $block = undef;
      } else {
        # Comment in block.
        if ($block eq 'TRANSFORMATION CHOICE') {
          ($line =~ /<([^>]+)>/) || die
          "Malformed TRANSFORMATION CHOICE block.";
          my $item  = $1;

          print "$line\n";
          $item =~ s/\s+/ /go;
          my @list = split /\s*,\s*/, $item;
          scalar(@list) || die;

          $attr_mapping[$$attr_ref] = +[ @list ];

          my @extra_blocks = ();

          while ($line =~ s/^[^<]*(<[^>]+>)\s*(<[^>]+>)/$1 /) {
            push @extra_blocks, $2;
          }

          if (scalar(@extra_blocks)) {
            $extras[$$attr_ref] = +[ @extra_blocks ];
          }

          my $orig = $list[0];
          if (!defined($gnippam_rtta{$orig})) {
            $gnippam_rtta{$orig} = +[];
          }
          push @{$gnippam_rtta{$orig}}, $$attr_ref;
          $$attr_ref++;
        }
      }
    }
  }
}

##############################################################
# "Interpreter, end program."
##############################################################
