#!/usr/local/bin/perl5 -w
#
# This is a quick-and-dirty script to perform SVD-based dimensionality
# reduction.  It may be inefficient and on large matrices it'll likely
# eat up all your RAM and then beg for more, but...

use strict;
use lib '/usr/lw2j/private/Stat/Perl';

require Numerical;

# Retain at least this much of the variance.
my $variance_to_keep = shift @ARGV;

# However, we'll handle it differently if it's an integer.  See below --
# if it's given, we'll treat it as /the/ number of components to keep
# (unless fewer would explain /all/ the variance).
#
# CHANGE:  If it's 1, we treat this as if it were a float -- 1.00 --
# meaning "keep all components with a corresponding non-zero singular
# value.  Set it to LESS than 1 to reduce based on that, or set it
# to 2 or more if you want to specify that many components.
#
# If you want to get exactly 1 component, just specify 2 or whatever
# and then use 'cut' -- the algorithm used isn't incremental, so it'll
# still take the same amount of work.

# Default:  Drop only those components with 0 significance.
$variance_to_keep = defined($variance_to_keep) ? $variance_to_keep : 1;

{
  # Read matrix.  Note that if it's a BIG matrix, we're going to have
  # memory problems...

  my $A = +[];
  my $line = undef;
  my $m = 0;
  my $n = undef;

  while (defined($line = <STDIN>)) {
    # We'll read line-by-line.  Commas are ignored, but octothorpes
    # and semicolons are treated as "everything to end of line is a
    # comment".

    $line =~ tr/,/ /;
    $line =~ s/\#.*$//;
    $line =~ s/\;.*$//;
    $line =~ s/^\s+//;
    $line =~ s/\s+$//;
    $line =~ s/\s+/ /g;

    ($line =~ /\S/) || next;

    my @vec = split /\s/, $line;

    if (!defined($n)) {
      $n = scalar @vec;
    } elsif ($n != (scalar @vec)) {
      die "Your matrix has an irregular number of columns.";
    }

    push @$A, \@vec;
    $m++;
  }

  my $AT = Numerical::matrix_transpose($A);

  # Standardize the matrix by centering it on the origin and
  # dividing by standard deviations.
  my @means = ();
  my @devs  = ();

  # The rows of $AT are the columns of $A, and we want those means.
  map { push @means, ArrayFoo::arrMean($_) } @$AT;
  map { push @devs,  ArrayFoo::arrDev($_)  } @$AT;

  # Transform.  Each column of A should now have mean 0, and is
  # in deviations.
  for (my $j=0; $j < $n; $j++) {
    for (my $i=0; $i < $m; $i++) {
      $A->[$i]->[$j] -= $means[$j];
      $AT->[$j]->[$i] -= $means[$j];
    }

    if ($devs[$j] > 0) {
      for (my $i=0; $i < $m; $i++) {
        $A->[$i]->[$j] /= $devs[$j];
        $AT->[$j]->[$i] /= $devs[$j];
      }
    }
  }

  my ($PI, $Q, $U, $V, $D) = Numerical::matrix_pseudoinverse($A);

  my $eigensum = 0;
  my $p        = scalar @$Q;

  # Compute sum of eigenvalues.
  map { $eigensum += ($_ ** 2) } @$Q;

  my $retained = 0;
  my $sum_kept = 0;

  # If a fraction, or 1, treat as a fraction of variance to keep.
  if (($variance_to_keep != int($variance_to_keep)) ||
  ($variance_to_keep == 1)) {
    while ($sum_kept < $variance_to_keep) {
      $sum_kept += ($Q->[$retained] ** 2) / $eigensum;
      $retained++;

      ($retained < $p) || last;
    }
  } else {
    $retained = $variance_to_keep;
    $retained = ($retained <= $p) ? $retained : $p;
  }

  map { splice @{$_}, $retained } @$PI;

  my $new_matrix = Numerical::matrix_multiply($A, $PI);

  foreach (@$new_matrix) {
    print join(" ", @{$_}),"\n";
  }
}


exit 0;
