Factor.cc Source File

00001 /*
00002     File:       Factor.cc
00003 
00004     Function:   Implements some matrix factorizations: SVD, QR
00005 
00006     Author:     Andrew Willmott
00007 
00008     Notes:      Hopefully this is a bit more understandable
00009                 than most of the other SVD code out there.
00010 
00011                 This should also make it a bit easier to optimize
00012                 for modern compilers, but that's a task for another
00013                 day.
00014 */
00015 
00016 #include "vl/Factor.h"
00017 
00018 #define DBG_COUT if (0) cerr
00019 
00020 /*
00021     NOTE
00022     
00023     Householder transformations zero all elements of a vector v apart
00024     from the first element.
00025     
00026         H = I - 2uut
00027             where ||u||^2 = 1
00028             HtH = HHt = HH = I
00029             - it's a reflection, so we would expect HH = I
00030 
00031         Hv = [c 0 0 0 0]
00032             where c = +/- ||v||
00033             and u = f(v - [c 0 0..])
00034             where f = 1/sqrt(2c(c - v[0]))
00035 
00036         For convenience, we can write
00037             Hv = v - 2u(u.v)
00038         or
00039             Hv = v - 2w(w.v)/g
00040             where g = c(c - v[0])
00041             w = v - [c 0 0..]
00042 */
00043 
00044 //#define DO_VL_SUB
00045 
00046 #ifdef DO_VL_SUB
00047 // experiment to see just how much slower using the generic sub-matrix
00048 // calls makes things. 
00049 
00050 Double LeftHouseholder(Matd &A, Matd &U, const Int i)
00051 // Zeroes out those elements below the diagonal in column i by use of a
00052 // Householder transformation matrix H: A' = HA. U is replaced with UH,
00053 // so that U'A' = UH HA = UA
00054 {
00055     Assert(i < A.Rows(), "bad i");
00056 
00057     Int         j, k;
00058     Double      scale, vSqrLen, oldDiag, newDiag, g, dotProd;
00059     Int         clen = A.Rows() - i;
00060     SubVecd     Ai = sub(col(A, i), i, clen);
00061 
00062     // scale factor is the sum of abs of elements below and including
00063     // the diagonal in column i
00064     scale = 0.0;
00065     for (k = 0; k < Ai.Elts(); k++)
00066         scale += abs(Ai[k]);
00067         
00068     // return if nothing to eliminate...
00069     if (scale == 0.0)
00070         return(0.0);
00071 
00072     // scale the column, find sum of squares
00073     Ai = Ai / scale;
00074     vSqrLen = sqrlen(Ai);
00075     
00076     oldDiag = Ai[0];            // v[0]
00077     newDiag = sqrt(vSqrLen);    // c
00078     if (oldDiag > 0.0)
00079         newDiag = -newDiag;
00080         
00081     g = vSqrLen - oldDiag * newDiag;    // c(c - v0)
00082 
00083     // replace column i of A with w
00084     Ai[0] = oldDiag - newDiag;      // v[0] -= c
00085 
00086     // Apply H to A by transforming remaining columns of A
00087     for (j = i + 1; j < A.Cols(); j++)
00088     {
00089         SubVecd     Aj = sub(col(A, j), i, clen);
00090 
00091         // Hv = v - w(v.w) / g
00092         Aj = Aj - Ai * (dot(Ai, Aj) / g);
00093     }
00094 
00095     // Apply H to rows of U
00096     for (j = 0; j < A.Rows(); j++)  
00097     {
00098         SubVecd     Uj = sub(row(U, j), i, clen);
00099 
00100         // vH = v - (v.w)w / g
00101         Uj = Uj - Ai * (dot(Ai, Uj) / g);
00102     }
00103 
00104     return(newDiag * scale);
00105 }
00106 
00107 #else
00108 
00109 Double LeftHouseholder(Matd &A, Matd &U, const Int i)
00110 // Zeroes out those elements below the diagonal in column i by use of a
00111 // Householder transformation matrix H: A' = HA. U is replaced with UH,
00112 // so that U'A' = UH HA = UA
00113 {
00114     Int     j, k;
00115     Double  scale, vSqrLen, oldDiag, newDiag, g, dotProd;
00116     
00117     Assert(i < A.Rows(), "bad i");
00118 
00119     // scale factor is the sum of abs of elements below and including
00120     // the diagonal in column i
00121     scale = 0.0;
00122     for (k = i; k < A.Rows(); k++)
00123         scale += abs(A[k][i]);
00124         
00125     // return if nothing to eliminate...
00126     if (scale == 0.0)
00127         return(0.0);
00128 
00129     // scale the column, find sum of squares
00130     vSqrLen = 0.0;
00131     for (k = i; k < A.Rows(); k++)
00132     {
00133         A[k][i] /= scale;
00134         vSqrLen += sqr(A[k][i]);
00135     }
00136     
00137     oldDiag = A[i][i];          // v[0]
00138     newDiag = sqrt(vSqrLen);    // c
00139     if (oldDiag > 0.0)
00140         newDiag = -newDiag;
00141         
00142     g = vSqrLen - oldDiag * newDiag;    // c(c - v0)
00143 
00144     // replace column i of A with w
00145     A[i][i] = oldDiag - newDiag;        // v[0] -= c
00146 
00147     // Apply H to A by transforming remaining columns of A
00148     for (j = i + 1; j < A.Cols(); j++)
00149     {
00150         // Hv = v - w(v.w) / g
00151 
00152         // find dot product of the columns [i:i..m] (w) and [j:i..m]
00153         dotProd = 0.0;
00154         for (k = i; k < A.Rows(); k++)
00155             dotProd += A[k][j] * A[k][i];
00156 
00157         dotProd /= g;
00158 
00159         // calculate A's new column j
00160         for (k = i; k < A.Rows(); k++)
00161             A[k][j] -= A[k][i] * dotProd;
00162     }
00163 
00164     // Apply H to rows of U
00165     for (j = 0; j < A.Rows(); j++)  
00166     {
00167         // vH = v - (v.w)w / g
00168 
00169         dotProd = 0.0;
00170         for (k = i; k < A.Rows(); k++)
00171             dotProd += A[k][i] * U[j][k];
00172 
00173         dotProd /= g;
00174 
00175         for (k = i; k < A.Rows(); k++)
00176             U[j][k] -=  A[k][i] * dotProd;
00177     }
00178 
00179     return(newDiag * scale);
00180 }
00181 
00182 #endif
00183 
00184 Double RightHouseholder(Matd &A, Matd &V, const Int i)
00185 // Zeroes out those elements to the right of the super-diagonal in row i
00186 // by use of a Householder transformation matrix H: A' = AH. V is
00187 // replaced with VH, so that A'V't = AH (HV)t = AVt
00188 {
00189     Int     j, k;
00190     Double  scale, vSqrLen, oldSuperDiag, newSuperDiag, g, dotProd;
00191     
00192     Assert(i < A.Cols() - 1, "bad i");
00193     
00194     // scale factor is the sum of abs of elements to the right of the
00195     // diagonal in row i
00196     scale = 0.0;
00197     for (k = i + 1; k < A.Cols(); k++)
00198         scale += abs(A[i][k]);
00199 
00200     // return if nothing to eliminate...
00201     if (scale == 0.0)
00202         return(0.0);
00203 
00204     vSqrLen = 0.0;
00205     for (k = i + 1; k < A.Cols(); k++)
00206     {
00207         A[i][k] /= scale;
00208         vSqrLen += sqr(A[i][k]);    
00209     }
00210     
00211     oldSuperDiag = A[i][i + 1];
00212     newSuperDiag = sqrt(vSqrLen);
00213     if (oldSuperDiag > 0.0)
00214         newSuperDiag = -newSuperDiag;
00215         
00216     g = oldSuperDiag * newSuperDiag - vSqrLen;
00217     A[i][i + 1] = oldSuperDiag - newSuperDiag;
00218 
00219     // transform the remaining rows below i
00220     for (j = i + 1; j < A.Rows(); j++)  
00221     {
00222         dotProd = 0.0;
00223         for (k = i + 1; k < A.Cols(); k++)
00224             dotProd += A[i][k] * A[j][k];
00225 
00226         dotProd /= g;
00227 
00228         for (k = i + 1; k < A.Cols(); k++)
00229             A[j][k] += A[i][k] * dotProd;
00230     }
00231 
00232     // Accumulate the transform in V
00233     for (j = 1; j < A.Cols(); j++)  
00234     {
00235         dotProd = 0.0;
00236         for (k = i + 1; k < A.Cols(); k++)
00237             dotProd += A[i][k] * V[j][k];
00238 
00239         dotProd /= g;
00240 
00241         for (k = i + 1; k < A.Cols(); k++)
00242             V[j][k] += A[i][k] * dotProd;       
00243     }
00244 
00245     // return new super-diagonal element A[i][i+1]
00246     return(newSuperDiag * scale);
00247 }
00248 
00249 Void Bidiagonalize(Matd &A, Matd &U, Matd &V, Vecd &diagonal, Vecd &superDiag)
00250 // bidiagonalize matrix A by using householder transformations to eliminate
00251 // columns below the diagonal and rows to the right of the super-diagonal.
00252 // A is modified, and the diagonal and superDiag set.
00253 {
00254     Int     i;
00255     Matd    Us;
00256         
00257     Assert(A.Rows() >= A.Cols(), "matrix must have rows >= cols");
00258 
00259     diagonal.SetSize(A.Cols());
00260     superDiag.SetSize(A.Cols() - 1);
00261     U.SetSize(A.Rows(), A.Cols());
00262     Us.SetSize(A.Rows(), A.Rows());
00263     V.SetSize(A.Cols(), A.Cols());
00264     Us = vl_I;
00265     V = vl_I;
00266     
00267     for (i = 0; i < A.Cols(); i++)
00268     {
00269         diagonal[i] = LeftHouseholder(A, Us, i);
00270 
00271         if (i < A.Cols() - 1)
00272             superDiag[i] = RightHouseholder(A, V, i);
00273     }
00274 
00275     U = sub(Us, 0, 0, A.Rows(), A.Cols());
00276 }
00277     
00278 Double QRFactorization(Matd &A, Matd &Q, Matd &R)
00279 // Factor A into an orthogonal matrix Q and an upper-triangular matrix R.
00280 // Destroys A.
00281 {
00282     Double  normAcc = 0.0, diagElt;
00283     Int     i, j;
00284     Matd    Qs;
00285         
00286     Assert(A.Rows() >= A.Cols(), "matrix must have rows >= cols");
00287 
00288     Qs.SetSize(A.Rows(), A.Rows());
00289     Q.SetSize(A.Rows(), A.Cols());
00290     R.SetSize(A.Cols(), A.Cols());
00291     Qs = vl_I;
00292     R = vl_0;
00293     
00294     // for each column
00295     for (i = 0; i < A.Cols(); i++)
00296     {
00297         // apply householder
00298         diagElt = LeftHouseholder(A, Qs, i);
00299         // copy over row i of A to R
00300         R[i][i] = diagElt;
00301         j = A.Cols() - i - 1;
00302         if (j)
00303             last(R[i], j) = last(A[i], j);
00304         normAcc = Max(normAcc, abs(diagElt));
00305     }
00306 
00307     Q = sub(Qs, 0, 0, A.Rows(), A.Cols());
00308 
00309     return(normAcc);
00310 }
00311 
00312 /*
00313     NOTE
00314     
00315     Givens rotations perform a rotation in a 2D plane. Can be
00316     used to zero an entry of a matrix.
00317     
00318     We pick axes i and j, then form G such that it is basically
00319     I with entries:
00320 
00321         i    j
00322     i:  c   -s
00323          ...
00324     j:  s    c
00325 
00326 
00327     s^2 + c^2 = 1
00328     inv(G) = Gt
00329 
00330     A' = GA
00331         modifies rows i and j only:
00332             row'[i] = c * row[i] - s * row[j]
00333             row'[j] = c * row[j] + s * row[i]
00334         can force:
00335         A'[i][j] = 0 
00336             if c * A[i][j] - s * A[j][j] = 0
00337             if c = A[j][j], s = A[i][j]
00338             must normalise to retain identity:
00339                 norm = sqrt(c^2 + s^2), c /= norm, s /= norm
00340             A'[j][j] = c * (norm * c) + s * (norm * s) = norm
00341         A'[i][i] = 0 
00342             if c * A[i][i] - s * A[j][i] = 0
00343             if c = A[j][i], s = A[i][i]
00344 
00345     A' = AGt
00346 
00347 
00348 */
00349 
00350 Void RotateRight(Matd& U, const Int i, const Int j, const Double c, const Double s)
00351 // rotate U by the given Givens rotation: U' = UGt
00352 // where G is defined as above
00353 {
00354     Vecd    temp;
00355     
00356     temp = col(U, i);
00357     col(U, i) =  c * col(U, i) - s * col(U, j);
00358     col(U, j) =  c * col(U, j) + s * temp;
00359 }
00360 
00361 Void ClearSuperEntry(Vecd &diagonal, Vecd &superDiag, Matd &U, 
00362             const Int k, const Int l, const Double prec)
00363 // We have a zero diagonal element at diag[l]. This means we can zero
00364 // out the super-diagonal entry to its right with a series of rotations.
00365 // Each rotation clears an entry (starting with the original
00366 // super-diagonal entry) but creates another entry immediately to its
00367 // right which must in turn be zeroed. Eventually we run off the end of
00368 // the matrix, and the process terminates.
00369 {
00370     Double  c = 0.0, s = 1.0;
00371     Double  f;
00372     Int     i;
00373     Double  norm;
00374 
00375     // We zero the superdiagonal element l by using the row immediately
00376     // below it in a Givens rotation. Unfortunately, the superdiagonal
00377     // in this row in turn creates another entry A[l][l+2]. We must keep
00378     // applying rotations in the plane of the axes l and l + n to keep
00379     // zeroing each new entry created until we reach the right hand side
00380     // of the matrix.
00381 
00382     // initialise: start with f being the original super diagonal entry
00383     // we're eliminating
00384     f = superDiag[l];
00385     superDiag[l] = 0.0;
00386 
00387     // at each stage, f = A[l][i], rotate in the l/i plane
00388     for (i = l + 1; true; i++)
00389     {
00390         if (abs(f) + prec == prec)  // if f is zero, we don't have to eliminate further
00391             break;
00392 
00393         // to eliminate f at each stage we pick s = -f, c = di
00394         s = -f;
00395         c = diagonal[i];
00396 
00397         // normalise
00398         norm = sqrt(sqr(c) + sqr(s));       
00399         c /= norm;
00400         s /= norm;
00401 
00402         // apply inverse rotation to U
00403         RotateRight(U, i, l, c, s);
00404 
00405         // do the rotation, zeroing f and creating f' immediately to its right
00406         diagonal[i] = norm;     // di' = c * di - s * f = norm
00407         if (i == k)             // done?
00408             break;
00409         f = s * superDiag[i];   // f'  = c * 0  + s * superdiag[i]
00410         superDiag[i] *= c;      // ei' = c * ei - s * 0
00411     }
00412 }
00413 
00414 Int FindSplit(Vecd &diagonal, Vecd &superDiag, Matd &U, const Int k, const Double prec)
00415 // Check for a zero entry along the superdiagonal; if we find one, we can
00416 // QR iterate on two separate matrices above and below it.
00417 // If there is a zero on the *diagonal*, we can call ClearSuperEntry to
00418 // zero out the corresponding superdiagonal entry to its right.
00419 {
00420     Int     l;
00421     
00422     for (l = k - 1; l >= 0; l--)
00423         if (superDiag[l] + prec == prec)
00424             // can split here
00425             return(l + 1);
00426         else if (diagonal[l] + prec == prec)
00427         {
00428             // can create a split here
00429             DBG_COUT << "creating split at " << l << endl;
00430             DBG_COUT << "diagonal " << diagonal << endl;
00431             DBG_COUT << "superDiag " << superDiag << endl;
00432             ClearSuperEntry(diagonal, superDiag, U, k, l, prec);
00433             DBG_COUT << "done: " << l << endl;
00434             DBG_COUT << "diagonal " << diagonal << endl;
00435             DBG_COUT << "superDiag " << superDiag << endl;
00436             DBG_COUT << endl;
00437             return(l + 1);
00438         }
00439 
00440     return(0);
00441 }
00442 
00443 Void Diagonalize(Vecd &diagonal, Vecd &superDiag, Matd &U, Matd &V)
00444 // Diagonalise the bidiagonal matrix A = (diagonal, superDiag), accumulating 
00445 // transforms into U on the left and Vt on the right.
00446 //
00447 // diag(A) = diagonal and diag(A, 1) = superDiag, that is to say, diagonal[0]
00448 // is A[0][0], and superDiag[0] is A[0][1].
00449 {
00450     Int     i, j, k, l;
00451     Double  prec;
00452     
00453     // work out a good precision value
00454     prec = abs(diagonal[0]);
00455     for (i = 1; i < diagonal.Elts(); i++)
00456         prec = Max(prec, abs(diagonal[i]) + abs(superDiag[i - 1]));
00457 
00458     // work our way up from the bottom of the matrix
00459     for (k = diagonal.Elts() - 1; k >= 0; k--)
00460     {
00461         while (1)
00462         {
00463             // if there's a split, start from there rather than A[0][0]
00464             l = FindSplit(diagonal, superDiag, U, k, prec);
00465             
00466             DBG_COUT << "QR-shift A " << l << ":" << k << endl;
00467 
00468             DBG_COUT << "diagonal " << diagonal << endl;
00469             DBG_COUT << "superDiag " << superDiag << endl;
00470 
00471             // are we done?
00472             if (l == k)     // last super-diag entry must be zero -- what we wanted.
00473                 break;
00474 
00475             // QR iterate over the sub-matrix of A, A[l..k][l..k], until
00476             // we've nuked super diagonal entry A[k - 1][k]
00477 
00478             // For extra stability, we shift the QR computation.  We
00479             // want the shift to be as close as possible to the largest
00480             // eigenvalue, which of course we don't know yet... so we
00481             // take the eigenvalues of the 2x2 matrix at the bottom of
00482             // our window of A, and use those.
00483 
00484             Double  shift;
00485             Double  e0;
00486             Double  e1 = superDiag[k - 1];
00487             Double  d1 = diagonal[k - 1];
00488             Double  d2 = diagonal[k];
00489             Double  dl = diagonal[l];
00490 
00491             if (k > 1)
00492                 e0 = superDiag[k - 2];
00493             else
00494                 e0 = 0.0;
00495                 
00496             //      d0  e0
00497             //          d1  e1
00498             //   k:         d2
00499 
00500             shift = (d1 - d2) * (d1 + d2) + (e0 - e1) * (e0 + e1);
00501             shift /= 2.0 * e1 * d1;
00502             shift += sign(shift) * sqrt(sqr(shift) + 1.0);
00503             shift = ((dl - d2) * (dl + d2) + e1 * (d1 / shift - e1)) / dl;
00504 
00505 
00506             Double      cos_th, sin_th, cos_ph, sin_ph;
00507             Double      d1n, norm, e2, f1, g0, g1, e1n;
00508             
00509             d1 = dl;    // d1 == A[i - 1][i - 1]: initialise it to that
00510             e1 = superDiag[l];  
00511             // the first rotation is the QR-shifted one. After that, we
00512             // are continually eliminating below-diagonal and
00513             // above-super diagonal elements created by the previous
00514             // rotation, until we hit the bottom-right of the matrix and
00515             // we have a bidiagonal matrix again. The QR-shift will
00516             // ensure that the new matrix has smaller super-diagonal
00517             // elements, however.
00518             g0 = e1;
00519             e0 = shift;
00520 
00521             for (i = l + 1; true; i++)
00522             {
00523                 // rotate in plane of axes i - 1, i:
00524                 //    d0  e0  g0      d0  e0'  0
00525                 //        d1  e1   ->     d1'  e1'
00526                 //  i:        d2          f1   d2'
00527                 // we are rotating on the right, and so affecting columns
00528                 d2 = diagonal[i];
00529 
00530                 // eliminate g0 using e0 (except for the first iteration,
00531                 // where e0 and g0 would be off the top of the matrix, and
00532                 // we're performing the QR-shift.)
00533                 sin_th = g0;
00534                 cos_th = e0;
00535 
00536                 norm = sqrt(sqr(cos_th) + sqr(sin_th));
00537                 cos_th /= norm;
00538                 sin_th /= norm;
00539 
00540                 // perform the rotation
00541                 if (i > 1)
00542                     superDiag[i - 2] = norm;    // e0' = cos_th * e0 + sin_th * g0
00543                 d1n = cos_th * d1 + sin_th * e1;    
00544                 e1n = -sin_th * d1 + cos_th * e1;
00545 
00546                 f1 = d2 * sin_th;           // f1  = c *  0 + s * d2
00547                 d2 *= cos_th;               // d2' = c * d2 + s * 0
00548 
00549                 // Accumulate rotation in V
00550                 RotateRight(V, i, i - 1, cos_th, sin_th);  
00551 
00552                 // in eliminating g0, we've created f1: eet must be
00553                 // destroyed
00554 
00555                 // rotate in plane of axes i - 1, i:
00556                 //      d0  e0             d0   e0
00557                 //      d1  e1      ->     d1'  e1'  g1'
00558                 //  i:  f1  d2  e2         0    d2'  e2'
00559                 // we are rotating on the left, and so affecting rows
00560 
00561                 // standard rotation to eliminate the f1 we've just created
00562                 cos_ph = d1n;
00563                 sin_ph = f1;
00564                 
00565                 norm = sqrt(sqr(cos_ph) + sqr(sin_ph));
00566 
00567                 if (norm == 0.0)
00568                 {
00569                     // rotation angle is arbitrary
00570                     cos_ph = cos_th;
00571                     sin_ph = sin_th;
00572                 }
00573                 else
00574                 {
00575                     cos_ph /= norm;
00576                     sin_ph /= norm;
00577                 }
00578                 
00579                 // as usual, for the elimination pair, f1 = 0, d1 = norm 
00580                 diagonal[i - 1] = norm;     // d1'
00581                 // rotate e1 and d2
00582                 e1 =  cos_ph * e1n + sin_ph * d2;
00583                 d2 = -sin_ph * e1n + cos_ph * d2;
00584                 
00585                 // Accumulate rotation into U
00586                 RotateRight(U, i, i - 1, cos_ph, sin_ph);
00587 
00588                 if (i == k)
00589                     break;
00590                     
00591                 // rotate g1 and e2
00592                 e2 = superDiag[i];
00593                 g1 = sin_ph * e2;
00594                 e2 = cos_ph * e2;
00595 
00596                 d1 = d2;        // the new d1 will be the old d2
00597                 e0 = e1;
00598                 e1 = e2;
00599                 g0 = g1;
00600             }
00601 
00602             if (l > 0) 
00603                 superDiag[l - 1] = 0.0;     // Supposed to be eliminated by now
00604             superDiag[k - 1] = e1;          // write in the last superdiagonal
00605             diagonal[k] = d2;               // and diagonal
00606         }       
00607         
00608         // Force singular value to be +ve if needs be
00609         if (diagonal[k] < 0)        
00610         {
00611             DBG_COUT << "flipping " << k << endl;
00612             diagonal[k] = -diagonal[k];
00613             // flip the corresponding row of Vt to balance out
00614             col(V, k) = -col(V, k);
00615         }
00616         DBG_COUT << "done: " << endl;
00617         DBG_COUT << "diagonal " << diagonal << endl;
00618         DBG_COUT << "superDiag " << superDiag << endl;
00619         DBG_COUT << endl;
00620     }
00621 }
00622 
00623 Void SVDFactorization(Matd &A, Matd &U, Matd &V, Vecd &diagonal)
00624 // pretty easy...
00625 {
00626     Vecd    superDiag;
00627     
00628     Bidiagonalize(A, U, V, diagonal, superDiag);
00629     Diagonalize(diagonal, superDiag, U, V);
00630 }