00001 /*
00002 File: Factor.cc
00003
00004 Function: Implements some matrix factorizations: SVD, QR
00005
00006 Author: Andrew Willmott
00007
00008 Notes: Hopefully this is a bit more understandable
00009 than most of the other SVD code out there.
00010
00011 This should also make it a bit easier to optimize
00012 for modern compilers, but that's a task for another
00013 day.
00014 */
00015
00016 #include "vl/Factor.h"
00017
00018 #define DBG_COUT if (0) cerr
00019
00020 /*
00021 NOTE
00022
00023 Householder transformations zero all elements of a vector v apart
00024 from the first element.
00025
00026 H = I - 2uut
00027 where ||u||^2 = 1
00028 HtH = HHt = HH = I
00029 - it's a reflection, so we would expect HH = I
00030
00031 Hv = [c 0 0 0 0]
00032 where c = +/- ||v||
00033 and u = f(v - [c 0 0..])
00034 where f = 1/sqrt(2c(c - v[0]))
00035
00036 For convenience, we can write
00037 Hv = v - 2u(u.v)
00038 or
00039 Hv = v - 2w(w.v)/g
00040 where g = c(c - v[0])
00041 w = v - [c 0 0..]
00042 */
00043
00044 //#define DO_VL_SUB
00045
00046 #ifdef DO_VL_SUB
00047 // experiment to see just how much slower using the generic sub-matrix
00048 // calls makes things.
00049
00050 Double LeftHouseholder(Matd &A, Matd &U, const Int i)
00051 // Zeroes out those elements below the diagonal in column i by use of a
00052 // Householder transformation matrix H: A' = HA. U is replaced with UH,
00053 // so that U'A' = UH HA = UA
00054 {
00055 Assert(i < A.Rows(), "bad i");
00056
00057 Int j, k;
00058 Double scale, vSqrLen, oldDiag, newDiag, g, dotProd;
00059 Int clen = A.Rows() - i;
00060 SubVecd Ai = sub(col(A, i), i, clen);
00061
00062 // scale factor is the sum of abs of elements below and including
00063 // the diagonal in column i
00064 scale = 0.0;
00065 for (k = 0; k < Ai.Elts(); k++)
00066 scale += abs(Ai[k]);
00067
00068 // return if nothing to eliminate...
00069 if (scale == 0.0)
00070 return(0.0);
00071
00072 // scale the column, find sum of squares
00073 Ai = Ai / scale;
00074 vSqrLen = sqrlen(Ai);
00075
00076 oldDiag = Ai[0]; // v[0]
00077 newDiag = sqrt(vSqrLen); // c
00078 if (oldDiag > 0.0)
00079 newDiag = -newDiag;
00080
00081 g = vSqrLen - oldDiag * newDiag; // c(c - v0)
00082
00083 // replace column i of A with w
00084 Ai[0] = oldDiag - newDiag; // v[0] -= c
00085
00086 // Apply H to A by transforming remaining columns of A
00087 for (j = i + 1; j < A.Cols(); j++)
00088 {
00089 SubVecd Aj = sub(col(A, j), i, clen);
00090
00091 // Hv = v - w(v.w) / g
00092 Aj = Aj - Ai * (dot(Ai, Aj) / g);
00093 }
00094
00095 // Apply H to rows of U
00096 for (j = 0; j < A.Rows(); j++)
00097 {
00098 SubVecd Uj = sub(row(U, j), i, clen);
00099
00100 // vH = v - (v.w)w / g
00101 Uj = Uj - Ai * (dot(Ai, Uj) / g);
00102 }
00103
00104 return(newDiag * scale);
00105 }
00106
00107 #else
00108
00109 Double LeftHouseholder(Matd &A, Matd &U, const Int i)
00110 // Zeroes out those elements below the diagonal in column i by use of a
00111 // Householder transformation matrix H: A' = HA. U is replaced with UH,
00112 // so that U'A' = UH HA = UA
00113 {
00114 Int j, k;
00115 Double scale, vSqrLen, oldDiag, newDiag, g, dotProd;
00116
00117 Assert(i < A.Rows(), "bad i");
00118
00119 // scale factor is the sum of abs of elements below and including
00120 // the diagonal in column i
00121 scale = 0.0;
00122 for (k = i; k < A.Rows(); k++)
00123 scale += abs(A[k][i]);
00124
00125 // return if nothing to eliminate...
00126 if (scale == 0.0)
00127 return(0.0);
00128
00129 // scale the column, find sum of squares
00130 vSqrLen = 0.0;
00131 for (k = i; k < A.Rows(); k++)
00132 {
00133 A[k][i] /= scale;
00134 vSqrLen += sqr(A[k][i]);
00135 }
00136
00137 oldDiag = A[i][i]; // v[0]
00138 newDiag = sqrt(vSqrLen); // c
00139 if (oldDiag > 0.0)
00140 newDiag = -newDiag;
00141
00142 g = vSqrLen - oldDiag * newDiag; // c(c - v0)
00143
00144 // replace column i of A with w
00145 A[i][i] = oldDiag - newDiag; // v[0] -= c
00146
00147 // Apply H to A by transforming remaining columns of A
00148 for (j = i + 1; j < A.Cols(); j++)
00149 {
00150 // Hv = v - w(v.w) / g
00151
00152 // find dot product of the columns [i:i..m] (w) and [j:i..m]
00153 dotProd = 0.0;
00154 for (k = i; k < A.Rows(); k++)
00155 dotProd += A[k][j] * A[k][i];
00156
00157 dotProd /= g;
00158
00159 // calculate A's new column j
00160 for (k = i; k < A.Rows(); k++)
00161 A[k][j] -= A[k][i] * dotProd;
00162 }
00163
00164 // Apply H to rows of U
00165 for (j = 0; j < A.Rows(); j++)
00166 {
00167 // vH = v - (v.w)w / g
00168
00169 dotProd = 0.0;
00170 for (k = i; k < A.Rows(); k++)
00171 dotProd += A[k][i] * U[j][k];
00172
00173 dotProd /= g;
00174
00175 for (k = i; k < A.Rows(); k++)
00176 U[j][k] -= A[k][i] * dotProd;
00177 }
00178
00179 return(newDiag * scale);
00180 }
00181
00182 #endif
00183
00184 Double RightHouseholder(Matd &A, Matd &V, const Int i)
00185 // Zeroes out those elements to the right of the super-diagonal in row i
00186 // by use of a Householder transformation matrix H: A' = AH. V is
00187 // replaced with VH, so that A'V't = AH (HV)t = AVt
00188 {
00189 Int j, k;
00190 Double scale, vSqrLen, oldSuperDiag, newSuperDiag, g, dotProd;
00191
00192 Assert(i < A.Cols() - 1, "bad i");
00193
00194 // scale factor is the sum of abs of elements to the right of the
00195 // diagonal in row i
00196 scale = 0.0;
00197 for (k = i + 1; k < A.Cols(); k++)
00198 scale += abs(A[i][k]);
00199
00200 // return if nothing to eliminate...
00201 if (scale == 0.0)
00202 return(0.0);
00203
00204 vSqrLen = 0.0;
00205 for (k = i + 1; k < A.Cols(); k++)
00206 {
00207 A[i][k] /= scale;
00208 vSqrLen += sqr(A[i][k]);
00209 }
00210
00211 oldSuperDiag = A[i][i + 1];
00212 newSuperDiag = sqrt(vSqrLen);
00213 if (oldSuperDiag > 0.0)
00214 newSuperDiag = -newSuperDiag;
00215
00216 g = oldSuperDiag * newSuperDiag - vSqrLen;
00217 A[i][i + 1] = oldSuperDiag - newSuperDiag;
00218
00219 // transform the remaining rows below i
00220 for (j = i + 1; j < A.Rows(); j++)
00221 {
00222 dotProd = 0.0;
00223 for (k = i + 1; k < A.Cols(); k++)
00224 dotProd += A[i][k] * A[j][k];
00225
00226 dotProd /= g;
00227
00228 for (k = i + 1; k < A.Cols(); k++)
00229 A[j][k] += A[i][k] * dotProd;
00230 }
00231
00232 // Accumulate the transform in V
00233 for (j = 1; j < A.Cols(); j++)
00234 {
00235 dotProd = 0.0;
00236 for (k = i + 1; k < A.Cols(); k++)
00237 dotProd += A[i][k] * V[j][k];
00238
00239 dotProd /= g;
00240
00241 for (k = i + 1; k < A.Cols(); k++)
00242 V[j][k] += A[i][k] * dotProd;
00243 }
00244
00245 // return new super-diagonal element A[i][i+1]
00246 return(newSuperDiag * scale);
00247 }
00248
00249 Void Bidiagonalize(Matd &A, Matd &U, Matd &V, Vecd &diagonal, Vecd &superDiag)
00250 // bidiagonalize matrix A by using householder transformations to eliminate
00251 // columns below the diagonal and rows to the right of the super-diagonal.
00252 // A is modified, and the diagonal and superDiag set.
00253 {
00254 Int i;
00255 Matd Us;
00256
00257 Assert(A.Rows() >= A.Cols(), "matrix must have rows >= cols");
00258
00259 diagonal.SetSize(A.Cols());
00260 superDiag.SetSize(A.Cols() - 1);
00261 U.SetSize(A.Rows(), A.Cols());
00262 Us.SetSize(A.Rows(), A.Rows());
00263 V.SetSize(A.Cols(), A.Cols());
00264 Us = vl_I;
00265 V = vl_I;
00266
00267 for (i = 0; i < A.Cols(); i++)
00268 {
00269 diagonal[i] = LeftHouseholder(A, Us, i);
00270
00271 if (i < A.Cols() - 1)
00272 superDiag[i] = RightHouseholder(A, V, i);
00273 }
00274
00275 U = sub(Us, 0, 0, A.Rows(), A.Cols());
00276 }
00277
00278 Double QRFactorization(Matd &A, Matd &Q, Matd &R)
00279 // Factor A into an orthogonal matrix Q and an upper-triangular matrix R.
00280 // Destroys A.
00281 {
00282 Double normAcc = 0.0, diagElt;
00283 Int i, j;
00284 Matd Qs;
00285
00286 Assert(A.Rows() >= A.Cols(), "matrix must have rows >= cols");
00287
00288 Qs.SetSize(A.Rows(), A.Rows());
00289 Q.SetSize(A.Rows(), A.Cols());
00290 R.SetSize(A.Cols(), A.Cols());
00291 Qs = vl_I;
00292 R = vl_0;
00293
00294 // for each column
00295 for (i = 0; i < A.Cols(); i++)
00296 {
00297 // apply householder
00298 diagElt = LeftHouseholder(A, Qs, i);
00299 // copy over row i of A to R
00300 R[i][i] = diagElt;
00301 j = A.Cols() - i - 1;
00302 if (j)
00303 last(R[i], j) = last(A[i], j);
00304 normAcc = Max(normAcc, abs(diagElt));
00305 }
00306
00307 Q = sub(Qs, 0, 0, A.Rows(), A.Cols());
00308
00309 return(normAcc);
00310 }
00311
00312 /*
00313 NOTE
00314
00315 Givens rotations perform a rotation in a 2D plane. Can be
00316 used to zero an entry of a matrix.
00317
00318 We pick axes i and j, then form G such that it is basically
00319 I with entries:
00320
00321 i j
00322 i: c -s
00323 ...
00324 j: s c
00325
00326
00327 s^2 + c^2 = 1
00328 inv(G) = Gt
00329
00330 A' = GA
00331 modifies rows i and j only:
00332 row'[i] = c * row[i] - s * row[j]
00333 row'[j] = c * row[j] + s * row[i]
00334 can force:
00335 A'[i][j] = 0
00336 if c * A[i][j] - s * A[j][j] = 0
00337 if c = A[j][j], s = A[i][j]
00338 must normalise to retain identity:
00339 norm = sqrt(c^2 + s^2), c /= norm, s /= norm
00340 A'[j][j] = c * (norm * c) + s * (norm * s) = norm
00341 A'[i][i] = 0
00342 if c * A[i][i] - s * A[j][i] = 0
00343 if c = A[j][i], s = A[i][i]
00344
00345 A' = AGt
00346
00347
00348 */
00349
00350 Void RotateRight(Matd& U, const Int i, const Int j, const Double c, const Double s)
00351 // rotate U by the given Givens rotation: U' = UGt
00352 // where G is defined as above
00353 {
00354 Vecd temp;
00355
00356 temp = col(U, i);
00357 col(U, i) = c * col(U, i) - s * col(U, j);
00358 col(U, j) = c * col(U, j) + s * temp;
00359 }
00360
00361 Void ClearSuperEntry(Vecd &diagonal, Vecd &superDiag, Matd &U,
00362 const Int k, const Int l, const Double prec)
00363 // We have a zero diagonal element at diag[l]. This means we can zero
00364 // out the super-diagonal entry to its right with a series of rotations.
00365 // Each rotation clears an entry (starting with the original
00366 // super-diagonal entry) but creates another entry immediately to its
00367 // right which must in turn be zeroed. Eventually we run off the end of
00368 // the matrix, and the process terminates.
00369 {
00370 Double c = 0.0, s = 1.0;
00371 Double f;
00372 Int i;
00373 Double norm;
00374
00375 // We zero the superdiagonal element l by using the row immediately
00376 // below it in a Givens rotation. Unfortunately, the superdiagonal
00377 // in this row in turn creates another entry A[l][l+2]. We must keep
00378 // applying rotations in the plane of the axes l and l + n to keep
00379 // zeroing each new entry created until we reach the right hand side
00380 // of the matrix.
00381
00382 // initialise: start with f being the original super diagonal entry
00383 // we're eliminating
00384 f = superDiag[l];
00385 superDiag[l] = 0.0;
00386
00387 // at each stage, f = A[l][i], rotate in the l/i plane
00388 for (i = l + 1; true; i++)
00389 {
00390 if (abs(f) + prec == prec) // if f is zero, we don't have to eliminate further
00391 break;
00392
00393 // to eliminate f at each stage we pick s = -f, c = di
00394 s = -f;
00395 c = diagonal[i];
00396
00397 // normalise
00398 norm = sqrt(sqr(c) + sqr(s));
00399 c /= norm;
00400 s /= norm;
00401
00402 // apply inverse rotation to U
00403 RotateRight(U, i, l, c, s);
00404
00405 // do the rotation, zeroing f and creating f' immediately to its right
00406 diagonal[i] = norm; // di' = c * di - s * f = norm
00407 if (i == k) // done?
00408 break;
00409 f = s * superDiag[i]; // f' = c * 0 + s * superdiag[i]
00410 superDiag[i] *= c; // ei' = c * ei - s * 0
00411 }
00412 }
00413
00414 Int FindSplit(Vecd &diagonal, Vecd &superDiag, Matd &U, const Int k, const Double prec)
00415 // Check for a zero entry along the superdiagonal; if we find one, we can
00416 // QR iterate on two separate matrices above and below it.
00417 // If there is a zero on the *diagonal*, we can call ClearSuperEntry to
00418 // zero out the corresponding superdiagonal entry to its right.
00419 {
00420 Int l;
00421
00422 for (l = k - 1; l >= 0; l--)
00423 if (superDiag[l] + prec == prec)
00424 // can split here
00425 return(l + 1);
00426 else if (diagonal[l] + prec == prec)
00427 {
00428 // can create a split here
00429 DBG_COUT << "creating split at " << l << endl;
00430 DBG_COUT << "diagonal " << diagonal << endl;
00431 DBG_COUT << "superDiag " << superDiag << endl;
00432 ClearSuperEntry(diagonal, superDiag, U, k, l, prec);
00433 DBG_COUT << "done: " << l << endl;
00434 DBG_COUT << "diagonal " << diagonal << endl;
00435 DBG_COUT << "superDiag " << superDiag << endl;
00436 DBG_COUT << endl;
00437 return(l + 1);
00438 }
00439
00440 return(0);
00441 }
00442
00443 Void Diagonalize(Vecd &diagonal, Vecd &superDiag, Matd &U, Matd &V)
00444 // Diagonalise the bidiagonal matrix A = (diagonal, superDiag), accumulating
00445 // transforms into U on the left and Vt on the right.
00446 //
00447 // diag(A) = diagonal and diag(A, 1) = superDiag, that is to say, diagonal[0]
00448 // is A[0][0], and superDiag[0] is A[0][1].
00449 {
00450 Int i, j, k, l;
00451 Double prec;
00452
00453 // work out a good precision value
00454 prec = abs(diagonal[0]);
00455 for (i = 1; i < diagonal.Elts(); i++)
00456 prec = Max(prec, abs(diagonal[i]) + abs(superDiag[i - 1]));
00457
00458 // work our way up from the bottom of the matrix
00459 for (k = diagonal.Elts() - 1; k >= 0; k--)
00460 {
00461 while (1)
00462 {
00463 // if there's a split, start from there rather than A[0][0]
00464 l = FindSplit(diagonal, superDiag, U, k, prec);
00465
00466 DBG_COUT << "QR-shift A " << l << ":" << k << endl;
00467
00468 DBG_COUT << "diagonal " << diagonal << endl;
00469 DBG_COUT << "superDiag " << superDiag << endl;
00470
00471 // are we done?
00472 if (l == k) // last super-diag entry must be zero -- what we wanted.
00473 break;
00474
00475 // QR iterate over the sub-matrix of A, A[l..k][l..k], until
00476 // we've nuked super diagonal entry A[k - 1][k]
00477
00478 // For extra stability, we shift the QR computation. We
00479 // want the shift to be as close as possible to the largest
00480 // eigenvalue, which of course we don't know yet... so we
00481 // take the eigenvalues of the 2x2 matrix at the bottom of
00482 // our window of A, and use those.
00483
00484 Double shift;
00485 Double e0;
00486 Double e1 = superDiag[k - 1];
00487 Double d1 = diagonal[k - 1];
00488 Double d2 = diagonal[k];
00489 Double dl = diagonal[l];
00490
00491 if (k > 1)
00492 e0 = superDiag[k - 2];
00493 else
00494 e0 = 0.0;
00495
00496 // d0 e0
00497 // d1 e1
00498 // k: d2
00499
00500 shift = (d1 - d2) * (d1 + d2) + (e0 - e1) * (e0 + e1);
00501 shift /= 2.0 * e1 * d1;
00502 shift += sign(shift) * sqrt(sqr(shift) + 1.0);
00503 shift = ((dl - d2) * (dl + d2) + e1 * (d1 / shift - e1)) / dl;
00504
00505
00506 Double cos_th, sin_th, cos_ph, sin_ph;
00507 Double d1n, norm, e2, f1, g0, g1, e1n;
00508
00509 d1 = dl; // d1 == A[i - 1][i - 1]: initialise it to that
00510 e1 = superDiag[l];
00511 // the first rotation is the QR-shifted one. After that, we
00512 // are continually eliminating below-diagonal and
00513 // above-super diagonal elements created by the previous
00514 // rotation, until we hit the bottom-right of the matrix and
00515 // we have a bidiagonal matrix again. The QR-shift will
00516 // ensure that the new matrix has smaller super-diagonal
00517 // elements, however.
00518 g0 = e1;
00519 e0 = shift;
00520
00521 for (i = l + 1; true; i++)
00522 {
00523 // rotate in plane of axes i - 1, i:
00524 // d0 e0 g0 d0 e0' 0
00525 // d1 e1 -> d1' e1'
00526 // i: d2 f1 d2'
00527 // we are rotating on the right, and so affecting columns
00528 d2 = diagonal[i];
00529
00530 // eliminate g0 using e0 (except for the first iteration,
00531 // where e0 and g0 would be off the top of the matrix, and
00532 // we're performing the QR-shift.)
00533 sin_th = g0;
00534 cos_th = e0;
00535
00536 norm = sqrt(sqr(cos_th) + sqr(sin_th));
00537 cos_th /= norm;
00538 sin_th /= norm;
00539
00540 // perform the rotation
00541 if (i > 1)
00542 superDiag[i - 2] = norm; // e0' = cos_th * e0 + sin_th * g0
00543 d1n = cos_th * d1 + sin_th * e1;
00544 e1n = -sin_th * d1 + cos_th * e1;
00545
00546 f1 = d2 * sin_th; // f1 = c * 0 + s * d2
00547 d2 *= cos_th; // d2' = c * d2 + s * 0
00548
00549 // Accumulate rotation in V
00550 RotateRight(V, i, i - 1, cos_th, sin_th);
00551
00552 // in eliminating g0, we've created f1: eet must be
00553 // destroyed
00554
00555 // rotate in plane of axes i - 1, i:
00556 // d0 e0 d0 e0
00557 // d1 e1 -> d1' e1' g1'
00558 // i: f1 d2 e2 0 d2' e2'
00559 // we are rotating on the left, and so affecting rows
00560
00561 // standard rotation to eliminate the f1 we've just created
00562 cos_ph = d1n;
00563 sin_ph = f1;
00564
00565 norm = sqrt(sqr(cos_ph) + sqr(sin_ph));
00566
00567 if (norm == 0.0)
00568 {
00569 // rotation angle is arbitrary
00570 cos_ph = cos_th;
00571 sin_ph = sin_th;
00572 }
00573 else
00574 {
00575 cos_ph /= norm;
00576 sin_ph /= norm;
00577 }
00578
00579 // as usual, for the elimination pair, f1 = 0, d1 = norm
00580 diagonal[i - 1] = norm; // d1'
00581 // rotate e1 and d2
00582 e1 = cos_ph * e1n + sin_ph * d2;
00583 d2 = -sin_ph * e1n + cos_ph * d2;
00584
00585 // Accumulate rotation into U
00586 RotateRight(U, i, i - 1, cos_ph, sin_ph);
00587
00588 if (i == k)
00589 break;
00590
00591 // rotate g1 and e2
00592 e2 = superDiag[i];
00593 g1 = sin_ph * e2;
00594 e2 = cos_ph * e2;
00595
00596 d1 = d2; // the new d1 will be the old d2
00597 e0 = e1;
00598 e1 = e2;
00599 g0 = g1;
00600 }
00601
00602 if (l > 0)
00603 superDiag[l - 1] = 0.0; // Supposed to be eliminated by now
00604 superDiag[k - 1] = e1; // write in the last superdiagonal
00605 diagonal[k] = d2; // and diagonal
00606 }
00607
00608 // Force singular value to be +ve if needs be
00609 if (diagonal[k] < 0)
00610 {
00611 DBG_COUT << "flipping " << k << endl;
00612 diagonal[k] = -diagonal[k];
00613 // flip the corresponding row of Vt to balance out
00614 col(V, k) = -col(V, k);
00615 }
00616 DBG_COUT << "done: " << endl;
00617 DBG_COUT << "diagonal " << diagonal << endl;
00618 DBG_COUT << "superDiag " << superDiag << endl;
00619 DBG_COUT << endl;
00620 }
00621 }
00622
00623 Void SVDFactorization(Matd &A, Matd &U, Matd &V, Vecd &diagonal)
00624 // pretty easy...
00625 {
00626 Vecd superDiag;
00627
00628 Bidiagonalize(A, U, V, diagonal, superDiag);
00629 Diagonalize(diagonal, superDiag, U, V);
00630 }