00001 /* 00002 File: Factor.cc 00003 00004 Function: Implements some matrix factorizations: SVD, QR 00005 00006 Author: Andrew Willmott 00007 00008 Notes: Hopefully this is a bit more understandable 00009 than most of the other SVD code out there. 00010 00011 This should also make it a bit easier to optimize 00012 for modern compilers, but that's a task for another 00013 day. 00014 */ 00015 00016 #include "vl/Factor.h" 00017 00018 #define DBG_COUT if (0) cerr 00019 00020 /* 00021 NOTE 00022 00023 Householder transformations zero all elements of a vector v apart 00024 from the first element. 00025 00026 H = I - 2uut 00027 where ||u||^2 = 1 00028 HtH = HHt = HH = I 00029 - it's a reflection, so we would expect HH = I 00030 00031 Hv = [c 0 0 0 0] 00032 where c = +/- ||v|| 00033 and u = f(v - [c 0 0..]) 00034 where f = 1/sqrt(2c(c - v[0])) 00035 00036 For convenience, we can write 00037 Hv = v - 2u(u.v) 00038 or 00039 Hv = v - 2w(w.v)/g 00040 where g = c(c - v[0]) 00041 w = v - [c 0 0..] 00042 */ 00043 00044 //#define DO_VL_SUB 00045 00046 #ifdef DO_VL_SUB 00047 // experiment to see just how much slower using the generic sub-matrix 00048 // calls makes things. 00049 00050 Double LeftHouseholder(Matd &A, Matd &U, const Int i) 00051 // Zeroes out those elements below the diagonal in column i by use of a 00052 // Householder transformation matrix H: A' = HA. U is replaced with UH, 00053 // so that U'A' = UH HA = UA 00054 { 00055 Assert(i < A.Rows(), "bad i"); 00056 00057 Int j, k; 00058 Double scale, vSqrLen, oldDiag, newDiag, g, dotProd; 00059 Int clen = A.Rows() - i; 00060 SubVecd Ai = sub(col(A, i), i, clen); 00061 00062 // scale factor is the sum of abs of elements below and including 00063 // the diagonal in column i 00064 scale = 0.0; 00065 for (k = 0; k < Ai.Elts(); k++) 00066 scale += abs(Ai[k]); 00067 00068 // return if nothing to eliminate... 00069 if (scale == 0.0) 00070 return(0.0); 00071 00072 // scale the column, find sum of squares 00073 Ai = Ai / scale; 00074 vSqrLen = sqrlen(Ai); 00075 00076 oldDiag = Ai[0]; // v[0] 00077 newDiag = sqrt(vSqrLen); // c 00078 if (oldDiag > 0.0) 00079 newDiag = -newDiag; 00080 00081 g = vSqrLen - oldDiag * newDiag; // c(c - v0) 00082 00083 // replace column i of A with w 00084 Ai[0] = oldDiag - newDiag; // v[0] -= c 00085 00086 // Apply H to A by transforming remaining columns of A 00087 for (j = i + 1; j < A.Cols(); j++) 00088 { 00089 SubVecd Aj = sub(col(A, j), i, clen); 00090 00091 // Hv = v - w(v.w) / g 00092 Aj = Aj - Ai * (dot(Ai, Aj) / g); 00093 } 00094 00095 // Apply H to rows of U 00096 for (j = 0; j < A.Rows(); j++) 00097 { 00098 SubVecd Uj = sub(row(U, j), i, clen); 00099 00100 // vH = v - (v.w)w / g 00101 Uj = Uj - Ai * (dot(Ai, Uj) / g); 00102 } 00103 00104 return(newDiag * scale); 00105 } 00106 00107 #else 00108 00109 Double LeftHouseholder(Matd &A, Matd &U, const Int i) 00110 // Zeroes out those elements below the diagonal in column i by use of a 00111 // Householder transformation matrix H: A' = HA. U is replaced with UH, 00112 // so that U'A' = UH HA = UA 00113 { 00114 Int j, k; 00115 Double scale, vSqrLen, oldDiag, newDiag, g, dotProd; 00116 00117 Assert(i < A.Rows(), "bad i"); 00118 00119 // scale factor is the sum of abs of elements below and including 00120 // the diagonal in column i 00121 scale = 0.0; 00122 for (k = i; k < A.Rows(); k++) 00123 scale += abs(A[k][i]); 00124 00125 // return if nothing to eliminate... 00126 if (scale == 0.0) 00127 return(0.0); 00128 00129 // scale the column, find sum of squares 00130 vSqrLen = 0.0; 00131 for (k = i; k < A.Rows(); k++) 00132 { 00133 A[k][i] /= scale; 00134 vSqrLen += sqr(A[k][i]); 00135 } 00136 00137 oldDiag = A[i][i]; // v[0] 00138 newDiag = sqrt(vSqrLen); // c 00139 if (oldDiag > 0.0) 00140 newDiag = -newDiag; 00141 00142 g = vSqrLen - oldDiag * newDiag; // c(c - v0) 00143 00144 // replace column i of A with w 00145 A[i][i] = oldDiag - newDiag; // v[0] -= c 00146 00147 // Apply H to A by transforming remaining columns of A 00148 for (j = i + 1; j < A.Cols(); j++) 00149 { 00150 // Hv = v - w(v.w) / g 00151 00152 // find dot product of the columns [i:i..m] (w) and [j:i..m] 00153 dotProd = 0.0; 00154 for (k = i; k < A.Rows(); k++) 00155 dotProd += A[k][j] * A[k][i]; 00156 00157 dotProd /= g; 00158 00159 // calculate A's new column j 00160 for (k = i; k < A.Rows(); k++) 00161 A[k][j] -= A[k][i] * dotProd; 00162 } 00163 00164 // Apply H to rows of U 00165 for (j = 0; j < A.Rows(); j++) 00166 { 00167 // vH = v - (v.w)w / g 00168 00169 dotProd = 0.0; 00170 for (k = i; k < A.Rows(); k++) 00171 dotProd += A[k][i] * U[j][k]; 00172 00173 dotProd /= g; 00174 00175 for (k = i; k < A.Rows(); k++) 00176 U[j][k] -= A[k][i] * dotProd; 00177 } 00178 00179 return(newDiag * scale); 00180 } 00181 00182 #endif 00183 00184 Double RightHouseholder(Matd &A, Matd &V, const Int i) 00185 // Zeroes out those elements to the right of the super-diagonal in row i 00186 // by use of a Householder transformation matrix H: A' = AH. V is 00187 // replaced with VH, so that A'V't = AH (HV)t = AVt 00188 { 00189 Int j, k; 00190 Double scale, vSqrLen, oldSuperDiag, newSuperDiag, g, dotProd; 00191 00192 Assert(i < A.Cols() - 1, "bad i"); 00193 00194 // scale factor is the sum of abs of elements to the right of the 00195 // diagonal in row i 00196 scale = 0.0; 00197 for (k = i + 1; k < A.Cols(); k++) 00198 scale += abs(A[i][k]); 00199 00200 // return if nothing to eliminate... 00201 if (scale == 0.0) 00202 return(0.0); 00203 00204 vSqrLen = 0.0; 00205 for (k = i + 1; k < A.Cols(); k++) 00206 { 00207 A[i][k] /= scale; 00208 vSqrLen += sqr(A[i][k]); 00209 } 00210 00211 oldSuperDiag = A[i][i + 1]; 00212 newSuperDiag = sqrt(vSqrLen); 00213 if (oldSuperDiag > 0.0) 00214 newSuperDiag = -newSuperDiag; 00215 00216 g = oldSuperDiag * newSuperDiag - vSqrLen; 00217 A[i][i + 1] = oldSuperDiag - newSuperDiag; 00218 00219 // transform the remaining rows below i 00220 for (j = i + 1; j < A.Rows(); j++) 00221 { 00222 dotProd = 0.0; 00223 for (k = i + 1; k < A.Cols(); k++) 00224 dotProd += A[i][k] * A[j][k]; 00225 00226 dotProd /= g; 00227 00228 for (k = i + 1; k < A.Cols(); k++) 00229 A[j][k] += A[i][k] * dotProd; 00230 } 00231 00232 // Accumulate the transform in V 00233 for (j = 1; j < A.Cols(); j++) 00234 { 00235 dotProd = 0.0; 00236 for (k = i + 1; k < A.Cols(); k++) 00237 dotProd += A[i][k] * V[j][k]; 00238 00239 dotProd /= g; 00240 00241 for (k = i + 1; k < A.Cols(); k++) 00242 V[j][k] += A[i][k] * dotProd; 00243 } 00244 00245 // return new super-diagonal element A[i][i+1] 00246 return(newSuperDiag * scale); 00247 } 00248 00249 Void Bidiagonalize(Matd &A, Matd &U, Matd &V, Vecd &diagonal, Vecd &superDiag) 00250 // bidiagonalize matrix A by using householder transformations to eliminate 00251 // columns below the diagonal and rows to the right of the super-diagonal. 00252 // A is modified, and the diagonal and superDiag set. 00253 { 00254 Int i; 00255 Matd Us; 00256 00257 Assert(A.Rows() >= A.Cols(), "matrix must have rows >= cols"); 00258 00259 diagonal.SetSize(A.Cols()); 00260 superDiag.SetSize(A.Cols() - 1); 00261 U.SetSize(A.Rows(), A.Cols()); 00262 Us.SetSize(A.Rows(), A.Rows()); 00263 V.SetSize(A.Cols(), A.Cols()); 00264 Us = vl_I; 00265 V = vl_I; 00266 00267 for (i = 0; i < A.Cols(); i++) 00268 { 00269 diagonal[i] = LeftHouseholder(A, Us, i); 00270 00271 if (i < A.Cols() - 1) 00272 superDiag[i] = RightHouseholder(A, V, i); 00273 } 00274 00275 U = sub(Us, 0, 0, A.Rows(), A.Cols()); 00276 } 00277 00278 Double QRFactorization(Matd &A, Matd &Q, Matd &R) 00279 // Factor A into an orthogonal matrix Q and an upper-triangular matrix R. 00280 // Destroys A. 00281 { 00282 Double normAcc = 0.0, diagElt; 00283 Int i, j; 00284 Matd Qs; 00285 00286 Assert(A.Rows() >= A.Cols(), "matrix must have rows >= cols"); 00287 00288 Qs.SetSize(A.Rows(), A.Rows()); 00289 Q.SetSize(A.Rows(), A.Cols()); 00290 R.SetSize(A.Cols(), A.Cols()); 00291 Qs = vl_I; 00292 R = vl_0; 00293 00294 // for each column 00295 for (i = 0; i < A.Cols(); i++) 00296 { 00297 // apply householder 00298 diagElt = LeftHouseholder(A, Qs, i); 00299 // copy over row i of A to R 00300 R[i][i] = diagElt; 00301 j = A.Cols() - i - 1; 00302 if (j) 00303 last(R[i], j) = last(A[i], j); 00304 normAcc = Max(normAcc, abs(diagElt)); 00305 } 00306 00307 Q = sub(Qs, 0, 0, A.Rows(), A.Cols()); 00308 00309 return(normAcc); 00310 } 00311 00312 /* 00313 NOTE 00314 00315 Givens rotations perform a rotation in a 2D plane. Can be 00316 used to zero an entry of a matrix. 00317 00318 We pick axes i and j, then form G such that it is basically 00319 I with entries: 00320 00321 i j 00322 i: c -s 00323 ... 00324 j: s c 00325 00326 00327 s^2 + c^2 = 1 00328 inv(G) = Gt 00329 00330 A' = GA 00331 modifies rows i and j only: 00332 row'[i] = c * row[i] - s * row[j] 00333 row'[j] = c * row[j] + s * row[i] 00334 can force: 00335 A'[i][j] = 0 00336 if c * A[i][j] - s * A[j][j] = 0 00337 if c = A[j][j], s = A[i][j] 00338 must normalise to retain identity: 00339 norm = sqrt(c^2 + s^2), c /= norm, s /= norm 00340 A'[j][j] = c * (norm * c) + s * (norm * s) = norm 00341 A'[i][i] = 0 00342 if c * A[i][i] - s * A[j][i] = 0 00343 if c = A[j][i], s = A[i][i] 00344 00345 A' = AGt 00346 00347 00348 */ 00349 00350 Void RotateRight(Matd& U, const Int i, const Int j, const Double c, const Double s) 00351 // rotate U by the given Givens rotation: U' = UGt 00352 // where G is defined as above 00353 { 00354 Vecd temp; 00355 00356 temp = col(U, i); 00357 col(U, i) = c * col(U, i) - s * col(U, j); 00358 col(U, j) = c * col(U, j) + s * temp; 00359 } 00360 00361 Void ClearSuperEntry(Vecd &diagonal, Vecd &superDiag, Matd &U, 00362 const Int k, const Int l, const Double prec) 00363 // We have a zero diagonal element at diag[l]. This means we can zero 00364 // out the super-diagonal entry to its right with a series of rotations. 00365 // Each rotation clears an entry (starting with the original 00366 // super-diagonal entry) but creates another entry immediately to its 00367 // right which must in turn be zeroed. Eventually we run off the end of 00368 // the matrix, and the process terminates. 00369 { 00370 Double c = 0.0, s = 1.0; 00371 Double f; 00372 Int i; 00373 Double norm; 00374 00375 // We zero the superdiagonal element l by using the row immediately 00376 // below it in a Givens rotation. Unfortunately, the superdiagonal 00377 // in this row in turn creates another entry A[l][l+2]. We must keep 00378 // applying rotations in the plane of the axes l and l + n to keep 00379 // zeroing each new entry created until we reach the right hand side 00380 // of the matrix. 00381 00382 // initialise: start with f being the original super diagonal entry 00383 // we're eliminating 00384 f = superDiag[l]; 00385 superDiag[l] = 0.0; 00386 00387 // at each stage, f = A[l][i], rotate in the l/i plane 00388 for (i = l + 1; true; i++) 00389 { 00390 if (abs(f) + prec == prec) // if f is zero, we don't have to eliminate further 00391 break; 00392 00393 // to eliminate f at each stage we pick s = -f, c = di 00394 s = -f; 00395 c = diagonal[i]; 00396 00397 // normalise 00398 norm = sqrt(sqr(c) + sqr(s)); 00399 c /= norm; 00400 s /= norm; 00401 00402 // apply inverse rotation to U 00403 RotateRight(U, i, l, c, s); 00404 00405 // do the rotation, zeroing f and creating f' immediately to its right 00406 diagonal[i] = norm; // di' = c * di - s * f = norm 00407 if (i == k) // done? 00408 break; 00409 f = s * superDiag[i]; // f' = c * 0 + s * superdiag[i] 00410 superDiag[i] *= c; // ei' = c * ei - s * 0 00411 } 00412 } 00413 00414 Int FindSplit(Vecd &diagonal, Vecd &superDiag, Matd &U, const Int k, const Double prec) 00415 // Check for a zero entry along the superdiagonal; if we find one, we can 00416 // QR iterate on two separate matrices above and below it. 00417 // If there is a zero on the *diagonal*, we can call ClearSuperEntry to 00418 // zero out the corresponding superdiagonal entry to its right. 00419 { 00420 Int l; 00421 00422 for (l = k - 1; l >= 0; l--) 00423 if (superDiag[l] + prec == prec) 00424 // can split here 00425 return(l + 1); 00426 else if (diagonal[l] + prec == prec) 00427 { 00428 // can create a split here 00429 DBG_COUT << "creating split at " << l << endl; 00430 DBG_COUT << "diagonal " << diagonal << endl; 00431 DBG_COUT << "superDiag " << superDiag << endl; 00432 ClearSuperEntry(diagonal, superDiag, U, k, l, prec); 00433 DBG_COUT << "done: " << l << endl; 00434 DBG_COUT << "diagonal " << diagonal << endl; 00435 DBG_COUT << "superDiag " << superDiag << endl; 00436 DBG_COUT << endl; 00437 return(l + 1); 00438 } 00439 00440 return(0); 00441 } 00442 00443 Void Diagonalize(Vecd &diagonal, Vecd &superDiag, Matd &U, Matd &V) 00444 // Diagonalise the bidiagonal matrix A = (diagonal, superDiag), accumulating 00445 // transforms into U on the left and Vt on the right. 00446 // 00447 // diag(A) = diagonal and diag(A, 1) = superDiag, that is to say, diagonal[0] 00448 // is A[0][0], and superDiag[0] is A[0][1]. 00449 { 00450 Int i, j, k, l; 00451 Double prec; 00452 00453 // work out a good precision value 00454 prec = abs(diagonal[0]); 00455 for (i = 1; i < diagonal.Elts(); i++) 00456 prec = Max(prec, abs(diagonal[i]) + abs(superDiag[i - 1])); 00457 00458 // work our way up from the bottom of the matrix 00459 for (k = diagonal.Elts() - 1; k >= 0; k--) 00460 { 00461 while (1) 00462 { 00463 // if there's a split, start from there rather than A[0][0] 00464 l = FindSplit(diagonal, superDiag, U, k, prec); 00465 00466 DBG_COUT << "QR-shift A " << l << ":" << k << endl; 00467 00468 DBG_COUT << "diagonal " << diagonal << endl; 00469 DBG_COUT << "superDiag " << superDiag << endl; 00470 00471 // are we done? 00472 if (l == k) // last super-diag entry must be zero -- what we wanted. 00473 break; 00474 00475 // QR iterate over the sub-matrix of A, A[l..k][l..k], until 00476 // we've nuked super diagonal entry A[k - 1][k] 00477 00478 // For extra stability, we shift the QR computation. We 00479 // want the shift to be as close as possible to the largest 00480 // eigenvalue, which of course we don't know yet... so we 00481 // take the eigenvalues of the 2x2 matrix at the bottom of 00482 // our window of A, and use those. 00483 00484 Double shift; 00485 Double e0; 00486 Double e1 = superDiag[k - 1]; 00487 Double d1 = diagonal[k - 1]; 00488 Double d2 = diagonal[k]; 00489 Double dl = diagonal[l]; 00490 00491 if (k > 1) 00492 e0 = superDiag[k - 2]; 00493 else 00494 e0 = 0.0; 00495 00496 // d0 e0 00497 // d1 e1 00498 // k: d2 00499 00500 shift = (d1 - d2) * (d1 + d2) + (e0 - e1) * (e0 + e1); 00501 shift /= 2.0 * e1 * d1; 00502 shift += sign(shift) * sqrt(sqr(shift) + 1.0); 00503 shift = ((dl - d2) * (dl + d2) + e1 * (d1 / shift - e1)) / dl; 00504 00505 00506 Double cos_th, sin_th, cos_ph, sin_ph; 00507 Double d1n, norm, e2, f1, g0, g1, e1n; 00508 00509 d1 = dl; // d1 == A[i - 1][i - 1]: initialise it to that 00510 e1 = superDiag[l]; 00511 // the first rotation is the QR-shifted one. After that, we 00512 // are continually eliminating below-diagonal and 00513 // above-super diagonal elements created by the previous 00514 // rotation, until we hit the bottom-right of the matrix and 00515 // we have a bidiagonal matrix again. The QR-shift will 00516 // ensure that the new matrix has smaller super-diagonal 00517 // elements, however. 00518 g0 = e1; 00519 e0 = shift; 00520 00521 for (i = l + 1; true; i++) 00522 { 00523 // rotate in plane of axes i - 1, i: 00524 // d0 e0 g0 d0 e0' 0 00525 // d1 e1 -> d1' e1' 00526 // i: d2 f1 d2' 00527 // we are rotating on the right, and so affecting columns 00528 d2 = diagonal[i]; 00529 00530 // eliminate g0 using e0 (except for the first iteration, 00531 // where e0 and g0 would be off the top of the matrix, and 00532 // we're performing the QR-shift.) 00533 sin_th = g0; 00534 cos_th = e0; 00535 00536 norm = sqrt(sqr(cos_th) + sqr(sin_th)); 00537 cos_th /= norm; 00538 sin_th /= norm; 00539 00540 // perform the rotation 00541 if (i > 1) 00542 superDiag[i - 2] = norm; // e0' = cos_th * e0 + sin_th * g0 00543 d1n = cos_th * d1 + sin_th * e1; 00544 e1n = -sin_th * d1 + cos_th * e1; 00545 00546 f1 = d2 * sin_th; // f1 = c * 0 + s * d2 00547 d2 *= cos_th; // d2' = c * d2 + s * 0 00548 00549 // Accumulate rotation in V 00550 RotateRight(V, i, i - 1, cos_th, sin_th); 00551 00552 // in eliminating g0, we've created f1: eet must be 00553 // destroyed 00554 00555 // rotate in plane of axes i - 1, i: 00556 // d0 e0 d0 e0 00557 // d1 e1 -> d1' e1' g1' 00558 // i: f1 d2 e2 0 d2' e2' 00559 // we are rotating on the left, and so affecting rows 00560 00561 // standard rotation to eliminate the f1 we've just created 00562 cos_ph = d1n; 00563 sin_ph = f1; 00564 00565 norm = sqrt(sqr(cos_ph) + sqr(sin_ph)); 00566 00567 if (norm == 0.0) 00568 { 00569 // rotation angle is arbitrary 00570 cos_ph = cos_th; 00571 sin_ph = sin_th; 00572 } 00573 else 00574 { 00575 cos_ph /= norm; 00576 sin_ph /= norm; 00577 } 00578 00579 // as usual, for the elimination pair, f1 = 0, d1 = norm 00580 diagonal[i - 1] = norm; // d1' 00581 // rotate e1 and d2 00582 e1 = cos_ph * e1n + sin_ph * d2; 00583 d2 = -sin_ph * e1n + cos_ph * d2; 00584 00585 // Accumulate rotation into U 00586 RotateRight(U, i, i - 1, cos_ph, sin_ph); 00587 00588 if (i == k) 00589 break; 00590 00591 // rotate g1 and e2 00592 e2 = superDiag[i]; 00593 g1 = sin_ph * e2; 00594 e2 = cos_ph * e2; 00595 00596 d1 = d2; // the new d1 will be the old d2 00597 e0 = e1; 00598 e1 = e2; 00599 g0 = g1; 00600 } 00601 00602 if (l > 0) 00603 superDiag[l - 1] = 0.0; // Supposed to be eliminated by now 00604 superDiag[k - 1] = e1; // write in the last superdiagonal 00605 diagonal[k] = d2; // and diagonal 00606 } 00607 00608 // Force singular value to be +ve if needs be 00609 if (diagonal[k] < 0) 00610 { 00611 DBG_COUT << "flipping " << k << endl; 00612 diagonal[k] = -diagonal[k]; 00613 // flip the corresponding row of Vt to balance out 00614 col(V, k) = -col(V, k); 00615 } 00616 DBG_COUT << "done: " << endl; 00617 DBG_COUT << "diagonal " << diagonal << endl; 00618 DBG_COUT << "superDiag " << superDiag << endl; 00619 DBG_COUT << endl; 00620 } 00621 } 00622 00623 Void SVDFactorization(Matd &A, Matd &U, Matd &V, Vecd &diagonal) 00624 // pretty easy... 00625 { 00626 Vecd superDiag; 00627 00628 Bidiagonalize(A, U, V, diagonal, superDiag); 00629 Diagonalize(diagonal, superDiag, U, V); 00630 }