#define EPSILON 0.00001 #define MAXITER 100 #define PI 3.1415926535897932384626 NODEVECTOR res, resa, p, w, xvec, saveres, diag, answer, err, boundary; MATRIX A; MATRIXFACTOR U; DENSEMATRIX Udense, UU; float kfunc(x, y) float x; float y; { return(1.0); } float bfunc(x, y) float x; float y; { return(0.08 * PI * PI); } float ffunc(x, y) float x; float y; { return(20.0 / (4.0 + (x - 12.5) * (x - 12.5) + (y - 6.8) * (y - 6.8)) + 20.0 / (4.0 + (x - 30.0) * (x - 30.0) + (y - 6.8) * (y - 6.8))); } void sparsify(nodes, sparsenodes, A, Acol, Aindex, Adense) int nodes; int sparsenodes; float *A; int *Acol; int *Aindex; float *Adense; { int i; int sparseindex, denseindex; int sparselast; sparseindex = Aindex[sparsenodes]; denseindex = 0; for (i = sparsenodes; i < nodes; i++) { sparselast = Aindex[i + 1]; while (sparseindex < sparselast) { A[sparseindex] = Adense[denseindex + Acol[sparseindex] - i]; sparseindex++; } denseindex += nodes - i; } } void backsolve(nodes, sparsenodes, U, Ucol, Uindex, w) int nodes; int sparsenodes; float *U; int *Ucol; int *Uindex; float *w; { int i; int Ufirst, Unext, Ulast; float sum, factor; Unext = Uindex[sparsenodes]; for (i = sparsenodes; i < nodes; i++) { Unext++; Ulast = Uindex[i + 1]; factor = w[i]; while (Unext < Ulast) { w[Ucol[Unext]] -= factor * U[Unext]; Unext++; } } Ulast = Uindex[nodes]; for (i = nodes - 1; i >= sparsenodes; i--) { Ufirst = Uindex[i]; Unext = Ufirst + 1; sum = w[i] / U[Ufirst]; while (Unext < Ulast) { sum -= U[Unext] * w[Ucol[Unext]]; Unext++; } w[i] = sum; Ulast = Ufirst; } } void incomplete_factor_ldu(nodes, sparsenodes, A, Acol, Aindex) int nodes; int sparsenodes; float *A; int *Acol; int *Aindex; { int i, j; int Anext, Alast; int Mnext, Mlast; int Mrow; int col; float factor, rowfactor; Anext = Aindex[sparsenodes]; for (i = sparsenodes; i < nodes; i++) { Alast = Aindex[i + 1]; factor = 1.0 / A[Anext++]; while (Anext < Alast) { rowfactor = factor * A[Anext]; Mrow = Acol[Anext]; Mnext = Aindex[Mrow]; Mlast = Aindex[Mrow + 1]; for (j = Anext; j < Alast; j++) { col = Acol[j]; while ((Acol[Mnext] < col) && (Mnext < Mlast)) { Mnext++; } if ((Acol[Mnext] == col) && (Mnext < Mlast)) { A[Mnext++] -= rowfactor * A[j]; } } A[Anext++] = rowfactor; } } } void main() { int iter; float bnorm, norm, oldnorm, errnorm; float alpha, beta; int i, j; float x, y; float pw; int cor[3]; float c0[2], c1[2], c2[2]; float line[3]; float block[3][3]; int itercg, itercgd, iterdd, iterddd, iterddjrs; int compcg, compcgd, compdd, compddd, compddjrs; int commcg, commcgd, commdd, commddd, commddjrs; int compcon, compconv, compunconv, dummy; READNODEVECTOR(boundary); res = 0.0; A = 0.0; FORELEM(i) { GETCORNERS(i, cor); GETCOORD2(cor[0], c0); GETCOORD2(cor[1], c1); GETCOORD2(cor[2], c2); GAUSS_TRIANGLE(c0, c1, c2, kfunc, bfunc, ffunc, block, line); ASSEMBLEVECTOR(line, i, res); ASSEMBLEMATRIX(block, i, A); } FORNODE(i) { if (boundary[i] == 10.0) { DIRICHLET(0.0, i, A, res); } else if (boundary[i] > 15.0) { DIRICHLET(1.0, i, A, res); } } saveres = res; TIMER_RESET(); SYMBOL_FACTOR(); printf("U: %d\n", ARCHfactorindex[ARCHpriv]); fflush(stdout); CONDENSE_MATRIX(A, U, Udense); GET_TIMER(compcon, dummy); CONDENSE_VECTOR(U, res); /* Diagonal preconditioner */ /* diag = 1.0; GET_DENSEDIAGONAL(Udense, diag); FULLASSEMBLE(diag); diag = 1.0 / diag; res *= diag; */ /* My preconditioner */ for (i = 0; i < MAX_DENSE; i++) { UU[i] = Udense[i]; } diag = 1.0; GET_DENSEDIAGONAL(UU, diag); FULLASSEMBLE(diag); SET_DENSEDIAGONAL(UU, diag); FACTOR_DENSE(UU); FULLASSEMBLE(res); BACKSOLVE_DENSE(UU, res); iter = 0; xvec = 0.0; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); BOUNDARY_DOTPRODUCT(res, resa, bnorm); YELLOW_OFF(); norm = bnorm; do { GREEN_TOGGLE(); /* DISPLAY(xvec); */ iter++; if (iter == 1) { p = resa; } else { beta = norm / oldnorm; p = resa + beta * p; } DENSEMVPRODUCT(Udense, p, w); /* Diagonal preconditioner */ /* w *= diag; */ /* My preconditioner */ /**/ FULLASSEMBLE(w); BACKSOLVE_DENSE(UU, w); /**/ BOUNDARY_DOTPRODUCT(p, w, pw); alpha = norm / pw; xvec = xvec + alpha * p; res = res - alpha * w; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); oldnorm = norm; BOUNDARY_DOTPRODUCT(res, resa, norm); YELLOW_OFF(); } while (iter < 200); /* DISPLAY(xvec); */ GREEN_OFF(); UNCONDENSE(U, xvec); /* DISPLAY(xvec); */ answer = xvec; res = saveres; /* iter = 0; xvec = 0.0; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); ADOTPRODUCT(resa, resa, bnorm); YELLOW_OFF(); norm = bnorm; do { GREEN_TOGGLE(); DISPLAY(xvec); iter++; if (iter == 1) { p = resa; } else { beta = norm / oldnorm; p = resa + beta * p; } MVPRODUCT(A, p, w); FULLASSEMBLE(w); ADOTPRODUCT(p, w, pw); alpha = norm / pw; xvec = xvec + alpha * p; resa = resa - alpha * w; YELLOW_ON(); oldnorm = norm; ADOTPRODUCT(resa, resa, norm); YELLOW_OFF(); err = xvec - answer; BOUNDARY_DOTPRODUCT(err, err, errnorm); } while (errnorm > 0.0001); DISPLAY(xvec); GREEN_OFF(); */ iter = 0; xvec = 0.0; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); DOTPRODUCT(res, resa, bnorm); TIMER_RESET(); YELLOW_OFF(); norm = bnorm; do { GREEN_TOGGLE(); /* DISPLAY(xvec); */ iter++; if (iter == 1) { p = resa; } else { beta = norm / oldnorm; p = resa + beta * p; } MVPRODUCT(A, p, w); DOTPRODUCT(p, w, pw); alpha = norm / pw; xvec = xvec + alpha * p; res = res - alpha * w; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); oldnorm = norm; DOTPRODUCT(res, resa, norm); YELLOW_OFF(); err = xvec - answer; BOUNDARY_DOTPRODUCT(err, err, errnorm); } while (errnorm > 0.0001); GET_TIMER(compcg, commcg); itercg = iter; /* DISPLAY(xvec); */ GREEN_OFF(); xvec -= answer; /* DISPLAY(xvec); */ /* printf("CG iterations: %d\n", iter); fflush(stdout); */ res = saveres; GET_DIAGONAL(A, diag); FULLASSEMBLE(diag); diag = 1.0 / diag; res *= diag; iter = 0; xvec = 0.0; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); DOTPRODUCT(res, resa, bnorm); TIMER_RESET(); YELLOW_OFF(); norm = bnorm; do { GREEN_TOGGLE(); /* DISPLAY(xvec); */ iter++; if (iter == 1) { p = resa; } else { beta = norm / oldnorm; p = resa + beta * p; } MVPRODUCT(A, p, w); /* Diagonal preconditioner */ /**/ w *= diag; /**/ DOTPRODUCT(p, w, pw); alpha = norm / pw; xvec = xvec + alpha * p; res = res - alpha * w; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); oldnorm = norm; DOTPRODUCT(res, resa, norm); YELLOW_OFF(); err = xvec - answer; BOUNDARY_DOTPRODUCT(err, err, errnorm); } while (errnorm > 0.0001); GET_TIMER(compcgd, commcgd); itercgd = iter; /* DISPLAY(xvec); */ GREEN_OFF(); xvec -= answer; /* DISPLAY(xvec); */ /* printf("CG-d iterations: %d\n", iter); fflush(stdout); */ res = saveres; TIMER_RESET(); CONDENSE_VECTOR(U, res); GET_TIMER(compconv, dummy); iter = 0; xvec = 0.0; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); BOUNDARY_DOTPRODUCT(res, resa, bnorm); TIMER_RESET(); YELLOW_OFF(); norm = bnorm; do { GREEN_TOGGLE(); /* DISPLAY(xvec); */ iter++; if (iter == 1) { p = resa; } else { beta = norm / oldnorm; p = resa + beta * p; } DENSEMVPRODUCT(Udense, p, w); /* Diagonal preconditioner */ /* w *= diag; */ /* My preconditioner */ /* FULLASSEMBLE(w); BACKSOLVE_DENSE(UU, w); */ BOUNDARY_DOTPRODUCT(p, w, pw); alpha = norm / pw; xvec = xvec + alpha * p; res = res - alpha * w; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); oldnorm = norm; BOUNDARY_DOTPRODUCT(res, resa, norm); YELLOW_OFF(); err = xvec - answer; BOUNDARY_DOTPRODUCT(err, err, errnorm); } while (errnorm > 0.0001); iterdd = iter; GET_TIMER(compdd, commdd); /* DISPLAY(xvec); */ GREEN_OFF(); TIMER_RESET(); UNCONDENSE(U, xvec); GET_TIMER(compunconv, dummy); /* DISPLAY(xvec); */ xvec -= answer; /* DISPLAY(xvec); */ /* printf("DD iterations: %d\n", iter); fflush(stdout); */ res = saveres; CONDENSE_VECTOR(U, res); diag = 1.0; GET_DENSEDIAGONAL(Udense, diag); FULLASSEMBLE(diag); diag = 1.0 / diag; res *= diag; iter = 0; xvec = 0.0; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); BOUNDARY_DOTPRODUCT(res, resa, bnorm); TIMER_RESET(); YELLOW_OFF(); norm = bnorm; do { GREEN_TOGGLE(); /* DISPLAY(xvec); */ iter++; if (iter == 1) { p = resa; } else { beta = norm / oldnorm; p = resa + beta * p; } DENSEMVPRODUCT(Udense, p, w); /* Diagonal preconditioner */ /**/ w *= diag; /**/ /* My preconditioner */ /* FULLASSEMBLE(w); BACKSOLVE_DENSE(UU, w); */ BOUNDARY_DOTPRODUCT(p, w, pw); alpha = norm / pw; xvec = xvec + alpha * p; res = res - alpha * w; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); oldnorm = norm; BOUNDARY_DOTPRODUCT(res, resa, norm); YELLOW_OFF(); err = xvec - answer; BOUNDARY_DOTPRODUCT(err, err, errnorm); } while (errnorm > 0.0001); GET_TIMER(compddd, commddd); iterddd = iter; /* DISPLAY(xvec); */ GREEN_OFF(); UNCONDENSE(U, xvec); /* DISPLAY(xvec); */ xvec -= answer; /* DISPLAY(xvec); */ /* printf("DD-d iterations: %d\n", iter); fflush(stdout); */ /* sparsify(ARCHnodes, ARCHpriv, A, ARCHmatrixcol, ARCHmatrixindex, Udense); GET_DIAGONAL(A, diag); FULLASSEMBLE(diag); SET_DIAGONAL(A, diag); incomplete_factor_ldu(ARCHnodes, ARCHpriv, A, ARCHmatrixcol, ARCHmatrixindex); */ res = saveres; CONDENSE_VECTOR(U, res); FULLASSEMBLE(res); BACKSOLVE_DENSE(UU, res); /* backsolve(ARCHnodes, ARCHpriv, A, ARCHmatrixcol, ARCHmatrixindex, res); */ iter = 0; xvec = 0.0; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); BOUNDARY_DOTPRODUCT(res, resa, bnorm); TIMER_RESET(); YELLOW_OFF(); norm = bnorm; do { GREEN_TOGGLE(); /* DISPLAY(xvec); */ iter++; if (iter == 1) { p = resa; } else { beta = norm / oldnorm; p = resa + beta * p; } DENSEMVPRODUCT(Udense, p, w); /* Diagonal preconditioner */ /* w *= diag; */ /* My preconditioner */ /**/ FULLASSEMBLE(w); BACKSOLVE_DENSE(UU, w); /* backsolve(ARCHnodes, ARCHpriv, A, ARCHmatrixcol, ARCHmatrixindex, res); */ /**/ BOUNDARY_DOTPRODUCT(p, w, pw); alpha = norm / pw; xvec = xvec + alpha * p; res = res - alpha * w; resa = res; YELLOW_ON(); FULLASSEMBLE(resa); oldnorm = norm; BOUNDARY_DOTPRODUCT(res, resa, norm); YELLOW_OFF(); err = xvec - answer; BOUNDARY_DOTPRODUCT(err, err, errnorm); } while (errnorm > 0.0001); GET_TIMER(compddjrs, commddjrs); iterddjrs = iter; DISPLAY(xvec); GREEN_OFF(); UNCONDENSE(U, xvec); DISPLAY(xvec); printf("iters: %d %d %d %d %d\n", itercg, itercgd, iterdd, iterddd, iterddjrs); printf("comp: %d %d %d %d %d\n", compcg, compcgd, compdd, compddd, compddjrs); printf("total: %d %d %d %d %d\n", compcg + commcg, compcgd + commcgd, compdd + commdd, compddd + commddd, compddjrs + commddjrs); printf("condense, conv, unconv: %d %d %d\n", compcon, compconv, compunconv); fflush(stdout); xvec -= answer; /* printf("DD-jrs iterations: %d\n", iter); fflush(stdout); */ DISPLAY(xvec); }