Actual source code: pipecgrr.c


  2: #include <petsc/private/kspimpl.h>

  4: /*
  5:      KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.

  7:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
  8:      but can be called directly by KSPSetUp()
  9: */
 10: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
 11: {
 12:   /* get work vectors needed by PIPECGRR */
 13:   KSPSetWorkVecs(ksp,9);
 14:   return 0;
 15: }

 17: /*
 18:  KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
 19: */
 20: static PetscErrorCode  KSPSolve_PIPECGRR(KSP ksp)
 21: {
 22:   PetscInt       i = 0,replace = 0,totreplaces = 0,nsize;
 23:   PetscScalar    alpha = 0.0,beta = 0.0,gamma = 0.0,gammaold = 0.0,delta = 0.0,alphap = 0.0,betap = 0.0;
 24:   PetscReal      dp = 0.0,nsi = 0.0,sqn = 0.0,Anorm = 0.0,rnp = 0.0,pnp = 0.0,snp = 0.0,unp = 0.0,wnp = 0.0,xnp = 0.0,qnp = 0.0,znp = 0.0,mnz = 5.0,tol = PETSC_SQRT_MACHINE_EPSILON,eps = PETSC_MACHINE_EPSILON;
 25:   PetscReal      ds = 0.0,dz = 0.0,dx = 0.0,dpp = 0.0,dq = 0.0,dm = 0.0,du = 0.0,dw = 0.0,db = 0.0,errr = 0.0,errrprev = 0.0,errs = 0.0,errw = 0.0,errz = 0.0,errncr = 0.0,errncs = 0.0,errncw = 0.0,errncz = 0.0;
 26:   Vec            X,B,Z,P,W,Q,U,M,N,R,S;
 27:   Mat            Amat,Pmat;
 28:   PetscBool      diagonalscale;

 30:   PCGetDiagonalScale(ksp->pc,&diagonalscale);

 33:   X = ksp->vec_sol;
 34:   B = ksp->vec_rhs;
 35:   M = ksp->work[0];
 36:   Z = ksp->work[1];
 37:   P = ksp->work[2];
 38:   N = ksp->work[3];
 39:   W = ksp->work[4];
 40:   Q = ksp->work[5];
 41:   U = ksp->work[6];
 42:   R = ksp->work[7];
 43:   S = ksp->work[8];

 45:   PCGetOperators(ksp->pc,&Amat,&Pmat);

 47:   ksp->its = 0;
 48:   if (!ksp->guess_zero) {
 49:     KSP_MatMult(ksp,Amat,X,R);  /*  r <- b - Ax  */
 50:     VecAYPX(R,-1.0,B);
 51:   } else {
 52:     VecCopy(B,R);               /*  r <- b (x is 0)  */
 53:   }

 55:   KSP_PCApply(ksp,R,U);         /*  u <- Br  */

 57:   switch (ksp->normtype) {
 58:   case KSP_NORM_PRECONDITIONED:
 59:     VecNormBegin(U,NORM_2,&dp); /*  dp <- u'*u = e'*A'*B'*B*A'*e'  */
 60:     VecNormBegin(B,NORM_2,&db);
 61:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
 62:     KSP_MatMult(ksp,Amat,U,W);  /*  w <- Au  */
 63:     VecNormEnd(U,NORM_2,&dp);
 64:     VecNormEnd(B,NORM_2,&db);
 65:     break;
 66:   case KSP_NORM_UNPRECONDITIONED:
 67:     VecNormBegin(R,NORM_2,&dp); /*  dp <- r'*r = e'*A'*A*e  */
 68:     VecNormBegin(B,NORM_2,&db);
 69:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 70:     KSP_MatMult(ksp,Amat,U,W);  /*  w <- Au  */
 71:     VecNormEnd(R,NORM_2,&dp);
 72:     VecNormEnd(B,NORM_2,&db);
 73:     break;
 74:   case KSP_NORM_NATURAL:
 75:     VecDotBegin(R,U,&gamma);    /*  gamma <- u'*r  */
 76:     VecNormBegin(B,NORM_2,&db);
 77:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 78:     KSP_MatMult(ksp,Amat,U,W);  /*  w <- Au  */
 79:     VecDotEnd(R,U,&gamma);
 80:     VecNormEnd(B,NORM_2,&db);
 81:     KSPCheckDot(ksp,gamma);
 82:     dp = PetscSqrtReal(PetscAbsScalar(gamma));       /*  dp <- r'*u = r'*B*r = e'*A'*B*A*e  */
 83:     break;
 84:   case KSP_NORM_NONE:
 85:     KSP_MatMult(ksp,Amat,U,W);
 86:     dp   = 0.0;
 87:     break;
 88:   default: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
 89:   }
 90:   KSPLogResidualHistory(ksp,dp);
 91:   KSPMonitor(ksp,0,dp);
 92:   ksp->rnorm = dp;
 93:   (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /*  test for convergence  */
 94:   if (ksp->reason) return 0;

 96:   MatNorm(Amat,NORM_INFINITY,&Anorm);
 97:   VecGetSize(B,&nsize);
 98:   nsi = (PetscReal) nsize;
 99:   sqn = PetscSqrtReal(nsi);

101:   do {
102:     if (i > 1) {
103:       pnp = dpp;
104:       snp = ds;
105:       qnp = dq;
106:       znp = dz;
107:     }
108:     if (i > 0) {
109:       rnp = dp;
110:       unp = du;
111:       wnp = dw;
112:       xnp = dx;
113:       alphap = alpha;
114:       betap = beta;
115:     }

117:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
118:       VecNormBegin(R,NORM_2,&dp);
119:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
120:       VecNormBegin(U,NORM_2,&dp);
121:     }
122:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
123:       VecDotBegin(R,U,&gamma);
124:     }
125:     VecDotBegin(W,U,&delta);

127:     if (i > 0) {
128:       VecNormBegin(S,NORM_2,&ds);
129:       VecNormBegin(Z,NORM_2,&dz);
130:       VecNormBegin(P,NORM_2,&dpp);
131:       VecNormBegin(Q,NORM_2,&dq);
132:       VecNormBegin(M,NORM_2,&dm);
133:     }
134:     VecNormBegin(X,NORM_2,&dx);
135:     VecNormBegin(U,NORM_2,&du);
136:     VecNormBegin(W,NORM_2,&dw);

138:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
139:     KSP_PCApply(ksp,W,M);           /*   m <- Bw       */
140:     KSP_MatMult(ksp,Amat,M,N);      /*   n <- Am       */

142:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
143:       VecNormEnd(R,NORM_2,&dp);
144:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
145:       VecNormEnd(U,NORM_2,&dp);
146:     }
147:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
148:       VecDotEnd(R,U,&gamma);
149:     }
150:     VecDotEnd(W,U,&delta);

152:     if (i > 0) {
153:       VecNormEnd(S,NORM_2,&ds);
154:       VecNormEnd(Z,NORM_2,&dz);
155:       VecNormEnd(P,NORM_2,&dpp);
156:       VecNormEnd(Q,NORM_2,&dq);
157:       VecNormEnd(M,NORM_2,&dm);
158:     }
159:     VecNormEnd(X,NORM_2,&dx);
160:     VecNormEnd(U,NORM_2,&du);
161:     VecNormEnd(W,NORM_2,&dw);

163:     if (i > 0) {
164:       if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
165:       else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;

167:       ksp->rnorm = dp;
168:       KSPLogResidualHistory(ksp,dp);
169:       KSPMonitor(ksp,i,dp);
170:       (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
171:       if (ksp->reason) return 0;
172:     }

174:     if (i == 0) {
175:       alpha = gamma / delta;
176:       VecCopy(N,Z);          /*  z <- n  */
177:       VecCopy(M,Q);          /*  q <- m  */
178:       VecCopy(U,P);          /*  p <- u  */
179:       VecCopy(W,S);          /*  s <- w  */
180:     } else {
181:       beta = gamma / gammaold;
182:       alpha = gamma / (delta - beta / alpha * gamma);
183:       VecAYPX(Z,beta,N);     /*  z <- n + beta * z  */
184:       VecAYPX(Q,beta,M);     /*  q <- m + beta * q  */
185:       VecAYPX(P,beta,U);     /*  p <- u + beta * p  */
186:       VecAYPX(S,beta,W);     /*  s <- w + beta * s  */
187:     }
188:     VecAXPY(X, alpha,P);     /*  x <- x + alpha * p  */
189:     VecAXPY(U,-alpha,Q);     /*  u <- u - alpha * q  */
190:     VecAXPY(W,-alpha,Z);     /*  w <- w - alpha * z  */
191:     VecAXPY(R,-alpha,S);     /*  r <- r - alpha * s  */
192:     gammaold = gamma;

194:     if (i > 0) {
195:       errncr = PetscSqrtReal(Anorm*xnp+2.0*Anorm*PetscAbsScalar(alphap)*dpp+rnp+2.0*PetscAbsScalar(alphap)*ds)*eps;
196:       errncw = PetscSqrtReal(Anorm*unp+2.0*Anorm*PetscAbsScalar(alphap)*dq+wnp+2.0*PetscAbsScalar(alphap)*dz)*eps;
197:     }
198:     if (i > 1) {
199:       errncs = PetscSqrtReal(Anorm*unp+2.0*Anorm*PetscAbsScalar(betap)*pnp+wnp+2.0*PetscAbsScalar(betap)*snp)*eps;
200:       errncz = PetscSqrtReal((mnz*sqn+2)*Anorm*dm+2.0*Anorm*PetscAbsScalar(betap)*qnp+2.0*PetscAbsScalar(betap)*znp)*eps;
201:     }

203:     if (i > 0) {
204:       if (i == 1) {
205:         errr = PetscSqrtReal((mnz*sqn+1)*Anorm*xnp+db)*eps+PetscSqrtReal(PetscAbsScalar(alphap)*mnz*sqn*Anorm*dpp)*eps+errncr;
206:         errs = PetscSqrtReal(mnz*sqn*Anorm*dpp)*eps;
207:         errw = PetscSqrtReal(mnz*sqn*Anorm*unp)*eps+PetscSqrtReal(PetscAbsScalar(alphap)*mnz*sqn*Anorm*dq)*eps+errncw;
208:         errz = PetscSqrtReal(mnz*sqn*Anorm*dq)*eps;
209:       } else if (replace == 1) {
210:         errrprev = errr;
211:         errr = PetscSqrtReal((mnz*sqn+1)*Anorm*dx+db)*eps;
212:         errs = PetscSqrtReal(mnz*sqn*Anorm*dpp)*eps;
213:         errw = PetscSqrtReal(mnz*sqn*Anorm*du)*eps;
214:         errz = PetscSqrtReal(mnz*sqn*Anorm*dq)*eps;
215:         replace = 0;
216:       } else {
217:         errrprev = errr;
218:         errr = errr+PetscAbsScalar(alphap)*PetscAbsScalar(betap)*errs+PetscAbsScalar(alphap)*errw+errncr+PetscAbsScalar(alphap)*errncs;
219:         errs = errw+PetscAbsScalar(betap)*errs+errncs;
220:         errw = errw+PetscAbsScalar(alphap)*PetscAbsScalar(betap)*errz+errncw+PetscAbsScalar(alphap)*errncz;
221:         errz = PetscAbsScalar(betap)*errz+errncz;
222:       }
223:       if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
224:         KSP_MatMult(ksp,Amat,X,R);        /*  r <- Ax - b  */
225:         VecAYPX(R,-1.0,B);
226:         KSP_PCApply(ksp,R,U);             /*  u <- Br  */
227:         KSP_MatMult(ksp,Amat,U,W);        /*  w <- Au  */
228:         KSP_MatMult(ksp,Amat,P,S);        /*  s <- Ap  */
229:         KSP_PCApply(ksp,S,Q);             /*  q <- Bs  */
230:         KSP_MatMult(ksp,Amat,Q,Z);        /*  z <- Aq  */
231:         replace = 1;
232:         totreplaces++;
233:       }
234:     }

236:     i++;
237:     ksp->its = i;

239:   } while (i<=ksp->max_it);
240:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
241:   return 0;
242: }

244: /*MC
245:    KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements.

247:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG.  The
248:    non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

250:    KSPPIPECGRR improves the robustness of KSPPIPECG by adding an automated residual replacement strategy.
251:    True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
252:    iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.

254:    See also KSPPIPECG, which is identical to KSPPIPECGRR without residual replacements.
255:    See also KSPPIPECR, where the reduction is only overlapped with the matrix-vector product.

257:    Level: intermediate

259:    Notes:
260:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
261:    performance of pipelined methods. See the FAQ on the PETSc website for details.

263:    Contributed by:
264:    Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
265:    European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)

267:    Reference:
268:    S. Cools, E.F. Yetkin, E. Agullo, L. Giraud, W. Vanroose, "Analyzing the effect of local rounding error
269:    propagation on the maximal attainable accuracy of the pipelined Conjugate Gradients method",
270:    SIAM Journal on Matrix Analysis and Applications (SIMAX), 39(1):426--450, 2018.

272: .seealso: KSPCreate(), KSPSetType(), KSPPIPECR, KSPGROPPCG, KSPPIPECG, KSPPGMRES, KSPCG, KSPPIPEBCGS, KSPCGUseSingleReduction()
273: M*/
274: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
275: {
276:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,2);
277:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
278:   KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);
279:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);

281:   ksp->ops->setup          = KSPSetUp_PIPECGRR;
282:   ksp->ops->solve          = KSPSolve_PIPECGRR;
283:   ksp->ops->destroy        = KSPDestroyDefault;
284:   ksp->ops->view           = NULL;
285:   ksp->ops->setfromoptions = NULL;
286:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
287:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
288:   return 0;
289: }