Actual source code: fbcgsr.c
2: /*
3: This file implements FBiCGStab-R.
4: Only allow right preconditioning.
5: FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are:
6: (1) There are fewer MPI_Allreduce calls.
7: (2) The convergence occasionally is much faster than that of FBiCGStab.
8: */
9: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>
10: #include <petsc/private/vecimpl.h>
12: static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp)
13: {
14: KSPSetWorkVecs(ksp,8);
15: return 0;
16: }
18: static PetscErrorCode KSPSolve_FBCGSR(KSP ksp)
19: {
20: PetscInt i,j,N;
21: PetscScalar tau,sigma,alpha,omega,beta;
22: PetscReal rho;
23: PetscScalar xi1,xi2,xi3,xi4;
24: Vec X,B,P,P2,RP,R,V,S,T,S2;
25: PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
26: PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
27: PetscScalar insums[4],outsums[4];
28: KSP_BCGS *bcgs = (KSP_BCGS*)ksp->data;
29: PC pc;
30: Mat mat;
33: VecGetLocalSize(ksp->vec_sol,&N);
35: X = ksp->vec_sol;
36: B = ksp->vec_rhs;
37: P2 = ksp->work[0];
39: /* The followings are involved in modified inner product calculations and vector updates */
40: RP = ksp->work[1]; VecGetArray(RP,(PetscScalar**)&rp)); PetscCall(VecRestoreArray(RP,NULL);
41: R = ksp->work[2]; VecGetArray(R,(PetscScalar**)&r)); PetscCall(VecRestoreArray(R,NULL);
42: P = ksp->work[3]; VecGetArray(P,(PetscScalar**)&p)); PetscCall(VecRestoreArray(P,NULL);
43: V = ksp->work[4]; VecGetArray(V,(PetscScalar**)&v)); PetscCall(VecRestoreArray(V,NULL);
44: S = ksp->work[5]; VecGetArray(S,(PetscScalar**)&s)); PetscCall(VecRestoreArray(S,NULL);
45: T = ksp->work[6]; VecGetArray(T,(PetscScalar**)&t)); PetscCall(VecRestoreArray(T,NULL);
46: S2 = ksp->work[7]; VecGetArray(S2,(PetscScalar**)&s2)); PetscCall(VecRestoreArray(S2,NULL);
48: /* Only supports right preconditioning */
50: if (!ksp->guess_zero) {
51: if (!bcgs->guess) {
52: VecDuplicate(X,&bcgs->guess);
53: }
54: VecCopy(X,bcgs->guess);
55: } else {
56: VecSet(X,0.0);
57: }
59: /* Compute initial residual */
60: KSPGetPC(ksp,&pc);
61: PCSetUp(pc);
62: PCGetOperators(pc,&mat,NULL);
63: if (!ksp->guess_zero) {
64: KSP_MatMult(ksp,mat,X,P2); /* P2 is used as temporary storage */
65: VecCopy(B,R);
66: VecAXPY(R,-1.0,P2);
67: } else {
68: VecCopy(B,R);
69: }
71: /* Test for nothing to do */
72: VecNorm(R,NORM_2,&rho);
73: PetscObjectSAWsTakeAccess((PetscObject)ksp);
74: ksp->its = 0;
75: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
76: else ksp->rnorm = 0;
77: PetscObjectSAWsGrantAccess((PetscObject)ksp);
78: KSPLogResidualHistory(ksp,ksp->rnorm);
79: KSPMonitor(ksp,0,ksp->rnorm);
80: (*ksp->converged)(ksp,0,ksp->rnorm,&ksp->reason,ksp->cnvP);
81: if (ksp->reason) return 0;
83: /* Initialize iterates */
84: VecCopy(R,RP); /* rp <- r */
85: VecCopy(R,P); /* p <- r */
87: /* Big loop */
88: for (i=0; i<ksp->max_it; i++) {
90: /* matmult and pc */
91: KSP_PCApply(ksp,P,P2); /* p2 <- K p */
92: KSP_MatMult(ksp,mat,P2,V); /* v <- A p2 */
94: /* inner prodcuts */
95: if (i==0) {
96: tau = rho*rho;
97: VecDot(V,RP,&sigma); /* sigma <- (v,rp) */
98: } else {
99: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
100: tau = sigma = 0.0;
101: for (j=0; j<N; j++) {
102: tau += r[j]*rp[j]; /* tau <- (r,rp) */
103: sigma += v[j]*rp[j]; /* sigma <- (v,rp) */
104: }
105: PetscLogFlops(4.0*N);
106: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
107: insums[0] = tau;
108: insums[1] = sigma;
109: PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
110: MPIU_Allreduce(insums,outsums,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));
111: PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
112: tau = outsums[0];
113: sigma = outsums[1];
114: }
116: /* scalar update */
117: alpha = tau / sigma;
119: /* vector update */
120: VecWAXPY(S,-alpha,V,R); /* s <- r - alpha v */
122: /* matmult and pc */
123: KSP_PCApply(ksp,S,S2); /* s2 <- K s */
124: KSP_MatMult(ksp,mat,S2,T); /* t <- A s2 */
126: /* inner prodcuts */
127: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
128: xi1 = xi2 = xi3 = xi4 = 0.0;
129: for (j=0; j<N; j++) {
130: xi1 += s[j]*s[j]; /* xi1 <- (s,s) */
131: xi2 += t[j]*s[j]; /* xi2 <- (t,s) */
132: xi3 += t[j]*t[j]; /* xi3 <- (t,t) */
133: xi4 += t[j]*rp[j]; /* xi4 <- (t,rp) */
134: }
135: PetscLogFlops(8.0*N);
136: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
138: insums[0] = xi1;
139: insums[1] = xi2;
140: insums[2] = xi3;
141: insums[3] = xi4;
143: PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
144: MPIU_Allreduce(insums,outsums,4,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));
145: PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
146: xi1 = outsums[0];
147: xi2 = outsums[1];
148: xi3 = outsums[2];
149: xi4 = outsums[3];
151: /* test denominator */
152: if ((xi3 == 0.0) || (sigma == 0.0)) {
154: else ksp->reason = KSP_DIVERGED_BREAKDOWN;
155: PetscInfo(ksp,"KSPSolve has failed due to zero inner product\n");
156: break;
157: }
159: /* scalar updates */
160: omega = xi2 / xi3;
161: beta = -xi4 / sigma;
162: rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */
164: /* vector updates */
165: VecAXPBYPCZ(X,alpha,omega,1.0,P2,S2); /* x <- alpha * p2 + omega * s2 + x */
167: /* convergence test */
168: PetscObjectSAWsTakeAccess((PetscObject)ksp);
169: ksp->its++;
170: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
171: else ksp->rnorm = 0;
172: PetscObjectSAWsGrantAccess((PetscObject)ksp);
173: KSPLogResidualHistory(ksp,ksp->rnorm);
174: KSPMonitor(ksp,i+1,ksp->rnorm);
175: (*ksp->converged)(ksp,i+1,ksp->rnorm,&ksp->reason,ksp->cnvP);
176: if (ksp->reason) break;
178: /* vector updates */
179: PetscLogEventBegin(VEC_Ops,0,0,0,0);
180: for (j=0; j<N; j++) {
181: r[j] = s[j] - omega * t[j]; /* r <- s - omega t */
182: p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
183: }
184: PetscLogFlops(6.0*N);
185: PetscLogEventEnd(VEC_Ops,0,0,0,0);
187: }
189: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
190: return 0;
191: }
193: /*MC
194: KSPFBCGSR - Implements a mathematically equivalent variant of FBiCGSTab.
196: Options Database Keys:
197: see KSPSolve()
199: Level: beginner
201: Notes:
202: Only allow right preconditioning
204: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPFBCGSL, KSPSetPCSide()
205: M*/
206: PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp)
207: {
208: KSP_BCGS *bcgs;
210: PetscNewLog(ksp,&bcgs);
212: ksp->data = bcgs;
213: ksp->ops->setup = KSPSetUp_FBCGSR;
214: ksp->ops->solve = KSPSolve_FBCGSR;
215: ksp->ops->destroy = KSPDestroy_BCGS;
216: ksp->ops->reset = KSPReset_BCGS;
217: ksp->ops->buildsolution = KSPBuildSolution_BCGS;
218: ksp->ops->buildresidual = KSPBuildResidualDefault;
219: ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
220: ksp->pc_side = PC_RIGHT; /* set default PC side */
222: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
223: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
224: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);
225: return 0;
226: }