Actual source code: bvec1.c
2: /*
3: Defines the BLAS based vector operations. Code shared by parallel
4: and sequential vectors.
5: */
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <petscblaslapack.h>
10: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
11: {
12: const PetscScalar *ya,*xa;
13: PetscBLASInt one = 1,bn = 0;
15: PetscBLASIntCast(xin->map->n,&bn);
16: VecGetArrayRead(xin,&xa);
17: VecGetArrayRead(yin,&ya);
18: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
19: PetscStackCallBLAS("BLASdot",*z = BLASdot_(&bn,ya,&one,xa,&one));
20: VecRestoreArrayRead(xin,&xa);
21: VecRestoreArrayRead(yin,&ya);
22: if (xin->map->n > 0) {
23: PetscLogFlops(2.0*xin->map->n-1);
24: }
25: return 0;
26: }
28: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
29: {
30: const PetscScalar *ya,*xa;
31: PetscBLASInt one = 1,bn = 0;
33: PetscBLASIntCast(xin->map->n,&bn);
34: VecGetArrayRead(xin,&xa);
35: VecGetArrayRead(yin,&ya);
36: PetscStackCallBLAS("BLASdot",*z = BLASdotu_(&bn,xa,&one,ya,&one));
37: VecRestoreArrayRead(xin,&xa);
38: VecRestoreArrayRead(yin,&ya);
39: if (xin->map->n > 0) {
40: PetscLogFlops(2.0*xin->map->n-1);
41: }
42: return 0;
43: }
45: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
46: {
47: PetscBLASInt one = 1,bn;
49: PetscBLASIntCast(xin->map->n,&bn);
50: if (alpha == (PetscScalar)0.0) {
51: VecSet_Seq(xin,alpha);
52: } else if (alpha != (PetscScalar)1.0) {
53: PetscScalar a = alpha,*xarray;
54: VecGetArray(xin,&xarray);
55: PetscStackCallBLAS("BLASscal",BLASscal_(&bn,&a,xarray,&one));
56: VecRestoreArray(xin,&xarray);
57: }
58: PetscLogFlops(xin->map->n);
59: return 0;
60: }
62: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
63: {
64: const PetscScalar *xarray;
65: PetscScalar *yarray;
66: PetscBLASInt one = 1,bn;
68: PetscBLASIntCast(yin->map->n,&bn);
69: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
70: if (alpha != (PetscScalar)0.0) {
71: VecGetArrayRead(xin,&xarray);
72: VecGetArray(yin,&yarray);
73: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one));
74: VecRestoreArrayRead(xin,&xarray);
75: VecRestoreArray(yin,&yarray);
76: PetscLogFlops(2.0*yin->map->n);
77: }
78: return 0;
79: }
81: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar a,PetscScalar b,Vec xin)
82: {
83: PetscInt n = yin->map->n,i;
84: const PetscScalar *xx;
85: PetscScalar *yy;
87: if (a == (PetscScalar)0.0) {
88: VecScale_Seq(yin,b);
89: } else if (b == (PetscScalar)1.0) {
90: VecAXPY_Seq(yin,a,xin);
91: } else if (a == (PetscScalar)1.0) {
92: VecAYPX_Seq(yin,b,xin);
93: } else if (b == (PetscScalar)0.0) {
94: VecGetArrayRead(xin,&xx);
95: VecGetArray(yin,(PetscScalar**)&yy);
96: for (i=0; i<n; i++) yy[i] = a*xx[i];
97: VecRestoreArrayRead(xin,&xx);
98: VecRestoreArray(yin,(PetscScalar**)&yy);
99: PetscLogFlops(xin->map->n);
100: } else {
101: VecGetArrayRead(xin,&xx);
102: VecGetArray(yin,(PetscScalar**)&yy);
103: for (i=0; i<n; i++) yy[i] = a*xx[i] + b*yy[i];
104: VecRestoreArrayRead(xin,&xx);
105: VecRestoreArray(yin,(PetscScalar**)&yy);
106: PetscLogFlops(3.0*xin->map->n);
107: }
108: return 0;
109: }
111: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
112: {
113: PetscInt n = zin->map->n,i;
114: const PetscScalar *yy,*xx;
115: PetscScalar *zz;
117: VecGetArrayRead(xin,&xx);
118: VecGetArrayRead(yin,&yy);
119: VecGetArray(zin,&zz);
120: if (alpha == (PetscScalar)1.0) {
121: for (i=0; i<n; i++) zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
122: PetscLogFlops(4.0*n);
123: } else if (gamma == (PetscScalar)1.0) {
124: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
125: PetscLogFlops(4.0*n);
126: } else if (gamma == (PetscScalar)0.0) {
127: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i];
128: PetscLogFlops(3.0*n);
129: } else {
130: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
131: PetscLogFlops(5.0*n);
132: }
133: VecRestoreArrayRead(xin,&xx);
134: VecRestoreArrayRead(yin,&yy);
135: VecRestoreArray(zin,&zz);
136: return 0;
137: }