Actual source code: ex226.c
1: static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
3: #include <petscmat.h>
5: /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
6: int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; }
8: int main(int argc,char **argv)
9: {
10: Mat A,B,C,PtAP,PtAP_copy,PtAP_squared;
11: PetscInt i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1;
12: PetscScalar v;
13: PetscBool equal=PETSC_FALSE,mat_view=PETSC_FALSE;
14: char stencil[PETSC_MAX_PATH_LEN];
15: #if defined(PETSC_USE_LOG)
16: PetscLogStage fullMatMatMultStage;
17: #endif
19: PetscInitialize(&argc,&argv,(char*)0,help);
20: PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
21: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
22: PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL);
23: PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view);
24: PetscOptionsGetString(NULL,NULL,"-stencil",stencil,sizeof(stencil),NULL);
26: /* Create a aij matrix A */
27: M = N = m*n*o;
28: MatCreate(PETSC_COMM_WORLD,&A);
29: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);
30: MatSetType(A,MATAIJ);
31: MatSetFromOptions(A);
33: /* Consistency checks */
36: /************ 2D stencils ***************/
37: PetscStrcmp(stencil, "2d5point", &equal);
38: if (equal) { /* 5-point stencil, 2D */
39: MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);
40: MatSeqAIJSetPreallocation(A,5,NULL);
41: MatGetOwnershipRange(A,&Istart,&Iend);
42: for (Ii=Istart; Ii<Iend; Ii++) {
43: v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
44: if (i>0) {J = global_index(i-1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
45: if (i<m-1) {J = global_index(i+1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
46: if (j>0) {J = global_index(i,j-1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
47: if (j<n-1) {J = global_index(i,j+1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
48: v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
49: }
50: }
51: PetscStrcmp(stencil, "2d9point", &equal);
52: if (equal) { /* 9-point stencil, 2D */
53: MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);
54: MatSeqAIJSetPreallocation(A,9,NULL);
55: MatGetOwnershipRange(A,&Istart,&Iend);
56: for (Ii=Istart; Ii<Iend; Ii++) {
57: v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
58: if (i>0) {J = global_index(i-1,j, k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
59: if (i>0 && j>0) {J = global_index(i-1,j-1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
60: if (j>0) {J = global_index(i, j-1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
61: if (i<m-1 && j>0) {J = global_index(i+1,j-1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
62: if (i<m-1) {J = global_index(i+1,j, k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
63: if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
64: if (j<n-1) {J = global_index(i, j+1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
65: if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
66: v = 8.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
67: }
68: }
69: PetscStrcmp(stencil, "2d9point2", &equal);
70: if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
71: MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);
72: MatSeqAIJSetPreallocation(A,9,NULL);
73: MatGetOwnershipRange(A,&Istart,&Iend);
74: for (Ii=Istart; Ii<Iend; Ii++) {
75: v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
76: if (i>0) {J = global_index(i-1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
77: if (i>1) {J = global_index(i-2,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
78: if (i<m-1) {J = global_index(i+1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
79: if (i<m-2) {J = global_index(i+2,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
80: if (j>0) {J = global_index(i,j-1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
81: if (j>1) {J = global_index(i,j-2,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
82: if (j<n-1) {J = global_index(i,j+1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
83: if (j<n-2) {J = global_index(i,j+2,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
84: v = 8.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
85: }
86: }
87: PetscStrcmp(stencil, "2d13point", &equal);
88: if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
89: MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);
90: MatSeqAIJSetPreallocation(A,13,NULL);
91: MatGetOwnershipRange(A,&Istart,&Iend);
92: for (Ii=Istart; Ii<Iend; Ii++) {
93: v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
94: if (i>0) {J = global_index(i-1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
95: if (i>1) {J = global_index(i-2,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
96: if (i>2) {J = global_index(i-3,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
97: if (i<m-1) {J = global_index(i+1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
98: if (i<m-2) {J = global_index(i+2,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
99: if (i<m-3) {J = global_index(i+3,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
100: if (j>0) {J = global_index(i,j-1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
101: if (j>1) {J = global_index(i,j-2,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
102: if (j>2) {J = global_index(i,j-3,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
103: if (j<n-1) {J = global_index(i,j+1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
104: if (j<n-2) {J = global_index(i,j+2,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
105: if (j<n-3) {J = global_index(i,j+3,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
106: v = 12.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
107: }
108: }
109: /************ 3D stencils ***************/
110: PetscStrcmp(stencil, "3d7point", &equal);
111: if (equal) { /* 7-point stencil, 3D */
112: MatMPIAIJSetPreallocation(A,7,NULL,7,NULL);
113: MatSeqAIJSetPreallocation(A,7,NULL);
114: MatGetOwnershipRange(A,&Istart,&Iend);
115: for (Ii=Istart; Ii<Iend; Ii++) {
116: v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
117: if (i>0) {J = global_index(i-1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
118: if (i<m-1) {J = global_index(i+1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
119: if (j>0) {J = global_index(i,j-1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
120: if (j<n-1) {J = global_index(i,j+1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
121: if (k>0) {J = global_index(i,j,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
122: if (k<o-1) {J = global_index(i,j,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
123: v = 6.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
124: }
125: }
126: PetscStrcmp(stencil, "3d13point", &equal);
127: if (equal) { /* 13-point stencil, 3D */
128: MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);
129: MatSeqAIJSetPreallocation(A,13,NULL);
130: MatGetOwnershipRange(A,&Istart,&Iend);
131: for (Ii=Istart; Ii<Iend; Ii++) {
132: v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
133: if (i>0) {J = global_index(i-1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
134: if (i>1) {J = global_index(i-2,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
135: if (i<m-1) {J = global_index(i+1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
136: if (i<m-2) {J = global_index(i+2,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
137: if (j>0) {J = global_index(i,j-1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
138: if (j>1) {J = global_index(i,j-2,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
139: if (j<n-1) {J = global_index(i,j+1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
140: if (j<n-2) {J = global_index(i,j+2,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
141: if (k>0) {J = global_index(i,j,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
142: if (k>1) {J = global_index(i,j,k-2,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
143: if (k<o-1) {J = global_index(i,j,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
144: if (k<o-2) {J = global_index(i,j,k+2,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
145: v = 12.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
146: }
147: }
148: PetscStrcmp(stencil, "3d19point", &equal);
149: if (equal) { /* 19-point stencil, 3D */
150: MatMPIAIJSetPreallocation(A,19,NULL,19,NULL);
151: MatSeqAIJSetPreallocation(A,19,NULL);
152: MatGetOwnershipRange(A,&Istart,&Iend);
153: for (Ii=Istart; Ii<Iend; Ii++) {
154: v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
155: /* one hop */
156: if (i>0) {J = global_index(i-1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
157: if (i<m-1) {J = global_index(i+1,j,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
158: if (j>0) {J = global_index(i,j-1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
159: if (j<n-1) {J = global_index(i,j+1,k,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
160: if (k>0) {J = global_index(i,j,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
161: if (k<o-1) {J = global_index(i,j,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
162: /* two hops */
163: if (i>0 && j>0) {J = global_index(i-1,j-1,k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
164: if (i>0 && k>0) {J = global_index(i-1,j, k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
165: if (i>0 && j<n-1) {J = global_index(i-1,j+1,k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
166: if (i>0 && k<o-1) {J = global_index(i-1,j, k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
167: if (i<m-1 && j>0) {J = global_index(i+1,j-1,k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
168: if (i<m-1 && k>0) {J = global_index(i+1,j, k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
169: if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
170: if (i<m-1 && k<o-1) {J = global_index(i+1,j, k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
171: if (j>0 && k>0) {J = global_index(i, j-1,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
172: if (j>0 && k<o-1) {J = global_index(i, j-1,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
173: if (j<n-1 && k>0) {J = global_index(i, j+1,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
174: if (j<n-1 && k<o-1) {J = global_index(i, j+1,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
175: v = 18.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
176: }
177: }
178: PetscStrcmp(stencil, "3d27point", &equal);
179: if (equal) { /* 27-point stencil, 3D */
180: MatMPIAIJSetPreallocation(A,27,NULL,27,NULL);
181: MatSeqAIJSetPreallocation(A,27,NULL);
182: MatGetOwnershipRange(A,&Istart,&Iend);
183: for (Ii=Istart; Ii<Iend; Ii++) {
184: v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
185: if (k>0) {
186: if (j>0) {
187: if (i>0) {J = global_index(i-1,j-1,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
188: J = global_index(i, j-1,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);
189: if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
190: }
191: {
192: if (i>0) {J = global_index(i-1,j, k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
193: J = global_index(i, j, k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);
194: if (i<m-1) {J = global_index(i+1,j, k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
195: }
196: if (j<n-1) {
197: if (i>0) {J = global_index(i-1,j+1,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
198: J = global_index(i, j+1,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);
199: if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
200: }
201: }
202: {
203: if (j>0) {
204: if (i>0) {J = global_index(i-1,j-1,k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
205: J = global_index(i, j-1,k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);
206: if (i<m-1) {J = global_index(i+1,j-1,k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
207: }
208: {
209: if (i>0) {J = global_index(i-1,j, k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
210: J = global_index(i, j, k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);
211: if (i<m-1) {J = global_index(i+1,j, k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
212: }
213: if (j<n-1) {
214: if (i>0) {J = global_index(i-1,j+1,k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
215: J = global_index(i, j+1,k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);
216: if (i<m-1) {J = global_index(i+1,j+1,k ,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
217: }
218: }
219: if (k<o-1) {
220: if (j>0) {
221: if (i>0) {J = global_index(i-1,j-1,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
222: J = global_index(i, j-1,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);
223: if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
224: }
225: {
226: if (i>0) {J = global_index(i-1,j, k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
227: J = global_index(i, j, k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);
228: if (i<m-1) {J = global_index(i+1,j, k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
229: }
230: if (j<n-1) {
231: if (i>0) {J = global_index(i-1,j+1,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
232: J = global_index(i, j+1,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);
233: if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
234: }
235: }
236: v = 26.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
237: }
238: }
239: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
240: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
242: /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
243: MatDuplicate(A,MAT_COPY_VALUES,&B);
245: PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage);
247: /* Test C = A*B */
248: PetscLogStagePush(fullMatMatMultStage);
249: MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C);
251: /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */
252: MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP);
253: MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy);
254: MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared);
256: MatView(C,PETSC_VIEWER_STDOUT_WORLD);
257: MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD);
259: MatDestroy(&PtAP_squared);
260: MatDestroy(&PtAP_copy);
261: MatDestroy(&PtAP);
262: MatDestroy(&C);
263: MatDestroy(&B);
264: MatDestroy(&A);
265: PetscFinalize();
266: return 0;
267: }
269: /*TEST
271: test:
272: suffix: 1
273: nsize: 1
274: args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
276: test:
277: suffix: 2
278: nsize: 1
279: args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
281: test:
282: suffix: 3
283: nsize: 4
284: args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
286: TEST*/