Actual source code: psort.c
2: #include <petsc/private/petscimpl.h>
3: #include <petscis.h>
5: /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
6: static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
7: {
8: PetscInt diff;
9: PetscInt split, mid, partner;
11: diff = rankEnd - rankStart;
12: if (diff <= 0) return 0;
13: if (diff == 1) {
14: if (forward) {
15: PetscSortInt((PetscInt) n, keys);
16: } else {
17: PetscSortReverseInt((PetscInt) n, keys);
18: }
19: return 0;
20: }
21: split = 1;
22: while (2 * split < diff) split *= 2;
23: mid = rankStart + split;
24: if (rank < mid) {
25: partner = rank + split;
26: } else {
27: partner = rank - split;
28: }
29: if (partner < rankEnd) {
30: PetscMPIInt i;
32: MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE);
33: if ((rank < partner) == (forward == PETSC_TRUE)) {
34: for (i = 0; i < n; i++) {
35: keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
36: }
37: } else {
38: for (i = 0; i < n; i++) {
39: keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
40: }
41: }
42: }
43: /* divide and conquer */
44: if (rank < mid) {
45: PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward);
46: } else {
47: PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);
48: }
49: return 0;
50: }
52: /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
53: static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
54: {
55: PetscInt diff;
56: PetscInt mid;
58: diff = rankEnd - rankStart;
59: if (diff <= 0) return 0;
60: if (diff == 1) {
61: if (forward) {
62: PetscSortInt(n, keys);
63: } else {
64: PetscSortReverseInt(n, keys);
65: }
66: return 0;
67: }
68: mid = rankStart + diff / 2;
69: /* divide and conquer */
70: if (rank < mid) {
71: PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool) !forward);
72: } else {
73: PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);
74: }
75: /* bitonic merge */
76: PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward);
77: return 0;
78: }
80: static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
81: {
82: PetscMPIInt size, rank, tag, mpin;
83: PetscInt *buffer;
86: PetscCommGetNewTag(comm, &tag);
87: MPI_Comm_size(comm, &size);
88: MPI_Comm_rank(comm, &rank);
89: PetscMPIIntCast(n, &mpin);
90: PetscMalloc1(n, &buffer);
91: PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE);
92: PetscFree(buffer);
93: return 0;
94: }
96: static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
97: {
98: PetscMPIInt size, rank;
99: PetscInt *pivots, *finalpivots, i;
100: PetscInt non_empty, my_first, count;
101: PetscMPIInt *keys_per, max_keys_per;
103: MPI_Comm_size(mapin->comm, &size);
104: MPI_Comm_rank(mapin->comm, &rank);
106: /* Choose P - 1 pivots that would be ideal for the distribution on this process */
107: PetscMalloc1(size - 1, &pivots);
108: for (i = 0; i < size - 1; i++) {
109: PetscInt index;
111: if (!mapin->n) {
112: /* if this rank is empty, put "infinity" in the list. each process knows how many empty
113: * processes are in the layout, so those values will be ignored from the end of the sorted
114: * pivots */
115: pivots[i] = PETSC_MAX_INT;
116: } else {
117: /* match the distribution to the desired output described by mapout by getting the index
118: * that is approximately the appropriate fraction through the list */
119: index = ((PetscReal) mapout->range[i + 1]) * ((PetscReal) mapin->n) / ((PetscReal) mapout->N);
120: index = PetscMin(index, (mapin->n - 1));
121: index = PetscMax(index, 0);
122: pivots[i] = keysin[index];
123: }
124: }
125: /* sort the pivots in parallel */
126: PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots);
127: if (PetscDefined(USE_DEBUG)) {
128: PetscBool sorted;
130: PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted);
132: }
134: /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
135: * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
136: non_empty = size;
137: for (i = 0; i < size; i++) if (mapout->range[i] == mapout->range[i+1]) non_empty--;
138: PetscCalloc1(size + 1, &keys_per);
139: my_first = -1;
140: if (non_empty) {
141: for (i = 0; i < size - 1; i++) {
142: size_t sample = (i + 1) * non_empty - 1;
143: size_t sample_rank = sample / (size - 1);
145: keys_per[sample_rank]++;
146: if (my_first < 0 && (PetscMPIInt) sample_rank == rank) {
147: my_first = (PetscInt) (sample - sample_rank * (size - 1));
148: }
149: }
150: }
151: for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
152: PetscMalloc1(size * max_keys_per, &finalpivots);
153: /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
154: * and allgather them */
155: for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
156: for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
157: MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm);
158: for (i = 0, count = 0; i < size; i++) {
159: PetscInt j;
161: for (j = 0; j < max_keys_per; j++) {
162: if (j < keys_per[i]) {
163: finalpivots[count++] = finalpivots[i * max_keys_per + j];
164: }
165: }
166: }
167: *outpivots = finalpivots;
168: PetscFree(keys_per);
169: PetscFree(pivots);
170: return 0;
171: }
173: static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
174: {
175: PetscMPIInt size, rank;
176: PetscInt myOffset, nextOffset;
177: PetscInt i;
178: PetscMPIInt total, filled;
179: PetscMPIInt sender, nfirst, nsecond;
180: PetscMPIInt firsttag, secondtag;
181: MPI_Request firstreqrcv;
182: MPI_Request *firstreqs;
183: MPI_Request *secondreqs;
184: MPI_Status firststatus;
186: MPI_Comm_size(map->comm, &size);
187: MPI_Comm_rank(map->comm, &rank);
188: PetscCommGetNewTag(map->comm, &firsttag);
189: PetscCommGetNewTag(map->comm, &secondtag);
190: myOffset = 0;
191: PetscMalloc2(size, &firstreqs, size, &secondreqs);
192: MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm);
193: myOffset = nextOffset - n;
194: total = map->range[rank + 1] - map->range[rank];
195: if (total > 0) {
196: MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv);
197: }
198: for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
199: PetscInt itotal;
200: PetscInt overlap, oStart, oEnd;
202: itotal = map->range[i + 1] - map->range[i];
203: if (itotal <= 0) continue;
204: oStart = PetscMax(myOffset, map->range[i]);
205: oEnd = PetscMin(nextOffset, map->range[i + 1]);
206: overlap = oEnd - oStart;
207: if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
208: /* send first message */
209: MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]));
210: } else if (overlap > 0) {
211: /* send second message */
212: MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));
213: } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
214: /* send empty second message */
215: MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));
216: }
217: }
218: filled = 0;
219: sender = -1;
220: if (total > 0) {
221: MPI_Wait(&firstreqrcv, &firststatus);
222: sender = firststatus.MPI_SOURCE;
223: MPI_Get_count(&firststatus, MPIU_INT, &filled);
224: }
225: while (filled < total) {
226: PetscMPIInt mfilled;
227: MPI_Status stat;
229: sender++;
230: MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat);
231: MPI_Get_count(&stat, MPIU_INT, &mfilled);
232: filled += mfilled;
233: }
234: MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE);
235: MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE);
236: PetscFree2(firstreqs, secondreqs);
237: return 0;
238: }
240: static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
241: {
242: PetscMPIInt size, rank;
243: PetscInt *pivots = NULL, *buffer;
244: PetscInt i, j;
245: PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
247: MPI_Comm_size(mapin->comm, &size);
248: MPI_Comm_rank(mapin->comm, &rank);
249: PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv);
250: /* sort locally */
251: PetscSortInt(mapin->n, keysin);
252: /* get P - 1 pivots */
253: PetscParallelSampleSelect(mapin, mapout, keysin, &pivots);
254: /* determine which entries in the sorted array go to which other processes based on the pivots */
255: for (i = 0, j = 0; i < size - 1; i++) {
256: PetscInt prev = j;
258: while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
259: offsets_snd[i] = prev;
260: keys_per_snd[i] = j - prev;
261: }
262: offsets_snd[size - 1] = j;
263: keys_per_snd[size - 1] = mapin->n - j;
264: offsets_snd[size] = mapin->n;
265: /* get the incoming sizes */
266: MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm);
267: offsets_rcv[0] = 0;
268: for (i = 0; i < size; i++) {
269: offsets_rcv[i+1] = offsets_rcv[i] + keys_per_rcv[i];
270: }
271: nrecv = offsets_rcv[size];
272: /* all to all exchange */
273: PetscMalloc1(nrecv, &buffer);
274: MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm);
275: PetscFree(pivots);
276: PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv);
278: /* local sort */
279: PetscSortInt(nrecv, buffer);
280: #if defined(PETSC_USE_DEBUG)
281: {
282: PetscBool sorted;
284: PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted);
286: }
287: #endif
289: /* redistribute to the desired order */
290: PetscParallelRedistribute(mapout, nrecv, buffer, keysout);
291: PetscFree(buffer);
292: return 0;
293: }
295: /*@
296: PetscParallelSortInt - Globally sort a distributed array of integers
298: Collective
300: Input Parameters:
301: + mapin - PetscLayout describing the distribution of the input keys
302: . mapout - PetscLayout describing the distribution of the output keys
303: - keysin - the pre-sorted array of integers
305: Output Parameter:
306: . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout.
308: Level: developer
310: Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
312: - sorts locally
313: - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
314: - using to the pivots to repartition the keys by all-to-all exchange
315: - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
316: - redistributing to match the mapout layout
318: If keysin != keysout, then keysin will not be changed during PetscParallelSortInt.
320: .seealso: PetscParallelSortedInt()
321: @*/
322: PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
323: {
324: PetscMPIInt size;
325: PetscMPIInt result;
326: PetscInt *keysincopy = NULL;
330: MPI_Comm_compare(mapin->comm, mapout->comm, &result);
332: PetscLayoutSetUp(mapin);
333: PetscLayoutSetUp(mapout);
337: MPI_Comm_size(mapin->comm, &size);
338: if (size == 1) {
339: if (keysout != keysin) {
340: PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt));
341: }
342: PetscSortInt(mapout->n, keysout);
343: if (size == 1) return 0;
344: }
345: if (keysout != keysin) {
346: PetscMalloc1(mapin->n, &keysincopy);
347: PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt));
348: keysin = keysincopy;
349: }
350: PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout);
351: #if defined(PETSC_USE_DEBUG)
352: {
353: PetscBool sorted;
355: PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted);
357: }
358: #endif
359: PetscFree(keysincopy);
360: return 0;
361: }