14
14
* Usage: ./alltoallw [OPTION]
15
15
* [-h] Print this help message
16
16
* [-v] Verbose mode (default: no)
17
+ * [-d] debug mode to check receive buffer contents (default: no)
17
18
* [-n num] number of iterations (default: 1)
18
19
* [-r num] every ratio processes is a receiver (default: 1)
19
20
* [-l num] receive amount per iteration (default: 8 MB)
22
23
* Example run command and output on screen:
23
24
* % mpiexec -n 2048 ./alltoallw -n 253 -r 32
24
25
*
25
- * nprocs = 2048
26
- * ntimes = 253
27
- * num_recvers = 64
28
- * individual message len = 4096 bytes
26
+ * number of MPI processes = 2048
27
+ * number of iterations = 253
28
+ * numbe of receivers = 64
29
+ * individual message length = 4096 bytes
29
30
* send/recv buffer gap = 4 int(s)
30
31
* Recv amount per iteration = 8388608 bytes
31
32
* Time for using MPI_alltoallw = 53.60 sec
42
43
#include <mpi.h>
43
44
44
45
static int verbose ;
46
+ static int debug ;
45
47
46
48
#define ERR \
47
49
if (err != MPI_SUCCESS) { \
@@ -52,6 +54,56 @@ static int verbose;
52
54
goto err_out; \
53
55
}
54
56
57
+ void initialize_bufs (int num_recvers ,
58
+ int len ,
59
+ int gap ,
60
+ int * sendBuf ,
61
+ int * recvBuf )
62
+ {
63
+ int i , j , k , nprocs , rank ;
64
+
65
+ MPI_Comm_size (MPI_COMM_WORLD , & nprocs );
66
+ MPI_Comm_rank (MPI_COMM_WORLD , & rank );
67
+
68
+ for (i = 0 ; i < (len + gap )* num_recvers ; i ++ )
69
+ sendBuf [i ] = -2 ;
70
+ for (i = 0 ; i < (len + gap )* nprocs ; i ++ )
71
+ recvBuf [i ] = -3 ;
72
+ k = 0 ;
73
+ for (i = 0 ; i < num_recvers ; i ++ ) {
74
+ for (j = 0 ; j < len ; j ++ ) {
75
+ sendBuf [k ++ ] = rank ;
76
+ }
77
+ k += gap ;
78
+ }
79
+ }
80
+
81
+ int check_recv_buf (char * comm_op ,
82
+ int len ,
83
+ int gap ,
84
+ int * recvBuf )
85
+ {
86
+ int i , j , k , expect , err = 0 , nprocs , rank ;
87
+
88
+ MPI_Comm_size (MPI_COMM_WORLD , & nprocs );
89
+ MPI_Comm_rank (MPI_COMM_WORLD , & rank );
90
+
91
+ k = 0 ;
92
+ for (i = 0 ; i < nprocs ; i ++ ) {
93
+ for (j = 0 ; j < len + gap ; j ++ ) {
94
+ expect = (i == rank ) ? -3 : ((j < len ) ? i : -3 );
95
+ if (recvBuf [k ] != expect ) {
96
+ printf ("Error(%s): rank %d i=%d j=%d expect %d but got %d\n" ,
97
+ comm_op , rank , i , j , expect , recvBuf [k ]);
98
+ goto err_out ;
99
+ }
100
+ k ++ ;
101
+ }
102
+ }
103
+ err_out :
104
+ return err ;
105
+ }
106
+
55
107
void run_alltoallw (int ntimes ,
56
108
int ratio ,
57
109
int is_receiver ,
@@ -60,7 +112,6 @@ void run_alltoallw(int ntimes,
60
112
int * sendBuf ,
61
113
int * recvBuf )
62
114
{
63
- int * sendPtr , * recvPtr ;
64
115
int i , j , err , nprocs , rank , num_recvers ;
65
116
int * sendCounts , * recvCounts , * sendDisps , * recvDisps ;
66
117
MPI_Datatype * sendTypes , * recvTypes ;
@@ -82,9 +133,6 @@ void run_alltoallw(int ntimes,
82
133
sendDisps = (int * ) calloc (nprocs * 2 , sizeof (int ));
83
134
recvDisps = sendDisps + nprocs ;
84
135
85
- sendPtr = sendBuf ;
86
- recvPtr = recvBuf ;
87
-
88
136
/* Only receivers has non-zero data to receive */
89
137
if (is_receiver ) {
90
138
j = 0 ;
@@ -113,11 +161,15 @@ void run_alltoallw(int ntimes,
113
161
}
114
162
115
163
for (i = 0 ; i < ntimes ; i ++ ) {
116
- err = MPI_Alltoallw (sendPtr , sendCounts , sendDisps , sendTypes ,
117
- recvPtr , recvCounts , recvDisps , recvTypes ,
164
+ if (debug )
165
+ initialize_bufs (num_recvers , len , gap , sendBuf , recvBuf );
166
+
167
+ err = MPI_Alltoallw (sendBuf , sendCounts , sendDisps , sendTypes ,
168
+ recvBuf , recvCounts , recvDisps , recvTypes ,
118
169
MPI_COMM_WORLD ); ERR
119
- sendPtr += num_recvers * (len + gap );
120
- recvPtr += nprocs * (len + gap );
170
+
171
+ if (debug && is_receiver )
172
+ check_recv_buf ("alltoallw ", len , gap , recvBuf );
121
173
}
122
174
123
175
err_out :
@@ -156,11 +208,13 @@ void run_async_send_recv(int ntimes,
156
208
reqs = (MPI_Request * ) malloc (sizeof (MPI_Request ) * (nprocs + num_recvers ));
157
209
st = (MPI_Status * ) malloc (sizeof (MPI_Status ) * (nprocs + num_recvers ));
158
210
159
- sendPtr = sendBuf ;
160
- recvPtr = recvBuf ;
161
-
162
211
for (i = 0 ; i < ntimes ; i ++ ) {
212
+ if (debug )
213
+ initialize_bufs (num_recvers , len , gap , sendBuf , recvBuf );
214
+
163
215
nreqs = 0 ;
216
+ sendPtr = sendBuf ;
217
+ recvPtr = recvBuf ;
164
218
165
219
/* Only receivers post recv requests */
166
220
if (is_receiver ) {
@@ -186,6 +240,9 @@ void run_async_send_recv(int ntimes,
186
240
}
187
241
188
242
err = MPI_Waitall (nreqs , reqs , st ); ERR
243
+
244
+ if (debug && is_receiver )
245
+ check_recv_buf ("isend /irecv ", len , gap , recvBuf );
189
246
}
190
247
191
248
err_out :
@@ -198,67 +255,12 @@ void run_async_send_recv(int ntimes,
198
255
printf ("Time for using MPI_Issend/Irecv = %.2f sec\n" , maxt );
199
256
}
200
257
201
- void initialize_bufs (int ntimes ,
202
- int num_recvers ,
203
- int len ,
204
- int gap ,
205
- int * sendBuf ,
206
- int * recvBuf )
207
- {
208
- int i , j , k , m , nprocs , rank ;
209
-
210
- MPI_Comm_size (MPI_COMM_WORLD , & nprocs );
211
- MPI_Comm_rank (MPI_COMM_WORLD , & rank );
212
-
213
- for (i = 0 ; i < (len + gap )* ntimes * num_recvers ; i ++ )
214
- sendBuf [i ] = -2 ;
215
- for (i = 0 ; i < (len + gap )* ntimes * nprocs ; i ++ )
216
- recvBuf [i ] = -3 ;
217
- m = 0 ;
218
- for (i = 0 ; i < ntimes ; i ++ ) {
219
- for (j = 0 ; j < num_recvers ; j ++ ) {
220
- for (k = 0 ; k < len ; k ++ ) {
221
- sendBuf [m ++ ] = rank ;
222
- }
223
- m += gap ;
224
- }
225
- }
226
- }
227
-
228
- int check_recv_buf (char * comm_op ,
229
- int ntimes ,
230
- int len ,
231
- int gap ,
232
- int * recvBuf )
233
- {
234
- int i , j , k , m , expect , err = 0 , nprocs , rank ;
235
-
236
- MPI_Comm_size (MPI_COMM_WORLD , & nprocs );
237
- MPI_Comm_rank (MPI_COMM_WORLD , & rank );
238
-
239
- m = 0 ;
240
- for (i = 0 ; i < ntimes ; i ++ ) {
241
- for (j = 0 ; j < nprocs ; j ++ ) {
242
- for (k = 0 ; k < len + gap ; k ++ ) {
243
- expect = (j == rank ) ? -3 : ((k < len ) ? j : -3 );
244
- if (recvBuf [m ] != expect ) {
245
- printf ("Error(%s): rank %d i=%d j=%d k=%d expect %d but got %d\n" ,
246
- comm_op , rank , i , j , k , expect , recvBuf [m ]);
247
- goto err_out ;
248
- }
249
- m ++ ;
250
- }
251
- }
252
- }
253
- err_out :
254
- return err ;
255
- }
256
-
257
258
/*----< usage() >------------------------------------------------------------*/
258
259
static void usage (char * argv0 ) {
259
260
char * help = "Usage: %s [OPTION]\n\
260
261
[-h] Print this help message\n\
261
262
[-v] Verbose mode (default: no)\n\
263
+ [-d] debug mode to check receive buffer contents (default: no)\n\
262
264
[-n num] number of iterations (default: 1)\n\
263
265
[-r num] every ratio processes is a receiver (default: 1)\n\
264
266
[-l num] receive amount per iteration (default: 8 MB)\n\
@@ -279,17 +281,21 @@ int main(int argc, char **argv) {
279
281
MPI_Comm_rank (MPI_COMM_WORLD , & rank );
280
282
281
283
verbose = 0 ;
284
+ debug = 0 ;
282
285
ntimes = 1 ;
283
286
ratio = 1 ;
284
287
block_len = 8 * 1024 * 1024 ;
285
288
gap = 4 ;
286
289
287
290
/* command-line arguments */
288
- while ((i = getopt (argc , argv , "hvn :r:l:g:" )) != EOF )
291
+ while ((i = getopt (argc , argv , "hdvn :r:l:g:" )) != EOF )
289
292
switch (i ) {
290
293
case 'v' :
291
294
verbose = 1 ;
292
295
break ;
296
+ case 'd' :
297
+ debug = 1 ;
298
+ break ;
293
299
case 'n' :
294
300
ntimes = atoi (optarg );
295
301
break ;
@@ -328,7 +334,7 @@ int main(int argc, char **argv) {
328
334
if (verbose ) fflush (stdout );
329
335
330
336
if (rank == 0 ) {
331
- printf ("nprocs = %d\n" , nprocs );
337
+ printf ("number of MPI processes = %d\n" , nprocs );
332
338
printf ("number of iterations = %d\n" , ntimes );
333
339
printf ("numbe of receivers = %d\n" , num_recvers );
334
340
printf ("individual message length = %zd bytes\n" ,len * sizeof (int ));
@@ -337,25 +343,19 @@ int main(int argc, char **argv) {
337
343
}
338
344
339
345
/* allocate and initialize send and recevive buffer */
340
- sendBuf = (int * ) malloc (sizeof (int ) * (len + gap ) * ntimes * num_recvers );
341
- recvBuf = (int * ) malloc (sizeof (int ) * (len + gap ) * ntimes * nprocs );
346
+ sendBuf = (int * ) malloc (sizeof (int ) * (len + gap ) * num_recvers );
347
+ recvBuf = (int * ) malloc (sizeof (int ) * (len + gap ) * nprocs );
342
348
343
- initialize_bufs (ntimes , num_recvers , len , gap , sendBuf , recvBuf );
349
+ initialize_bufs (num_recvers , len , gap , sendBuf , recvBuf );
344
350
345
351
MPI_Barrier (MPI_COMM_WORLD );
346
352
run_alltoallw (ntimes , ratio , is_receiver , len , gap , sendBuf , recvBuf );
347
353
348
- if (is_receiver )
349
- check_recv_buf ("alltoallw" , ntimes , len , gap , recvBuf );
350
-
351
- initialize_bufs (ntimes , num_recvers , len , gap , sendBuf , recvBuf );
354
+ initialize_bufs (num_recvers , len , gap , sendBuf , recvBuf );
352
355
353
356
MPI_Barrier (MPI_COMM_WORLD );
354
357
run_async_send_recv (ntimes , ratio , is_receiver , len , gap , sendBuf , recvBuf );
355
358
356
- if (is_receiver )
357
- check_recv_buf ("isend/irecv" , ntimes , len , gap , recvBuf );
358
-
359
359
free (recvBuf );
360
360
free (sendBuf );
361
361
0 commit comments