Skip to content

Commit 9eaa8ec

Browse files
committed
enable make check for folder examples/Pytorch_DDP
1 parent 220cfe1 commit 9eaa8ec

File tree

5 files changed

+118
-14
lines changed

5 files changed

+118
-14
lines changed

examples/Makefile

+9-6
Original file line numberDiff line numberDiff line change
@@ -28,29 +28,32 @@ OUTPUT_DIR = _tmp_output
2828
all:
2929

3030
check: ptest4
31+
cd Pytorch_DDP && make check
32+
3133
ptests: ptest3 ptest4 ptest8
34+
cd Pytorch_DDP && make ptests
3235

3336
ptest3:
3437
@mkdir -p ${OUTPUT_DIR}
35-
@echo "==========================================================="
38+
@echo "======================================================================"
3639
@echo " examples: Parallel testing on 3 MPI processes"
37-
@echo "==========================================================="
40+
@echo "======================================================================"
3841
@${TESTS_ENVIRONMENT} export NPROC=3; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
3942
@echo ""
4043

4144
ptest4:
4245
@mkdir -p ${OUTPUT_DIR}
43-
@echo "==========================================================="
46+
@echo "======================================================================"
4447
@echo " examples: Parallel testing on 4 MPI processes"
45-
@echo "==========================================================="
48+
@echo "======================================================================"
4649
@${TESTS_ENVIRONMENT} export NPROC=4; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
4750
@echo ""
4851

4952
ptest8:
5053
@mkdir -p ${OUTPUT_DIR}
51-
@echo "==========================================================="
54+
@echo "======================================================================"
5255
@echo " examples: Parallel testing on 8 MPI processes"
53-
@echo "==========================================================="
56+
@echo "======================================================================"
5457
@${TESTS_ENVIRONMENT} export NPROC=8; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
5558
@echo ""
5659

examples/Pytorch_DDP/Makefile

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#
2+
# Copyright (C) 2024, Northwestern University and Argonne National Laboratory
3+
# See COPYRIGHT notice in top-level directory.
4+
#
5+
6+
check_PROGRAMS = torch_ddp_skeleton.py
7+
8+
9+
TESTS_ENVIRONMENT = export check_PROGRAMS="${check_PROGRAMS}";
10+
TESTS_ENVIRONMENT += export PNETCDF_DIR="${PNETCDF_DIR}";
11+
12+
OUTPUT_DIR = _tmp_output
13+
14+
all:
15+
16+
check: ptest4
17+
ptests: ptest3 ptest4 ptest8
18+
19+
ptest3:
20+
@mkdir -p ${OUTPUT_DIR}
21+
@echo "======================================================================"
22+
@echo " examples/Pytorch_DDP: Parallel testing on 3 MPI processes"
23+
@echo "======================================================================"
24+
@${TESTS_ENVIRONMENT} export NPROC=3; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
25+
@echo ""
26+
27+
ptest4:
28+
@mkdir -p ${OUTPUT_DIR}
29+
@echo "======================================================================"
30+
@echo " examples/Pytorch_DDP: Parallel testing on 4 MPI processes"
31+
@echo "======================================================================"
32+
@${TESTS_ENVIRONMENT} export NPROC=4; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
33+
@echo ""
34+
35+
ptest8:
36+
@mkdir -p ${OUTPUT_DIR}
37+
@echo "======================================================================"
38+
@echo " examples/Pytorch_DDP: Parallel testing on 8 MPI processes"
39+
@echo "======================================================================"
40+
@${TESTS_ENVIRONMENT} export NPROC=8; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
41+
@echo ""
42+
43+
clean:
44+
rm -rf ${OUTPUT_DIR}
45+

examples/Pytorch_DDP/parallel_run.sh

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/bash
2+
#
3+
# Copyright (C) 2024, Northwestern University and Argonne National Laboratory
4+
# See COPYRIGHT notice in top-level directory.
5+
#
6+
7+
# Exit immediately if a command exits with a non-zero status.
8+
set -e
9+
10+
# Get the directory containing this script
11+
if test "x$NPROC" = x ; then
12+
NPROC=4
13+
fi
14+
15+
# get output folder from command line
16+
if test "$#" -gt 0 ; then
17+
args=("$@")
18+
OUT_DIR="${args[0]}"
19+
# check if output folder exists
20+
if ! test -d $OUT_DIR ; then
21+
echo "Error: output folder \"$OUT_DIR\" does not exist."
22+
exit 1
23+
fi
24+
else
25+
# output folder is not set at command line, use current folder
26+
OUT_DIR="."
27+
fi
28+
# echo "OUT_DIR=$OUT_DIR"
29+
30+
for prog in $check_PROGRAMS; do
31+
printf '%-60s' "Testing $prog"
32+
33+
if test $prog = "torch_ddp_skeleton.py" ; then
34+
CMD="mpiexec -n $NPROC python $prog -q"
35+
fi
36+
$CMD
37+
status=$?
38+
if [ $status -ne 0 ]; then
39+
echo " ---- FAIL"
40+
else
41+
echo " ---- PASS"
42+
fi
43+
done
44+

examples/Pytorch_DDP/torch_ddp_skeleton.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55
# This is a skeleton program to show how to run Pytorch distributed environment
66
# with MPI
77

8-
import os
8+
import os, argparse
99
import torch
1010
import torch.distributed as dist
1111
from mpi4py import MPI
1212

13+
verbose = True
14+
1315
class distributed():
1416
def get_size(self):
1517
if dist.is_available() and dist.is_initialized():
@@ -217,13 +219,23 @@ def init_parallel():
217219

218220
#----< main() >----------------------------------------------------------------
219221
def main():
222+
global verbose
223+
220224
# initialize parallel environment
221225
comm, device = init_parallel()
222226

223227
rank = comm.get_rank()
224228
nprocs = comm.get_size()
225229

226-
print("nprocs = ", nprocs, " rank = ",rank," device = ", device)
230+
# Get command-line arguments
231+
args = None
232+
parser = argparse.ArgumentParser()
233+
parser.add_argument("-q", help="Quiet mode (reports when fail)", action="store_true")
234+
args = parser.parse_args()
235+
if args.q: verbose = False
236+
237+
if verbose:
238+
print("nprocs = ", nprocs, " rank = ",rank," device = ", device)
227239

228240
comm.finalize()
229241

test/Makefile

+6-6
Original file line numberDiff line numberDiff line change
@@ -58,25 +58,25 @@ ptests: ptest3 ptest4 ptest8
5858

5959
ptest3:
6060
@mkdir -p ${OUTPUT_DIR}
61-
@echo "==========================================================="
61+
@echo "======================================================================"
6262
@echo " test: Parallel testing on 3 MPI processes"
63-
@echo "==========================================================="
63+
@echo "======================================================================"
6464
@${TESTS_ENVIRONMENT} export NPROC=3; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
6565
@echo ""
6666

6767
ptest4:
6868
@mkdir -p ${OUTPUT_DIR}
69-
@echo "==========================================================="
69+
@echo "======================================================================"
7070
@echo " test: Parallel testing on 4 MPI processes"
71-
@echo "==========================================================="
71+
@echo "======================================================================"
7272
@${TESTS_ENVIRONMENT} export NPROC=4; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
7373
@echo ""
7474

7575
ptest8:
7676
@mkdir -p ${OUTPUT_DIR}
77-
@echo "==========================================================="
77+
@echo "======================================================================"
7878
@echo " test: Parallel testing on 8 MPI processes"
79-
@echo "==========================================================="
79+
@echo "======================================================================"
8080
@${TESTS_ENVIRONMENT} export NPROC=8; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
8181
@echo ""
8282

0 commit comments

Comments
 (0)