deepspeedai · jeffra · Feb 4, 2020 · Feb 4, 2020 · Feb 4, 2020 · Feb 4, 2020
@@ -11,7 +11,7 @@
 DEEPSPEED_UNIT_WORKER_TIMEOUT = 5
 
 
-def distributed_test(world_size=2):
+def distributed_test(world_size=2, backend='gloo'):
     """A decorator for executing a function (e.g., a unit test) in a distributed manner.
     This decorator manages the spawning and joining of processes, initialization of
     torch.distributed, and catching of errors.
@@ -33,14 +33,14 @@ def dist_init(local_rank, num_procs, *func_args, **func_kwargs):
             """Initialize torch.distributed and execute the user function. """
             os.environ['MASTER_ADDR'] = '127.0.0.1'
             os.environ['MASTER_PORT'] = '29500'
-            dist.init_process_group(backend='nccl',
+            dist.init_process_group(backend=backend,
                                     init_method='env://',
                                     rank=local_rank,
                                     world_size=num_procs)
 
             # XXX temporarily disabled due to CUDA runtime error?
             #if torch.cuda.is_available():
-            #   torch.cuda.set_device(local_rank)
+            #    torch.cuda.set_device(local_rank)
 
             run_func(*func_args, **func_kwargs)
 

@@ -1,3 +1,4 @@
+import torch
 import torch.distributed as dist
 
 from common import distributed_test
@@ -26,3 +27,11 @@ def _test_dist_args_helper(x, color='red'):
 
     """Ensure that we can parse args to distributed_test decorated functions. """
     _test_dist_args_helper(number, color=color)
+
+
+@distributed_test(world_size=2)
+def test_dist_allreduce():
+    x = torch.ones(1, 3) * (dist.get_rank() + 1)
+    result = torch.ones(1, 3) * 3
+    dist.all_reduce(x)
+    assert torch.all(x == result)