Add tests for all collectives.

This changes test_correctness to test_allreduces for consistency. New argument passing added into test_utils. Note that the allgather tests currently fail (see #22).
LLNL · Nov 23, 2018 · 5ab2991 · 5ab2991
1 parent c72bfa5
commit 5ab2991
Show file tree

Hide file tree

Showing 11 changed files with 1,495 additions and 121 deletions.
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -1,8 +1,3 @@
-# The name changed in CMake 3.10
-if (NOT MPIEXEC_EXECUTABLE AND MPIEXEC)
-  set(MPIEXEC_EXECUTABLE ${MPIEXEC})
-endif ()
-
 set_full_path(TEST_HEADERS
   test_utils.hpp)
 if (AL_HAS_CUDA)
@@ -23,78 +18,30 @@ target_sources(aluminum_test_headers INTERFACE "${TEST_HEADERS}")
 target_include_directories(
   aluminum_test_headers INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}")
 
-add_executable(TestCorrectness.exe test_correctness.cpp ${TEST_HEADERS})
-target_link_libraries(TestCorrectness.exe PRIVATE Al)
-if (AL_HAS_CUDA)
-  target_link_libraries(TestCorrectness.exe PUBLIC cuda)
-endif ()
-
-# This is mostly a sanity check
-set(TEST_ARGS MPI 8)
-add_test(NAME TestCorrectness
-  COMMAND $<TARGET_FILE:TestCorrectness.exe> ${TEST_ARGS})
+set(TEST_SRCS
+  test_allreduce.cpp
+  test_reduce.cpp
+  test_reduce_scatter.cpp
+  test_allgather.cpp
+  test_alltoall.cpp
+  test_bcast.cpp
+  test_gather.cpp
+  test_scatter.cpp
+  test_multi_nballreduces.cpp
+  test_nccl_collectives.cpp)
+
+foreach(src ${TEST_SRCS})
+  string(REPLACE ".cpp" ".exe" _test_exe_name "${src}")
+  add_executable(${_test_exe_name} ${src})
+  target_link_libraries(${_test_exe_name} PRIVATE Al aluminum_test_headers)
+  if (AL_HAS_CUDA)
+    target_link_libraries(${_test_exe_name} PUBLIC cuda)
+  endif()
+endforeach()
 
-if (MPIEXEC_EXECUTABLE)
-  add_test(NAME TestCorrectness_np4
-    COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4
-    ${MPIEXEC_PREFLAGS}
-    $<TARGET_FILE:TestCorrectness.exe>
-    ${MPIEXEC_POSTFLAGS} ${TEST_ARGS})
-endif ()
-
-add_executable(TestMultiNBAllReduces.exe
-  test_multi_nballreduces.cpp ${TEST_HEADERS})
-target_link_libraries(TestMultiNBAllReduces.exe PRIVATE Al)
 if (AL_HAS_CUDA)
-  target_link_libraries(TestMultiNBAllReduces.exe PUBLIC cuda)
-endif ()
-
-set(TEST_ARGS "8")
-add_test(NAME TestMultiNBAllReduces
-  COMMAND $<TARGET_FILE:TestMultiNBAllReduces.exe> ${TEST_ARGS})
-
-if (MPIEXEC_EXECUTABLE)
-  add_test(NAME TestMultiNBAllReduces_np4
-    COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4
-    ${MPIEXEC_PREFLAGS}
-    $<TARGET_FILE:TestMultiNBAllReduces.exe>
-    ${MPIEXEC_POSTFLAGS} ${TEST_ARGS})
-endif ()
-
-add_executable(TestNCCLCollectives.exe
-  test_nccl_collectives.cpp ${TEST_HEADERS})
-target_link_libraries(TestNCCLCollectives.exe PRIVATE Al)
-if (AL_HAS_CUDA)
-  target_link_libraries(TestNCCLCollectives.exe PUBLIC cuda)
-endif ()
-
-set(TEST_ARGS "8")
-add_test(NAME TestNCCLCollectives
-  COMMAND $<TARGET_FILE:TestNCCLCollectives.exe> ${TEST_ARGS})
-
-if (MPIEXEC_EXECUTABLE)
-  add_test(NAME TestNCCLCollectives_np4
-    COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4
-    ${MPIEXEC_PREFLAGS}
-    $<TARGET_FILE:TestNCCLCollectives.exe>
-    ${MPIEXEC_POSTFLAGS} ${TEST_ARGS})
-endif ()
-
-if (AL_HAS_CUDA)
-  add_executable(TestStreamMemOps.exe
+  add_executable(test_stream_mem_ops.exe
   test_stream_mem_ops.cpp ${TEST_HEADERS})
-  target_link_libraries(TestStreamMemOps.exe PRIVATE Al)
-  target_link_libraries(TestStreamMemOps.exe PUBLIC cuda)
-
-  set(TEST_ARGS "8")
-  add_test(NAME TestStreamMemOps
-    COMMAND $<TARGET_FILE:TestStreamMemOps.exe> ${TEST_ARGS})
-
-  if (MPIEXEC_EXECUTABLE)
-    add_test(NAME TestStreamMemOps_np4
-      COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4
-      ${MPIEXEC_PREFLAGS}
-      $<TARGET_FILE:TestStreamMemOps.exe>
-      ${MPIEXEC_POSTFLAGS} ${TEST_ARGS})
-  endif ()
+  target_link_libraries(test_stream_mem_ops.exe PRIVATE Al)
+  target_link_libraries(test_stream_mem_ops.exe PUBLIC cuda)
 endif ()
diff --git a/test/test_allgather.cpp b/test/test_allgather.cpp
@@ -0,0 +1,178 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2018, Lawrence Livermore National Security, LLC.  Produced at the
+// Lawrence Livermore National Laboratory in collaboration with University of
+// Illinois Urbana-Champaign.
+//
+// Written by the LBANN Research Team (N. Dryden, N. Maruyama, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-756777.
+// All rights reserved.
+//
+// This file is part of Aluminum GPU-aware Communication Library. For details, see
+// http://software.llnl.gov/Aluminum or https://github.com/LLNL/Aluminum.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include <iostream>
+#include "Al.hpp"
+#include "test_utils.hpp"
+#ifdef AL_HAS_NCCL
+#include "test_utils_nccl_cuda.hpp"
+#endif
+#ifdef AL_HAS_MPI_CUDA
+#include "test_utils_mpi_cuda.hpp"
+#endif
+
+#include <stdlib.h>
+#include <math.h>
+#include <string>
+
+// Size is the per-rank send size.
+size_t start_size = 1;
+size_t max_size = 1<<30;
+
+/**
+ * Test allgather algo on input, check with expected.
+ */
+template <typename Backend>
+void test_allgather_algo(const typename VectorType<Backend>::type& expected,
+                         const typename VectorType<Backend>::type& expected_inplace,
+                         typename VectorType<Backend>::type input,
+                         typename VectorType<Backend>::type input_inplace,
+                         typename Backend::comm_type& comm,
+                         typename Backend::algo_type algo) {
+  auto recv = get_vector<Backend>(input.size() * comm.size());
+  // Test regular allgather.
+  Al::Allgather<Backend>(input.data(), recv.data(), input.size(), comm, algo);
+  if (!check_vector(expected, recv)) {
+    std::cout << comm.rank() << ": regular allgather does not match" <<
+        std::endl;
+    std::abort();
+  }
+  MPI_Barrier(MPI_COMM_WORLD);
+  // Test in-place allgather.
+  std::stringstream ss;
+  ss << comm.rank() << ": input: ";
+  for (const auto& v : input_inplace.copyout()) ss << v << " ";
+  std::cout << ss.str() << std::endl;
+  Al::Allgather<Backend>(input_inplace.data(), input_inplace.size() / comm.size(),
+                         comm, algo);
+  MPI_Barrier(MPI_COMM_WORLD);
+  if (!check_vector(expected_inplace, input_inplace)) {
+    std::cout << comm.rank() << ": in-place allgather does not match" <<
+      std::endl;
+    std::abort();
+  }
+}
+
+/**
+ * Test non-blocking allgather algo on input, check with expected.
+ */
+template <typename Backend>
+void test_nb_allgather_algo(const typename VectorType<Backend>::type& expected,
+                            const typename VectorType<Backend>::type& expected_inplace,
+                            typename VectorType<Backend>::type input,
+                            typename VectorType<Backend>::type input_inplace,
+                            typename Backend::comm_type& comm,
+                            typename Backend::algo_type algo) {
+  typename Backend::req_type req = get_request<Backend>();
+  auto recv = get_vector<Backend>(input.size() * comm.size());
+  // Test regular allgather.
+  Al::NonblockingAllgather<Backend>(input.data(), recv.data(),
+                                    input.size(), comm, req, algo);
+  Al::Wait<Backend>(req);
+  if (!check_vector(expected, recv)) {
+    std::cout << comm.rank() << ": regular allgather does not match" <<
+      std::endl;
+    std::abort();
+  }
+  MPI_Barrier(MPI_COMM_WORLD);
+  // Test in-place allgather.
+  Al::NonblockingAllgather<Backend>(input_inplace.data(),
+                                    input_inplace.size() / comm.size(),
+                                    comm, req, algo);
+  Al::Wait<Backend>(req);
+  if (!check_vector(expected_inplace, input_inplace)) {
+    std::cout << comm.rank() << ": in-place allgather does not match" <<
+      std::endl;
+    std::abort();
+  }
+}
+
+template <typename Backend>
+void test_correctness() {
+  auto algos = get_allgather_algorithms<Backend>();
+  auto nb_algos = get_nb_allgather_algorithms<Backend>();
+  typename Backend::comm_type comm;  // Use COMM_WORLD.
+  // Compute sizes to test.
+  std::vector<size_t> sizes = get_sizes(start_size, max_size, true);
+  for (const auto& size : sizes) {
+    if (comm.rank() == 0) {
+      std::cout << "Testing size " << human_readable_size(size) << std::endl;
+    }
+    // Compute true value.
+    size_t global_size = size * comm.size();
+    typename VectorType<Backend>::type &&data = gen_data<Backend>(size);
+    auto expected = get_vector<Backend>(global_size);
+    get_expected_allgather_result(data, expected);
+    typename VectorType<Backend>::type &&data_inplace = gen_data<Backend>(global_size);
+    auto expected_inplace(data_inplace);
+    get_expected_allgather_inplace_result(expected_inplace);
+    // Test algorithms.
+    for (auto&& algo : algos) {
+      MPI_Barrier(MPI_COMM_WORLD);
+      if (comm.rank() == 0) {
+        std::cout << " Algo: " << Al::allreduce_name(algo) << std::endl;
+      }
+      test_allgather_algo<Backend>(expected, expected_inplace,
+                                   data, data_inplace, comm, algo);
+    }
+    for (auto&& algo : nb_algos) {
+      MPI_Barrier(MPI_COMM_WORLD);
+      if (comm.rank() == 0) {
+        std::cout << " Algo: NB " << Al::allreduce_name(algo) << std::endl;
+      }
+      test_nb_allgather_algo<Backend>(expected, expected_inplace,
+                                      data, data_inplace, comm, algo);
+    }
+  }
+}
+
+int main(int argc, char** argv) {
+  // Need to set the CUDA device before initializing Aluminum.
+#ifdef AL_HAS_CUDA
+  set_device();
+#endif
+  Al::Initialize(argc, argv);
+
+  std::string backend = "MPI";
+  parse_args(argc, argv, backend, start_size, max_size);
+
+  if (backend == "MPI") {
+    std::cerr << "Allgather not supported on MPI backend." << std::endl;
+    std::abort();
+#ifdef AL_HAS_NCCL
+  } else if (backend == "NCCL") {
+    test_correctness<Al::NCCLBackend>();
+#endif
+#ifdef AL_HAS_MPI_CUDA
+  } else if (backend == "MPI-CUDA") {
+    test_correctness<Al::MPICUDABackend>();
+#endif
+  }
+
+  Al::Finalize();
+  return 0;
+}
diff --git a/test/test_correctness.cpp → test/test_allreduce.cpp b/test/test_correctness.cpp → test/test_allreduce.cpp
@@ -39,13 +39,9 @@
 #include <math.h>
 #include <string>
 
+size_t start_size = 1;
 size_t max_size = 1<<30;
 
-void get_expected_result(std::vector<float>& expected) {
-  MPI_Allreduce(MPI_IN_PLACE, expected.data(), expected.size(),
-                MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
-}
-
 /**
  * Test allreduce algo on input, check with expected.
  */
@@ -113,22 +109,15 @@ void test_correctness() {
   auto nb_algos = get_nb_allreduce_algorithms<Backend>();
   typename Backend::comm_type comm;  // Use COMM_WORLD.
   // Compute sizes to test.
-  std::vector<size_t> sizes = {0};
-  for (size_t size = 1; size <= max_size; size *= 2) {
-    sizes.push_back(size);
-    // Avoid duplicating 2.
-    if (size > 1) {
-      sizes.push_back(size + 1);
-    }
-  }
+  std::vector<size_t> sizes = get_sizes(start_size, max_size, true);
   for (const auto& size : sizes) {
     if (comm.rank() == 0) {
       std::cout << "Testing size " << human_readable_size(size) << std::endl;
     }
     // Compute true value.
     typename VectorType<Backend>::type &&data = gen_data<Backend>(size);
     auto expected(data);
-    get_expected_result(expected);
+    get_expected_allreduce_result(expected);
     // Test algorithms.
     for (auto&& algo : algos) {
       MPI_Barrier(MPI_COMM_WORLD);
@@ -155,12 +144,7 @@ int main(int argc, char** argv) {
   Al::Initialize(argc, argv);
 
   std::string backend = "MPI";
-  if (argc >= 2) {
-    backend = argv[1];
-  }
-  if (argc == 3) {
-    max_size = std::stoul(argv[2]);
-  }
+  parse_args(argc, argv, backend, start_size, max_size);
 
   if (backend == "MPI") {
     test_correctness<Al::MPIBackend>();
@@ -172,16 +156,6 @@ int main(int argc, char** argv) {
   } else if (backend == "MPI-CUDA") {
     test_correctness<Al::MPICUDABackend>();
 #endif
-  } else {
-    std::cerr << "usage: " << argv[0] << " [MPI";
-#ifdef AL_HAS_NCCL
-    std::cerr << " | NCCL";
-#endif
-#ifdef AL_HAS_MPI_CUDA
-    std::cerr << " | MPI-CUDA";
-#endif
-    std::cerr << "]" << std::endl;
-    return -1;
   }
 
   Al::Finalize();