Merge pull request #10 from clara-genomics/integrate-cga-v0.3.0

Integrate cga v0.3.0
lbcb-sci · Oct 9, 2019 · 4ba45c5 · 4ba45c5
2 parents 2c5672e + 5d12aef
commit 4ba45c5
Show file tree

Hide file tree

Showing 5 changed files with 26 additions and 37 deletions.
diff --git a/ci/common/build.sh b/ci/common/build.sh
@@ -37,6 +37,14 @@ g++ --version
 # FIX Added to deal with Anancoda SSL verification issues during conda builds
 conda config --set ssl_verify False
 
+conda install \
+    -c conda-forge \
+    -c sarcasm \
+    -c bioconda \
+    doxygen \
+    ninja \
+    cmake
+
 CUDA_REL=${CUDA:0:3}
 if [ "${CUDA:0:2}" == '10' ]; then
   # CUDA 10 release

diff --git a/src/cuda/cudaaligner.cpp b/src/cuda/cudaaligner.cpp
@@ -39,7 +39,7 @@ CUDABatchAligner::CUDABatchAligner(uint32_t max_query_size,
     aligner_ = claragenomics::cudaaligner::create_aligner(max_query_size,
                                                           max_target_size,
                                                           max_alignments,
-                                                          claragenomics::cudaaligner::AlignmentType::global,
+                                                          claragenomics::cudaaligner::AlignmentType::global_alignment,
                                                           stream_,
                                                           device_id);
 }
@@ -68,9 +68,7 @@ bool CUDABatchAligner::addOverlap(Overlap* overlap, std::vector<std::unique_ptr<
     else if (s == claragenomics::cudaaligner::StatusType::exceeded_max_alignment_difference
              || s == claragenomics::cudaaligner::StatusType::exceeded_max_length)
     {
-        cpu_overlap_data_.emplace_back(std::make_pair<std::string, std::string>(std::string(q, q + q_len),
-                                                                                std::string(t, t + t_len)));
-        cpu_overlaps_.push_back(overlap);
+        // Do nothing as this case will be handled by CPU aligner.
     }
     else if (s != claragenomics::cudaaligner::StatusType::success)
     {
@@ -86,21 +84,9 @@ bool CUDABatchAligner::addOverlap(Overlap* overlap, std::vector<std::unique_ptr<
 void CUDABatchAligner::alignAll()
 {
     aligner_->align_all();
-    compute_cpu_overlaps();
 }
 
-void CUDABatchAligner::compute_cpu_overlaps()
-{
-    for(std::size_t a = 0; a < cpu_overlaps_.size(); a++)
-    {
-        // Run CPU version of overlap.
-        Overlap* overlap = cpu_overlaps_[a];
-        overlap->align_overlaps(cpu_overlap_data_[a].first.c_str(), cpu_overlap_data_[a].first.length(),
-                                cpu_overlap_data_[a].second.c_str(), cpu_overlap_data_[a].second.length());
-    }
-}
-
-void CUDABatchAligner::find_breaking_points(uint32_t window_length)
+void CUDABatchAligner::generate_cigar_strings()
 {
     aligner_->sync_alignments();
 
@@ -113,19 +99,12 @@ void CUDABatchAligner::find_breaking_points(uint32_t window_length)
     for(std::size_t a = 0; a < alignments.size(); a++)
     {
         overlaps_[a]->cigar_ = alignments[a]->convert_to_cigar();
-        overlaps_[a]->find_breaking_points_from_cigar(window_length);
-    }
-    for(Overlap* overlap : cpu_overlaps_)
-    {
-        // Run CPU version of breaking points.
-        overlap->find_breaking_points_from_cigar(window_length);
     }
 }
 
 void CUDABatchAligner::reset()
 {
     overlaps_.clear();
-    cpu_overlaps_.clear();
     cpu_overlap_data_.clear();
     aligner_->reset();
 }

diff --git a/src/cuda/cudaaligner.hpp b/src/cuda/cudaaligner.hpp
@@ -49,10 +49,11 @@ class CUDABatchAligner
         virtual void alignAll();
 
         /**
-         * @brief Find breaking points in alignments.
+         * @brief Generate cigar strings for overlaps that were successfully
+         *        copmuted on the GPU.
          *
          */
-        virtual void find_breaking_points(uint32_t window_length);
+        virtual void generate_cigar_strings();
 
         /**
          * @brief Resets the state of the object, which includes
@@ -74,13 +75,10 @@ class CUDABatchAligner
         CUDABatchAligner(const CUDABatchAligner&) = delete;
         const CUDABatchAligner& operator=(const CUDABatchAligner&) = delete;
 
-        void compute_cpu_overlaps();
-
         std::unique_ptr<claragenomics::cudaaligner::Aligner> aligner_;
 
         std::vector<Overlap*> overlaps_;
 
-        std::vector<Overlap*> cpu_overlaps_;
         std::vector<std::pair<std::string, std::string>> cpu_overlap_data_;
 
         // Static batch count used to generate batch IDs.

diff --git a/src/cuda/cudapolisher.cpp b/src/cuda/cudapolisher.cpp
@@ -85,12 +85,7 @@ std::vector<uint32_t> CUDAPolisher::calculate_batches_per_gpu(uint32_t batches,
 
 void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Overlap>>& overlaps)
 {
-    if (cudaaligner_batches_ < 1)
-    {
-        // TODO: Kept CPU overlap alignment right now while GPU is a dummy implmentation.
-        Polisher::find_overlap_breaking_points(overlaps);
-    }
-    else
+    if (cudaaligner_batches_ >= 1)
     {
         // TODO: Experimentally this is giving decent perf
         const uint32_t MAX_ALIGNMENTS = 200;
@@ -137,7 +132,10 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
                 {
                     // Launch workload.
                     batch->alignAll();
-                    batch->find_breaking_points(window_length_);
+
+                    // Generate CIGAR strings for successful alignments. The actual breaking points
+                    // will be calculate by the overlap object.
+                    batch->generate_cigar_strings();
 
                     // logging bar
                     {
@@ -193,6 +191,12 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
 
         batch_aligners_.clear();
     }
+
+    // This call runs the breaking point detection code for all alignments.
+    // Any overlaps that couldn't be processed by the GPU are also handled here
+    // by the CPU aligner.
+    logger_->log();
+    Polisher::find_overlap_breaking_points(overlaps);
 }
 
 void CUDAPolisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,

diff --git a/vendor/ClaraGenomicsAnalysis b/vendor/ClaraGenomicsAnalysis