Use float gam instead of double in CudaOptimiser

Even though tests show otherwise, using float gets better results in real world scenarios.
KCL-BMEIS · Nov 27, 2023 · b9c9bec · b9c9bec
1 parent 25aba87
commit b9c9bec
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
@@ -1 +1 @@
-371
+372
diff --git a/reg-lib/cuda/CudaOptimiser.cu b/reg-lib/cuda/CudaOptimiser.cu
@@ -237,7 +237,7 @@ void GetConjugateGradient(float4 *gradientCuda,
         return make_double2(dgg, gg);
     };
 
-    double gam;
+    float gam;
     thrust::counting_iterator<int> it(0);
     const double2 gg = thrust::transform_reduce(thrust::device, it, it + nVoxels, [=]__device__(const int index) {
         return calcGam(gradientTexture, conjugateGTexture, conjugateHTexture, index);
@@ -247,8 +247,8 @@ void GetConjugateGradient(float4 *gradientCuda,
         const double2 ggBw = thrust::transform_reduce(thrust::device, it, it + nVoxelsBw, [=]__device__(const int index) {
             return calcGam(gradientBwTexture, conjugateGBwTexture, conjugateHBwTexture, index);
         }, make_double2(0, 0), thrust::plus<double2>());
-        gam = (gg.x + ggBw.x) / (gg.y + ggBw.y);
-    } else gam = gg.x / gg.y;
+        gam = static_cast<float>((gg.x + ggBw.x) / (gg.y + ggBw.y));
+    } else gam = static_cast<float>(gg.x / gg.y);
 
     // Conjugate gradient
     auto conjugate = [gam]__device__(float4 *gradientCuda, float4 *conjugateGCuda, float4 *conjugateHCuda,