diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 5b0cffbc..67bf40fe 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -370 +371 diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 269e4e98..90967d07 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -13,7 +13,8 @@ #include "_reg_localTrans.h" #include "_reg_maths_eigen.h" -#ifdef BUILD_TESTS +// Due to SSE usage creates incorrect test results +#if defined(BUILD_TESTS) && !defined(NDEBUG) #undef USE_SSE #endif diff --git a/reg-lib/cuda/CudaContext.cpp b/reg-lib/cuda/CudaContext.cpp index 9be42aca..ebfd5173 100644 --- a/reg-lib/cuda/CudaContext.cpp +++ b/reg-lib/cuda/CudaContext.cpp @@ -51,22 +51,22 @@ void CudaContext::PickCard(unsigned deviceId = 999) { return; } - // following code is from cutGetMaxGflopsDeviceId() - int max_gflops_device = 0; - int max_gflops = 0; - unsigned current_device = 0; - while (current_device < numDevices) { - cudaGetDeviceProperties(&deviceProp, current_device); + // The following code is from cutGetMaxGflopsDeviceId() + int maxGflopsDevice = 0; + int maxGflops = 0; + unsigned currentDevice = 0; + while (currentDevice < numDevices) { + cudaGetDeviceProperties(&deviceProp, currentDevice); int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate; - if (gflops > max_gflops) { - max_gflops = gflops; - max_gflops_device = current_device; + if (gflops > maxGflops) { + maxGflops = gflops; + maxGflopsDevice = currentDevice; } - ++current_device; + ++currentDevice; } - NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device)); - NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); + NR_CUDA_SAFE_CALL(cudaSetDevice(maxGflopsDevice)); + NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, maxGflopsDevice)); + NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, maxGflopsDevice)); if (deviceProp.major < 1) { NR_FATAL_ERROR("The specified graphics card does not exist"); @@ -77,15 +77,15 @@ void CudaContext::PickCard(unsigned deviceId = 999) { if (deviceProp.totalGlobalMem != total) NR_FATAL_ERROR("The CUDA card "s + deviceProp.name + " does not seem to be available\n"s + "Expected total memory: "s + std::to_string(deviceProp.totalGlobalMem / (1024 * 1024)) + - " MB - Recovered total memory: "s + std::to_string(total / (1024 * 1024)) + " MB"); - NR_DEBUG("The following device is used: "s + deviceProp.name); - NR_DEBUG("It has "s + std::to_string(free / (1024 * 1024)) + " MB free out of "s + std::to_string(total / (1024 * 1024)) + " MB"); - NR_DEBUG("The CUDA compute capability is "s + std::to_string(deviceProp.major) + "."s + std::to_string(deviceProp.minor)); - NR_DEBUG("The shared memory size in bytes: "s + std::to_string(deviceProp.sharedMemPerBlock)); - NR_DEBUG("The CUDA version is "s + std::to_string(CUDART_VERSION)); - NR_DEBUG("The card clock rate is "s + std::to_string(deviceProp.clockRate / 1000) + " MHz"); - NR_DEBUG("The card has "s + std::to_string(deviceProp.multiProcessorCount) + " multiprocessors"); - cudaIdx = max_gflops_device; + " MB - Recovered total memory: "s + std::to_string(total / (1024 * 1024)) + " MB"s); + NR_DEBUG("The following device is used: " << deviceProp.name); + NR_DEBUG("It has " << free / (1024 * 1024) << " MB free out of " << total / (1024 * 1024) << " MB"); + NR_DEBUG("The CUDA compute capability is " << deviceProp.major << "." << deviceProp.minor); + NR_DEBUG("The shared memory size in bytes: " << deviceProp.sharedMemPerBlock); + NR_DEBUG("The CUDA version is " << CUDART_VERSION); + NR_DEBUG("The card clock rate is " << deviceProp.clockRate / 1000 << " MHz"); + NR_DEBUG("The card has " << deviceProp.multiProcessorCount << " multiprocessors"); + cudaIdx = maxGflopsDevice; cudaGetDeviceProperties(&deviceProp, cudaIdx); if (deviceProp.major > 1) { isCardDoubleCapable = true; diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 2a0a775f..77a2f739 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -83,7 +83,7 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage, const double weight = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; const double diff = refValue - warValue; return { Square(diff) * weight, weight }; // ssd and count - }, make_double2(0.0, 0.0), thrust::plus()); + }, make_double2(0, 0), thrust::plus()); ssd += (ssdAndCount.x * timePointWeights[t]) / ssdAndCount.y; }