Skip to content

Commit

Permalink
Refactorisations
Browse files Browse the repository at this point in the history
  • Loading branch information
onurulgen committed Nov 24, 2023
1 parent 592d01d commit 25aba87
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 25 deletions.
2 changes: 1 addition & 1 deletion niftyreg_build_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
370
371
3 changes: 2 additions & 1 deletion reg-lib/cpu/_reg_localTrans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
#include "_reg_localTrans.h"
#include "_reg_maths_eigen.h"

#ifdef BUILD_TESTS
// Due to SSE usage creates incorrect test results
#if defined(BUILD_TESTS) && !defined(NDEBUG)
#undef USE_SSE
#endif

Expand Down
44 changes: 22 additions & 22 deletions reg-lib/cuda/CudaContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,22 @@ void CudaContext::PickCard(unsigned deviceId = 999) {
return;
}

// following code is from cutGetMaxGflopsDeviceId()
int max_gflops_device = 0;
int max_gflops = 0;
unsigned current_device = 0;
while (current_device < numDevices) {
cudaGetDeviceProperties(&deviceProp, current_device);
// The following code is from cutGetMaxGflopsDeviceId()
int maxGflopsDevice = 0;
int maxGflops = 0;
unsigned currentDevice = 0;
while (currentDevice < numDevices) {
cudaGetDeviceProperties(&deviceProp, currentDevice);
int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate;
if (gflops > max_gflops) {
max_gflops = gflops;
max_gflops_device = current_device;
if (gflops > maxGflops) {
maxGflops = gflops;
maxGflopsDevice = currentDevice;
}
++current_device;
++currentDevice;
}
NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device));
NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device));
NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
NR_CUDA_SAFE_CALL(cudaSetDevice(maxGflopsDevice));
NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, maxGflopsDevice));
NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, maxGflopsDevice));

if (deviceProp.major < 1) {
NR_FATAL_ERROR("The specified graphics card does not exist");
Expand All @@ -77,15 +77,15 @@ void CudaContext::PickCard(unsigned deviceId = 999) {
if (deviceProp.totalGlobalMem != total)
NR_FATAL_ERROR("The CUDA card "s + deviceProp.name + " does not seem to be available\n"s +
"Expected total memory: "s + std::to_string(deviceProp.totalGlobalMem / (1024 * 1024)) +
" MB - Recovered total memory: "s + std::to_string(total / (1024 * 1024)) + " MB");
NR_DEBUG("The following device is used: "s + deviceProp.name);
NR_DEBUG("It has "s + std::to_string(free / (1024 * 1024)) + " MB free out of "s + std::to_string(total / (1024 * 1024)) + " MB");
NR_DEBUG("The CUDA compute capability is "s + std::to_string(deviceProp.major) + "."s + std::to_string(deviceProp.minor));
NR_DEBUG("The shared memory size in bytes: "s + std::to_string(deviceProp.sharedMemPerBlock));
NR_DEBUG("The CUDA version is "s + std::to_string(CUDART_VERSION));
NR_DEBUG("The card clock rate is "s + std::to_string(deviceProp.clockRate / 1000) + " MHz");
NR_DEBUG("The card has "s + std::to_string(deviceProp.multiProcessorCount) + " multiprocessors");
cudaIdx = max_gflops_device;
" MB - Recovered total memory: "s + std::to_string(total / (1024 * 1024)) + " MB"s);
NR_DEBUG("The following device is used: " << deviceProp.name);
NR_DEBUG("It has " << free / (1024 * 1024) << " MB free out of " << total / (1024 * 1024) << " MB");
NR_DEBUG("The CUDA compute capability is " << deviceProp.major << "." << deviceProp.minor);
NR_DEBUG("The shared memory size in bytes: " << deviceProp.sharedMemPerBlock);
NR_DEBUG("The CUDA version is " << CUDART_VERSION);
NR_DEBUG("The card clock rate is " << deviceProp.clockRate / 1000 << " MHz");
NR_DEBUG("The card has " << deviceProp.multiProcessorCount << " multiprocessors");
cudaIdx = maxGflopsDevice;
cudaGetDeviceProperties(&deviceProp, cudaIdx);
if (deviceProp.major > 1) {
isCardDoubleCapable = true;
Expand Down
2 changes: 1 addition & 1 deletion reg-lib/cuda/_reg_ssd_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage,
const double weight = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
const double diff = refValue - warValue;
return { Square(diff) * weight, weight }; // ssd and count
}, make_double2(0.0, 0.0), thrust::plus<double2>());
}, make_double2(0, 0), thrust::plus<double2>());

ssd += (ssdAndCount.x * timePointWeights[t]) / ssdAndCount.y;
}
Expand Down

0 comments on commit 25aba87

Please # to comment.