diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions.cpp b/Samples/5_Domain_Specific/binomialOptions/binomialOptions.cpp index c3b1f441e..89961fd60 100644 --- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions.cpp +++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions.cpp @@ -52,13 +52,14 @@ extern "C" void BlackScholesCall(real &callResult, TOptionData optionData); // Process single option on CPU // Note that CPU code is for correctness testing only and not for benchmarking. //////////////////////////////////////////////////////////////////////////////// -extern "C" void binomialOptionsCPU(real &callResult, TOptionData optionData); +extern "C" void binomialOptionsCPU(real &callResult, TOptionData optionData, + option_t option_type); //////////////////////////////////////////////////////////////////////////////// // Process an array of OptN options on GPU //////////////////////////////////////////////////////////////////////////////// extern "C" void binomialOptionsGPU(real *callValue, TOptionData *optionData, - int optN); + int optN, option_t option_type); //////////////////////////////////////////////////////////////////////////////// // Helper function, returning uniformly distributed @@ -103,12 +104,14 @@ int main(int argc, char **argv) { BlackScholesCall(callValueBS[i], optionData[i]); } - printf("Running GPU binomial tree...\n"); + option_t option_type = EU; + + printf("Running GPU binomial tree (EU)...\n"); checkCudaErrors(cudaDeviceSynchronize()); sdkResetTimer(&hTimer); sdkStartTimer(&hTimer); - binomialOptionsGPU(callValueGPU, optionData, OPT_N); + binomialOptionsGPU(callValueGPU, optionData, OPT_N, option_type); checkCudaErrors(cudaDeviceSynchronize()); sdkStopTimer(&hTimer); @@ -118,13 +121,13 @@ int main(int argc, char **argv) { printf("binomialOptionsGPU() time: %f msec\n", gpuTime); printf("Options per second : %f \n", OPT_N / (gpuTime * 0.001)); - printf("Running CPU binomial tree...\n"); + printf("Running CPU binomial tree (EU)...\n"); for (i = 0; i < OPT_N; i++) { - binomialOptionsCPU(callValueCPU[i], optionData[i]); + binomialOptionsCPU(callValueCPU[i], optionData[i], option_type); } - printf("Comparing the results...\n"); + printf("Comparing the results (EU)...\n"); sumDelta = 0; sumRef = 0; printf("GPU binomial vs. Black-Scholes\n"); @@ -170,6 +173,49 @@ int main(int argc, char **argv) { printf("Avg. diff: %E\n", (double)(sumDelta / (real)OPT_N)); } + if (errorVal > 5e-4) { + printf("Test failed!\n"); + exit(EXIT_FAILURE); + } + + option_type = NA; + + printf("\nRunning GPU binomial tree (NA)...\n"); + checkCudaErrors(cudaDeviceSynchronize()); + sdkResetTimer(&hTimer); + sdkStartTimer(&hTimer); + + binomialOptionsGPU(callValueGPU, optionData, OPT_N, option_type); + + checkCudaErrors(cudaDeviceSynchronize()); + sdkStopTimer(&hTimer); + gpuTime = sdkGetTimerValue(&hTimer); + printf("Options count : %i \n", OPT_N); + printf("Time steps : %i \n", NUM_STEPS); + printf("binomialOptionsGPU() time: %f msec\n", gpuTime); + printf("Options per second : %f \n", OPT_N / (gpuTime * 0.001)); + + printf("Running CPU binomial tree (NA)...\n"); + + for (i = 0; i < OPT_N; i++) { + binomialOptionsCPU(callValueCPU[i], optionData[i], option_type); + } + + printf("CPU binomial vs. GPU binomial\n"); + sumDelta = 0; + sumRef = 0; + + for (i = 0; i < OPT_N; i++) { + sumDelta += fabs(callValueGPU[i] - callValueCPU[i]); + sumRef += callValueCPU[i]; + } + + if (sumRef > 1E-5) { + printf("L1 norm: %E\n", errorVal = sumDelta / sumRef); + } else { + printf("Avg. diff: %E\n", (double)(sumDelta / (real)OPT_N)); + } + printf("Shutting down...\n"); sdkDeleteTimer(&hTimer); diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_common.h b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_common.h index 84e22954a..582eea1d9 100644 --- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_common.h +++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_common.h @@ -41,6 +41,15 @@ typedef struct { real V; } TOptionData; +//////////////////////////////////////////////////////////////////////////////// +// Option types +//////////////////////////////////////////////////////////////////////////////// +enum option_t +{ + NA = 0, + EU, +}; + //////////////////////////////////////////////////////////////////////////////// // Global parameters //////////////////////////////////////////////////////////////////////////////// diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_gold.cpp b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_gold.cpp index 4847ab390..046991dae 100644 --- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_gold.cpp +++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_gold.cpp @@ -78,7 +78,8 @@ static real expiryCallValue(real S, real X, real vDt, int i) { return (d > (real)0) ? d : (real)0; } -extern "C" void binomialOptionsCPU(real &callResult, TOptionData optionData) { +extern "C" void binomialOptionsCPU(real &callResult, TOptionData optionData, + option_t option_type) { static real Call[NUM_STEPS + 1]; const real S = optionData.S; @@ -112,9 +113,18 @@ extern "C" void binomialOptionsCPU(real &callResult, TOptionData optionData) { //////////////////////////////////////////////////////////////////////// // Walk backwards up binomial tree //////////////////////////////////////////////////////////////////////// - for (int i = NUM_STEPS; i > 0; i--) - for (int j = 0; j <= i - 1; j++) - Call[j] = puByDf * Call[j + 1] + pdByDf * Call[j]; + for (int i = NUM_STEPS; i > 0; i--) { + for (int j = 0; j <= i - 1; j++) { + real continuation_value = puByDf * Call[j + 1] + pdByDf * Call[j]; + if(option_type == NA){ + real fwd = S * exp((2*j-i) * vDt); + real exercise_value = (fwd - X) > (real)0 ? (fwd - X) : (real)0; + Call[j] = exercise_value > continuation_value ? exercise_value : continuation_value; + } else if (option_type == EU) { + Call[j] = continuation_value; + } + } + } callResult = (real)Call[0]; } diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_kernel.cu b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_kernel.cu index 3b1e8111d..ad80d90bd 100644 --- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_kernel.cu +++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_kernel.cu @@ -74,7 +74,7 @@ __device__ inline double expiryCallValue(double S, double X, double vDt, #error Bad constants #endif -__global__ void binomialOptionsKernel() { +__global__ void binomialOptionsKernel(option_t option_type) { // Handle to thread block group cg::thread_block cta = cg::this_thread_block(); __shared__ real call_exchange[THREADBLOCK_SIZE + 1]; @@ -105,8 +105,20 @@ __global__ void binomialOptionsKernel() { if (i > final_it) { #pragma unroll - for (int j = 0; j < ELEMS_PER_THREAD; ++j) - call[j] = puByDf * call[j + 1] + pdByDf * call[j]; + for (int j = 0; j < ELEMS_PER_THREAD; ++j) { + real continuation_value = puByDf * call[j + 1] + pdByDf * call[j]; + if(option_type == NA){ +#ifndef DOUBLE_PRECISION + real fwd = S*__expf(vDt * (2*(tid * ELEMS_PER_THREAD + j) - i)); +#else + real fwd = S*exp(vDt * (2*(tid * ELEMS_PER_THREAD + j) - i)); +#endif + real exercise_value = ((fwd - X) > (real)0) ? (fwd - X) : (real)0; + call[j] = exercise_value > continuation_value ? exercise_value : continuation_value; + } else if (option_type == EU){ + call[j] = continuation_value; + } + } } } @@ -119,7 +131,7 @@ __global__ void binomialOptionsKernel() { // Host-side interface to GPU binomialOptions //////////////////////////////////////////////////////////////////////////////// extern "C" void binomialOptionsGPU(real *callValue, TOptionData *optionData, - int optN) { + int optN, option_t option_type) { __TOptionData h_OptionData[MAX_OPTIONS]; for (int i = 0; i < optN; i++) { @@ -150,7 +162,7 @@ extern "C" void binomialOptionsGPU(real *callValue, TOptionData *optionData, checkCudaErrors(cudaMemcpyToSymbol(d_OptionData, h_OptionData, optN * sizeof(__TOptionData))); - binomialOptionsKernel<<>>(); + binomialOptionsKernel<<>>(option_type); getLastCudaError("binomialOptionsKernel() execution failed.\n"); checkCudaErrors( cudaMemcpyFromSymbol(callValue, d_CallValue, optN * sizeof(real)));