diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/cpp_model_parameters_h.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/cpp_model_parameters_h.inc index 0250c160ed..ef3d99d07c 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/cpp_model_parameters_h.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/cpp_model_parameters_h.inc @@ -172,7 +172,7 @@ namespace mg5amcCpu #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> #pragma GCC diagnostic ignored "-Wunused-parameter" // e.g. <> -#ifdef __CUDACC__ +#ifdef MGONGPUCPP_GPUIMPL #pragma nv_diagnostic push #pragma nv_diag_suppress 177 // e.g. <> #endif @@ -194,9 +194,9 @@ namespace mg5amcCpu %(dcoupsetdcoup)s } %(eftspecial2)s - return out; - } -#ifdef __CUDACC__ + return out; + } +#ifdef MGONGPUCPP_GPUIMPL #pragma GCC diagnostic pop #pragma nv_diagnostic pop #endif @@ -212,6 +212,12 @@ namespace mg5amcCpu //========================================================================== +#ifdef MGONGPUCPP_GPUIMPL +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ #pragma GCC diagnostic push #ifndef __clang__ #pragma GCC diagnostic ignored "-Wunused-but-set-variable" // e.g. <> diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CudaRuntime.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CudaRuntime.h deleted file mode 100644 index df0c3f3df8..0000000000 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CudaRuntime.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: S. Roiser (Jul 2020) for the MG5aMC CUDACPP plugin. -// Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. - -#ifndef MG5AMC_CUDARUNTIME_H -#define MG5AMC_CUDARUNTIME_H 1 - -// MG5AMC on GPU uses the CUDA runtime API, not the lower level CUDA driver API -// See https://docs.nvidia.com/cuda/cuda-runtime-api/driver-vs-runtime-api.html#driver-vs-runtime-api - -#include -#include - -//-------------------------------------------------------------------------- - -// See https://stackoverflow.com/a/14038590 -#ifdef MGONGPUCPP_GPUIMPL /* clang-format off */ -#define checkCuda( code ) { assertCuda( code, __FILE__, __LINE__ ); } -inline void assertCuda( cudaError_t code, const char* file, int line, bool abort = true ) -{ - if( code != cudaSuccess ) - { - printf( "ERROR! assertCuda: '%s' (%d) in %s:%d\n", cudaGetErrorString( code ), code, file, line ); - if( abort ) assert( code == cudaSuccess ); - } -} -#endif /* clang-format on */ - -//-------------------------------------------------------------------------- - -#ifdef MGONGPUCPP_GPUIMPL -namespace mg5amcGpu -{ - // Instantiate a CudaRuntime at the beginnining of the application's main to - // invoke cudaSetDevice(0) in the constructor and book a cudaDeviceReset() call in the destructor - // *** FIXME! This will all need to be designed differently when going to multi-GPU nodes! *** - struct CudaRuntime final - { - CudaRuntime( const bool debug = true ) - : m_debug( debug ) { setUp( m_debug ); } - ~CudaRuntime() { tearDown( m_debug ); } - CudaRuntime( const CudaRuntime& ) = delete; - CudaRuntime( CudaRuntime&& ) = delete; - CudaRuntime& operator=( const CudaRuntime& ) = delete; - CudaRuntime& operator=( CudaRuntime&& ) = delete; - bool m_debug; - - // Set up CUDA application - // ** NB: strictly speaking this is not needed when using the CUDA runtime API ** - // Calling cudaSetDevice on startup is useful to properly book-keep the time spent in CUDA initialization - static void setUp( const bool debug = true ) - { - // ** NB: it is useful to call cudaSetDevice, or cudaFree, to properly book-keep the time spent in CUDA initialization - // ** NB: otherwise, the first CUDA operation (eg a cudaMemcpyToSymbol in CPPProcess ctor) appears to take much longer! - /* - // [We initially added cudaFree(0) to "ease profile analysis" only because it shows up as a big recognizable block!] - // No explicit initialization is needed: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#initialization - // It is not clear what cudaFree(0) does at all: https://stackoverflow.com/questions/69967813/ - if ( debug ) std::cout << "__CudaRuntime: calling cudaFree(0)" << std::endl; - checkCuda( cudaFree( 0 ) ); // SLOW! - */ - // Replace cudaFree(0) by cudaSetDevice(0), even if it is not really needed either - // (but see https://developer.nvidia.com/blog/cuda-pro-tip-always-set-current-device-avoid-multithreading-bugs) - if( debug ) std::cout << "__CudaRuntime: calling cudaSetDevice(0)" << std::endl; - checkCuda( cudaSetDevice( 0 ) ); // SLOW! - } - - // Tear down CUDA application (call cudaDeviceReset) - // ** NB: strictly speaking this is not needed when using the CUDA runtime API ** - // Calling cudaDeviceReset on shutdown is only needed for checking memory leaks in cuda-memcheck - // See https://docs.nvidia.com/cuda/cuda-memcheck/index.html#leak-checking - static void tearDown( const bool debug = true ) - { - if( debug ) std::cout << "__CudaRuntime: calling cudaDeviceReset()" << std::endl; - checkCuda( cudaDeviceReset() ); - } - }; - -} -#endif - -//-------------------------------------------------------------------------- - -#endif // MG5AMC_CUDARUNTIME_H diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc index 9a39220077..491dfc02e1 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc @@ -12,6 +12,7 @@ #include "BridgeKernels.h" #include "CPPProcess.h" #include "CrossSectionKernels.h" +#include "GpuRuntime.h" #include "MatrixElementKernels.h" #include "MemoryAccessMatrixElements.h" #include "MemoryAccessMomenta.h" @@ -102,12 +103,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef MGONGPUCPP_GPUIMPL - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU +#ifdef __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on CUDA GPU #elif not defined MGONGPU_HAS_NO_CURAND RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand and on HIP GPU #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -145,10 +146,10 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef MGONGPUCPP_GPUIMPL +#ifdef __CUDACC__ rndgen = RandomNumberMode::CurandDevice; #else - throw std::runtime_error( "CurandDevice is not supported on CPUs" ); + throw std::runtime_error( "CurandDevice is not supported on CPUs or on HIP GPUs" ); #endif } else if( arg == "--curhst" ) @@ -265,12 +266,12 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL - // --- 00. Initialise cuda - // Instantiate a CudaRuntime at the beginnining of the application's main to - // invoke cudaSetDevice(0) in the constructor and book a cudaDeviceReset() call in the destructor - const std::string cdinKey = "00 CudaInit"; + // --- 00. Initialise GPU + // Instantiate a GpuRuntime at the beginnining of the application's main. + // For CUDA this invokes cudaSetDevice(0) in the constructor and books a cudaDeviceReset() call in the destructor. + const std::string cdinKey = "00 GpuInit"; timermap.start( cdinKey ); - CudaRuntime cudaRuntime( debug ); + GpuRuntime GpuRuntime( debug ); #endif // --- 0a. Initialise physics process @@ -394,7 +395,7 @@ main( int argc, char** argv ) const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); } -#ifdef MGONGPUCPP_GPUIMPL +#ifdef __CUDACC__ else { const bool onDevice = true; @@ -403,7 +404,7 @@ main( int argc, char** argv ) #else else { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) + throw std::logic_error( "CurandDevice is not supported on CPUs or HIP GPUs" ); // INTERNAL ERROR (no path to this statement) } #endif #else @@ -729,17 +730,21 @@ main( int argc, char** argv ) rndgentxt = "CURAND HOST"; else if( rndgen == RandomNumberMode::CurandDevice ) rndgentxt = "CURAND DEVICE"; -#ifdef MGONGPUCPP_GPUIMPL +#ifdef __CUDACC__ rndgentxt += " (CUDA code)"; +#elif defined __HIPCC__ + rndgentxt += " (HIP code)"; #else rndgentxt += " (C++ code)"; #endif // Workflow description summary std::string wrkflwtxt; - // -- CUDA or C++? -#ifdef MGONGPUCPP_GPUIMPL + // -- CUDA or HIP or C++? +#ifdef __CUDACC__ wrkflwtxt += "CUD:"; +#elif defined __HIPCC__ + wrkflwtxt += "HIP:"; #else wrkflwtxt += "CPP:"; #endif @@ -754,7 +759,7 @@ main( int argc, char** argv ) wrkflwtxt += "???+"; // no path to this statement #endif // -- CUCOMPLEX or THRUST or STD complex numbers? -#ifdef MGONGPUCPP_GPUIMPL +#ifdef __CUDACC__ #if defined MGONGPU_CUCXTYPE_CUCOMPLEX wrkflwtxt += "CUX:"; #elif defined MGONGPU_CUCXTYPE_THRUST @@ -764,6 +769,12 @@ main( int argc, char** argv ) #else wrkflwtxt += "???:"; // no path to this statement #endif +#elif defined __HIPCC__ +#if defined MGONGPU_CUCXTYPE_CXSMPL + wrkflwtxt += "CXS:"; +#else + wrkflwtxt += "???:"; // no path to this statement +#endif #else #if defined MGONGPU_CPPCXTYPE_STDCOMPLEX wrkflwtxt += "STX:"; @@ -864,8 +875,10 @@ main( int argc, char** argv ) #endif // Dump all configuration parameters and all results std::cout << std::string( SEP79, '*' ) << std::endl -#ifdef MGONGPUCPP_GPUIMPL +#ifdef __CUDACC__ << "Process = " << XSTRINGIFY( MG_EPOCH_PROCESS_ID ) << "_CUDA" +#elif defined __HIPCC__ + << "Process = " << XSTRINGIFY( MG_EPOCH_PROCESS_ID ) << "_HIP" #else << "Process = " << XSTRINGIFY( MG_EPOCH_PROCESS_ID ) << "_CPP" #endif @@ -892,14 +905,14 @@ main( int argc, char** argv ) #elif defined MGONGPU_FPTYPE_FLOAT << "FP precision = FLOAT (NaN/abnormal=" << nabn << ", zero=" << nzero << ")" << std::endl #endif -#ifdef MGONGPUCPP_GPUIMPL #if defined MGONGPU_CUCXTYPE_CUCOMPLEX << "Complex type = CUCOMPLEX" << std::endl #elif defined MGONGPU_CUCXTYPE_THRUST << "Complex type = THRUST::COMPLEX" << std::endl -#endif -#else +#elif defined MGONGPU_CUCXTYPE_CXSMPL << "Complex type = STD::COMPLEX" << std::endl +#else + << "Complex type = ???" << std::endl // no path to this statement... #endif << "RanNumb memory layout = AOSOA[" << neppR << "]" << ( neppR == 1 ? " == AOS" : "" ) @@ -1033,14 +1046,14 @@ main( int argc, char** argv ) << "\"FLOAT (NaN/abnormal=" << nabn << ")\"," << std::endl #endif << "\"Complex type\": " -#ifdef MGONGPUCPP_GPUIMPL #if defined MGONGPU_CUCXTYPE_CUCOMPLEX << "\"CUCOMPLEX\"," << std::endl #elif defined MGONGPU_CUCXTYPE_THRUST << "\"THRUST::COMPLEX\"," << std::endl -#endif -#else +#elif defined MGONGPU_CUCXTYPE_CXSMPL << "\"STD::COMPLEX\"," << std::endl +#else + << "\"???\"," << std::endl // no path to this statement... #endif << "\"RanNumb memory layout\": " << "\"AOSOA[" << neppR << "]\"" diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h index 5b04029787..1811de4699 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h @@ -6,21 +6,31 @@ #ifndef MGONGPUCONFIG_H #define MGONGPUCONFIG_H 1 -#include "GpuRuntime.h" // Includes the GPU abstraction - // HARDCODED AT CODE GENERATION TIME: DO NOT MODIFY (#473) // There are two different code bases for standalone_cudacpp (without multichannel) and madevent+cudacpp (with multichannel) %(mgongpu_supports_multichannel)s +// Is this a GPU (CUDA, HIP) or CPU implementation? +#ifdef __CUDACC__ +#define MGONGPUCPP_GPUIMPL cuda +#elif defined __HIPCC__ +#define MGONGPUCPP_GPUIMPL hip +#else +#undef MGONGPUCPP_GPUIMPL +#endif + // ** NB1 Throughputs (e.g. 6.8E8) are events/sec for "./gcheck.exe -p 65536 128 12" // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers +// For CUDA, by default, it is supported +// For HIP, by default, it is not supported // For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef MGONGPUCPP_CUDACC +#ifdef __CUDACC__ #undef MGONGPU_HAS_NO_CURAND -#elif defined MGONGPUCPP_HIPCC +#elif defined __HIPCC__ #define MGONGPU_HAS_NO_CURAND 1 +#else //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 #endif @@ -55,23 +65,28 @@ //#undef MGONGPU_HARDCODE_PARAM // default ////#define MGONGPU_HARDCODE_PARAM 1 -// Complex type in c++: std::complex or cxsmpl (CHOOSE ONLY ONE) -#ifndef MGONGPUCPP_GPUIMPL -//#define MGONGPU_CPPCXTYPE_STDCOMPLEX 1 // ~8 percent slower on float, same on double (5.1E6/double, 9.4E6/float) -#define MGONGPU_CPPCXTYPE_CXSMPL 1 // new default (5.1E6/double, 10.2E6/float) -#endif - -// Complex type in cuda: thrust or cucomplex or cxsmpl (CHOOSE ONLY ONE) -#ifdef MGONGPUCPP_GPUIMPL +// Complex type in CUDA: thrust or cucomplex or cxsmpl (CHOOSE ONLY ONE) +#ifdef __CUDACC__ #define MGONGPU_CUCXTYPE_THRUST 1 // default (~1.15E9/double, ~3.2E9/float) //#define MGONGPU_CUCXTYPE_CUCOMPLEX 1 // ~10 percent slower (1.03E9/double, ~2.8E9/float) //#define MGONGPU_CUCXTYPE_CXSMPL 1 // ~10 percent slower (1.00E9/double, ~2.9E9/float) + +// Complex type in HIP: cxsmpl (ONLY ONE OPTION POSSIBLE) +#elif defined __HIPCC__ +#define MGONGPU_CUCXTYPE_CXSMPL 1 // ~10 percent slower (1.00E9/double, ~2.9E9/float) + +// Complex type in C++: std::complex or cxsmpl (CHOOSE ONLY ONE) +#else +//#define MGONGPU_CPPCXTYPE_STDCOMPLEX 1 // ~8 percent slower on float, same on double (5.1E6/double, 9.4E6/float) +#define MGONGPU_CPPCXTYPE_CXSMPL 1 // new default (5.1E6/double, 10.2E6/float) #endif -// Cuda nsight compute (ncu) debug: add dummy lines to ease SASS program flow navigation -#ifdef MGONGPUCPP_GPUIMPL -#undef MGONGPU_NSIGHT_DEBUG // default +// CUDA nsight compute (ncu) debug: add dummy lines to ease SASS program flow navigation +#ifdef __CUDACC__ +#undef MGONGPU_NSIGHT_DEBUG // default in CUDA //#define MGONGPU_NSIGHT_DEBUG 1 +#else +#undef MGONGPU_NSIGHT_DEBUG // only option in HIP or C++ #endif // SANITY CHECKS (floating point precision for everything but color algebra #537) @@ -87,17 +102,21 @@ #error You cannot use double precision for color algebra and single precision elsewhere #endif -// SANITY CHECKS (c++ complex number implementation) -#ifndef MGONGPUCPP_GPUIMPL -#if defined MGONGPU_CPPCXTYPE_STDCOMPLEX and defined MGONGPU_CPPCXTYPE_CXSMPL -#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_CPPCXTYPE_STDCOMPLEX or MGONGPU_CPPCXTYPE_CXSMPL +// SANITY CHECKS (CUDA complex number implementation) +#ifdef __CUDACC__ +#if defined MGONGPU_CUCXTYPE_THRUST and defined MGONGPU_CUCXTYPE_CUCOMPLEX +#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_CUCXTYPE_THRUST or MGONGPU_CUCXTYPE_CUCOMPLEX for CUDA +#elif defined MGONGPU_CUCXTYPE_THRUST and defined MGONGPU_CUCXTYPE_CXSMPL +#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_CUCXTYPE_THRUST or MGONGPU_CUCXTYPE_CXSMPL for CUDA +#elif defined MGONGPU_CUCXTYPE_CUCOMPLEX and defined MGONGPU_CUCXTYPE_CXSMPL +#error You must CHOOSE (ONE AND) ONLY ONE OF MGONGPU_CUCXTYPE_CUCOMPLEX or MGONGPU_CUCXTYPE_CXSMPL for CUDA #endif #endif -// SANITY CHECKS (cuda complex number implementation) -#ifdef MGONGPUCPP_GPUIMPL -#if defined MGONGPU_CUCXTYPE_THRUST and defined MGONGPU_CUCXTYPE_CUCOMPLEX and defined MGONGPU_CUCXTYPE_CXSMPL -#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_CUCXTYPE_THRUST or MGONGPU_CUCXTYPE_CUCOMPLEX or MGONGPU_CUCXTYPE_CXSMPL +// SANITY CHECKS (C++ complex number implementation) +#ifndef MGONGPUCPP_GPUIMPL +#if defined MGONGPU_CPPCXTYPE_STDCOMPLEX and defined MGONGPU_CPPCXTYPE_CXSMPL +#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_CPPCXTYPE_STDCOMPLEX or MGONGPU_CPPCXTYPE_CXSMPL for C++ #endif #endif @@ -146,7 +165,7 @@ using mgOnGpu::fptype; using mgOnGpu::fptype2; // C++ SIMD vectorization width (this will be used to set neppV) -#ifdef MGONGPUCPP_GPUIMPL // CUDA implementation has no SIMD +#ifdef MGONGPUCPP_GPUIMPL // CUDA and HIP implementations have no SIMD #undef MGONGPU_CPPSIMD #elif defined __AVX512VL__ && defined MGONGPU_PVW512 // C++ "512z" AVX512 with 512 width (512-bit ie 64-byte): 8 (DOUBLE) or 16 (FLOAT) #ifdef MGONGPU_FPTYPE_DOUBLE @@ -176,9 +195,9 @@ using mgOnGpu::fptype2; #undef MGONGPU_CPPSIMD #endif -// Cuda nsight compute (ncu) debug: add dummy lines to ease SASS program flow navigation +// CUDA nsight compute (ncu) debug: add dummy lines to ease SASS program flow navigation // Arguments (not used so far): text is __FUNCTION__, code is 0 (start) or 1 (end) -#if defined MGONGPUCPP_GPUIMPL && defined MGONGPU_NSIGHT_DEBUG /* clang-format off */ +#if defined __CUDA__ && defined MGONGPU_NSIGHT_DEBUG /* clang-format off */ #define mgDebugDeclare() __shared__ float mgDebugCounter[mgOnGpu::ntpbMAX]; #define mgDebugInitialise() { mgDebugCounter[threadIdx.x] = 0; } #define mgDebug( code, text ) { mgDebugCounter[threadIdx.x] += 1; } @@ -190,7 +209,7 @@ using mgOnGpu::fptype2; #define mgDebugFinalise() { /*noop*/ } #endif /* clang-format on */ -// Define empty CUDA declaration specifiers for C++ +// Define empty CUDA/HIP declaration specifiers for C++ #ifndef MGONGPUCPP_GPUIMPL #define __global__ #define __host__ diff --git a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h index cacab1031a..ed3e219f8a 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h @@ -69,6 +69,8 @@ #ifdef __CUDACC__ #undef MGONGPU_NSIGHT_DEBUG // default //#define MGONGPU_NSIGHT_DEBUG 1 +#else +#undef MGONGPU_NSIGHT_DEBUG // only option in HIP or C++ #endif // SANITY CHECKS (floating point precision for everything but color algebra #537) diff --git a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h index e1299ba81e..e91f5927d6 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h @@ -9,6 +9,8 @@ #include "mgOnGpuCxtypes.h" #include "mgOnGpuFptypes.h" +#include "GpuAbstraction.h" + #include //==========================================================================