diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CurandRandomNumberKernel.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CurandRandomNumberKernel.cc index 5b33207ad0..98ec214eaf 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CurandRandomNumberKernel.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CurandRandomNumberKernel.cc @@ -22,7 +22,7 @@ inline void assertCurand( curandStatus_t code, const char *file, int line, bool } #endif /* clang-format on */ -#ifdef MGONGPUCPP_CUDACC +#ifdef __CUDACC__ namespace mg5amcGpu #else namespace mg5amcCpu @@ -36,7 +36,7 @@ namespace mg5amcCpu { if( m_isOnDevice ) { -#ifdef MGONGPUCPP_CUDACC +#ifdef __CUDACC__ if( !m_rnarray.isOnDevice() ) throw std::runtime_error( "CurandRandomNumberKernel on device with a host random number array" ); #else diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuAbstraction.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuAbstraction.h index 2f000e33d1..427c82c05d 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuAbstraction.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuAbstraction.h @@ -3,22 +3,10 @@ #include -#ifdef MGONGPUCPP_GPUIMPL -#define MGONGPUCPP_CUDACC 1 -#endif - -#ifdef __HIPCC__ -#include "hip/hip_runtime.h" -#define MGONGPUCPP_HIPCC 1 -#endif - -#ifdef MGONGPUCPP_CUDACC - -// Defines correct compiler -#define MGONGPUCPP_GPUIMPL MGONGPUCPP_GPUIMPL - //-------------------------------------------------------------------------- +#ifdef __CUDACC__ + #define gpuError_t cudaError_t #define gpuPeekAtLastError cudaPeekAtLastError #define gpuGetErrorString cudaGetErrorString @@ -44,12 +32,9 @@ //-------------------------------------------------------------------------- -#elif defined MGONGPUCPP_HIPCC - -// Defines correct compiler -#define MGONGPUCPP_GPUIMPL __HCC__ +#elif defined __HIPCC__ -//-------------------------------------------------------------------------- +#include "hip/hip_runtime.h" #define gpuError_t hipError_t #define gpuPeekAtLastError hipPeekAtLastError @@ -74,6 +59,8 @@ #define gpuLaunchKernel( kernel, blocks, threads, ... ) kernel<<>>( __VA_ARGS__ ) #define gpuLaunchKernelSharedMem( kernel, blocks, threads, sharedMem, ... ) kernel<<>>( __VA_ARGS__ ) +//-------------------------------------------------------------------------- + #endif -#endif // MG5AMC_GPUABSTRACTION_H \ No newline at end of file +#endif // MG5AMC_GPUABSTRACTION_H diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryBuffers.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryBuffers.h index d6ba45dcad..522e6ce100 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryBuffers.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MemoryBuffers.h @@ -11,6 +11,7 @@ #include "mgOnGpuCxtypes.h" #include "CPPProcess.h" +#include "GpuRuntime.h" #include "Parameters_%(model_name)s.h" #include diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuVectors.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuVectors.h index ee906f450d..7904b93c61 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuVectors.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuVectors.h @@ -9,8 +9,6 @@ #include "mgOnGpuCxtypes.h" #include "mgOnGpuFptypes.h" -#include "GpuAbstraction.h" - #include //========================================================================== diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc index 9dceb45708..95400f42db 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc @@ -14,7 +14,6 @@ #include "mgOnGpuConfig.h" -#include "GpuRuntime.h" %(hel_amps_h)s #include "MemoryAccessAmplitudes.h" #include "MemoryAccessCouplings.h" diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc index 2a473552fa..1269fb0a3f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc @@ -10,8 +10,6 @@ // Class member functions for calculating the matrix elements for %(process_lines)s -#include "GpuRuntime.h" - #ifdef MGONGPUCPP_GPUIMPL namespace mg5amcGpu #else diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_matrix.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_matrix.inc index 241c50a9d1..3cfbf668ca 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_matrix.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_matrix.inc @@ -7,8 +7,6 @@ ! Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. !========================================================================== -#include "GpuAbstraction.h" - // *** COLOR CHOICE BELOW *** // Store the leading color flows for choice of color if( jamp2_sv ) // disable color choice if nullptr diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 803fa5e258..b5b6ed037b 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1080,7 +1080,7 @@ def get_process_function_definitions(self, write=True): %(len(coupling_indep), ' ), cxmake( m_pars->'.join(coupling_indep)) # AV only indep! replace_dict['cipcdevice'] = '__device__ __constant__ fptype cIPC[%i];'%(2*len(coupling_indep)) replace_dict['cipcstatic'] = 'static fptype cIPC[%i];'%(2*len(coupling_indep)) - replace_dict['cipc2tipcSym'] = 'checkCuda( cudaMemcpyToSymbol( cIPC, tIPC, %i * sizeof( cxtype ) ) );'%len(coupling_indep) + replace_dict['cipc2tipcSym'] = 'gpuMemcpyToSymbol( cIPC, tIPC, %i * sizeof( cxtype ) );'%len(coupling_indep) replace_dict['cipc2tipc'] = 'memcpy( cIPC, tIPC, %i * sizeof( cxtype ) );'%len(coupling_indep) replace_dict['cipcdump'] = '\n //for ( i=0; i<%i; i++ ) std::cout << std::setprecision(17) << "tIPC[i] = " << tIPC[i] << std::endl;'%len(coupling_indep) coup_str_hrd = '__device__ const fptype cIPC[%s] = { ' % (len(coupling_indep)*2) @@ -1091,7 +1091,7 @@ def get_process_function_definitions(self, write=True): replace_dict['cipcassign'] = '//const cxtype tIPC[0] = { ... }; // nicoup=0' replace_dict['cipcdevice'] = '__device__ __constant__ fptype* cIPC = nullptr; // unused as nicoup=0' replace_dict['cipcstatic'] = 'static fptype* cIPC = nullptr; // unused as nicoup=0' - replace_dict['cipc2tipcSym'] = '//checkCuda( cudaMemcpyToSymbol( cIPC, tIPC, %i * sizeof( cxtype ) ) ); // nicoup=0'%len(coupling_indep) + replace_dict['cipc2tipcSym'] = '//gpuMemcpyToSymbol( cIPC, tIPC, %i * sizeof( cxtype ) ); // nicoup=0'%len(coupling_indep) replace_dict['cipc2tipc'] = '//memcpy( cIPC, tIPC, %i * sizeof( cxtype ) ); // nicoup=0'%len(coupling_indep) replace_dict['cipcdump'] = '' replace_dict['cipchrdcod'] = '__device__ const fptype* cIPC = nullptr; // unused as nicoup=0' @@ -1100,7 +1100,7 @@ def get_process_function_definitions(self, write=True): %(len(params), ', (fptype)m_pars->'.join(params)) replace_dict['cipddevice'] = '__device__ __constant__ fptype cIPD[%i];'%(len(params)) replace_dict['cipdstatic'] = 'static fptype cIPD[%i];'%(len(params)) - replace_dict['cipd2tipdSym'] = 'checkCuda( cudaMemcpyToSymbol( cIPD, tIPD, %i * sizeof( fptype ) ) );'%len(params) + replace_dict['cipd2tipdSym'] = 'gpuMemcpyToSymbol( cIPD, tIPD, %i * sizeof( fptype ) );'%len(params) replace_dict['cipd2tipd'] = 'memcpy( cIPD, tIPD, %i * sizeof( fptype ) );'%len(params) replace_dict['cipddump'] = '\n //for ( i=0; i<%i; i++ ) std::cout << std::setprecision(17) << "tIPD[i] = " << tIPD[i] << std::endl;'%len(params) param_str_hrd = '__device__ const fptype cIPD[%s] = { ' % len(params) @@ -1111,7 +1111,7 @@ def get_process_function_definitions(self, write=True): replace_dict['cipdassign'] = '//const fptype tIPD[0] = { ... }; // nparam=0' replace_dict['cipddevice'] = '//__device__ __constant__ fptype* cIPD = nullptr; // unused as nparam=0' replace_dict['cipdstatic'] = '//static fptype* cIPD = nullptr; // unused as nparam=0' - replace_dict['cipd2tipdSym'] = '//checkCuda( cudaMemcpyToSymbol( cIPD, tIPD, %i * sizeof( fptype ) ) ); // nparam=0'%len(params) + replace_dict['cipd2tipdSym'] = '//gpuMemcpyToSymbol( cIPD, tIPD, %i * sizeof( fptype ) ); // nparam=0'%len(params) replace_dict['cipd2tipd'] = '//memcpy( cIPD, tIPD, %i * sizeof( fptype ) ); // nparam=0'%len(params) replace_dict['cipddump'] = '' replace_dict['cipdhrdcod'] = '//__device__ const fptype* cIPD = nullptr; // unused as nparam=0' @@ -1183,13 +1183,13 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name): fptype* allDenominators, // output: multichannel denominators[nevt], running_sum_over_helicities #endif fptype_sv* jamp2_sv // output: jamp2[nParity][ncolor][neppV] for color choice (nullptr if disabled) -#ifndef __CUDACC__ +#ifndef MGONGPUCPP_GPUIMPL , const int ievt00 // input: first event number in current C++ event page (for CUDA, ievt depends on threadid) #endif ) //ALWAYS_INLINE // attributes are not permitted in a function definition { -#ifdef __CUDACC__ +#ifdef MGONGPUCPP_GPUIMPL using namespace mg5amcGpu; using M_ACCESS = DeviceAccessMomenta; // non-trivial access: buffer includes all events using E_ACCESS = DeviceAccessMatrixElements; // non-trivial access: buffer includes all events @@ -1216,7 +1216,7 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name): #endif /* clang-format on */ mgDebug( 0, __FUNCTION__ ); //printf( \"calculate_wavefunctions: ihel=%2d\\n\", ihel ); -#ifndef __CUDACC__ +#ifndef MGONGPUCPP_GPUIMPL //printf( \"calculate_wavefunctions: ievt00=%d\\n\", ievt00 ); #endif""") nwavefuncs = self.matrix_elements[0].get_number_of_wavefunctions() @@ -1253,7 +1253,7 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name): #endif for( int iParity = 0; iParity < nParity; ++iParity ) { // START LOOP ON IPARITY -#ifndef __CUDACC__ +#ifndef MGONGPUCPP_GPUIMPL const int ievt0 = ievt00 + iParity * neppV; #endif""") ret_lines += helas_calls @@ -1653,8 +1653,10 @@ def super_get_matrix_element_calls(self, matrix_element, color_amplitudes, multi allCOUPs[idcoup] = CD_ACCESS::idcoupAccessBufferConst( allcouplings, idcoup ); // dependent couplings, vary event-by-event for( size_t iicoup = 0; iicoup < nicoup; iicoup++ ) allCOUPs[ndcoup + iicoup] = CI_ACCESS::iicoupAccessBufferConst( cIPC, iicoup ); // independent couplings, fixed for all events +#ifdef MGONGPUCPP_GPUIMPL #ifdef __CUDACC__ #pragma nv_diagnostic pop +#endif // CUDA kernels take input/output buffers with momenta/MEs for all events const fptype* momenta = allmomenta; const fptype* COUPs[nxcoup]; @@ -1770,7 +1772,7 @@ def get_external(self, wf, argument): split_line2 = [ str.lstrip(' ').rstrip(' ') for str in split_line2] # AV split_line2.insert(2, '0') # add parameter fmass=0 line2 = ', '.join(split_line2) - text = '#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE )\n %s\n#else\n if( ( blockDim.x * blockIdx.x + threadIdx.x ) %% 2 == 0 )\n %s\n else\n %s\n#endif\n' # AV + text = '#if not( defined MGONGPUCPP_GPUIMPL and defined MGONGPU_TEST_DIVERGENCE )\n %s\n#else\n if( ( blockDim.x * blockIdx.x + threadIdx.x ) %% 2 == 0 )\n %s\n else\n %s\n#endif\n' # AV return text % (line, line, line2) text = '%s\n' # AV return text % line diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryBuffers.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryBuffers.h index 7576be7e7c..d37eafb214 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryBuffers.h +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MemoryBuffers.h @@ -7,6 +7,7 @@ #define MemoryBuffers_H 1 #include "mgOnGpuConfig.h" + #include "mgOnGpuCxtypes.h" #include "CPPProcess.h" diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc index fbe245d418..9d5f088f38 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc @@ -745,7 +745,7 @@ main( int argc, char** argv ) wrkflwtxt += "CUD:"; #elif defined __HIPCC__ wrkflwtxt += "HIP:"; -else +#else wrkflwtxt += "CPP:"; #endif // -- DOUBLE or FLOAT? @@ -1053,7 +1053,7 @@ else #elif defined MGONGPU_CUCXTYPE_CXSMPL << "\"STD::COMPLEX\"," << std::endl #else - << "\"???\"," << std::endl // no path to this statement... + << "\"???\"," << std::endl // no path to this statement... #endif << "\"RanNumb memory layout\": " << "\"AOSOA[" << neppR << "]\""