From 8a6f50d0acb9efb47a37f3fac98e2042a789c884 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 29 Feb 2024 21:12:57 +0100 Subject: [PATCH] [smeft] regenerate smeft_gg_tttt.sa after merging susy2 - generation is ok (also added src/constexpr_math.h), builds fail for both HRDCOD=0 amd =1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For HRDCOD=1 (#616), this is a (non-exhautive?) list of errors In file included from CPPProcess.h:22, from Bridge.h:11, from fsampler.cc:8: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:121:31: error: exponent has no digits 121 | constexpr double mdl_WH = 4.070000e - 03; | ^~~~~~~~~ In file included from /usr/include/c++/11/cassert:44, from ../../src/constexpr_math.h:11, from ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:23, from CPPProcess.h:22, from Bridge.h:11, from fsampler.cc:8: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:384:58: in ‘constexpr’ expansion of ‘mg5amcCpu::constexpr_pow(((long double)2.0e+0), ((long double)2.5e-1))’ ../../src/constexpr_math.h:55:5: error: call to non-‘constexpr’ function ‘void __assert_fail(const char*, const char*, unsigned int, const char*)’ 55 | assert( static_cast( iexp ) == exp ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'" | ^~~~~~ In file included from CPPProcess.h:22, from Bridge.h:11, from fsampler.cc:8: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:388:37: error: ‘ABS’ was not declared in this scope 388 | constexpr double mdl_propCorr = ABS( mdl_linearPropCorrections ) / ( ABS( mdl_linearPropCorrections ) + mdl_nb__10__exp___m_40 ); | ^~~ In file included from CPPProcess.h:22, from Bridge.h:11, from fsampler.cc:8: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h: In function ‘const mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::DependentCouplings_sv mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::computeDependentCouplings_fromG(const fptype_sv&, const fptype*)’: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:554:45: error: ‘aS’ was not declared in this scope 554 | const fptype_sv mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI ); | ^~ ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:560:66: error: conversion from ‘mg5amcCpu::fptype_sv’ {aka ‘__vector(4) double’} to non-scalar type ‘const mgOnGpu::cxsmpl’ requested 560 | constexpr cxsmpl mdl_G__exp__3 = ( ( G ) * ( G ) * ( G ) ); | ~~~~~~~~~~~~~~~~^~~~~~~~~ ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(121): warning #2506-D: a user-provided literal suffix must begin with "_" Remark: The warnings can be suppressed with "-diag-suppress " ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(121): error: user-defined literal operator not found For HRDCOD=0 (#614), this is a (non-exhautive?) list of errors In file included from CPPProcess.h:22, from Bridge.h:11, from fsampler.cc:8: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h: In function ‘const mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::DependentCouplings_sv mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::computeDependentCouplings_fromG(const fptype_sv&, const fptype*)’: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:554:45: error: ‘aS’ was not declared in this scope 554 | const fptype_sv mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI ); | ^~ ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:560:66: error: conversion from ‘mg5amcCpu::fptype_sv’ {aka ‘__vector(4) double’} to non-scalar type ‘const mgOnGpu::cxsmpl’ requested 560 | constexpr cxsmpl mdl_G__exp__3 = ( ( G ) * ( G ) * ( G ) ); | ~~~~~~~~~~~~~~~~^~~~~~~~~ ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:561:35: error: ‘mdl_WH’ was not declared in this scope; did you mean ‘mdl_dWH’? 561 | const fptype_sv mdl_dWH = mdl_WH * ( -0.24161 * mdl_dGf + 0.96644 * mdl_dgw + 0.4832199999999999 * mdl_dkH - 0.11186509426655467 * mdl_dWW + ( 0.36410378449238195 * mdl_cHj3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.17608307708657747 * mdl_cHl3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.1636 * mdl_cHG * mdl_MT__exp__2 * mdl_vevhat__exp__2 ) / ( mdl_LambdaSMEFT__exp__2 * ( -0.5 * mdl_gHgg2 * mdl_MH__exp__2 + mdl_gHgg1 * mdl_MT__exp__2 ) ) + ( mdl_cHW * ( -0.35937785117066967 * mdl_gHaa * mdl_gHza + 0.006164 * mdl_cth * mdl_gHaa * mdl_sth + 0.00454 * mdl_gHza * mdl_sth__exp__2 ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHWB * ( -0.00454 * mdl_cth * mdl_gHza * mdl_sth + mdl_gHaa * ( -0.0030819999999999997 + 0.006163999999999999 * mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHB * ( -0.006163999999999999 * mdl_cth * mdl_gHaa * mdl_sth - 0.00454 * mdl_gHza * ( -1. + mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + mdl_dWHc + mdl_dWHb + mdl_dWHta ); | ^~~~~~ | mdl_dWH ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(554): error: identifier "aS" is undefined ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(560): error: expression must have a constant value ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(560): note #2689-D: the value of variable "G" (550): here cannot be used as a constant ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(561): error: identifier "mdl_WH" is undefined --- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 30 +-- .../CPPProcess.cc | 37 ++- .../smeft_gg_tttt.sa/SubProcesses/cudacpp.mk | 3 + .../smeft_gg_tttt.sa/SubProcesses/testmisc.cc | 139 +++++++++++ ...Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc | 8 +- .../Parameters_SMEFTsim_topU3l_MwScheme_UFO.h | 68 +++--- .../smeft_gg_tttt.sa/src/constexpr_math.h | 223 ++++++++++++++++++ .../smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h | 3 +- 8 files changed, 449 insertions(+), 62 deletions(-) create mode 100644 epochX/cudacpp/smeft_gg_tttt.sa/src/constexpr_math.h diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index c45d36eeba..6b74176d7b 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.13582301139831543  +DEBUG: model prefixing takes 0.13656258583068848  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.643 s +1 processes with 72 diagrams generated in 3.639 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -100,30 +100,30 @@ Load PLUGIN.CUDACPP_OUTPUT It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 160]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 165]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 194]  -DEBUG: type(subproc_group)= [output.py at line 195]  -DEBUG: type(fortran_model)= [output.py at line 196]  -DEBUG: type(me)= me=0 [output.py at line 197]  -DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 198]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 195]  +DEBUG: type(subproc_group)= [output.py at line 196]  +DEBUG: type(fortran_model)= [output.py at line 197]  +DEBUG: type(me)= me=0 [output.py at line 198]  +DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 199]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.182 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +Generated helas calls for 1 subprocesses (72 diagrams) in 0.186 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 204]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.311 s +ALOHA: aloha creates 5 routines in 0.316 s VVV5 VVV5 FFV1 @@ -143,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.006s -user 0m4.931s -sys 0m0.055s +real 0m5.046s +user 0m4.945s +sys 0m0.073s Code generation completed in 5 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc index 7eab30aba5..fc0614b8fb 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,31 @@ namespace mg5amcCpu static fptype cIPD[2]; static fptype* cIPC = nullptr; // unused as nicoup=0 #endif +#endif + + // AV Jan 2024 (PR #625): this ugly #define was the only way I found to avoid creating arrays[nBsm] in CPPProcess.cc if nBsm is 0 + // The problem is that nBsm is determined when generating Parameters.h, which happens after CPPProcess.cc has already been generated + // For simplicity, keep this code hardcoded also for SM processes (a nullptr is needed as in the case nBsm == 0) +#ifdef MGONGPUCPP_NBSMINDEPPARAM_GT_0 +#ifdef MGONGPU_HARDCODE_PARAM + __device__ const double* bsmIndepParam = Parameters_MSSM_SLHA2::mdl_bsmIndepParam; +#else +#ifdef MGONGPUCPP_GPUIMPL + __device__ __constant__ double bsmIndepParam[Parameters_MSSM_SLHA2::nBsmIndepParam]; +#else + static double bsmIndepParam[Parameters_MSSM_SLHA2::nBsmIndepParam]; +#endif +#endif +#else +#ifdef MGONGPU_HARDCODE_PARAM + __device__ const double* bsmIndepParam = nullptr; +#else +#ifdef MGONGPUCPP_GPUIMPL + __device__ __constant__ double* bsmIndepParam = nullptr; +#else + static double* bsmIndepParam = nullptr; +#endif +#endif #endif // Helicity combinations (and filtering of "good" helicity combinations) @@ -1544,11 +1570,16 @@ namespace mg5amcCpu #ifdef MGONGPUCPP_GPUIMPL gpuMemcpyToSymbol( cIPD, tIPD, 2 * sizeof( fptype ) ); //gpuMemcpyToSymbol( cIPC, tIPC, 0 * sizeof( cxtype ) ); // nicoup=0 + if( Parameters_MSSM_SLHA2::nBsmIndepParam > 0 ) + gpuMemcpyToSymbol( bsmIndepParam, m_pars->mdl_bsmIndepParam, Parameters_MSSM_SLHA2::nBsmIndepParam * sizeof( double ) ); #else memcpy( cIPD, tIPD, 2 * sizeof( fptype ) ); //memcpy( cIPC, tIPC, 0 * sizeof( cxtype ) ); // nicoup=0 + if( Parameters_MSSM_SLHA2::nBsmIndepParam > 0 ) + memcpy( bsmIndepParam, m_pars->mdl_bsmIndepParam, Parameters_MSSM_SLHA2::nBsmIndepParam * sizeof( double ) ); #endif - //for ( i=0; i<2; i++ ) std::cout << std::setprecision(17) << "tIPD[i] = " << tIPD[i] << std::endl; + //for ( int i=0; i<2; i++ ) std::cout << std::setprecision(17) << "tIPD[i] = " << tIPD[i] << std::endl; + //for ( int i=0; imdl_bsmIndepParam[i] = " << m_pars->mdl_bsmIndepParam[i] << std::endl; } #else // Initialize process (with hardcoded parameters) @@ -1661,7 +1692,7 @@ namespace mg5amcCpu using namespace mg5amcGpu; using G_ACCESS = DeviceAccessGs; using C_ACCESS = DeviceAccessCouplings; - G2COUP( allgs, allcouplings ); + G2COUP( allgs, allcouplings, bsmIndepParam ); #else using namespace mg5amcCpu; using G_ACCESS = HostAccessGs; @@ -1671,7 +1702,7 @@ namespace mg5amcCpu const int ievt0 = ipagV * neppV; const fptype* gs = MemoryAccessGs::ieventAccessRecordConst( allgs, ievt0 ); fptype* couplings = MemoryAccessCouplings::ieventAccessRecord( allcouplings, ievt0 ); - G2COUP( gs, couplings ); + G2COUP( gs, couplings, bsmIndepParam ); } #endif } diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk index 3ad91dfd59..f7a61d3e74 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk @@ -847,6 +847,9 @@ $(testmain): LIBFLAGS += -lgomp endif endif +# Test quadmath in testmisc.cc tests for constexpr_math #627 +###$(testmain): LIBFLAGS += -lquadmath + # Bypass std::filesystem completely to ease portability on LUMI #803 #ifneq ($(findstring hipcc,$(GPUCC)),) #$(testmain): LIBFLAGS += -lstdc++fs diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc index ac0b049e60..8c29482e5a 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc @@ -10,10 +10,14 @@ #include "mgOnGpuVectors.h" +#include "constexpr_math.h" #include "epoch_process_id.h" #include +//#include +//#include // needs C++20... https://stackoverflow.com/a/65347016 +#include #include #include @@ -295,4 +299,139 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Test constexpr floor + EXPECT_TRUE( constexpr_floor( 1.5 ) == 1 ); + EXPECT_TRUE( constexpr_floor( 0.5 ) == 0 ); + EXPECT_TRUE( constexpr_floor( -0.5 ) == -1 ); + EXPECT_TRUE( constexpr_floor( -1.5 ) == -2 ); + + // Distance from the horizontal or vertical axis (i.e. from 0, pi/2, pi, or 3pi/2) + auto distance4 = []( const long double xx ) + { + const long double xx2 = mapIn0to2Pi( xx ); // in [0,2*pi) + const long double xx3 = xx2 - constexpr_floor( xx2 / constexpr_pi_by_2 ) * constexpr_pi_by_2; // in [0,pi/2) + const long double d0 = xx3; // distance from 0 + const long double d1 = constexpr_pi_by_2 - xx3; // distance from pi/2 + return ( d0 < d1 ? d0 : d1 ); + }; + + // Test constexpr sin, cos, tan - specific, problematic, points + auto testSinCosTanX = []( const long double xx, const double tolerance, const bool debug = false, const long long istep = -999999999 ) + { + const double x = (double)xx; + if( debug ) + { + //std::cout << std::setprecision(40) << "testSinCosTanX: xx= " << xx << std::endl; + //std::cout << std::setprecision(40) << " x= " << x << std::endl; + } + //std::cout << std::setprecision(40) << "xx - 3pi/2 " << xx - 3 * constexpr_pi_by_2 << std::endl; + //int width = 46; + //char buf[128]; + //quadmath_snprintf( buf, sizeof( buf ), "%+-#*.40Qe", width, (__float128)xx ); + //std::cout << std::setprecision(40) << "testSinCosTanX: xx=" << buf << std::endl; + //quadmath_snprintf( buf, sizeof( buf ), "%+-#*.40Qe", width, (__float128)x ); + //std::cout << std::setprecision(40) << " x= " << buf << std::endl; + EXPECT_NEAR( std::sin( x ), constexpr_sin( x ), std::abs( std::sin( x ) * tolerance ) ) + << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ", istep=" << istep; + EXPECT_NEAR( std::cos( x ), constexpr_cos( x ), std::abs( std::cos( x ) * tolerance ) ) + << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ", istep=" << istep; + EXPECT_NEAR( std::tan( x ), constexpr_tan( x ), std::abs( std::tan( x ) * tolerance ) ) + << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ", istep=" << istep; + std::cout << std::setprecision( 6 ); // default + }; + testSinCosTanX( M_PIl, 1E-3, true ); // from math.h + testSinCosTanX( (long double)3.141592653589793238462643383279502884L, 1E-3, true ); // from math.h + testSinCosTanX( 4.712388980384687897640105802565813064575L, 1E-3, true ); // from 100 steps n [-4*pi,6*pi]... succeeds? (note x==xx) + testSinCosTanX( 3 * constexpr_pi_by_2 - 1.96e-15L, 1E-3, true ); // from 100 steps n [-4*pi,6*pi]... succeeds? (note x!=xx) + testSinCosTanX( 3 * constexpr_pi_by_2 - 1.9601e-15L, 1E-3, true ); // from 100 steps n [-4*pi,6*pi]... succeeds? (note x==xx) + + // Test constexpr sin, cos, tan - 8 points on (or close to) the boundaries of the 8 sectors of [0,2*pi] + auto testSinCosTan8 = [testSinCosTanX]( const double deltax, const double tolerance ) + { + for( int ioff = -1; ioff < 2; ioff++, ioff++ ) // -1, 1 + { + const bool debug = false; + const int nstep = 8; + for( int istep = 0; istep < nstep + 1; istep++ ) + { + long double x0 = deltax * ioff; + long double x1 = deltax * ioff + 2 * constexpr_pi; + double x = x0 + istep * ( x1 - x0 ) / nstep; // test this for double (else std::cos and std::sin use long double) + testSinCosTanX( x, tolerance, debug, istep ); + } + } + }; + + // Use much lower tolerance when testing on the boundaries of the 8 sectors of [0,2*pi] + // Use progressively stricter tolerances as you move away from the boundaries of the 8 sectors of [0,2*pi] + testSinCosTan8( 0, 1E-03 ); // fails with 1E-04 - DANGEROUS ANYWAY... + testSinCosTan8( 1E-15, 1E-03 ); // fails with 1E-04 - DANGEROUS ANYWAY... + testSinCosTan8( 1E-14, 1E-04 ); // fails with 1E-05 + testSinCosTan8( 1E-12, 1E-06 ); // fails with 1E-07 + testSinCosTan8( 1E-09, 1E-09 ); // fails with 1E-10 + testSinCosTan8( 1E-06, 1E-12 ); // fails with 1E-13 + testSinCosTan8( 1E-03, 1E-15 ); // fails with 1E-16 + testSinCosTan8( 1E-02, 1E-99 ); // never fails? always bit-by-bit identical? + + // Test constexpr sin, cos, tan - N points almost randomly with a varying tolerance + auto testSinCosTanN = [testSinCosTanX, distance4]( const int nstep, const double x0, const double x1 ) + { + auto toleranceForX = [distance4]( const double x ) + { + const double d4 = distance4( x ); + if( d4 < 1E-14 ) + return 1E-03; // NB: absolute distance limited to 1E-14 anyway even if relative tolerance is 1E-3... + else if( d4 < 1E-13 ) + return 1E-04; + else if( d4 < 1E-12 ) + return 1E-05; + else if( d4 < 1E-11 ) + return 1E-06; + else if( d4 < 1E-10 ) + return 1E-07; + else if( d4 < 1E-09 ) + return 1E-08; + else if( d4 < 1E-08 ) + return 1E-09; + else if( d4 < 1E-07 ) + return 1E-10; + else if( d4 < 1E-06 ) + return 1E-11; + else if( d4 < 1E-05 ) + return 1E-12; + else if( d4 < 1E-04 ) + return 1E-13; + else + return 1E-14; // play it safe even if the agreement might even be better? + }; + for( int istep = 0; istep < nstep + 1; istep++ ) + { + double x = x0 + istep * ( x1 - x0 ) / nstep; // test this for double (else std::cos and std::sin use long double) + const double tolerance = toleranceForX( x ); + EXPECT_NEAR( std::sin( x ), constexpr_sin( x ), std::max( std::abs( std::sin( x ) * tolerance ), 3E-15 ) ) + << std::setprecision( 40 ) << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ",\n istep=" << istep << ", distance4=" << distance4( x ); + EXPECT_NEAR( std::cos( x ), constexpr_cos( x ), std::max( std::abs( std::cos( x ) * tolerance ), 3E-15 ) ) + << std::setprecision( 40 ) << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ",\n istep=" << istep << ", distance4=" << distance4( x ); + EXPECT_NEAR( std::tan( x ), constexpr_tan( x ), std::max( std::abs( std::tan( x ) * tolerance ), 3E-15 ) ) + << std::setprecision( 40 ) << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ",\n istep=" << istep << ", distance4=" << distance4( x ); + } + }; + testSinCosTanN( 100, -4 * constexpr_pi, 6 * constexpr_pi ); // this was failing at 3*pi/2 (now fixed by absolute tolerance 3E-15) + testSinCosTanN( 10000, -constexpr_pi_by_2, 5 * constexpr_pi_by_2 ); + + // Test constexpr atan + { + const double tolerance = 1E-12; + const int nstep = 1000; + for( int istep = 0; istep < nstep + 1; istep++ ) + { + long double x0 = -5, x1 = +5; + double x = x0 + istep * ( x1 - x0 ) / nstep; // test this for double (else std::cos and std::sin use long double) + EXPECT_NEAR( std::atan( x ), constexpr_atan( x ), std::abs( std::atan( x ) * tolerance ) ) + << "x=" << x << ", istep=" << istep; + } + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc index 7a91bad998..15b14801e7 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc @@ -40,9 +40,9 @@ Parameters_SMEFTsim_topU3l_MwScheme_UFO::getInstance() void Parameters_SMEFTsim_topU3l_MwScheme_UFO::setIndependentParameters( SLHAReader& slha ) { - zero = 0; // define "zero" - ZERO = 0; // define "zero" - //std::vector indices(2, 0); // prepare a vector for indices + zero = 0; // define "zero" + ZERO = 0; // define "zero" + std::vector indices( 2, 0 ); // prepare a vector for indices mdl_WH = slha.get_block_entry( "decay", 25, 4.070000e-03 ); mdl_WW = slha.get_block_entry( "decay", 24, 2.085000e+00 ); mdl_WZ = slha.get_block_entry( "decay", 23, 2.495200e+00 ); @@ -387,6 +387,8 @@ Parameters_SMEFTsim_topU3l_MwScheme_UFO::setIndependentParameters( SLHAReader& s mdl_g1sh = ( mdl_ee * ( 1. + mdl_dg1 - ( mdl_cHB * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 ) ) / mdl_cth; mdl_ee__exp__3 = ( ( mdl_ee ) * ( mdl_ee ) * ( mdl_ee ) ); mdl_vevhat__exp__3 = ( ( mdl_vevhat ) * ( mdl_vevhat ) * ( mdl_vevhat ) ); + // BSM parameters that do not depend on alphaS but are needed in the computation of alphaS-dependent couplings; + // (none) } void diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h index ddc905638d..ba4fce3110 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h @@ -20,10 +20,17 @@ #include "mgOnGpuCxtypes.h" #include "mgOnGpuVectors.h" +#include "constexpr_math.h" + //========================================================================== -#ifndef MGONGPU_HARDCODE_PARAM // this is only supported in SM processes (e.g. not in EFT models) for the moment (#439) -#error This non-SM physics process only supports MGONGPU_HARDCODE_PARAM builds (#439): please run "make HRDCOD=1" +// AV Jan 2024 (PR #625): this ugly #define was the only way I found to avoid creating arrays[nBsm] in CPPProcess.cc if nBsm is 0 +// The problem is that nBsm is determined when generating Parameters.h, which happens after CPPProcess.cc has already been generated +// For simplicity, keep this code hardcoded also for SM processes (a nullptr is needed as in the case nBsm == 0) +#undef MGONGPUCPP_NBSMINDEPPARAM_GT_0 + +#ifndef MGONGPU_HARDCODE_PARAM +//#warning Support for non-SM physics processes (e.g. SUSY or EFT) is still limited for HRDCOD=0 builds (#439 and PR #625) #include "read_slha.h" @@ -81,6 +88,9 @@ namespace mg5amcCpu // Print couplings that are changed event by event //void printDependentCouplings(); // now computed event-by-event (running alphas #373) + // BSM parameters that do not depend on alphaS but are needed in the computation of alphaS-dependent couplings; + static constexpr int nBsmIndepParam = 0; + //double mdl_bsmIndepParam[nBsmIndepParam]; private: @@ -90,6 +100,7 @@ namespace mg5amcCpu } // end namespace mg5amcGpu/mg5amcCpu #else +//#warning Support for non-SM physics processes (e.g. SUSY or EFT) is still limited for HRDCOD=1 builds (#439 and PR #625) #include #include @@ -104,37 +115,6 @@ namespace mg5amcCpu // Hardcoded constexpr physics parameters namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO // keep the same name rather than HardcodedParameters_SMEFTsim_topU3l_MwScheme_UFO for simplicity { - // Constexpr implementation of sqrt (see https://stackoverflow.com/a/34134071) - double constexpr sqrtNewtonRaphson( double x, double curr, double prev ) - { - return curr == prev ? curr : sqrtNewtonRaphson( x, 0.5 * ( curr + x / curr ), curr ); - } - double constexpr constexpr_sqrt( double x ) - { - return x >= 0 // && x < std::numeric_limits::infinity() // avoid -Wtautological-constant-compare warning in fast math - ? sqrtNewtonRaphson( x, x, 0 ) - : std::numeric_limits::quiet_NaN(); - } - - // Constexpr implementation of floor (see https://stackoverflow.com/a/66146159) - constexpr int constexpr_floor( double d ) - { - const int i = static_cast( d ); - return d < i ? i - 1 : i; - } - - // Constexpr implementation of pow - constexpr double constexpr_pow( double base, double exp ) - { - // NB(1): this implementation of constexpr_pow requires exponent >= 0 - assert( exp >= 0 ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'" - // NB(2): this implementation of constexpr_pow requires an integer exponent - const int iexp = constexpr_floor( exp ); - assert( static_cast( iexp ) == exp ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'" - // Iterative implementation of pow if exp is a non negative integer - return iexp == 0 ? 1 : base * constexpr_pow( base, iexp - 1 ); - } - // Model parameters independent of aS constexpr double zero = 0; constexpr double ZERO = 0; @@ -487,8 +467,8 @@ namespace mg5amcCpu // (none) // Model parameters dependent on aS - //constexpr double mdl_sqrt__aS = //constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) - //constexpr double G = 2. * mdl_sqrt__aS * //constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_sqrt__aS = constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) + //constexpr double G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) //constexpr double mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI ); // now computed event-by-event (running alphas #373) //constexpr double mdl_gHgg4 = aS / ( 360. * M_PI ); // now computed event-by-event (running alphas #373) //constexpr double mdl_gHgg5 = aS / ( 20. * M_PI ); // now computed event-by-event (running alphas #373) @@ -514,6 +494,10 @@ namespace mg5amcCpu // Print couplings that are changed event by event //void printDependentCouplings(); // now computed event-by-event (running alphas #373) + + // BSM parameters that do not depend on alphaS but are needed in the computation of alphaS-dependent couplings; + constexpr int nBsmIndepParam = 0; + //__device__ constexpr double mdl_bsmIndepParam[nBsmIndepParam]; } } // end namespace mg5amcGpu/mg5amcCpu @@ -542,16 +526,19 @@ namespace mg5amcCpu cxtype_sv GC_8; }; #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> -#pragma GCC diagnostic ignored "-Wunused-parameter" // e.g. <> +#pragma GCC diagnostic ignored "-Wunused-parameter" // e.g. <> +#pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" // e.g. <> #ifdef MGONGPUCPP_GPUIMPL #pragma nv_diagnostic push #pragma nv_diag_suppress 177 // e.g. <> #endif - __host__ __device__ inline const DependentCouplings_sv computeDependentCouplings_fromG( const fptype_sv& G_sv ) + __host__ __device__ inline const DependentCouplings_sv computeDependentCouplings_fromG( const fptype_sv& G_sv, const fptype* bsmIndepParamPtr ) { #ifdef MGONGPU_HARDCODE_PARAM using namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO; +#else + // No additional parameters needed in constant memory for this BSM model #endif // NB: hardcode cxtype cI(0,1) instead of cxtype (or hardcoded cxsmpl) mdl_complexi (which exists in Parameters_SMEFTsim_topU3l_MwScheme_UFO) because: // (1) mdl_complexi is always (0,1); (2) mdl_complexi is undefined in device code; (3) need cxsmpl conversion to cxtype in code below @@ -643,12 +630,13 @@ namespace mg5amcCpu template __device__ inline void G2COUP( const fptype gs[], - fptype couplings[] ) + fptype couplings[], + const fptype* bsmIndepParamPtr ) { mgDebug( 0, __FUNCTION__ ); using namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings; const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs ); - DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv ); + DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv, bsmIndepParamPtr ); fptype* GC_7s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_7 ); fptype* GC_6s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_6 ); fptype* GC_8s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_8 ); diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/constexpr_math.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/constexpr_math.h new file mode 100644 index 0000000000..78ff8b16ab --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/constexpr_math.h @@ -0,0 +1,223 @@ +// Copyright (C) 2020-2024 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Feb 2024) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin. + +#ifndef constexpr_math_h +#define constexpr_math_h 1 + +#include "mgOnGpuConfig.h" + +#include +#include +#include + +// FOR DEBUGGING! +#undef CONSTEXPR_MATH_DEBUG // no-debug +//#define CONSTEXPR_MATH_DEBUG 1 // debug +#ifdef CONSTEXPR_MATH_DEBUG +#define constexpr const +#endif + +// NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725) +#ifdef MGONGPUCPP_GPUIMPL +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + // Constexpr implementation of sqrt (see https://stackoverflow.com/a/34134071) + constexpr long double sqrtNewtonRaphson( const long double xx, const long double curr, const long double prev ) + { + return curr == prev ? curr : sqrtNewtonRaphson( xx, 0.5 * ( curr + xx / curr ), curr ); + } + constexpr long double constexpr_sqrt( const long double xx ) + { + return xx >= 0 // && x < std::numeric_limits::infinity() // avoid -Wtautological-constant-compare warning in fast math + ? sqrtNewtonRaphson( xx, xx, 0 ) + : std::numeric_limits::quiet_NaN(); + } + + // Constexpr implementation of floor (see https://stackoverflow.com/a/66146159) + constexpr int constexpr_floor( const long double xx ) + { + const int i = static_cast( xx ); + return xx < i ? i - 1 : i; + } + + // Constexpr implementation of pow + constexpr long double constexpr_pow( const long double base, const long double exp ) + { + // NB(1): this implementation of constexpr_pow requires exponent >= 0 + assert( exp >= 0 ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'" + // NB(2): this implementation of constexpr_pow requires an integer exponent + const int iexp = constexpr_floor( exp ); + assert( static_cast( iexp ) == exp ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'" + // Iterative implementation of pow if exp is a non negative integer + return iexp == 0 ? 1 : base * constexpr_pow( base, iexp - 1 ); + } + + // PI from cmath + constexpr long double constexpr_pi = M_PIl; // pi + constexpr long double constexpr_pi_by_2 = M_PI_2l; // pi/2 + constexpr long double constexpr_pi_by_4 = M_PI_4l; // pi/4 + + // Constexpr implementation of sin for 0= 0 && "The argument of sinTaylor is assumed to be in [0,pi/4)" ); + assert( xx < constexpr_pi_by_4 && "The argument of sinTaylor is assumed to be in [0,pi/4)" ); + long double sinx = 0; + int ipow = 1; + long double delta = xx; + while( true ) + { + long double sinxlast = sinx; + sinx += delta; +#ifdef CONSTEXPR_MATH_DEBUG + std::cout << "ipow=" << ipow << ", delta=" << delta << ", sinx=" << sinx << std::endl; // for debugging (not constexpr) +#endif + if( sinx == sinxlast ) break; + // Next iteration + ipow += 2; + delta *= -xx * xx / ( ipow - 1 ) / ipow; + } + return sinx; + } + + // Mapping to [0,2*pi) range (long double signature) + constexpr long double mapIn0to2Pi( const long double xx ) + { + return xx - constexpr_floor( xx / 2 / constexpr_pi ) * 2 * constexpr_pi; + } + + // Constexpr implementation of cos (long double signature) + constexpr long double constexpr_cos_quad( const long double xx, const bool assume0to2Pi = false ) + { + if( assume0to2Pi ) + { + assert( xx >= 0 && "The argument of constexpr_cos_quad is assumed to be in [0,2*pi)" ); + assert( xx < 2 * constexpr_pi && "The argument of constexpr_cos_quad is assumed to be in [0,2*pi)" ); + } + if( xx < 0 ) + return constexpr_cos_quad( mapIn0to2Pi( xx ), true ); + else if( xx < constexpr_pi_by_4 ) // [0/4*pi, 1/4*pi) + return constexpr_sqrt( 1 - constexpr_pow( sinTaylor( xx ), 2 ) ); + else if( xx < constexpr_pi_by_2 ) // [1/4*pi, 2/4*pi) + return sinTaylor( constexpr_pi_by_2 - xx ); + else if( xx < 3 * constexpr_pi_by_4 ) // [2/4*pi, 3/4*pi) + return -sinTaylor( xx - constexpr_pi_by_2 ); + else if( xx < constexpr_pi ) // [3/4*pi, 4/4*pi) + return -constexpr_sqrt( 1 - constexpr_pow( sinTaylor( constexpr_pi - xx ), 2 ) ); + else if( xx < 2 * constexpr_pi ) // [4/4*pi, 8/4*pi) + return constexpr_cos_quad( 2 * constexpr_pi - xx, true ); + else // [8/4*pi, +inf) + return constexpr_cos_quad( mapIn0to2Pi( xx ), true ); + } + + // Constexpr implementation of cos (double signature, internally implemented as long double) + constexpr double constexpr_cos( const double x ) + { + return constexpr_cos_quad( x ); + } + + // Constexpr implementation of sin (long double signature) + constexpr long double constexpr_sin_quad( const long double xx, const bool assume0to2Pi = false ) + { + if( assume0to2Pi ) + { + assert( xx >= 0 && "The argument of constexpr_sin_quad is assumed to be in [0,2*pi)" ); + assert( xx < 2 * constexpr_pi && "The argument of constexpr_sin_quad is assumed to be in [0,2*pi)" ); + } + if( xx < 0 ) + return constexpr_sin_quad( mapIn0to2Pi( xx ), true ); + else if( xx < constexpr_pi_by_4 ) // [0/4*pi, 1/4*pi) + return sinTaylor( xx ); + else if( xx < constexpr_pi_by_2 ) // [1/4*pi, 2/4*pi) + return constexpr_sqrt( 1 - constexpr_pow( sinTaylor( constexpr_pi_by_2 - xx ), 2 ) ); + else if( xx < 3 * constexpr_pi_by_4 ) // [2/4*pi, 3/4*pi) + return constexpr_sqrt( 1 - constexpr_pow( sinTaylor( xx - constexpr_pi_by_2 ), 2 ) ); + else if( xx < constexpr_pi ) // [3/4*pi, 4/4*pi) + return sinTaylor( constexpr_pi - xx ); + else if( xx < 2 * constexpr_pi ) // [4/4*pi, 8/4*pi) + return -constexpr_sin_quad( 2 * constexpr_pi - xx, true ); + else // [8/4*pi, +inf) + return constexpr_sin_quad( mapIn0to2Pi( xx ), true ); + } + + // Constexpr implementation of sin (double signature, internally implemented as long double) + constexpr double constexpr_sin( const double x ) + { + return constexpr_sin_quad( x ); + } + + // Constexpr implementation of tan (long double signature) + constexpr long double constexpr_tan_quad( const long double xx, const bool assume0to2Pi = false ) + { + if( assume0to2Pi ) + { + assert( xx >= 0 && "The argument of constexpr_sin_quad is assumed to be in [0,2*pi)" ); + assert( xx < 2 * constexpr_pi && "The argument of constexpr_sin_quad is assumed to be in [0,2*pi)" ); + } + if( xx < 0 ) + return constexpr_tan_quad( mapIn0to2Pi( xx ), true ); + else if( xx < 2 * constexpr_pi ) // [0, 2*pi) + return constexpr_sin_quad( xx, assume0to2Pi ) / constexpr_cos_quad( xx, assume0to2Pi ); + else // [8/4*pi, +inf) + return constexpr_tan_quad( mapIn0to2Pi( xx ), true ); + } + + // Constexpr implementation of tan (double signature, internally implemented as long double) + constexpr double constexpr_tan( const double x ) + { + return constexpr_tan_quad( x ); + } + + // Constexpr implementation of atan for -1= -1 && "The argument of atanTaylor is assumed to be in (-1,+1)" ); + assert( xx < 1 && "The argument of atanTaylor is assumed to be in (-1,+1)" ); + long double atanx = 0; + int ipow = 1; + long double xpow = xx; + while( true ) + { + long double atanxlast = atanx; + atanx += xpow / ipow; +#ifdef CONSTEXPR_MATH_DEBUG + std::cout << "ipow=" << ipow << ", xpow=" << xpow << ", atanx=" << atanx << std::endl; // for debugging (not constexpr) +#endif + if( atanx == atanxlast ) break; + // Next iteration + ipow += 2; + xpow *= -xx * xx; + } + return atanx; + } + + // Constexpr implementation of atan (long double signature) + constexpr long double constexpr_atan_quad( const long double xx ) + { + if( xx > 1 ) + return constexpr_pi_by_2 - atanTaylor( 1 / xx ); + else if( xx == 1 ) + return constexpr_pi_by_4; + else if( xx > -1 ) + return atanTaylor( xx ); + else if( xx == -1 ) + return -constexpr_pi_by_4; + else // if( xx < -1 ) + return -constexpr_pi_by_2 - atanTaylor( 1 / xx ); + } + + // Constexpr implementation of atan (double signature, internally implemented as long double) + constexpr double constexpr_atan( const double x ) + { + return constexpr_atan_quad( x ); + } +} + +#endif // constexpr_math_h diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h index 7ede1dbfae..9ef1c44899 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h @@ -76,7 +76,8 @@ namespace mgOnGpu /* clang-format off */ }; template - inline __host__ __device__ cxsmpl // (NB: cannot be constexpr as a constexpr function cannot have a nonliteral return type "mgOnGpu::cxsmpl") + constexpr // (NB: now valid code? in the past this failed as "a constexpr function cannot have a nonliteral return type mgOnGpu::cxsmpl") + inline __host__ __device__ cxsmpl conj( const cxsmpl& c ) { return cxsmpl( c.real(), -c.imag() );