[smeft] regenerate smeft_gg_tttt.sa after merging susy2 - generation …

…is ok (also added src/constexpr_math.h), builds fail for both HRDCOD=0 amd =1 For HRDCOD=1 (madgraph5#616), this is a (non-exhautive?) list of errors In file included from CPPProcess.h:22, from Bridge.h:11, from fsampler.cc:8: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:121:31: error: exponent has no digits 121 | constexpr double mdl_WH = 4.070000e - 03; | ^~~~~~~~~ In file included from /usr/include/c++/11/cassert:44, from ../../src/constexpr_math.h:11, from ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:23, from CPPProcess.h:22, from Bridge.h:11, from fsampler.cc:8: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:384:58: in ‘constexpr’ expansion of ‘mg5amcCpu::constexpr_pow(((long double)2.0e+0), ((long double)2.5e-1))’ ../../src/constexpr_math.h:55:5: error: call to non-‘constexpr’ function ‘void __assert_fail(const char*, const char*, unsigned int, const char*)’ 55 | assert( static_cast<long double>( iexp ) == exp ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'" | ^~~~~~ In file included from CPPProcess.h:22, from Bridge.h:11, from fsampler.cc:8: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:388:37: error: ‘ABS’ was not declared in this scope 388 | constexpr double mdl_propCorr = ABS( mdl_linearPropCorrections ) / ( ABS( mdl_linearPropCorrections ) + mdl_nb__10__exp___m_40 ); | ^~~ In file included from CPPProcess.h:22, from Bridge.h:11, from fsampler.cc:8: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h: In function ‘const mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::DependentCouplings_sv mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::computeDependentCouplings_fromG(const fptype_sv&, const fptype*)’: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:554:45: error: ‘aS’ was not declared in this scope 554 | const fptype_sv mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI ); | ^~ ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:560:66: error: conversion from ‘mg5amcCpu::fptype_sv’ {aka ‘__vector(4) double’} to non-scalar type ‘const mgOnGpu::cxsmpl<double>’ requested 560 | constexpr cxsmpl<double> mdl_G__exp__3 = ( ( G ) * ( G ) * ( G ) ); | ~~~~~~~~~~~~~~~~^~~~~~~~~ ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(121): warning #2506-D: a user-provided literal suffix must begin with "_" Remark: The warnings can be suppressed with "-diag-suppress <warning-number>" ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(121): error: user-defined literal operator not found For HRDCOD=0 (madgraph5#614), this is a (non-exhautive?) list of errors In file included from CPPProcess.h:22, from Bridge.h:11, from fsampler.cc:8: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h: In function ‘const mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::DependentCouplings_sv mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::computeDependentCouplings_fromG(const fptype_sv&, const fptype*)’: ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:554:45: error: ‘aS’ was not declared in this scope 554 | const fptype_sv mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI ); | ^~ ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:560:66: error: conversion from ‘mg5amcCpu::fptype_sv’ {aka ‘__vector(4) double’} to non-scalar type ‘const mgOnGpu::cxsmpl<double>’ requested 560 | constexpr cxsmpl<double> mdl_G__exp__3 = ( ( G ) * ( G ) * ( G ) ); | ~~~~~~~~~~~~~~~~^~~~~~~~~ ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:561:35: error: ‘mdl_WH’ was not declared in this scope; did you mean ‘mdl_dWH’? 561 | const fptype_sv mdl_dWH = mdl_WH * ( -0.24161 * mdl_dGf + 0.96644 * mdl_dgw + 0.4832199999999999 * mdl_dkH - 0.11186509426655467 * mdl_dWW + ( 0.36410378449238195 * mdl_cHj3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.17608307708657747 * mdl_cHl3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.1636 * mdl_cHG * mdl_MT__exp__2 * mdl_vevhat__exp__2 ) / ( mdl_LambdaSMEFT__exp__2 * ( -0.5 * mdl_gHgg2 * mdl_MH__exp__2 + mdl_gHgg1 * mdl_MT__exp__2 ) ) + ( mdl_cHW * ( -0.35937785117066967 * mdl_gHaa * mdl_gHza + 0.006164 * mdl_cth * mdl_gHaa * mdl_sth + 0.00454 * mdl_gHza * mdl_sth__exp__2 ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHWB * ( -0.00454 * mdl_cth * mdl_gHza * mdl_sth + mdl_gHaa * ( -0.0030819999999999997 + 0.006163999999999999 * mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHB * ( -0.006163999999999999 * mdl_cth * mdl_gHaa * mdl_sth - 0.00454 * mdl_gHza * ( -1. + mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + mdl_dWHc + mdl_dWHb + mdl_dWHta ); | ^~~~~~ | mdl_dWH ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(554): error: identifier "aS" is undefined ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(560): error: expression must have a constant value ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(560): note #2689-D: the value of variable "G" (550): here cannot be used as a constant ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(561): error: identifier "mdl_WH" is undefined
valassi · Feb 29, 2024 · 8a6f50d · 8a6f50d
1 parent fc1ce2b
commit 8a6f50d
Show file tree

Hide file tree

Showing 8 changed files with 449 additions and 62 deletions.
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
@@ -77,7 +77,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.13582301139831543 [0m
+[1;32mDEBUG: model prefixing  takes 0.13656258583068848 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -92,38 +92,38 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.643 s
+1 processes with 72 diagrams generated in 3.639 s
 Total: 1 processes with 72 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt
 Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3071][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 160][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 161][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 166][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 194][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 195][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 198][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 195][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 196][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 197][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 198][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 199][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. 
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.182 s
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.convert_model (create the model) [1;30m[output.py at line 203][0m [0m
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.186 s
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.convert_model (create the model) [1;30m[output.py at line 204][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.311 s
+ALOHA: aloha creates 5 routines in  0.316 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -143,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME
 INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
 quit
 
-real	0m5.006s
-user	0m4.931s
-sys	0m0.055s
+real	0m5.046s
+user	0m4.945s
+sys	0m0.073s
 Code generation completed in 5 seconds
diff --git a/...eft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc b/...eft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc
@@ -34,6 +34,7 @@
 #include <algorithm>
 #include <array>
 #include <cstring>
+#include <iomanip>
 #include <iostream>
 #include <memory>
 
@@ -86,6 +87,31 @@ namespace mg5amcCpu
   static fptype cIPD[2];
   static fptype* cIPC = nullptr; // unused as nicoup=0
 #endif
+#endif
+
+  // AV Jan 2024 (PR #625): this ugly #define was the only way I found to avoid creating arrays[nBsm] in CPPProcess.cc if nBsm is 0
+  // The problem is that nBsm is determined when generating Parameters.h, which happens after CPPProcess.cc has already been generated
+  // For simplicity, keep this code hardcoded also for SM processes (a nullptr is needed as in the case nBsm == 0)
+#ifdef MGONGPUCPP_NBSMINDEPPARAM_GT_0
+#ifdef MGONGPU_HARDCODE_PARAM
+  __device__ const double* bsmIndepParam = Parameters_MSSM_SLHA2::mdl_bsmIndepParam;
+#else
+#ifdef MGONGPUCPP_GPUIMPL
+  __device__ __constant__ double bsmIndepParam[Parameters_MSSM_SLHA2::nBsmIndepParam];
+#else
+  static double bsmIndepParam[Parameters_MSSM_SLHA2::nBsmIndepParam];
+#endif
+#endif
+#else
+#ifdef MGONGPU_HARDCODE_PARAM
+  __device__ const double* bsmIndepParam = nullptr;
+#else
+#ifdef MGONGPUCPP_GPUIMPL
+  __device__ __constant__ double* bsmIndepParam = nullptr;
+#else
+  static double* bsmIndepParam = nullptr;
+#endif
+#endif
 #endif
 
   // Helicity combinations (and filtering of "good" helicity combinations)
@@ -1544,11 +1570,16 @@ namespace mg5amcCpu
 #ifdef MGONGPUCPP_GPUIMPL
     gpuMemcpyToSymbol( cIPD, tIPD, 2 * sizeof( fptype ) );
     //gpuMemcpyToSymbol( cIPC, tIPC, 0 * sizeof( cxtype ) ); // nicoup=0
+    if( Parameters_MSSM_SLHA2::nBsmIndepParam > 0 )
+      gpuMemcpyToSymbol( bsmIndepParam, m_pars->mdl_bsmIndepParam, Parameters_MSSM_SLHA2::nBsmIndepParam * sizeof( double ) );
 #else
     memcpy( cIPD, tIPD, 2 * sizeof( fptype ) );
     //memcpy( cIPC, tIPC, 0 * sizeof( cxtype ) ); // nicoup=0
+    if( Parameters_MSSM_SLHA2::nBsmIndepParam > 0 )
+      memcpy( bsmIndepParam, m_pars->mdl_bsmIndepParam, Parameters_MSSM_SLHA2::nBsmIndepParam * sizeof( double ) );
 #endif
-    //for ( i=0; i<2; i++ ) std::cout << std::setprecision(17) << "tIPD[i] = " << tIPD[i] << std::endl;
+    //for ( int i=0; i<2; i++ ) std::cout << std::setprecision(17) << "tIPD[i] = " << tIPD[i] << std::endl;
+    //for ( int i=0; i<Parameters_MSSM_SLHA2::nBsmIndepParam; i++ ) std::cout << std::setprecision(17) << "m_pars->mdl_bsmIndepParam[i] = " << m_pars->mdl_bsmIndepParam[i] << std::endl;
   }
 #else
   // Initialize process (with hardcoded parameters)
@@ -1661,7 +1692,7 @@ namespace mg5amcCpu
     using namespace mg5amcGpu;
     using G_ACCESS = DeviceAccessGs;
     using C_ACCESS = DeviceAccessCouplings;
-    G2COUP<G_ACCESS, C_ACCESS>( allgs, allcouplings );
+    G2COUP<G_ACCESS, C_ACCESS>( allgs, allcouplings, bsmIndepParam );
 #else
     using namespace mg5amcCpu;
     using G_ACCESS = HostAccessGs;
@@ -1671,7 +1702,7 @@ namespace mg5amcCpu
       const int ievt0 = ipagV * neppV;
       const fptype* gs = MemoryAccessGs::ieventAccessRecordConst( allgs, ievt0 );
       fptype* couplings = MemoryAccessCouplings::ieventAccessRecord( allcouplings, ievt0 );
-      G2COUP<G_ACCESS, C_ACCESS>( gs, couplings );
+      G2COUP<G_ACCESS, C_ACCESS>( gs, couplings, bsmIndepParam );
     }
 #endif
   }

diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk
@@ -847,6 +847,9 @@ $(testmain): LIBFLAGS += -lgomp
 endif
 endif
 
+# Test quadmath in testmisc.cc tests for constexpr_math #627
+###$(testmain): LIBFLAGS += -lquadmath
+
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
 #$(testmain): LIBFLAGS += -lstdc++fs

diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc
@@ -10,10 +10,14 @@
 
 #include "mgOnGpuVectors.h"
 
+#include "constexpr_math.h"
 #include "epoch_process_id.h"
 
 #include <gtest/gtest.h>
 
+//#include <quadmath.h>
+//#include <format> // needs C++20... https://stackoverflow.com/a/65347016
+#include <iomanip>
 #include <sstream>
 #include <typeinfo>
 
@@ -295,4 +299,139 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc )
   }
 
   //--------------------------------------------------------------------------
+
+  // Test constexpr floor
+  EXPECT_TRUE( constexpr_floor( 1.5 ) == 1 );
+  EXPECT_TRUE( constexpr_floor( 0.5 ) == 0 );
+  EXPECT_TRUE( constexpr_floor( -0.5 ) == -1 );
+  EXPECT_TRUE( constexpr_floor( -1.5 ) == -2 );
+
+  // Distance from the horizontal or vertical axis (i.e. from 0, pi/2, pi, or 3pi/2)
+  auto distance4 = []( const long double xx )
+  {
+    const long double xx2 = mapIn0to2Pi( xx );                                                    // in [0,2*pi)
+    const long double xx3 = xx2 - constexpr_floor( xx2 / constexpr_pi_by_2 ) * constexpr_pi_by_2; // in [0,pi/2)
+    const long double d0 = xx3;                                                                   // distance from 0
+    const long double d1 = constexpr_pi_by_2 - xx3;                                               // distance from pi/2
+    return ( d0 < d1 ? d0 : d1 );
+  };
+
+  // Test constexpr sin, cos, tan - specific, problematic, points
+  auto testSinCosTanX = []( const long double xx, const double tolerance, const bool debug = false, const long long istep = -999999999 )
+  {
+    const double x = (double)xx;
+    if( debug )
+    {
+      //std::cout << std::setprecision(40) << "testSinCosTanX: xx= " << xx << std::endl;
+      //std::cout << std::setprecision(40) << "                x=  " << x << std::endl;
+    }
+    //std::cout << std::setprecision(40) << "xx - 3pi/2 " << xx - 3 * constexpr_pi_by_2 << std::endl;
+    //int width = 46;
+    //char buf[128];
+    //quadmath_snprintf( buf, sizeof( buf ), "%+-#*.40Qe", width, (__float128)xx );
+    //std::cout << std::setprecision(40) << "testSinCosTanX: xx=" << buf << std::endl;
+    //quadmath_snprintf( buf, sizeof( buf ), "%+-#*.40Qe", width, (__float128)x );
+    //std::cout << std::setprecision(40) << "                x= " << buf << std::endl;
+    EXPECT_NEAR( std::sin( x ), constexpr_sin( x ), std::abs( std::sin( x ) * tolerance ) )
+      << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ", istep=" << istep;
+    EXPECT_NEAR( std::cos( x ), constexpr_cos( x ), std::abs( std::cos( x ) * tolerance ) )
+      << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ", istep=" << istep;
+    EXPECT_NEAR( std::tan( x ), constexpr_tan( x ), std::abs( std::tan( x ) * tolerance ) )
+      << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ", istep=" << istep;
+    std::cout << std::setprecision( 6 ); // default
+  };
+  testSinCosTanX( M_PIl, 1E-3, true );                                                // from math.h
+  testSinCosTanX( (long double)3.141592653589793238462643383279502884L, 1E-3, true ); // from math.h
+  testSinCosTanX( 4.712388980384687897640105802565813064575L, 1E-3, true );           // from 100 steps n [-4*pi,6*pi]... succeeds? (note x==xx)
+  testSinCosTanX( 3 * constexpr_pi_by_2 - 1.96e-15L, 1E-3, true );                    // from 100 steps n [-4*pi,6*pi]... succeeds? (note x!=xx)
+  testSinCosTanX( 3 * constexpr_pi_by_2 - 1.9601e-15L, 1E-3, true );                  // from 100 steps n [-4*pi,6*pi]... succeeds? (note x==xx)
+
+  // Test constexpr sin, cos, tan - 8 points on (or close to) the boundaries of the 8 sectors of [0,2*pi]
+  auto testSinCosTan8 = [testSinCosTanX]( const double deltax, const double tolerance )
+  {
+    for( int ioff = -1; ioff < 2; ioff++, ioff++ ) // -1, 1
+    {
+      const bool debug = false;
+      const int nstep = 8;
+      for( int istep = 0; istep < nstep + 1; istep++ )
+      {
+        long double x0 = deltax * ioff;
+        long double x1 = deltax * ioff + 2 * constexpr_pi;
+        double x = x0 + istep * ( x1 - x0 ) / nstep; // test this for double (else std::cos and std::sin use long double)
+        testSinCosTanX( x, tolerance, debug, istep );
+      }
+    }
+  };
+
+  // Use much lower tolerance when testing on the boundaries of the 8 sectors of [0,2*pi]
+  // Use progressively stricter tolerances as you move away from the boundaries of the 8 sectors of [0,2*pi]
+  testSinCosTan8( 0, 1E-03 );     // fails with 1E-04 - DANGEROUS ANYWAY...
+  testSinCosTan8( 1E-15, 1E-03 ); // fails with 1E-04 - DANGEROUS ANYWAY...
+  testSinCosTan8( 1E-14, 1E-04 ); // fails with 1E-05
+  testSinCosTan8( 1E-12, 1E-06 ); // fails with 1E-07
+  testSinCosTan8( 1E-09, 1E-09 ); // fails with 1E-10
+  testSinCosTan8( 1E-06, 1E-12 ); // fails with 1E-13
+  testSinCosTan8( 1E-03, 1E-15 ); // fails with 1E-16
+  testSinCosTan8( 1E-02, 1E-99 ); // never fails? always bit-by-bit identical?
+
+  // Test constexpr sin, cos, tan - N points almost randomly with a varying tolerance
+  auto testSinCosTanN = [testSinCosTanX, distance4]( const int nstep, const double x0, const double x1 )
+  {
+    auto toleranceForX = [distance4]( const double x )
+    {
+      const double d4 = distance4( x );
+      if( d4 < 1E-14 )
+        return 1E-03; // NB: absolute distance limited to 1E-14 anyway even if relative tolerance is 1E-3...
+      else if( d4 < 1E-13 )
+        return 1E-04;
+      else if( d4 < 1E-12 )
+        return 1E-05;
+      else if( d4 < 1E-11 )
+        return 1E-06;
+      else if( d4 < 1E-10 )
+        return 1E-07;
+      else if( d4 < 1E-09 )
+        return 1E-08;
+      else if( d4 < 1E-08 )
+        return 1E-09;
+      else if( d4 < 1E-07 )
+        return 1E-10;
+      else if( d4 < 1E-06 )
+        return 1E-11;
+      else if( d4 < 1E-05 )
+        return 1E-12;
+      else if( d4 < 1E-04 )
+        return 1E-13;
+      else
+        return 1E-14; // play it safe even if the agreement might even be better?
+    };
+    for( int istep = 0; istep < nstep + 1; istep++ )
+    {
+      double x = x0 + istep * ( x1 - x0 ) / nstep; // test this for double (else std::cos and std::sin use long double)
+      const double tolerance = toleranceForX( x );
+      EXPECT_NEAR( std::sin( x ), constexpr_sin( x ), std::max( std::abs( std::sin( x ) * tolerance ), 3E-15 ) )
+        << std::setprecision( 40 ) << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ",\n istep=" << istep << ", distance4=" << distance4( x );
+      EXPECT_NEAR( std::cos( x ), constexpr_cos( x ), std::max( std::abs( std::cos( x ) * tolerance ), 3E-15 ) )
+        << std::setprecision( 40 ) << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ",\n istep=" << istep << ", distance4=" << distance4( x );
+      EXPECT_NEAR( std::tan( x ), constexpr_tan( x ), std::max( std::abs( std::tan( x ) * tolerance ), 3E-15 ) )
+        << std::setprecision( 40 ) << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ",\n istep=" << istep << ", distance4=" << distance4( x );
+    }
+  };
+  testSinCosTanN( 100, -4 * constexpr_pi, 6 * constexpr_pi ); // this was failing at 3*pi/2 (now fixed by absolute tolerance 3E-15)
+  testSinCosTanN( 10000, -constexpr_pi_by_2, 5 * constexpr_pi_by_2 );
+
+  // Test constexpr atan
+  {
+    const double tolerance = 1E-12;
+    const int nstep = 1000;
+    for( int istep = 0; istep < nstep + 1; istep++ )
+    {
+      long double x0 = -5, x1 = +5;
+      double x = x0 + istep * ( x1 - x0 ) / nstep; // test this for double (else std::cos and std::sin use long double)
+      EXPECT_NEAR( std::atan( x ), constexpr_atan( x ), std::abs( std::atan( x ) * tolerance ) )
+        << "x=" << x << ", istep=" << istep;
+    }
+  }
+
+  //--------------------------------------------------------------------------
 }