From 8a6f50d0acb9efb47a37f3fac98e2042a789c884 Mon Sep 17 00:00:00 2001
From: Andrea Valassi <andrea.valassi@cern.ch>
Date: Thu, 29 Feb 2024 21:12:57 +0100
Subject: [PATCH] [smeft] regenerate smeft_gg_tttt.sa after merging susy2 -
 generation is ok (also added src/constexpr_math.h), builds fail for both
 HRDCOD=0 amd =1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For HRDCOD=1 (#616), this is a (non-exhautive?) list of errors

In file included from CPPProcess.h:22,
                 from Bridge.h:11,
                 from fsampler.cc:8:
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:121:31: error: exponent has no digits
  121 |     constexpr double mdl_WH = 4.070000e - 03;
      |                               ^~~~~~~~~

In file included from /usr/include/c++/11/cassert:44,
                 from ../../src/constexpr_math.h:11,
                 from ../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:23,
                 from CPPProcess.h:22,
                 from Bridge.h:11,
                 from fsampler.cc:8:
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:384:58:   in ‘constexpr’ expansion of ‘mg5amcCpu::constexpr_pow(((long double)2.0e+0), ((long double)2.5e-1))’
../../src/constexpr_math.h:55:5: error: call to non-‘constexpr’ function ‘void __assert_fail(const char*, const char*, unsigned int, const char*)’
   55 |     assert( static_cast<long double>( iexp ) == exp ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'"
      |     ^~~~~~

In file included from CPPProcess.h:22,
                 from Bridge.h:11,
                 from fsampler.cc:8:
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:388:37: error: ‘ABS’ was not declared in this scope
  388 |     constexpr double mdl_propCorr = ABS( mdl_linearPropCorrections ) / ( ABS( mdl_linearPropCorrections ) + mdl_nb__10__exp___m_40 );
      |                                     ^~~

In file included from CPPProcess.h:22,
                 from Bridge.h:11,
                 from fsampler.cc:8:
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h: In function ‘const mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::DependentCouplings_sv mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::computeDependentCouplings_fromG(const fptype_sv&, const fptype*)’:
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:554:45: error: ‘aS’ was not declared in this scope
  554 |         const fptype_sv mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI );
      |                                             ^~
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:560:66: error: conversion from ‘mg5amcCpu::fptype_sv’ {aka ‘__vector(4) double’} to non-scalar type ‘const mgOnGpu::cxsmpl<double>’ requested
  560 |         constexpr cxsmpl<double> mdl_G__exp__3 = ( ( G ) * ( G ) * ( G ) );
      |                                                  ~~~~~~~~~~~~~~~~^~~~~~~~~

../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(121): warning #2506-D: a user-provided literal suffix must begin with "_"

Remark: The warnings can be suppressed with "-diag-suppress <warning-number>"

../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(121): error: user-defined literal operator not found

For HRDCOD=0 (#614), this is a (non-exhautive?) list of errors

In file included from CPPProcess.h:22,
                 from Bridge.h:11,
                 from fsampler.cc:8:
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h: In function ‘const mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::DependentCouplings_sv mg5amcCpu::Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::computeDependentCouplings_fromG(const fptype_sv&, const fptype*)’:
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:554:45: error: ‘aS’ was not declared in this scope
  554 |         const fptype_sv mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI );
      |                                             ^~
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:560:66: error: conversion from ‘mg5amcCpu::fptype_sv’ {aka ‘__vector(4) double’} to non-scalar type ‘const mgOnGpu::cxsmpl<double>’ requested
  560 |         constexpr cxsmpl<double> mdl_G__exp__3 = ( ( G ) * ( G ) * ( G ) );
      |                                                  ~~~~~~~~~~~~~~~~^~~~~~~~~
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h:561:35: error: ‘mdl_WH’ was not declared in this scope; did you mean ‘mdl_dWH’?
  561 |         const fptype_sv mdl_dWH = mdl_WH * ( -0.24161 * mdl_dGf + 0.96644 * mdl_dgw + 0.4832199999999999 * mdl_dkH - 0.11186509426655467 * mdl_dWW + ( 0.36410378449238195 * mdl_cHj3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.17608307708657747 * mdl_cHl3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.1636 * mdl_cHG * mdl_MT__exp__2 * mdl_vevhat__exp__2 ) / ( mdl_LambdaSMEFT__exp__2 * ( -0.5 * mdl_gHgg2 * mdl_MH__exp__2 + mdl_gHgg1 * mdl_MT__exp__2 ) ) + ( mdl_cHW * ( -0.35937785117066967 * mdl_gHaa * mdl_gHza + 0.006164 * mdl_cth * mdl_gHaa * mdl_sth + 0.00454 * mdl_gHza * mdl_sth__exp__2 ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHWB * ( -0.00454 * mdl_cth * mdl_gHza * mdl_sth + mdl_gHaa * ( -0.0030819999999999997 + 0.006163999999999999 * mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHB * ( -0.006163999999999999 * mdl_cth * mdl_gHaa * mdl_sth - 0.00454 * mdl_gHza * ( -1. + mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + mdl_dWHc + mdl_dWHb + mdl_dWHta );
      |                                   ^~~~~~
      |                                   mdl_dWH

../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(554): error: identifier "aS" is undefined

../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(560): error: expression must have a constant value
../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(560): note #2689-D: the value of variable "G"
(550): here cannot be used as a constant

../../src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h(561): error: identifier "mdl_WH" is undefined
---
 .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt     |  30 +--
 .../CPPProcess.cc                             |  37 ++-
 .../smeft_gg_tttt.sa/SubProcesses/cudacpp.mk  |   3 +
 .../smeft_gg_tttt.sa/SubProcesses/testmisc.cc | 139 +++++++++++
 ...Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc |   8 +-
 .../Parameters_SMEFTsim_topU3l_MwScheme_UFO.h |  68 +++---
 .../smeft_gg_tttt.sa/src/constexpr_math.h     | 223 ++++++++++++++++++
 .../smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h     |   3 +-
 8 files changed, 449 insertions(+), 62 deletions(-)
 create mode 100644 epochX/cudacpp/smeft_gg_tttt.sa/src/constexpr_math.h
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
index c45d36eeba..6b74176d7b 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
@@ -77,7 +77,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.13582301139831543 [0m
+[1;32mDEBUG: model prefixing  takes 0.13656258583068848 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.643 s
+1 processes with 72 diagrams generated in 3.639 s
 Total: 1 processes with 72 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt
 Load PLUGIN.CUDACPP_OUTPUT
@@ -100,30 +100,30 @@ Load PLUGIN.CUDACPP_OUTPUT
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3071][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 160][0m [0m
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 165][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 161][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 166][0m [0m
 INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 194][0m [0m
-[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 195][0m [0m
-[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 196][0m [0m
-[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 197][0m [0m
-[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 198][0m [0m
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 195][0m [0m
+[1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 196][0m [0m
+[1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 197][0m [0m
+[1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 198][0m [0m
+[1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 199][0m [0m
 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. 
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.182 s
-[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.convert_model (create the model) [1;30m[output.py at line 203][0m [0m
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.186 s
+[1;32mDEBUG:  Entering PLUGIN_ProcessExporter.convert_model (create the model) [1;30m[output.py at line 204][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.311 s
+ALOHA: aloha creates 5 routines in  0.316 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -143,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME
 INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
 quit
 
-real	0m5.006s
-user	0m4.931s
-sys	0m0.055s
+real	0m5.046s
+user	0m4.945s
+sys	0m0.073s
 Code generation completed in 5 seconds
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc
index 7eab30aba5..fc0614b8fb 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc
@@ -34,6 +34,7 @@
 #include <algorithm>
 #include <array>
 #include <cstring>
+#include <iomanip>
 #include <iostream>
 #include <memory>
 
@@ -86,6 +87,31 @@ namespace mg5amcCpu
   static fptype cIPD[2];
   static fptype* cIPC = nullptr; // unused as nicoup=0
 #endif
+#endif
+
+  // AV Jan 2024 (PR #625): this ugly #define was the only way I found to avoid creating arrays[nBsm] in CPPProcess.cc if nBsm is 0
+  // The problem is that nBsm is determined when generating Parameters.h, which happens after CPPProcess.cc has already been generated
+  // For simplicity, keep this code hardcoded also for SM processes (a nullptr is needed as in the case nBsm == 0)
+#ifdef MGONGPUCPP_NBSMINDEPPARAM_GT_0
+#ifdef MGONGPU_HARDCODE_PARAM
+  __device__ const double* bsmIndepParam = Parameters_MSSM_SLHA2::mdl_bsmIndepParam;
+#else
+#ifdef MGONGPUCPP_GPUIMPL
+  __device__ __constant__ double bsmIndepParam[Parameters_MSSM_SLHA2::nBsmIndepParam];
+#else
+  static double bsmIndepParam[Parameters_MSSM_SLHA2::nBsmIndepParam];
+#endif
+#endif
+#else
+#ifdef MGONGPU_HARDCODE_PARAM
+  __device__ const double* bsmIndepParam = nullptr;
+#else
+#ifdef MGONGPUCPP_GPUIMPL
+  __device__ __constant__ double* bsmIndepParam = nullptr;
+#else
+  static double* bsmIndepParam = nullptr;
+#endif
+#endif
 #endif
 
   // Helicity combinations (and filtering of "good" helicity combinations)
@@ -1544,11 +1570,16 @@ namespace mg5amcCpu
 #ifdef MGONGPUCPP_GPUIMPL
     gpuMemcpyToSymbol( cIPD, tIPD, 2 * sizeof( fptype ) );
     //gpuMemcpyToSymbol( cIPC, tIPC, 0 * sizeof( cxtype ) ); // nicoup=0
+    if( Parameters_MSSM_SLHA2::nBsmIndepParam > 0 )
+      gpuMemcpyToSymbol( bsmIndepParam, m_pars->mdl_bsmIndepParam, Parameters_MSSM_SLHA2::nBsmIndepParam * sizeof( double ) );
 #else
     memcpy( cIPD, tIPD, 2 * sizeof( fptype ) );
     //memcpy( cIPC, tIPC, 0 * sizeof( cxtype ) ); // nicoup=0
+    if( Parameters_MSSM_SLHA2::nBsmIndepParam > 0 )
+      memcpy( bsmIndepParam, m_pars->mdl_bsmIndepParam, Parameters_MSSM_SLHA2::nBsmIndepParam * sizeof( double ) );
 #endif
-    //for ( i=0; i<2; i++ ) std::cout << std::setprecision(17) << "tIPD[i] = " << tIPD[i] << std::endl;
+    //for ( int i=0; i<2; i++ ) std::cout << std::setprecision(17) << "tIPD[i] = " << tIPD[i] << std::endl;
+    //for ( int i=0; i<Parameters_MSSM_SLHA2::nBsmIndepParam; i++ ) std::cout << std::setprecision(17) << "m_pars->mdl_bsmIndepParam[i] = " << m_pars->mdl_bsmIndepParam[i] << std::endl;
   }
 #else
   // Initialize process (with hardcoded parameters)
@@ -1661,7 +1692,7 @@ namespace mg5amcCpu
     using namespace mg5amcGpu;
     using G_ACCESS = DeviceAccessGs;
     using C_ACCESS = DeviceAccessCouplings;
-    G2COUP<G_ACCESS, C_ACCESS>( allgs, allcouplings );
+    G2COUP<G_ACCESS, C_ACCESS>( allgs, allcouplings, bsmIndepParam );
 #else
     using namespace mg5amcCpu;
     using G_ACCESS = HostAccessGs;
@@ -1671,7 +1702,7 @@ namespace mg5amcCpu
       const int ievt0 = ipagV * neppV;
       const fptype* gs = MemoryAccessGs::ieventAccessRecordConst( allgs, ievt0 );
       fptype* couplings = MemoryAccessCouplings::ieventAccessRecord( allcouplings, ievt0 );
-      G2COUP<G_ACCESS, C_ACCESS>( gs, couplings );
+      G2COUP<G_ACCESS, C_ACCESS>( gs, couplings, bsmIndepParam );
     }
 #endif
   }
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk
index 3ad91dfd59..f7a61d3e74 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk
@@ -847,6 +847,9 @@ $(testmain): LIBFLAGS += -lgomp
 endif
 endif
 
+# Test quadmath in testmisc.cc tests for constexpr_math #627
+###$(testmain): LIBFLAGS += -lquadmath
+
 # Bypass std::filesystem completely to ease portability on LUMI #803
 #ifneq ($(findstring hipcc,$(GPUCC)),)
 #$(testmain): LIBFLAGS += -lstdc++fs
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc
index ac0b049e60..8c29482e5a 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc
@@ -10,10 +10,14 @@
 
 #include "mgOnGpuVectors.h"
 
+#include "constexpr_math.h"
 #include "epoch_process_id.h"
 
 #include <gtest/gtest.h>
 
+//#include <quadmath.h>
+//#include <format> // needs C++20... https://stackoverflow.com/a/65347016
+#include <iomanip>
 #include <sstream>
 #include <typeinfo>
 
@@ -295,4 +299,139 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc )
   }
 
   //--------------------------------------------------------------------------
+
+  // Test constexpr floor
+  EXPECT_TRUE( constexpr_floor( 1.5 ) == 1 );
+  EXPECT_TRUE( constexpr_floor( 0.5 ) == 0 );
+  EXPECT_TRUE( constexpr_floor( -0.5 ) == -1 );
+  EXPECT_TRUE( constexpr_floor( -1.5 ) == -2 );
+
+  // Distance from the horizontal or vertical axis (i.e. from 0, pi/2, pi, or 3pi/2)
+  auto distance4 = []( const long double xx )
+  {
+    const long double xx2 = mapIn0to2Pi( xx );                                                    // in [0,2*pi)
+    const long double xx3 = xx2 - constexpr_floor( xx2 / constexpr_pi_by_2 ) * constexpr_pi_by_2; // in [0,pi/2)
+    const long double d0 = xx3;                                                                   // distance from 0
+    const long double d1 = constexpr_pi_by_2 - xx3;                                               // distance from pi/2
+    return ( d0 < d1 ? d0 : d1 );
+  };
+
+  // Test constexpr sin, cos, tan - specific, problematic, points
+  auto testSinCosTanX = []( const long double xx, const double tolerance, const bool debug = false, const long long istep = -999999999 )
+  {
+    const double x = (double)xx;
+    if( debug )
+    {
+      //std::cout << std::setprecision(40) << "testSinCosTanX: xx= " << xx << std::endl;
+      //std::cout << std::setprecision(40) << "                x=  " << x << std::endl;
+    }
+    //std::cout << std::setprecision(40) << "xx - 3pi/2 " << xx - 3 * constexpr_pi_by_2 << std::endl;
+    //int width = 46;
+    //char buf[128];
+    //quadmath_snprintf( buf, sizeof( buf ), "%+-#*.40Qe", width, (__float128)xx );
+    //std::cout << std::setprecision(40) << "testSinCosTanX: xx=" << buf << std::endl;
+    //quadmath_snprintf( buf, sizeof( buf ), "%+-#*.40Qe", width, (__float128)x );
+    //std::cout << std::setprecision(40) << "                x= " << buf << std::endl;
+    EXPECT_NEAR( std::sin( x ), constexpr_sin( x ), std::abs( std::sin( x ) * tolerance ) )
+      << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ", istep=" << istep;
+    EXPECT_NEAR( std::cos( x ), constexpr_cos( x ), std::abs( std::cos( x ) * tolerance ) )
+      << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ", istep=" << istep;
+    EXPECT_NEAR( std::tan( x ), constexpr_tan( x ), std::abs( std::tan( x ) * tolerance ) )
+      << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ", istep=" << istep;
+    std::cout << std::setprecision( 6 ); // default
+  };
+  testSinCosTanX( M_PIl, 1E-3, true );                                                // from math.h
+  testSinCosTanX( (long double)3.141592653589793238462643383279502884L, 1E-3, true ); // from math.h
+  testSinCosTanX( 4.712388980384687897640105802565813064575L, 1E-3, true );           // from 100 steps n [-4*pi,6*pi]... succeeds? (note x==xx)
+  testSinCosTanX( 3 * constexpr_pi_by_2 - 1.96e-15L, 1E-3, true );                    // from 100 steps n [-4*pi,6*pi]... succeeds? (note x!=xx)
+  testSinCosTanX( 3 * constexpr_pi_by_2 - 1.9601e-15L, 1E-3, true );                  // from 100 steps n [-4*pi,6*pi]... succeeds? (note x==xx)
+
+  // Test constexpr sin, cos, tan - 8 points on (or close to) the boundaries of the 8 sectors of [0,2*pi]
+  auto testSinCosTan8 = [testSinCosTanX]( const double deltax, const double tolerance )
+  {
+    for( int ioff = -1; ioff < 2; ioff++, ioff++ ) // -1, 1
+    {
+      const bool debug = false;
+      const int nstep = 8;
+      for( int istep = 0; istep < nstep + 1; istep++ )
+      {
+        long double x0 = deltax * ioff;
+        long double x1 = deltax * ioff + 2 * constexpr_pi;
+        double x = x0 + istep * ( x1 - x0 ) / nstep; // test this for double (else std::cos and std::sin use long double)
+        testSinCosTanX( x, tolerance, debug, istep );
+      }
+    }
+  };
+
+  // Use much lower tolerance when testing on the boundaries of the 8 sectors of [0,2*pi]
+  // Use progressively stricter tolerances as you move away from the boundaries of the 8 sectors of [0,2*pi]
+  testSinCosTan8( 0, 1E-03 );     // fails with 1E-04 - DANGEROUS ANYWAY...
+  testSinCosTan8( 1E-15, 1E-03 ); // fails with 1E-04 - DANGEROUS ANYWAY...
+  testSinCosTan8( 1E-14, 1E-04 ); // fails with 1E-05
+  testSinCosTan8( 1E-12, 1E-06 ); // fails with 1E-07
+  testSinCosTan8( 1E-09, 1E-09 ); // fails with 1E-10
+  testSinCosTan8( 1E-06, 1E-12 ); // fails with 1E-13
+  testSinCosTan8( 1E-03, 1E-15 ); // fails with 1E-16
+  testSinCosTan8( 1E-02, 1E-99 ); // never fails? always bit-by-bit identical?
+
+  // Test constexpr sin, cos, tan - N points almost randomly with a varying tolerance
+  auto testSinCosTanN = [testSinCosTanX, distance4]( const int nstep, const double x0, const double x1 )
+  {
+    auto toleranceForX = [distance4]( const double x )
+    {
+      const double d4 = distance4( x );
+      if( d4 < 1E-14 )
+        return 1E-03; // NB: absolute distance limited to 1E-14 anyway even if relative tolerance is 1E-3...
+      else if( d4 < 1E-13 )
+        return 1E-04;
+      else if( d4 < 1E-12 )
+        return 1E-05;
+      else if( d4 < 1E-11 )
+        return 1E-06;
+      else if( d4 < 1E-10 )
+        return 1E-07;
+      else if( d4 < 1E-09 )
+        return 1E-08;
+      else if( d4 < 1E-08 )
+        return 1E-09;
+      else if( d4 < 1E-07 )
+        return 1E-10;
+      else if( d4 < 1E-06 )
+        return 1E-11;
+      else if( d4 < 1E-05 )
+        return 1E-12;
+      else if( d4 < 1E-04 )
+        return 1E-13;
+      else
+        return 1E-14; // play it safe even if the agreement might even be better?
+    };
+    for( int istep = 0; istep < nstep + 1; istep++ )
+    {
+      double x = x0 + istep * ( x1 - x0 ) / nstep; // test this for double (else std::cos and std::sin use long double)
+      const double tolerance = toleranceForX( x );
+      EXPECT_NEAR( std::sin( x ), constexpr_sin( x ), std::max( std::abs( std::sin( x ) * tolerance ), 3E-15 ) )
+        << std::setprecision( 40 ) << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ",\n istep=" << istep << ", distance4=" << distance4( x );
+      EXPECT_NEAR( std::cos( x ), constexpr_cos( x ), std::max( std::abs( std::cos( x ) * tolerance ), 3E-15 ) )
+        << std::setprecision( 40 ) << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ",\n istep=" << istep << ", distance4=" << distance4( x );
+      EXPECT_NEAR( std::tan( x ), constexpr_tan( x ), std::max( std::abs( std::tan( x ) * tolerance ), 3E-15 ) )
+        << std::setprecision( 40 ) << "x=" << x << ", x(0to2Pi)=" << mapIn0to2Pi( x ) << ",\n istep=" << istep << ", distance4=" << distance4( x );
+    }
+  };
+  testSinCosTanN( 100, -4 * constexpr_pi, 6 * constexpr_pi ); // this was failing at 3*pi/2 (now fixed by absolute tolerance 3E-15)
+  testSinCosTanN( 10000, -constexpr_pi_by_2, 5 * constexpr_pi_by_2 );
+
+  // Test constexpr atan
+  {
+    const double tolerance = 1E-12;
+    const int nstep = 1000;
+    for( int istep = 0; istep < nstep + 1; istep++ )
+    {
+      long double x0 = -5, x1 = +5;
+      double x = x0 + istep * ( x1 - x0 ) / nstep; // test this for double (else std::cos and std::sin use long double)
+      EXPECT_NEAR( std::atan( x ), constexpr_atan( x ), std::abs( std::atan( x ) * tolerance ) )
+        << "x=" << x << ", istep=" << istep;
+    }
+  }
+
+  //--------------------------------------------------------------------------
 }
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
index 7a91bad998..15b14801e7 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
@@ -40,9 +40,9 @@ Parameters_SMEFTsim_topU3l_MwScheme_UFO::getInstance()
 void
 Parameters_SMEFTsim_topU3l_MwScheme_UFO::setIndependentParameters( SLHAReader& slha )
 {
-  zero = 0; // define "zero"
-  ZERO = 0; // define "zero"
-  //std::vector<int> indices(2, 0); // prepare a vector for indices
+  zero = 0;                         // define "zero"
+  ZERO = 0;                         // define "zero"
+  std::vector<int> indices( 2, 0 ); // prepare a vector for indices
   mdl_WH = slha.get_block_entry( "decay", 25, 4.070000e-03 );
   mdl_WW = slha.get_block_entry( "decay", 24, 2.085000e+00 );
   mdl_WZ = slha.get_block_entry( "decay", 23, 2.495200e+00 );
@@ -387,6 +387,8 @@ Parameters_SMEFTsim_topU3l_MwScheme_UFO::setIndependentParameters( SLHAReader& s
   mdl_g1sh = ( mdl_ee * ( 1. + mdl_dg1 - ( mdl_cHB * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 ) ) / mdl_cth;
   mdl_ee__exp__3 = ( ( mdl_ee ) * ( mdl_ee ) * ( mdl_ee ) );
   mdl_vevhat__exp__3 = ( ( mdl_vevhat ) * ( mdl_vevhat ) * ( mdl_vevhat ) );
+  // BSM parameters that do not depend on alphaS but are needed in the computation of alphaS-dependent couplings;
+  // (none)
 }
 
 void
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
index ddc905638d..ba4fce3110 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -20,10 +20,17 @@
 #include "mgOnGpuCxtypes.h"
 #include "mgOnGpuVectors.h"
 
+#include "constexpr_math.h"
+
 //==========================================================================
 
-#ifndef MGONGPU_HARDCODE_PARAM // this is only supported in SM processes (e.g. not in EFT models) for the moment (#439)
-#error This non-SM physics process only supports MGONGPU_HARDCODE_PARAM builds (#439): please run "make HRDCOD=1"
+// AV Jan 2024 (PR #625): this ugly #define was the only way I found to avoid creating arrays[nBsm] in CPPProcess.cc if nBsm is 0
+// The problem is that nBsm is determined when generating Parameters.h, which happens after CPPProcess.cc has already been generated
+// For simplicity, keep this code hardcoded also for SM processes (a nullptr is needed as in the case nBsm == 0)
+#undef MGONGPUCPP_NBSMINDEPPARAM_GT_0
+
+#ifndef MGONGPU_HARDCODE_PARAM
+//#warning Support for non-SM physics processes (e.g. SUSY or EFT) is still limited for HRDCOD=0 builds (#439 and PR #625)
 
 #include "read_slha.h"
 
@@ -81,6 +88,9 @@ namespace mg5amcCpu
 
     // Print couplings that are changed event by event
     //void printDependentCouplings(); // now computed event-by-event (running alphas #373)
+    // BSM parameters that do not depend on alphaS but are needed in the computation of alphaS-dependent couplings;
+    static constexpr int nBsmIndepParam = 0;
+    //double mdl_bsmIndepParam[nBsmIndepParam];
 
   private:
 
@@ -90,6 +100,7 @@ namespace mg5amcCpu
 } // end namespace mg5amcGpu/mg5amcCpu
 
 #else
+//#warning Support for non-SM physics processes (e.g. SUSY or EFT) is still limited for HRDCOD=1 builds (#439 and PR #625)
 
 #include <cassert>
 #include <limits>
@@ -104,37 +115,6 @@ namespace mg5amcCpu
   // Hardcoded constexpr physics parameters
   namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO // keep the same name rather than HardcodedParameters_SMEFTsim_topU3l_MwScheme_UFO for simplicity
   {
-    // Constexpr implementation of sqrt (see https://stackoverflow.com/a/34134071)
-    double constexpr sqrtNewtonRaphson( double x, double curr, double prev )
-    {
-      return curr == prev ? curr : sqrtNewtonRaphson( x, 0.5 * ( curr + x / curr ), curr );
-    }
-    double constexpr constexpr_sqrt( double x )
-    {
-      return x >= 0 // && x < std::numeric_limits<double>::infinity() // avoid -Wtautological-constant-compare warning in fast math
-        ? sqrtNewtonRaphson( x, x, 0 )
-        : std::numeric_limits<double>::quiet_NaN();
-    }
-
-    // Constexpr implementation of floor (see https://stackoverflow.com/a/66146159)
-    constexpr int constexpr_floor( double d )
-    {
-      const int i = static_cast<int>( d );
-      return d < i ? i - 1 : i;
-    }
-
-    // Constexpr implementation of pow
-    constexpr double constexpr_pow( double base, double exp )
-    {
-      // NB(1): this implementation of constexpr_pow requires exponent >= 0
-      assert( exp >= 0 ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'"
-      // NB(2): this implementation of constexpr_pow requires an integer exponent
-      const int iexp = constexpr_floor( exp );
-      assert( static_cast<double>( iexp ) == exp ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'"
-      // Iterative implementation of pow if exp is a non negative integer
-      return iexp == 0 ? 1 : base * constexpr_pow( base, iexp - 1 );
-    }
-
     // Model parameters independent of aS
     constexpr double zero = 0;
     constexpr double ZERO = 0;
@@ -487,8 +467,8 @@ namespace mg5amcCpu
     // (none)
 
     // Model parameters dependent on aS
-    //constexpr double mdl_sqrt__aS = //constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373)
-    //constexpr double G = 2. * mdl_sqrt__aS * //constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373)
+    //constexpr double mdl_sqrt__aS = constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373)
+    //constexpr double G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373)
     //constexpr double mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI ); // now computed event-by-event (running alphas #373)
     //constexpr double mdl_gHgg4 = aS / ( 360. * M_PI ); // now computed event-by-event (running alphas #373)
     //constexpr double mdl_gHgg5 = aS / ( 20. * M_PI ); // now computed event-by-event (running alphas #373)
@@ -514,6 +494,10 @@ namespace mg5amcCpu
 
     // Print couplings that are changed event by event
     //void printDependentCouplings(); // now computed event-by-event (running alphas #373)
+
+    // BSM parameters that do not depend on alphaS but are needed in the computation of alphaS-dependent couplings;
+    constexpr int nBsmIndepParam = 0;
+    //__device__ constexpr double mdl_bsmIndepParam[nBsmIndepParam];
   }
 
 } // end namespace mg5amcGpu/mg5amcCpu
@@ -542,16 +526,19 @@ namespace mg5amcCpu
       cxtype_sv GC_8;
     };
 #pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-variable"  // e.g. <<warning: unused variable ‘mdl_G__exp__2’ [-Wunused-variable]>>
-#pragma GCC diagnostic ignored "-Wunused-parameter" // e.g. <<warning: unused parameter ‘G’ [-Wunused-parameter]>>
+#pragma GCC diagnostic ignored "-Wunused-parameter"        // e.g. <<warning: unused parameter ‘G’ [-Wunused-parameter]>>
+#pragma GCC diagnostic ignored "-Wunused-variable"         // e.g. <<warning: unused variable ‘mdl_G__exp__2’ [-Wunused-variable]>>
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable" // e.g. <<warning: variable ‘mdl_G__exp__2’ set but not used [-Wunused-but-set-variable]>>
 #ifdef MGONGPUCPP_GPUIMPL
 #pragma nv_diagnostic push
 #pragma nv_diag_suppress 177 // e.g. <<warning #177-D: variable "mdl_G__exp__2" was declared but never referenced>>
 #endif
-    __host__ __device__ inline const DependentCouplings_sv computeDependentCouplings_fromG( const fptype_sv& G_sv )
+    __host__ __device__ inline const DependentCouplings_sv computeDependentCouplings_fromG( const fptype_sv& G_sv, const fptype* bsmIndepParamPtr )
     {
 #ifdef MGONGPU_HARDCODE_PARAM
       using namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO;
+#else
+      // No additional parameters needed in constant memory for this BSM model
 #endif
       // NB: hardcode cxtype cI(0,1) instead of cxtype (or hardcoded cxsmpl) mdl_complexi (which exists in Parameters_SMEFTsim_topU3l_MwScheme_UFO) because:
       // (1) mdl_complexi is always (0,1); (2) mdl_complexi is undefined in device code; (3) need cxsmpl conversion to cxtype in code below
@@ -643,12 +630,13 @@ namespace mg5amcCpu
   template<class G_ACCESS, class C_ACCESS>
   __device__ inline void
   G2COUP( const fptype gs[],
-          fptype couplings[] )
+          fptype couplings[],
+          const fptype* bsmIndepParamPtr )
   {
     mgDebug( 0, __FUNCTION__ );
     using namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings;
     const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs );
-    DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv );
+    DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv, bsmIndepParamPtr );
     fptype* GC_7s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_7 );
     fptype* GC_6s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_6 );
     fptype* GC_8s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_8 );
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/constexpr_math.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/constexpr_math.h
new file mode 100644
index 0000000000..78ff8b16ab
--- /dev/null
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/constexpr_math.h
@@ -0,0 +1,223 @@
+// Copyright (C) 2020-2024 CERN and UCLouvain.
+// Licensed under the GNU Lesser General Public License (version 3 or later).
+// Created by: A. Valassi (Feb 2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+
+#ifndef constexpr_math_h
+#define constexpr_math_h 1
+
+#include "mgOnGpuConfig.h"
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+
+// FOR DEBUGGING!
+#undef CONSTEXPR_MATH_DEBUG // no-debug
+//#define CONSTEXPR_MATH_DEBUG 1 // debug
+#ifdef CONSTEXPR_MATH_DEBUG
+#define constexpr const
+#endif
+
+// NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
+#ifdef MGONGPUCPP_GPUIMPL
+namespace mg5amcGpu
+#else
+namespace mg5amcCpu
+#endif
+{
+  // Constexpr implementation of sqrt (see https://stackoverflow.com/a/34134071)
+  constexpr long double sqrtNewtonRaphson( const long double xx, const long double curr, const long double prev )
+  {
+    return curr == prev ? curr : sqrtNewtonRaphson( xx, 0.5 * ( curr + xx / curr ), curr );
+  }
+  constexpr long double constexpr_sqrt( const long double xx )
+  {
+    return xx >= 0 // && x < std::numeric_limits<double>::infinity() // avoid -Wtautological-constant-compare warning in fast math
+      ? sqrtNewtonRaphson( xx, xx, 0 )
+      : std::numeric_limits<long double>::quiet_NaN();
+  }
+
+  // Constexpr implementation of floor (see https://stackoverflow.com/a/66146159)
+  constexpr int constexpr_floor( const long double xx )
+  {
+    const int i = static_cast<int>( xx );
+    return xx < i ? i - 1 : i;
+  }
+
+  // Constexpr implementation of pow
+  constexpr long double constexpr_pow( const long double base, const long double exp )
+  {
+    // NB(1): this implementation of constexpr_pow requires exponent >= 0
+    assert( exp >= 0 ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'"
+    // NB(2): this implementation of constexpr_pow requires an integer exponent
+    const int iexp = constexpr_floor( exp );
+    assert( static_cast<long double>( iexp ) == exp ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'"
+    // Iterative implementation of pow if exp is a non negative integer
+    return iexp == 0 ? 1 : base * constexpr_pow( base, iexp - 1 );
+  }
+
+  // PI from cmath
+  constexpr long double constexpr_pi = M_PIl;        // pi
+  constexpr long double constexpr_pi_by_2 = M_PI_2l; // pi/2
+  constexpr long double constexpr_pi_by_4 = M_PI_4l; // pi/4
+
+  // Constexpr implementation of sin for 0<x<pi/4 (long double signature)
+  // Taylor expansion : x - x**3/3! + x**5/5!
+  constexpr long double sinTaylor( const long double xx )
+  {
+    assert( xx >= 0 && "The argument of sinTaylor is assumed to be in [0,pi/4)" );
+    assert( xx < constexpr_pi_by_4 && "The argument of sinTaylor is assumed to be in [0,pi/4)" );
+    long double sinx = 0;
+    int ipow = 1;
+    long double delta = xx;
+    while( true )
+    {
+      long double sinxlast = sinx;
+      sinx += delta;
+#ifdef CONSTEXPR_MATH_DEBUG
+      std::cout << "ipow=" << ipow << ", delta=" << delta << ", sinx=" << sinx << std::endl; // for debugging (not constexpr)
+#endif
+      if( sinx == sinxlast ) break;
+      // Next iteration
+      ipow += 2;
+      delta *= -xx * xx / ( ipow - 1 ) / ipow;
+    }
+    return sinx;
+  }
+
+  // Mapping to [0,2*pi) range (long double signature)
+  constexpr long double mapIn0to2Pi( const long double xx )
+  {
+    return xx - constexpr_floor( xx / 2 / constexpr_pi ) * 2 * constexpr_pi;
+  }
+
+  // Constexpr implementation of cos (long double signature)
+  constexpr long double constexpr_cos_quad( const long double xx, const bool assume0to2Pi = false )
+  {
+    if( assume0to2Pi )
+    {
+      assert( xx >= 0 && "The argument of constexpr_cos_quad is assumed to be in [0,2*pi)" );
+      assert( xx < 2 * constexpr_pi && "The argument of constexpr_cos_quad is assumed to be in [0,2*pi)" );
+    }
+    if( xx < 0 )
+      return constexpr_cos_quad( mapIn0to2Pi( xx ), true );
+    else if( xx < constexpr_pi_by_4 ) // [0/4*pi, 1/4*pi)
+      return constexpr_sqrt( 1 - constexpr_pow( sinTaylor( xx ), 2 ) );
+    else if( xx < constexpr_pi_by_2 ) // [1/4*pi, 2/4*pi)
+      return sinTaylor( constexpr_pi_by_2 - xx );
+    else if( xx < 3 * constexpr_pi_by_4 ) // [2/4*pi, 3/4*pi)
+      return -sinTaylor( xx - constexpr_pi_by_2 );
+    else if( xx < constexpr_pi ) // [3/4*pi, 4/4*pi)
+      return -constexpr_sqrt( 1 - constexpr_pow( sinTaylor( constexpr_pi - xx ), 2 ) );
+    else if( xx < 2 * constexpr_pi ) // [4/4*pi, 8/4*pi)
+      return constexpr_cos_quad( 2 * constexpr_pi - xx, true );
+    else // [8/4*pi, +inf)
+      return constexpr_cos_quad( mapIn0to2Pi( xx ), true );
+  }
+
+  // Constexpr implementation of cos (double signature, internally implemented as long double)
+  constexpr double constexpr_cos( const double x )
+  {
+    return constexpr_cos_quad( x );
+  }
+
+  // Constexpr implementation of sin (long double signature)
+  constexpr long double constexpr_sin_quad( const long double xx, const bool assume0to2Pi = false )
+  {
+    if( assume0to2Pi )
+    {
+      assert( xx >= 0 && "The argument of constexpr_sin_quad is assumed to be in [0,2*pi)" );
+      assert( xx < 2 * constexpr_pi && "The argument of constexpr_sin_quad is assumed to be in [0,2*pi)" );
+    }
+    if( xx < 0 )
+      return constexpr_sin_quad( mapIn0to2Pi( xx ), true );
+    else if( xx < constexpr_pi_by_4 ) // [0/4*pi, 1/4*pi)
+      return sinTaylor( xx );
+    else if( xx < constexpr_pi_by_2 ) // [1/4*pi, 2/4*pi)
+      return constexpr_sqrt( 1 - constexpr_pow( sinTaylor( constexpr_pi_by_2 - xx ), 2 ) );
+    else if( xx < 3 * constexpr_pi_by_4 ) // [2/4*pi, 3/4*pi)
+      return constexpr_sqrt( 1 - constexpr_pow( sinTaylor( xx - constexpr_pi_by_2 ), 2 ) );
+    else if( xx < constexpr_pi ) // [3/4*pi, 4/4*pi)
+      return sinTaylor( constexpr_pi - xx );
+    else if( xx < 2 * constexpr_pi ) // [4/4*pi, 8/4*pi)
+      return -constexpr_sin_quad( 2 * constexpr_pi - xx, true );
+    else // [8/4*pi, +inf)
+      return constexpr_sin_quad( mapIn0to2Pi( xx ), true );
+  }
+
+  // Constexpr implementation of sin (double signature, internally implemented as long double)
+  constexpr double constexpr_sin( const double x )
+  {
+    return constexpr_sin_quad( x );
+  }
+
+  // Constexpr implementation of tan (long double signature)
+  constexpr long double constexpr_tan_quad( const long double xx, const bool assume0to2Pi = false )
+  {
+    if( assume0to2Pi )
+    {
+      assert( xx >= 0 && "The argument of constexpr_sin_quad is assumed to be in [0,2*pi)" );
+      assert( xx < 2 * constexpr_pi && "The argument of constexpr_sin_quad is assumed to be in [0,2*pi)" );
+    }
+    if( xx < 0 )
+      return constexpr_tan_quad( mapIn0to2Pi( xx ), true );
+    else if( xx < 2 * constexpr_pi ) // [0, 2*pi)
+      return constexpr_sin_quad( xx, assume0to2Pi ) / constexpr_cos_quad( xx, assume0to2Pi );
+    else // [8/4*pi, +inf)
+      return constexpr_tan_quad( mapIn0to2Pi( xx ), true );
+  }
+
+  // Constexpr implementation of tan (double signature, internally implemented as long double)
+  constexpr double constexpr_tan( const double x )
+  {
+    return constexpr_tan_quad( x );
+  }
+
+  // Constexpr implementation of atan for -1<x<1 (long double signature)
+  // Taylor expansion : x - x**3/3 + x**5/5...
+  constexpr long double atanTaylor( const long double xx )
+  {
+    assert( xx >= -1 && "The argument of atanTaylor is assumed to be in (-1,+1)" );
+    assert( xx < 1 && "The argument of atanTaylor is assumed to be in (-1,+1)" );
+    long double atanx = 0;
+    int ipow = 1;
+    long double xpow = xx;
+    while( true )
+    {
+      long double atanxlast = atanx;
+      atanx += xpow / ipow;
+#ifdef CONSTEXPR_MATH_DEBUG
+      std::cout << "ipow=" << ipow << ", xpow=" << xpow << ", atanx=" << atanx << std::endl; // for debugging (not constexpr)
+#endif
+      if( atanx == atanxlast ) break;
+      // Next iteration
+      ipow += 2;
+      xpow *= -xx * xx;
+    }
+    return atanx;
+  }
+
+  // Constexpr implementation of atan (long double signature)
+  constexpr long double constexpr_atan_quad( const long double xx )
+  {
+    if( xx > 1 )
+      return constexpr_pi_by_2 - atanTaylor( 1 / xx );
+    else if( xx == 1 )
+      return constexpr_pi_by_4;
+    else if( xx > -1 )
+      return atanTaylor( xx );
+    else if( xx == -1 )
+      return -constexpr_pi_by_4;
+    else // if( xx < -1 )
+      return -constexpr_pi_by_2 - atanTaylor( 1 / xx );
+  }
+
+  // Constexpr implementation of atan (double signature, internally implemented as long double)
+  constexpr double constexpr_atan( const double x )
+  {
+    return constexpr_atan_quad( x );
+  }
+}
+
+#endif // constexpr_math_h
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h
index 7ede1dbfae..9ef1c44899 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h
@@ -76,7 +76,8 @@ namespace mgOnGpu /* clang-format off */
   };
 
   template<typename FP>
-  inline __host__ __device__ cxsmpl<FP> // (NB: cannot be constexpr as a constexpr function cannot have a nonliteral return type "mgOnGpu::cxsmpl")
+  constexpr // (NB: now valid code? in the past this failed as "a constexpr function cannot have a nonliteral return type mgOnGpu::cxsmpl")
+  inline __host__ __device__ cxsmpl<FP>
   conj( const cxsmpl<FP>& c )
   {
     return cxsmpl<FP>( c.real(), -c.imag() );