From fa422fca2f94ca626b59769529669e139d828714 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 19 May 2023 14:35:15 +0200 Subject: [PATCH] [cmsdy] ggttgg tmad succeeds again with my attempted fix upstream for #655 --- .../log_ggttgg_mad_d_inl0_hrd0.txt | 506 ++++++++++++++++-- 1 file changed, 465 insertions(+), 41 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index b711222edd..5f8c4e4e22 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -2,30 +2,30 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 + +make USEBUILDDIR=1 AVX=512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - -make USEBUILDDIR=1 AVX=512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-05-19_14:22:30 +DATE: 2023-05-19_14:30:52 On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -57,9 +57,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/o [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 4.3572s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2106s - [COUNTERS] Fortran MEs ( 1 ) : 4.1466s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3714s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2112s + [COUNTERS] Fortran MEs ( 1 ) : 4.1602s for 8192 events => throughput is 1.97E+03 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -81,9 +81,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 49 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 4.4614s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2996s - [COUNTERS] Fortran MEs ( 1 ) : 4.1618s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5575s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2999s + [COUNTERS] Fortran MEs ( 1 ) : 4.2576s for 8192 events => throughput is 1.92E+03 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -105,9 +105,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=0 [UNWEIGHT] Wrote 204 events (found 1633 events) - [COUNTERS] PROGRAM TOTAL : 47.5639s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8857s - [COUNTERS] Fortran MEs ( 1 ) : 45.6781s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.7111s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8882s + [COUNTERS] Fortran MEs ( 1 ) : 45.8229s for 90112 events => throughput is 1.97E+03 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -129,34 +129,458 @@ Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 49 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 8.5491s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3641s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1850s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.5715s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3785s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1930s for 8192 events => throughput is 1.95E+03 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311352998E-004) differ by less than 2E-14 (4.440892098500626e-16) *** (2-none) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** -ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/events.lhe.ref.1 | head -20 -3,4c3,4 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.13289043826E+04 0.13289043826E+04 0.00000000000E+00 0. -1. -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.81223316322E+02 0.81223316322E+02 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.13289043826E+04 0.13289043826E+04 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.81223316322E+02 0.81223316322E+02 0.00000000000E+00 0. -1. -6,8c6,8 -< -6 1 1 2 0 505 0.39403209480E+02 -0.10079469096E+02 0.28578226692E+03 0.33653337532E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 501 -0.19269775075E+03 0.33434234480E+02 0.26595208036E+03 0.33012237159E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 502 0.20498361398E+02 0.29398294961E+02 0.12436578484E+03 0.12942677855E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.39403209480E+02 -0.10079469096E+02 0.28578226692E+03 0.33653337532E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 503 -0.19269775075E+03 0.33434234480E+02 0.26595208036E+03 0.33012237159E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 501 0.20498361398E+02 0.29398294961E+02 0.12436578484E+03 0.12942677855E+03 0.00000000000E+00 0. -1. -54,56c54,56 -< -6 1 1 2 0 504 0.12539878316E+03 0.25084537686E+03 0.17266798312E+03 0.37201006747E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 0.91559552940E+02 -0.56451043237E+03 0.74367925168E+03 0.93814391719E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 503 -0.59178509296E+01 -0.20888672560E+02 0.16637826240E+02 0.27352785287E+02 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 505 0.12539878316E+03 0.25084537686E+03 0.17266798312E+03 0.37201006747E+03 0.17300000000E+03 0. 1. + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803725748610601E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 52.3263s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0903s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.2360s for 90112 events => throughput is 1.95E+03 events/s + +*** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610601E-004) differ by less than 2E-14 (2.220446049250313e-16) + +*** (2-none) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.014958e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.009480e+03 ) sec^-1 + +*** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 4.7131s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5210s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.1921s for 8192 events => throughput is 3.74E+03 events/s + +*** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311352993E-004) differ by less than 2E-14 (2.220446049250313e-16) + +*** (2-sse4) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803725748610596E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 27.8399s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0362s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.8037s for 90112 events => throughput is 3.79E+03 events/s + +*** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610596E-004) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-sse4) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.687643e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.671872e+03 ) sec^-1 + +*** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 2.2009s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2449s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9560s for 8192 events => throughput is 8.57E+03 events/s + +*** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311353009E-004) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-avx2) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 13.3743s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8310s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.5433s for 90112 events => throughput is 8.55E+03 events/s + +*** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) + +*** (2-avx2) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.672370e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.744164e+03 ) sec^-1 + +*** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 2.0671s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8934s for 8192 events => throughput is 9.17E+03 events/s + +*** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311353009E-004) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-512y) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 12.3305s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7548s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5757s for 90112 events => throughput is 9.41E+03 events/s + +*** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) + +*** (2-512y) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.772296e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.855760e+03 ) sec^-1 + +*** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 2.4468s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3753s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0715s for 8192 events => throughput is 7.65E+03 events/s + +*** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311353009E-004) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-512z) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 14.6803s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9606s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.7197s for 90112 events => throughput is 7.69E+03 events/s + +*** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) + +*** (2-512z) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.626213e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.734126e+03 ) sec^-1 + +*** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 0.8950s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8616s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0334s for 8192 events => throughput is 2.45E+05 events/s + +*** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311352998E-004) differ by less than 2E-14 (4.440892098500626e-16) + +*** (3) Compare GMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE GMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803725748610601E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 2.8034s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4369s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3665s for 90112 events => throughput is 2.46E+05 events/s + +*** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610601E-004) differ by less than 2E-14 (2.220446049250313e-16) + +*** (3) Compare GMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.274676e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.510721e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.133270e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.189046e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.132163e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.189226e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.115399e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.452884e+05 ) sec^-1 + +TEST COMPLETED