Skip to content

Commit

Permalink
[oct23av] rerun 78 tput tests, with FPEs enabled in the check executa…
Browse files Browse the repository at this point in the history
…ble - usual failures in ggttg f/m and gqttq f (madgraph5#783), no change in performance (*NB OpenMP is now disabled by default!*)

STARTED  AT Fri Nov  3 10:06:44 AM CET 2023
./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean
ENDED(1) AT Fri Nov  3 01:30:11 PM CET 2023 [Status=2]
./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean
ENDED(2) AT Fri Nov  3 01:55:47 PM CET 2023 [Status=0]
./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean
ENDED(3) AT Fri Nov  3 02:05:25 PM CET 2023 [Status=2]
./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst
ENDED(4) AT Fri Nov  3 02:08:40 PM CET 2023 [Status=0]
./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst
ENDED(5) AT Fri Nov  3 02:11:53 PM CET 2023 [Status=0]
  • Loading branch information
valassi committed Nov 3, 2023
1 parent 6b39fcb commit c4d2e9e
Show file tree
Hide file tree
Showing 78 changed files with 3,928 additions and 4,290 deletions.
103 changes: 49 additions & 54 deletions epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
export CUDACPP_RUNTIME_ENABLEFPE=on

Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
OMPFLAGS=-fopenmp
OMPFLAGS=
AVX=512y
FPTYPE=d
HELINL=0
Expand Down Expand Up @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0'
make[1]: Nothing to be done for 'all'.
make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'

DATE: 2023-10-30_22:38:57
DATE: 2023-11-03_13:10:40

On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
=========================================================================
Expand All @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions
Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
EvtsPerSec[Rmb+ME] (23) = ( 7.999458e+07 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 4.940961e+08 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 7.067088e+08 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 5.425670e+07 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 4.222247e+08 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 7.012114e+08 ) sec^-1
MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
TOTAL : 0.786611 sec
2,626,513,820 cycles # 3.012 GHz
4,089,953,760 instructions # 1.56 insn per cycle
1.073481584 seconds time elapsed
TOTAL : 0.857427 sec
2,861,836,198 cycles # 3.005 GHz
4,395,542,756 instructions # 1.54 insn per cycle
1.189695235 seconds time elapsed
runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1
WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions
==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
Expand All @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD)
OMP threads / `nproc --all` = 1 / 4
EvtsPerSec[Rmb+ME] (23) = ( 1.125739e+06 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 1.324458e+06 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 1.324458e+06 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 1.128658e+06 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 1.324925e+06 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 1.324925e+06 ) sec^-1
MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
TOTAL : 5.985518 sec
18,327,786,554 cycles # 3.060 GHz
44,036,033,273 instructions # 2.40 insn per cycle
5.990684166 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0)
TOTAL : 5.970436 sec
18,347,671,953 cycles # 3.071 GHz
43,938,983,717 instructions # 2.39 insn per cycle
5.978217238 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0)
-------------------------------------------------------------------------
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe
[ PASSED ] 6 tests.
Expand All @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
OMP threads / `nproc --all` = 1 / 4
EvtsPerSec[Rmb+ME] (23) = ( 1.691045e+06 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 2.213177e+06 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 2.213177e+06 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 1.660817e+06 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 2.184751e+06 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 2.184751e+06 ) sec^-1
MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
TOTAL : 4.113874 sec
12,767,822,386 cycles # 3.101 GHz
31,002,879,427 instructions # 2.43 insn per cycle
4.119112324 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0)
TOTAL : 4.192129 sec
12,809,417,728 cycles # 3.052 GHz
31,016,432,387 instructions # 2.42 insn per cycle
4.205962943 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0)
-------------------------------------------------------------------------
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe
[ PASSED ] 6 tests.
Expand All @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
OMP threads / `nproc --all` = 1 / 4
EvtsPerSec[Rmb+ME] (23) = ( 2.100650e+06 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 2.950531e+06 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 2.950531e+06 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 2.074393e+06 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 2.897997e+06 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 2.897997e+06 ) sec^-1
MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
TOTAL : 3.380885 sec
10,086,974,506 cycles # 2.980 GHz
19,377,326,262 instructions # 1.92 insn per cycle
3.386108586 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0)
TOTAL : 3.424513 sec
10,077,521,544 cycles # 2.940 GHz
19,366,070,840 instructions # 1.92 insn per cycle
3.435971091 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0)
-------------------------------------------------------------------------
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe
[ PASSED ] 6 tests.
Expand All @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
OMP threads / `nproc --all` = 1 / 4
EvtsPerSec[Rmb+ME] (23) = ( 2.189272e+06 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 3.095103e+06 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 3.095103e+06 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 2.171630e+06 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 3.081425e+06 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 3.081425e+06 ) sec^-1
MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
TOTAL : 3.258705 sec
9,718,547,501 cycles # 2.979 GHz
19,005,874,298 instructions # 1.96 insn per cycle
3.263850249 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0)
TOTAL : 3.285829 sec
9,782,653,766 cycles # 2.972 GHz
18,983,356,035 instructions # 1.94 insn per cycle
3.300405408 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0)
-------------------------------------------------------------------------
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe
[ PASSED ] 6 tests.
Expand All @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0
Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
OMP threads / `nproc --all` = 1 / 4
EvtsPerSec[Rmb+ME] (23) = ( 1.790268e+06 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 2.367426e+06 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 2.367426e+06 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 1.870011e+06 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 2.499133e+06 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 2.499133e+06 ) sec^-1
MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
TOTAL : 3.915927 sec
8,595,619,480 cycles # 2.193 GHz
15,738,404,294 instructions # 1.83 insn per cycle
3.921168735 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258)
TOTAL : 3.753305 sec
8,627,314,746 cycles # 2.296 GHz
15,735,767,464 instructions # 1.82 insn per cycle
3.765985274 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258)
-------------------------------------------------------------------------
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe
[ PASSED ] 6 tests.
Expand Down
Loading

0 comments on commit c4d2e9e

Please # to comment.