diff --git a/CMakeLists.txt b/CMakeLists.txt
index 29b152c..bda4c84 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,8 +2,7 @@ cmake_minimum_required(VERSION 3.12)
 project(hellocmake LANGUAGES CXX)
 
 set(CMAKE_CXX_STANDARD 17)
-if (NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE Release)
-endif()
+set(CMAKE_BUILD_TYPE Release)
 
 add_executable(main main.cpp)
+target_compile_options(main PUBLIC -ffast-math -march=native)
\ No newline at end of file
diff --git a/main.S b/main.S
new file mode 100644
index 0000000..aa08f9b
--- /dev/null
+++ b/main.S
@@ -0,0 +1,1139 @@
+	.file	"main.cpp"
+# GNU C++14 (Ubuntu 9.4.0-1ubuntu1~20.04.1) version 9.4.0 (x86_64-linux-gnu)
+#	compiled by GNU C version 9.4.0, GMP version 6.2.0, MPFR version 4.0.2, MPC version 1.1.0, isl version isl-0.22.1-GMP
+
+# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
+# options passed:  -imultiarch x86_64-linux-gnu -D_GNU_SOURCE main.cpp
+# -mtune=generic -march=x86-64 -auxbase-strip main.S -fomit-frame-pointer
+# -fverbose-asm -fasynchronous-unwind-tables -fstack-protector-strong
+# -Wformat -Wformat-security -fstack-clash-protection -fcf-protection
+# options enabled:  -fPIC -fPIE -faggressive-loop-optimizations
+# -fassume-phsa -fasynchronous-unwind-tables -fauto-inc-dec -fcommon
+# -fdelete-null-pointer-checks -fdwarf2-cfi-asm -fearly-inlining
+# -feliminate-unused-debug-types -fexceptions -ffp-int-builtin-inexact
+# -ffunction-cse -fgcse-lm -fgnu-runtime -fgnu-unique -fident
+# -finline-atomics -fipa-stack-alignment -fira-hoist-pressure
+# -fira-share-save-slots -fira-share-spill-slots -fivopts
+# -fkeep-static-consts -fleading-underscore -flifetime-dse
+# -flto-odr-type-merging -fmath-errno -fmerge-debug-strings
+# -fomit-frame-pointer -fpeephole -fplt -fprefetch-loop-arrays
+# -freg-struct-return -fsched-critical-path-heuristic
+# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
+# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
+# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fschedule-fusion
+# -fsemantic-interposition -fshow-column -fshrink-wrap-separate
+# -fsigned-zeros -fsplit-ivs-in-unroller -fssa-backprop
+# -fstack-clash-protection -fstack-protector-strong -fstdarg-opt
+# -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math -ftree-cselim
+# -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im -ftree-loop-ivcanon
+# -ftree-loop-optimize -ftree-parallelize-loops= -ftree-phiprop
+# -ftree-reassoc -ftree-scev-cprop -funit-at-a-time -funwind-tables
+# -fverbose-asm -fzero-initialized-in-bss -m128bit-long-double -m64 -m80387
+# -malign-stringops -mavx256-split-unaligned-load
+# -mavx256-split-unaligned-store -mfancy-math-387 -mfp-ret-in-387 -mfxsr
+# -mglibc -mieee-fp -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone
+# -msse -msse2 -mstv -mtls-direct-seg-refs -mvzeroupper
+
+	.text
+	.section	.rodata
+	.type	_ZStL19piecewise_construct, @object
+	.size	_ZStL19piecewise_construct, 1
+_ZStL19piecewise_construct:
+	.zero	1
+	.section	.text._ZNKSt6chrono8durationIlSt5ratioILl1ELl1000000000EEE5countEv,"axG",@progbits,_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000000000EEE5countEv,comdat
+	.align 2
+	.weak	_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000000000EEE5countEv
+	.type	_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000000000EEE5countEv, @function
+_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000000000EEE5countEv:
+.LFB1117:
+	.cfi_startproc
+	endbr64	
+	movq	%rdi, -8(%rsp)	# this, this
+# /usr/include/c++/9/chrono:347: 	{ return __r; }
+	movq	-8(%rsp), %rax	# this, tmp84
+	movq	(%rax), %rax	# this_2(D)->__r, _3
+# /usr/include/c++/9/chrono:347: 	{ return __r; }
+	ret	
+	.cfi_endproc
+.LFE1117:
+	.size	_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000000000EEE5countEv, .-_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000000000EEE5countEv
+	.section	.text._ZNSt6chrono8durationIlSt5ratioILl1ELl1000000000EEEC1IlvEERKT_,"axG",@progbits,_ZNSt6chrono8durationIlSt5ratioILl1ELl1000000000EEEC1IlvEERKT_,comdat
+	.align 2
+	.weak	_ZNSt6chrono8durationIlSt5ratioILl1ELl1000000000EEEC1IlvEERKT_
+	.type	_ZNSt6chrono8durationIlSt5ratioILl1ELl1000000000EEEC1IlvEERKT_, @function
+_ZNSt6chrono8durationIlSt5ratioILl1ELl1000000000EEEC1IlvEERKT_:
+.LFB1122:
+	.cfi_startproc
+	endbr64	
+	movq	%rdi, -8(%rsp)	# this, this
+	movq	%rsi, -16(%rsp)	# __rep, __rep
+# /usr/include/c++/9/chrono:332: 	  : __r(static_cast<rep>(__rep)) { }
+	movq	-16(%rsp), %rax	# __rep, tmp83
+	movq	(%rax), %rdx	# *__rep_5(D), _1
+	movq	-8(%rsp), %rax	# this, tmp84
+	movq	%rdx, (%rax)	# _1, this_3(D)->__r
+# /usr/include/c++/9/chrono:332: 	  : __r(static_cast<rep>(__rep)) { }
+	nop	
+	ret	
+	.cfi_endproc
+.LFE1122:
+	.size	_ZNSt6chrono8durationIlSt5ratioILl1ELl1000000000EEEC1IlvEERKT_, .-_ZNSt6chrono8durationIlSt5ratioILl1ELl1000000000EEEC1IlvEERKT_
+	.section	.text._ZSt4sqrtf,"axG",@progbits,_ZSt4sqrtf,comdat
+	.weak	_ZSt4sqrtf
+	.type	_ZSt4sqrtf, @function
+_ZSt4sqrtf:
+.LFB1210:
+	.cfi_startproc
+	endbr64	
+	subq	$24, %rsp	#,
+	.cfi_def_cfa_offset 32
+	movss	%xmm0, 12(%rsp)	# __x, __x
+# /usr/include/c++/9/cmath:464:   { return __builtin_sqrtf(__x); }
+	movl	12(%rsp), %eax	# __x, tmp84
+	movd	%eax, %xmm0	# tmp84,
+	call	sqrtf@PLT	#
+# /usr/include/c++/9/cmath:464:   { return __builtin_sqrtf(__x); }
+	addq	$24, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1210:
+	.size	_ZSt4sqrtf, .-_ZSt4sqrtf
+	.section	.rodata
+	.align 8
+	.type	_ZL1N, @object
+	.size	_ZL1N, 8
+_ZL1N:
+	.quad	48
+	.text
+	.globl	_Z5frandv
+	.type	_Z5frandv, @function
+_Z5frandv:
+.LFB1369:
+	.cfi_startproc
+	endbr64	
+	subq	$8, %rsp	#,
+	.cfi_def_cfa_offset 16
+# main.cpp:10:     return (float)std::rand() / RAND_MAX * 2 - 1;
+	call	rand@PLT	#
+# main.cpp:10:     return (float)std::rand() / RAND_MAX * 2 - 1;
+	cvtsi2ssl	%eax, %xmm0	# _1, _2
+# main.cpp:10:     return (float)std::rand() / RAND_MAX * 2 - 1;
+	movss	.LC0(%rip), %xmm1	#, tmp88
+	divss	%xmm1, %xmm0	# tmp88, _3
+# main.cpp:10:     return (float)std::rand() / RAND_MAX * 2 - 1;
+	addss	%xmm0, %xmm0	# _3, _4
+# main.cpp:10:     return (float)std::rand() / RAND_MAX * 2 - 1;
+	movss	.LC1(%rip), %xmm1	#, tmp89
+	subss	%xmm1, %xmm0	# tmp89, _7
+# main.cpp:11: }
+	addq	$8, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1369:
+	.size	_Z5frandv, .-_Z5frandv
+	.globl	stars
+	.bss
+	.align 32
+	.type	stars, @object
+	.size	stars, 1344
+stars:
+	.zero	1344
+	.text
+	.globl	_Z4initv
+	.type	_Z4initv, @function
+_Z4initv:
+.LFB1370:
+	.cfi_startproc
+	endbr64	
+	subq	$24, %rsp	#,
+	.cfi_def_cfa_offset 32
+# main.cpp:23:     for (size_t i = 0; i < N; i++) {
+	movq	$0, 8(%rsp)	#, i
+.L10:
+# main.cpp:23:     for (size_t i = 0; i < N; i++) {
+	cmpq	$47, 8(%rsp)	#, i
+	setbe	%al	#, _1
+	testb	%al, %al	# _1
+	je	.L11	#,
+# main.cpp:24:         stars.px[i] = frand();
+	call	_Z5frandv	#
+	movd	%xmm0, %eax	#, _2
+# main.cpp:24:         stars.px[i] = frand();
+	movq	8(%rsp), %rdx	# i, tmp91
+	leaq	0(,%rdx,4), %rcx	#, tmp92
+	leaq	stars(%rip), %rdx	#, tmp93
+	movl	%eax, (%rcx,%rdx)	# _2, stars.px
+# main.cpp:25:         stars.py[i] = frand();
+	call	_Z5frandv	#
+	movd	%xmm0, %eax	#, _3
+# main.cpp:25:         stars.py[i] = frand();
+	movq	8(%rsp), %rdx	# i, tmp95
+	addq	$48, %rdx	#, tmp94
+	leaq	0(,%rdx,4), %rcx	#, tmp96
+	leaq	stars(%rip), %rdx	#, tmp97
+	movl	%eax, (%rcx,%rdx)	# _3, stars.py
+# main.cpp:26:         stars.pz[i] = frand();
+	call	_Z5frandv	#
+	movd	%xmm0, %eax	#, _4
+# main.cpp:26:         stars.pz[i] = frand();
+	movq	8(%rsp), %rdx	# i, tmp99
+	addq	$96, %rdx	#, tmp98
+	leaq	0(,%rdx,4), %rcx	#, tmp100
+	leaq	stars(%rip), %rdx	#, tmp101
+	movl	%eax, (%rcx,%rdx)	# _4, stars.pz
+# main.cpp:27:         stars.vx[i] = frand();
+	call	_Z5frandv	#
+	movd	%xmm0, %eax	#, _5
+# main.cpp:27:         stars.vx[i] = frand();
+	movq	8(%rsp), %rdx	# i, tmp103
+	addq	$144, %rdx	#, tmp102
+	leaq	0(,%rdx,4), %rcx	#, tmp104
+	leaq	stars(%rip), %rdx	#, tmp105
+	movl	%eax, (%rcx,%rdx)	# _5, stars.vx
+# main.cpp:28:         stars.vy[i] = frand();
+	call	_Z5frandv	#
+	movd	%xmm0, %eax	#, _6
+# main.cpp:28:         stars.vy[i] = frand();
+	movq	8(%rsp), %rdx	# i, tmp107
+	addq	$192, %rdx	#, tmp106
+	leaq	0(,%rdx,4), %rcx	#, tmp108
+	leaq	stars(%rip), %rdx	#, tmp109
+	movl	%eax, (%rcx,%rdx)	# _6, stars.vy
+# main.cpp:29:         stars.vz[i] = frand();
+	call	_Z5frandv	#
+	movd	%xmm0, %eax	#, _7
+# main.cpp:29:         stars.vz[i] = frand();
+	movq	8(%rsp), %rdx	# i, tmp111
+	addq	$240, %rdx	#, tmp110
+	leaq	0(,%rdx,4), %rcx	#, tmp112
+	leaq	stars(%rip), %rdx	#, tmp113
+	movl	%eax, (%rcx,%rdx)	# _7, stars.vz
+# main.cpp:30:         stars.mass[i] = frand() + 1.f;
+	call	_Z5frandv	#
+# main.cpp:30:         stars.mass[i] = frand() + 1.f;
+	movss	.LC1(%rip), %xmm1	#, tmp114
+	addss	%xmm1, %xmm0	# tmp114, _9
+# main.cpp:30:         stars.mass[i] = frand() + 1.f;
+	movq	8(%rsp), %rax	# i, tmp116
+	addq	$288, %rax	#, tmp115
+	leaq	0(,%rax,4), %rdx	#, tmp117
+	leaq	stars(%rip), %rax	#, tmp118
+	movss	%xmm0, (%rdx,%rax)	# _9, stars.mass
+# main.cpp:23:     for (size_t i = 0; i < N; i++) {
+	addq	$1, 8(%rsp)	#, i
+	jmp	.L10	#
+.L11:
+# main.cpp:32: }
+	nop	
+	addq	$24, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1370:
+	.size	_Z4initv, .-_Z4initv
+	.section	.rodata
+	.align 4
+	.type	_ZL1G, @object
+	.size	_ZL1G, 4
+_ZL1G:
+	.long	981668463
+	.align 4
+	.type	_ZL3eps, @object
+	.size	_ZL3eps, 4
+_ZL3eps:
+	.long	981668463
+	.align 4
+	.type	_ZL2dt, @object
+	.size	_ZL2dt, 4
+_ZL2dt:
+	.long	1008981770
+	.align 4
+	.type	_ZL4eps2, @object
+	.size	_ZL4eps2, 4
+_ZL4eps2:
+	.long	897988542
+	.align 4
+	.type	_ZL3dt2, @object
+	.size	_ZL3dt2, 4
+_ZL3dt2:
+	.long	953267991
+	.align 4
+	.type	_ZL9G_plus_dt, @object
+	.size	_ZL9G_plus_dt, 4
+_ZL9G_plus_dt:
+	.long	925353389
+	.text
+	.globl	_Z4stepv
+	.type	_Z4stepv, @function
+_Z4stepv:
+.LFB1371:
+	.cfi_startproc
+	endbr64	
+	subq	$56, %rsp	#,
+	.cfi_def_cfa_offset 64
+# main.cpp:42:     for (size_t i = 0; i < N; i++) {
+	movq	$0, 24(%rsp)	#, i
+.L16:
+# main.cpp:42:     for (size_t i = 0; i < N; i++) {
+	cmpq	$47, 24(%rsp)	#, i
+	ja	.L13	#,
+# main.cpp:44:         for (size_t j = 0; j < N; j++) {
+	movq	$0, 32(%rsp)	#, j
+.L15:
+# main.cpp:44:         for (size_t j = 0; j < N; j++) {
+	cmpq	$47, 32(%rsp)	#, j
+	setbe	%al	#, _1
+	testb	%al, %al	# _1
+	je	.L14	#,
+# main.cpp:45:             float dx = stars.px[j] - stars.px[i];
+	movq	32(%rsp), %rax	# j, tmp126
+	leaq	0(,%rax,4), %rdx	#, tmp127
+	leaq	stars(%rip), %rax	#, tmp128
+	movss	(%rdx,%rax), %xmm0	# stars.px, _2
+# main.cpp:45:             float dx = stars.px[j] - stars.px[i];
+	movq	24(%rsp), %rax	# i, tmp129
+	leaq	0(,%rax,4), %rdx	#, tmp130
+	leaq	stars(%rip), %rax	#, tmp131
+	movss	(%rdx,%rax), %xmm1	# stars.px, _3
+# main.cpp:45:             float dx = stars.px[j] - stars.px[i];
+	subss	%xmm1, %xmm0	# _3, tmp132
+	movss	%xmm0, 4(%rsp)	# tmp132, dx
+# main.cpp:46:             float dy = stars.py[j] - stars.py[i];
+	movq	32(%rsp), %rax	# j, tmp134
+	addq	$48, %rax	#, tmp133
+	leaq	0(,%rax,4), %rdx	#, tmp135
+	leaq	stars(%rip), %rax	#, tmp136
+	movss	(%rdx,%rax), %xmm0	# stars.py, _4
+# main.cpp:46:             float dy = stars.py[j] - stars.py[i];
+	movq	24(%rsp), %rax	# i, tmp138
+	addq	$48, %rax	#, tmp137
+	leaq	0(,%rax,4), %rdx	#, tmp139
+	leaq	stars(%rip), %rax	#, tmp140
+	movss	(%rdx,%rax), %xmm1	# stars.py, _5
+# main.cpp:46:             float dy = stars.py[j] - stars.py[i];
+	subss	%xmm1, %xmm0	# _5, tmp141
+	movss	%xmm0, 8(%rsp)	# tmp141, dy
+# main.cpp:47:             float dz = stars.pz[j] - stars.pz[i];
+	movq	32(%rsp), %rax	# j, tmp143
+	addq	$96, %rax	#, tmp142
+	leaq	0(,%rax,4), %rdx	#, tmp144
+	leaq	stars(%rip), %rax	#, tmp145
+	movss	(%rdx,%rax), %xmm0	# stars.pz, _6
+# main.cpp:47:             float dz = stars.pz[j] - stars.pz[i];
+	movq	24(%rsp), %rax	# i, tmp147
+	addq	$96, %rax	#, tmp146
+	leaq	0(,%rax,4), %rdx	#, tmp148
+	leaq	stars(%rip), %rax	#, tmp149
+	movss	(%rdx,%rax), %xmm1	# stars.pz, _7
+# main.cpp:47:             float dz = stars.pz[j] - stars.pz[i];
+	subss	%xmm1, %xmm0	# _7, tmp150
+	movss	%xmm0, 12(%rsp)	# tmp150, dz
+# main.cpp:48:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	movss	4(%rsp), %xmm0	# dx, tmp151
+	movaps	%xmm0, %xmm1	# tmp151, tmp151
+	mulss	%xmm0, %xmm1	# tmp151, tmp151
+# main.cpp:48:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	movss	8(%rsp), %xmm0	# dy, tmp152
+	mulss	%xmm0, %xmm0	# tmp152, _9
+# main.cpp:48:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	addss	%xmm0, %xmm1	# _9, _10
+# main.cpp:48:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	movss	12(%rsp), %xmm0	# dz, tmp153
+	mulss	%xmm0, %xmm0	# tmp153, _11
+# main.cpp:48:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	addss	%xmm0, %xmm1	# _11, _12
+# main.cpp:48:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	movss	.LC2(%rip), %xmm0	#, tmp155
+	addss	%xmm1, %xmm0	# _12, tmp154
+	movss	%xmm0, 16(%rsp)	# tmp154, d2
+# main.cpp:49:             d2 *= std::sqrt(d2);
+	movl	16(%rsp), %eax	# d2, tmp156
+	movd	%eax, %xmm0	# tmp156,
+	call	_ZSt4sqrtf	#
+# main.cpp:49:             d2 *= std::sqrt(d2);
+	movss	16(%rsp), %xmm1	# d2, tmp158
+	mulss	%xmm1, %xmm0	# tmp158, tmp157
+	movss	%xmm0, 16(%rsp)	# tmp157, d2
+# main.cpp:50:             float inv_d2 = 1.f / d2;
+	movss	.LC1(%rip), %xmm0	#, tmp160
+	divss	16(%rsp), %xmm0	# d2, tmp159
+	movss	%xmm0, 20(%rsp)	# tmp159, inv_d2
+# main.cpp:51:             stars.vx[i] += dx * stars.mass[j] * G_plus_dt * inv_d2;
+	movq	24(%rsp), %rax	# i, tmp162
+	addq	$144, %rax	#, tmp161
+	leaq	0(,%rax,4), %rdx	#, tmp163
+	leaq	stars(%rip), %rax	#, tmp164
+	movss	(%rdx,%rax), %xmm1	# stars.vx, _13
+# main.cpp:51:             stars.vx[i] += dx * stars.mass[j] * G_plus_dt * inv_d2;
+	movq	32(%rsp), %rax	# j, tmp166
+	addq	$288, %rax	#, tmp165
+	leaq	0(,%rax,4), %rdx	#, tmp167
+	leaq	stars(%rip), %rax	#, tmp168
+	movss	(%rdx,%rax), %xmm0	# stars.mass, _14
+# main.cpp:51:             stars.vx[i] += dx * stars.mass[j] * G_plus_dt * inv_d2;
+	movaps	%xmm0, %xmm2	# _14, _14
+	mulss	4(%rsp), %xmm2	# dx, _14
+# main.cpp:51:             stars.vx[i] += dx * stars.mass[j] * G_plus_dt * inv_d2;
+	movss	.LC3(%rip), %xmm0	#, tmp169
+	mulss	%xmm2, %xmm0	# _15, _16
+# main.cpp:51:             stars.vx[i] += dx * stars.mass[j] * G_plus_dt * inv_d2;
+	mulss	20(%rsp), %xmm0	# inv_d2, _17
+# main.cpp:51:             stars.vx[i] += dx * stars.mass[j] * G_plus_dt * inv_d2;
+	addss	%xmm1, %xmm0	# _13, _18
+	movq	24(%rsp), %rax	# i, tmp171
+	addq	$144, %rax	#, tmp170
+	leaq	0(,%rax,4), %rdx	#, tmp172
+	leaq	stars(%rip), %rax	#, tmp173
+	movss	%xmm0, (%rdx,%rax)	# _18, stars.vx
+# main.cpp:52:             stars.vy[i] += dy * stars.mass[j] * G_plus_dt * inv_d2;
+	movq	24(%rsp), %rax	# i, tmp175
+	addq	$192, %rax	#, tmp174
+	leaq	0(,%rax,4), %rdx	#, tmp176
+	leaq	stars(%rip), %rax	#, tmp177
+	movss	(%rdx,%rax), %xmm1	# stars.vy, _19
+# main.cpp:52:             stars.vy[i] += dy * stars.mass[j] * G_plus_dt * inv_d2;
+	movq	32(%rsp), %rax	# j, tmp179
+	addq	$288, %rax	#, tmp178
+	leaq	0(,%rax,4), %rdx	#, tmp180
+	leaq	stars(%rip), %rax	#, tmp181
+	movss	(%rdx,%rax), %xmm0	# stars.mass, _20
+# main.cpp:52:             stars.vy[i] += dy * stars.mass[j] * G_plus_dt * inv_d2;
+	movaps	%xmm0, %xmm2	# _20, _20
+	mulss	8(%rsp), %xmm2	# dy, _20
+# main.cpp:52:             stars.vy[i] += dy * stars.mass[j] * G_plus_dt * inv_d2;
+	movss	.LC3(%rip), %xmm0	#, tmp182
+	mulss	%xmm2, %xmm0	# _21, _22
+# main.cpp:52:             stars.vy[i] += dy * stars.mass[j] * G_plus_dt * inv_d2;
+	mulss	20(%rsp), %xmm0	# inv_d2, _23
+# main.cpp:52:             stars.vy[i] += dy * stars.mass[j] * G_plus_dt * inv_d2;
+	addss	%xmm1, %xmm0	# _19, _24
+	movq	24(%rsp), %rax	# i, tmp184
+	addq	$192, %rax	#, tmp183
+	leaq	0(,%rax,4), %rdx	#, tmp185
+	leaq	stars(%rip), %rax	#, tmp186
+	movss	%xmm0, (%rdx,%rax)	# _24, stars.vy
+# main.cpp:53:             stars.vz[i] += dz * stars.mass[j] * G_plus_dt * inv_d2;
+	movq	24(%rsp), %rax	# i, tmp188
+	addq	$240, %rax	#, tmp187
+	leaq	0(,%rax,4), %rdx	#, tmp189
+	leaq	stars(%rip), %rax	#, tmp190
+	movss	(%rdx,%rax), %xmm1	# stars.vz, _25
+# main.cpp:53:             stars.vz[i] += dz * stars.mass[j] * G_plus_dt * inv_d2;
+	movq	32(%rsp), %rax	# j, tmp192
+	addq	$288, %rax	#, tmp191
+	leaq	0(,%rax,4), %rdx	#, tmp193
+	leaq	stars(%rip), %rax	#, tmp194
+	movss	(%rdx,%rax), %xmm0	# stars.mass, _26
+# main.cpp:53:             stars.vz[i] += dz * stars.mass[j] * G_plus_dt * inv_d2;
+	movaps	%xmm0, %xmm2	# _26, _26
+	mulss	12(%rsp), %xmm2	# dz, _26
+# main.cpp:53:             stars.vz[i] += dz * stars.mass[j] * G_plus_dt * inv_d2;
+	movss	.LC3(%rip), %xmm0	#, tmp195
+	mulss	%xmm2, %xmm0	# _27, _28
+# main.cpp:53:             stars.vz[i] += dz * stars.mass[j] * G_plus_dt * inv_d2;
+	mulss	20(%rsp), %xmm0	# inv_d2, _29
+# main.cpp:53:             stars.vz[i] += dz * stars.mass[j] * G_plus_dt * inv_d2;
+	addss	%xmm1, %xmm0	# _25, _30
+	movq	24(%rsp), %rax	# i, tmp197
+	addq	$240, %rax	#, tmp196
+	leaq	0(,%rax,4), %rdx	#, tmp198
+	leaq	stars(%rip), %rax	#, tmp199
+	movss	%xmm0, (%rdx,%rax)	# _30, stars.vz
+# main.cpp:44:         for (size_t j = 0; j < N; j++) {
+	addq	$1, 32(%rsp)	#, j
+	jmp	.L15	#
+.L14:
+# main.cpp:42:     for (size_t i = 0; i < N; i++) {
+	addq	$1, 24(%rsp)	#, i
+	jmp	.L16	#
+.L13:
+# main.cpp:57:     for (size_t i = 0; i < N; i++) {
+	movq	$0, 40(%rsp)	#, i
+.L18:
+# main.cpp:57:     for (size_t i = 0; i < N; i++) {
+	cmpq	$47, 40(%rsp)	#, i
+	setbe	%al	#, _31
+	testb	%al, %al	# _31
+	je	.L19	#,
+# main.cpp:58:         stars.px[i] += stars.vx[i] * dt;
+	movq	40(%rsp), %rax	# i, tmp200
+	leaq	0(,%rax,4), %rdx	#, tmp201
+	leaq	stars(%rip), %rax	#, tmp202
+	movss	(%rdx,%rax), %xmm1	# stars.px, _32
+# main.cpp:58:         stars.px[i] += stars.vx[i] * dt;
+	movq	40(%rsp), %rax	# i, tmp204
+	addq	$144, %rax	#, tmp203
+	leaq	0(,%rax,4), %rdx	#, tmp205
+	leaq	stars(%rip), %rax	#, tmp206
+	movss	(%rdx,%rax), %xmm2	# stars.vx, _33
+# main.cpp:58:         stars.px[i] += stars.vx[i] * dt;
+	movss	.LC4(%rip), %xmm0	#, tmp207
+	mulss	%xmm2, %xmm0	# _33, _34
+# main.cpp:58:         stars.px[i] += stars.vx[i] * dt;
+	addss	%xmm1, %xmm0	# _32, _35
+	movq	40(%rsp), %rax	# i, tmp208
+	leaq	0(,%rax,4), %rdx	#, tmp209
+	leaq	stars(%rip), %rax	#, tmp210
+	movss	%xmm0, (%rdx,%rax)	# _35, stars.px
+# main.cpp:59:         stars.py[i] += stars.vy[i] * dt;
+	movq	40(%rsp), %rax	# i, tmp212
+	addq	$48, %rax	#, tmp211
+	leaq	0(,%rax,4), %rdx	#, tmp213
+	leaq	stars(%rip), %rax	#, tmp214
+	movss	(%rdx,%rax), %xmm1	# stars.py, _36
+# main.cpp:59:         stars.py[i] += stars.vy[i] * dt;
+	movq	40(%rsp), %rax	# i, tmp216
+	addq	$192, %rax	#, tmp215
+	leaq	0(,%rax,4), %rdx	#, tmp217
+	leaq	stars(%rip), %rax	#, tmp218
+	movss	(%rdx,%rax), %xmm2	# stars.vy, _37
+# main.cpp:59:         stars.py[i] += stars.vy[i] * dt;
+	movss	.LC4(%rip), %xmm0	#, tmp219
+	mulss	%xmm2, %xmm0	# _37, _38
+# main.cpp:59:         stars.py[i] += stars.vy[i] * dt;
+	addss	%xmm1, %xmm0	# _36, _39
+	movq	40(%rsp), %rax	# i, tmp221
+	addq	$48, %rax	#, tmp220
+	leaq	0(,%rax,4), %rdx	#, tmp222
+	leaq	stars(%rip), %rax	#, tmp223
+	movss	%xmm0, (%rdx,%rax)	# _39, stars.py
+# main.cpp:60:         stars.pz[i] += stars.vz[i] * dt;
+	movq	40(%rsp), %rax	# i, tmp225
+	addq	$96, %rax	#, tmp224
+	leaq	0(,%rax,4), %rdx	#, tmp226
+	leaq	stars(%rip), %rax	#, tmp227
+	movss	(%rdx,%rax), %xmm1	# stars.pz, _40
+# main.cpp:60:         stars.pz[i] += stars.vz[i] * dt;
+	movq	40(%rsp), %rax	# i, tmp229
+	addq	$240, %rax	#, tmp228
+	leaq	0(,%rax,4), %rdx	#, tmp230
+	leaq	stars(%rip), %rax	#, tmp231
+	movss	(%rdx,%rax), %xmm2	# stars.vz, _41
+# main.cpp:60:         stars.pz[i] += stars.vz[i] * dt;
+	movss	.LC4(%rip), %xmm0	#, tmp232
+	mulss	%xmm2, %xmm0	# _41, _42
+# main.cpp:60:         stars.pz[i] += stars.vz[i] * dt;
+	addss	%xmm1, %xmm0	# _40, _43
+	movq	40(%rsp), %rax	# i, tmp234
+	addq	$96, %rax	#, tmp233
+	leaq	0(,%rax,4), %rdx	#, tmp235
+	leaq	stars(%rip), %rax	#, tmp236
+	movss	%xmm0, (%rdx,%rax)	# _43, stars.pz
+# main.cpp:57:     for (size_t i = 0; i < N; i++) {
+	addq	$1, 40(%rsp)	#, i
+	jmp	.L18	#
+.L19:
+# main.cpp:62: }
+	nop	
+	addq	$56, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1371:
+	.size	_Z4stepv, .-_Z4stepv
+	.globl	_Z4calcv
+	.type	_Z4calcv, @function
+_Z4calcv:
+.LFB1372:
+	.cfi_startproc
+	endbr64	
+	subq	$56, %rsp	#,
+	.cfi_def_cfa_offset 64
+# main.cpp:65:     float energy = 0;
+	pxor	%xmm0, %xmm0	# tmp115
+	movss	%xmm0, (%rsp)	# tmp115, energy
+# main.cpp:66:     float half_G = G * 0.5f;
+	movss	.LC6(%rip), %xmm0	#, tmp116
+	movss	%xmm0, 4(%rsp)	# tmp116, half_G
+# main.cpp:67:     for (size_t i = 0; i < N; i++) {
+	movq	$0, 32(%rsp)	#, i
+.L24:
+# main.cpp:67:     for (size_t i = 0; i < N; i++) {
+	cmpq	$47, 32(%rsp)	#, i
+	ja	.L21	#,
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	movq	32(%rsp), %rax	# i, tmp118
+	addq	$144, %rax	#, tmp117
+	leaq	0(,%rax,4), %rdx	#, tmp119
+	leaq	stars(%rip), %rax	#, tmp120
+	movss	(%rdx,%rax), %xmm1	# stars.vx, _1
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	movq	32(%rsp), %rax	# i, tmp122
+	addq	$144, %rax	#, tmp121
+	leaq	0(,%rax,4), %rdx	#, tmp123
+	leaq	stars(%rip), %rax	#, tmp124
+	movss	(%rdx,%rax), %xmm0	# stars.vx, _2
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	mulss	%xmm0, %xmm1	# _2, _3
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	movq	32(%rsp), %rax	# i, tmp126
+	addq	$192, %rax	#, tmp125
+	leaq	0(,%rax,4), %rdx	#, tmp127
+	leaq	stars(%rip), %rax	#, tmp128
+	movss	(%rdx,%rax), %xmm2	# stars.vy, _4
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	movq	32(%rsp), %rax	# i, tmp130
+	addq	$192, %rax	#, tmp129
+	leaq	0(,%rax,4), %rdx	#, tmp131
+	leaq	stars(%rip), %rax	#, tmp132
+	movss	(%rdx,%rax), %xmm0	# stars.vy, _5
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	mulss	%xmm2, %xmm0	# _4, _6
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	addss	%xmm0, %xmm1	# _6, _7
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	movq	32(%rsp), %rax	# i, tmp134
+	addq	$240, %rax	#, tmp133
+	leaq	0(,%rax,4), %rdx	#, tmp135
+	leaq	stars(%rip), %rax	#, tmp136
+	movss	(%rdx,%rax), %xmm2	# stars.vz, _8
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	movq	32(%rsp), %rax	# i, tmp138
+	addq	$240, %rax	#, tmp137
+	leaq	0(,%rax,4), %rdx	#, tmp139
+	leaq	stars(%rip), %rax	#, tmp140
+	movss	(%rdx,%rax), %xmm0	# stars.vz, _9
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	mulss	%xmm2, %xmm0	# _8, _10
+# main.cpp:68:         float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+	addss	%xmm1, %xmm0	# _7, tmp141
+	movss	%xmm0, 8(%rsp)	# tmp141, v2
+# main.cpp:69:         energy += stars.mass[i] * v2 * 0.5f;
+	movq	32(%rsp), %rax	# i, tmp143
+	addq	$288, %rax	#, tmp142
+	leaq	0(,%rax,4), %rdx	#, tmp144
+	leaq	stars(%rip), %rax	#, tmp145
+	movss	(%rdx,%rax), %xmm0	# stars.mass, _11
+# main.cpp:69:         energy += stars.mass[i] * v2 * 0.5f;
+	movaps	%xmm0, %xmm1	# _11, _11
+	mulss	8(%rsp), %xmm1	# v2, _11
+# main.cpp:69:         energy += stars.mass[i] * v2 * 0.5f;
+	movss	.LC7(%rip), %xmm0	#, tmp146
+	mulss	%xmm1, %xmm0	# _12, _13
+# main.cpp:69:         energy += stars.mass[i] * v2 * 0.5f;
+	movss	(%rsp), %xmm1	# energy, tmp148
+	addss	%xmm1, %xmm0	# tmp148, tmp147
+	movss	%xmm0, (%rsp)	# tmp147, energy
+# main.cpp:71:         for (size_t j = 0; j < N; j++) {
+	movq	$0, 40(%rsp)	#, j
+.L23:
+# main.cpp:71:         for (size_t j = 0; j < N; j++) {
+	cmpq	$47, 40(%rsp)	#, j
+	setbe	%al	#, _14
+	testb	%al, %al	# _14
+	je	.L22	#,
+# main.cpp:72:             float dx = stars.px[j] - stars.px[i];
+	movq	40(%rsp), %rax	# j, tmp149
+	leaq	0(,%rax,4), %rdx	#, tmp150
+	leaq	stars(%rip), %rax	#, tmp151
+	movss	(%rdx,%rax), %xmm0	# stars.px, _15
+# main.cpp:72:             float dx = stars.px[j] - stars.px[i];
+	movq	32(%rsp), %rax	# i, tmp152
+	leaq	0(,%rax,4), %rdx	#, tmp153
+	leaq	stars(%rip), %rax	#, tmp154
+	movss	(%rdx,%rax), %xmm1	# stars.px, _16
+# main.cpp:72:             float dx = stars.px[j] - stars.px[i];
+	subss	%xmm1, %xmm0	# _16, tmp155
+	movss	%xmm0, 12(%rsp)	# tmp155, dx
+# main.cpp:73:             float dy = stars.py[j] - stars.py[i];
+	movq	40(%rsp), %rax	# j, tmp157
+	addq	$48, %rax	#, tmp156
+	leaq	0(,%rax,4), %rdx	#, tmp158
+	leaq	stars(%rip), %rax	#, tmp159
+	movss	(%rdx,%rax), %xmm0	# stars.py, _17
+# main.cpp:73:             float dy = stars.py[j] - stars.py[i];
+	movq	32(%rsp), %rax	# i, tmp161
+	addq	$48, %rax	#, tmp160
+	leaq	0(,%rax,4), %rdx	#, tmp162
+	leaq	stars(%rip), %rax	#, tmp163
+	movss	(%rdx,%rax), %xmm1	# stars.py, _18
+# main.cpp:73:             float dy = stars.py[j] - stars.py[i];
+	subss	%xmm1, %xmm0	# _18, tmp164
+	movss	%xmm0, 16(%rsp)	# tmp164, dy
+# main.cpp:74:             float dz = stars.pz[j] - stars.pz[i];
+	movq	40(%rsp), %rax	# j, tmp166
+	addq	$96, %rax	#, tmp165
+	leaq	0(,%rax,4), %rdx	#, tmp167
+	leaq	stars(%rip), %rax	#, tmp168
+	movss	(%rdx,%rax), %xmm0	# stars.pz, _19
+# main.cpp:74:             float dz = stars.pz[j] - stars.pz[i];
+	movq	32(%rsp), %rax	# i, tmp170
+	addq	$96, %rax	#, tmp169
+	leaq	0(,%rax,4), %rdx	#, tmp171
+	leaq	stars(%rip), %rax	#, tmp172
+	movss	(%rdx,%rax), %xmm1	# stars.pz, _20
+# main.cpp:74:             float dz = stars.pz[j] - stars.pz[i];
+	subss	%xmm1, %xmm0	# _20, tmp173
+	movss	%xmm0, 20(%rsp)	# tmp173, dz
+# main.cpp:75:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	movss	12(%rsp), %xmm0	# dx, tmp174
+	movaps	%xmm0, %xmm1	# tmp174, tmp174
+	mulss	%xmm0, %xmm1	# tmp174, tmp174
+# main.cpp:75:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	movss	16(%rsp), %xmm0	# dy, tmp175
+	mulss	%xmm0, %xmm0	# tmp175, _22
+# main.cpp:75:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	addss	%xmm0, %xmm1	# _22, _23
+# main.cpp:75:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	movss	20(%rsp), %xmm0	# dz, tmp176
+	mulss	%xmm0, %xmm0	# tmp176, _24
+# main.cpp:75:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	addss	%xmm0, %xmm1	# _24, _25
+# main.cpp:75:             float d2 = dx * dx + dy * dy + dz * dz + eps2;
+	movss	.LC2(%rip), %xmm0	#, tmp178
+	addss	%xmm1, %xmm0	# _25, tmp177
+	movss	%xmm0, 24(%rsp)	# tmp177, d2
+# main.cpp:76:             float inv_sqrt_d2 = 1.f / std::sqrt(d2);
+	movl	24(%rsp), %eax	# d2, tmp179
+	movd	%eax, %xmm0	# tmp179,
+	call	_ZSt4sqrtf	#
+# main.cpp:76:             float inv_sqrt_d2 = 1.f / std::sqrt(d2);
+	movss	.LC1(%rip), %xmm1	#, tmp181
+	divss	%xmm0, %xmm1	# _26, tmp181
+	movaps	%xmm1, %xmm0	# tmp181, tmp180
+	movss	%xmm0, 28(%rsp)	# tmp180, inv_sqrt_d2
+# main.cpp:77:             energy -= stars.mass[j] * stars.mass[i] * half_G * inv_sqrt_d2;
+	movq	40(%rsp), %rax	# j, tmp183
+	addq	$288, %rax	#, tmp182
+	leaq	0(,%rax,4), %rdx	#, tmp184
+	leaq	stars(%rip), %rax	#, tmp185
+	movss	(%rdx,%rax), %xmm1	# stars.mass, _27
+# main.cpp:77:             energy -= stars.mass[j] * stars.mass[i] * half_G * inv_sqrt_d2;
+	movq	32(%rsp), %rax	# i, tmp187
+	addq	$288, %rax	#, tmp186
+	leaq	0(,%rax,4), %rdx	#, tmp188
+	leaq	stars(%rip), %rax	#, tmp189
+	movss	(%rdx,%rax), %xmm0	# stars.mass, _28
+# main.cpp:77:             energy -= stars.mass[j] * stars.mass[i] * half_G * inv_sqrt_d2;
+	mulss	%xmm1, %xmm0	# _27, _29
+# main.cpp:77:             energy -= stars.mass[j] * stars.mass[i] * half_G * inv_sqrt_d2;
+	mulss	4(%rsp), %xmm0	# half_G, _30
+# main.cpp:77:             energy -= stars.mass[j] * stars.mass[i] * half_G * inv_sqrt_d2;
+	movaps	%xmm0, %xmm1	# _30, _30
+	mulss	28(%rsp), %xmm1	# inv_sqrt_d2, _30
+# main.cpp:77:             energy -= stars.mass[j] * stars.mass[i] * half_G * inv_sqrt_d2;
+	movss	(%rsp), %xmm0	# energy, tmp191
+	subss	%xmm1, %xmm0	# _31, tmp190
+	movss	%xmm0, (%rsp)	# tmp190, energy
+# main.cpp:71:         for (size_t j = 0; j < N; j++) {
+	addq	$1, 40(%rsp)	#, j
+	jmp	.L23	#
+.L22:
+# main.cpp:67:     for (size_t i = 0; i < N; i++) {
+	addq	$1, 32(%rsp)	#, i
+	jmp	.L24	#
+.L21:
+# main.cpp:80:     return energy;
+	movss	(%rsp), %xmm0	# energy, _55
+# main.cpp:81: }
+	addq	$56, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1372:
+	.size	_Z4calcv, .-_Z4calcv
+	.align 2
+	.type	_ZZ4mainENKUlvE_clEv, @function
+_ZZ4mainENKUlvE_clEv:
+.LFB1375:
+	.cfi_startproc
+	subq	$40, %rsp	#,
+	.cfi_def_cfa_offset 48
+	movq	%rdi, 8(%rsp)	# __closure, __closure
+# main.cpp:96:         for (int i = 0; i < 100000; i++)
+	movl	$0, 28(%rsp)	#, i
+.L28:
+# main.cpp:96:         for (int i = 0; i < 100000; i++)
+	cmpl	$99999, 28(%rsp)	#, i
+	jg	.L29	#,
+# main.cpp:97:             step();
+	call	_Z4stepv	#
+# main.cpp:96:         for (int i = 0; i < 100000; i++)
+	addl	$1, 28(%rsp)	#, i
+	jmp	.L28	#
+.L29:
+# main.cpp:98:     });
+	nop	
+	addq	$40, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1375:
+	.size	_ZZ4mainENKUlvE_clEv, .-_ZZ4mainENKUlvE_clEv
+	.section	.rodata
+.LC8:
+	.string	"Initial energy: %f\n"
+.LC9:
+	.string	"Final energy: %f\n"
+.LC10:
+	.string	"Time elapsed: %ld ms\n"
+	.text
+	.globl	main
+	.type	main, @function
+main:
+.LFB1374:
+	.cfi_startproc
+	endbr64	
+	subq	$40, %rsp	#,
+	.cfi_def_cfa_offset 48
+# main.cpp:92: int main() {
+	movq	%fs:40, %rax	# MEM[(<address-space-1> long unsigned int *)40B], tmp92
+	movq	%rax, 24(%rsp)	# tmp92, D.28846
+	xorl	%eax, %eax	# tmp92
+# main.cpp:93:     init();
+	call	_Z4initv	#
+# main.cpp:94:     printf("Initial energy: %f\n", calc());
+	call	_Z4calcv	#
+# main.cpp:94:     printf("Initial energy: %f\n", calc());
+	cvtss2sd	%xmm0, %xmm0	# _1, _2
+	leaq	.LC8(%rip), %rdi	#,
+	movl	$1, %eax	#,
+	call	printf@PLT	#
+# main.cpp:95:     auto dt = benchmark([&] {
+	leaq	15(%rsp), %rax	#, tmp89
+	movq	%rax, %rdi	# tmp89,
+	call	_Z9benchmarkIZ4mainEUlvE_ElRKT_	#
+	movq	%rax, 16(%rsp)	# _10, dt
+# main.cpp:99:     printf("Final energy: %f\n", calc());
+	call	_Z4calcv	#
+# main.cpp:99:     printf("Final energy: %f\n", calc());
+	cvtss2sd	%xmm0, %xmm0	# _3, _4
+	leaq	.LC9(%rip), %rdi	#,
+	movl	$1, %eax	#,
+	call	printf@PLT	#
+# main.cpp:100:     printf("Time elapsed: %ld ms\n", dt);
+	movq	16(%rsp), %rax	# dt, tmp90
+	movq	%rax, %rsi	# tmp90,
+	leaq	.LC10(%rip), %rdi	#,
+	movl	$0, %eax	#,
+	call	printf@PLT	#
+# main.cpp:101:     return 0;
+	movl	$0, %eax	#, _16
+# main.cpp:102: }
+	movq	24(%rsp), %rdx	# D.28846, tmp93
+	xorq	%fs:40, %rdx	# MEM[(<address-space-1> long unsigned int *)40B], tmp93
+	je	.L32	#,
+	call	__stack_chk_fail@PLT	#
+.L32:
+	addq	$40, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1374:
+	.size	main, .-main
+	.section	.text._ZNSt6chrono13duration_castINS_8durationIlSt5ratioILl1ELl1000EEEElS2_ILl1ELl1000000000EEEENSt9enable_ifIXsrNS_13__is_durationIT_EE5valueES8_E4typeERKNS1_IT0_T1_EE,"axG",@progbits,_ZNSt6chrono13duration_castINS_8durationIlSt5ratioILl1ELl1000EEEElS2_ILl1ELl1000000000EEEENSt9enable_ifIXsrNS_13__is_durationIT_EE5valueES8_E4typeERKNS1_IT0_T1_EE,comdat
+	.weak	_ZNSt6chrono13duration_castINS_8durationIlSt5ratioILl1ELl1000EEEElS2_ILl1ELl1000000000EEEENSt9enable_ifIXsrNS_13__is_durationIT_EE5valueES8_E4typeERKNS1_IT0_T1_EE
+	.type	_ZNSt6chrono13duration_castINS_8durationIlSt5ratioILl1ELl1000EEEElS2_ILl1ELl1000000000EEEENSt9enable_ifIXsrNS_13__is_durationIT_EE5valueES8_E4typeERKNS1_IT0_T1_EE, @function
+_ZNSt6chrono13duration_castINS_8durationIlSt5ratioILl1ELl1000EEEElS2_ILl1ELl1000000000EEEENSt9enable_ifIXsrNS_13__is_durationIT_EE5valueES8_E4typeERKNS1_IT0_T1_EE:
+.LFB1407:
+	.cfi_startproc
+	endbr64	
+	subq	$24, %rsp	#,
+	.cfi_def_cfa_offset 32
+	movq	%rdi, 8(%rsp)	# __d, __d
+# /usr/include/c++/9/chrono:200: 	return __dc::__cast(__d);
+	movq	8(%rsp), %rax	# __d, tmp84
+	movq	%rax, %rdi	# tmp84,
+	call	_ZNSt6chrono20__duration_cast_implINS_8durationIlSt5ratioILl1ELl1000EEEES2_ILl1ELl1000000EElLb1ELb0EE6__castIlS2_ILl1ELl1000000000EEEES4_RKNS1_IT_T0_EE	#
+# /usr/include/c++/9/chrono:201:       }
+	addq	$24, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1407:
+	.size	_ZNSt6chrono13duration_castINS_8durationIlSt5ratioILl1ELl1000EEEElS2_ILl1ELl1000000000EEEENSt9enable_ifIXsrNS_13__is_durationIT_EE5valueES8_E4typeERKNS1_IT0_T1_EE, .-_ZNSt6chrono13duration_castINS_8durationIlSt5ratioILl1ELl1000EEEElS2_ILl1ELl1000000000EEEENSt9enable_ifIXsrNS_13__is_durationIT_EE5valueES8_E4typeERKNS1_IT0_T1_EE
+	.section	.text._ZNSt6chronomiINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE,"axG",@progbits,_ZNSt6chronomiINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE,comdat
+	.weak	_ZNSt6chronomiINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE
+	.type	_ZNSt6chronomiINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE, @function
+_ZNSt6chronomiINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE:
+.LFB1408:
+	.cfi_startproc
+	endbr64	
+	subq	$56, %rsp	#,
+	.cfi_def_cfa_offset 64
+	movq	%rdi, 8(%rsp)	# __lhs, __lhs
+	movq	%rsi, (%rsp)	# __rhs, __rhs
+# /usr/include/c++/9/chrono:762:       operator-(const time_point<_Clock, _Dur1>& __lhs,
+	movq	%fs:40, %rax	# MEM[(<address-space-1> long unsigned int *)40B], tmp93
+	movq	%rax, 40(%rsp)	# tmp93, D.28851
+	xorl	%eax, %eax	# tmp93
+# /usr/include/c++/9/chrono:764:       { return __lhs.time_since_epoch() - __rhs.time_since_epoch(); }
+	movq	(%rsp), %rax	# __rhs, tmp84
+	movq	%rax, %rdi	# tmp84,
+	call	_ZNKSt6chrono10time_pointINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEEE16time_since_epochEv	#
+	movq	%rax, 32(%rsp)	# tmp86, D.28631
+	movq	8(%rsp), %rax	# __lhs, tmp87
+	movq	%rax, %rdi	# tmp87,
+	call	_ZNKSt6chrono10time_pointINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEEE16time_since_epochEv	#
+	movq	%rax, 24(%rsp)	# tmp89, D.28630
+	leaq	32(%rsp), %rdx	#, tmp90
+	leaq	24(%rsp), %rax	#, tmp91
+	movq	%rdx, %rsi	# tmp90,
+	movq	%rax, %rdi	# tmp91,
+	call	_ZNSt6chronomiIlSt5ratioILl1ELl1000000000EElS2_EENSt11common_typeIJNS_8durationIT_T0_EENS4_IT1_T2_EEEE4typeERKS7_RKSA_	#
+# /usr/include/c++/9/chrono:764:       { return __lhs.time_since_epoch() - __rhs.time_since_epoch(); }
+	movq	40(%rsp), %rcx	# D.28851, tmp94
+	xorq	%fs:40, %rcx	# MEM[(<address-space-1> long unsigned int *)40B], tmp94
+	je	.L37	#,
+	call	__stack_chk_fail@PLT	#
+.L37:
+	addq	$56, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1408:
+	.size	_ZNSt6chronomiINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE, .-_ZNSt6chronomiINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE
+	.section	.text._ZNSt6chronomiIlSt5ratioILl1ELl1000000000EElS2_EENSt11common_typeIJNS_8durationIT_T0_EENS4_IT1_T2_EEEE4typeERKS7_RKSA_,"axG",@progbits,_ZNSt6chronomiIlSt5ratioILl1ELl1000000000EElS2_EENSt11common_typeIJNS_8durationIT_T0_EENS4_IT1_T2_EEEE4typeERKS7_RKSA_,comdat
+	.weak	_ZNSt6chronomiIlSt5ratioILl1ELl1000000000EElS2_EENSt11common_typeIJNS_8durationIT_T0_EENS4_IT1_T2_EEEE4typeERKS7_RKSA_
+	.type	_ZNSt6chronomiIlSt5ratioILl1ELl1000000000EElS2_EENSt11common_typeIJNS_8durationIT_T0_EENS4_IT1_T2_EEEE4typeERKS7_RKSA_, @function
+_ZNSt6chronomiIlSt5ratioILl1ELl1000000000EElS2_EENSt11common_typeIJNS_8durationIT_T0_EENS4_IT1_T2_EEEE4typeERKS7_RKSA_:
+.LFB1409:
+	.cfi_startproc
+	endbr64	
+	pushq	%rbx	#
+	.cfi_def_cfa_offset 16
+	.cfi_offset 3, -16
+	subq	$64, %rsp	#,
+	.cfi_def_cfa_offset 80
+	movq	%rdi, 8(%rsp)	# __lhs, __lhs
+	movq	%rsi, (%rsp)	# __rhs, __rhs
+# /usr/include/c++/9/chrono:463:       operator-(const duration<_Rep1, _Period1>& __lhs,
+	movq	%fs:40, %rax	# MEM[(<address-space-1> long unsigned int *)40B], tmp96
+	movq	%rax, 56(%rsp)	# tmp96, D.28852
+	xorl	%eax, %eax	# tmp96
+# /usr/include/c++/9/chrono:469: 	return __cd(__cd(__lhs).count() - __cd(__rhs).count());
+	movq	8(%rsp), %rax	# __lhs, tmp87
+	movq	(%rax), %rax	# *__lhs_5(D), tmp88
+	movq	%rax, 24(%rsp)	# tmp88, D.28647
+	leaq	24(%rsp), %rax	#, tmp89
+	movq	%rax, %rdi	# tmp89,
+	call	_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000000000EEE5countEv	#
+	movq	%rax, %rbx	#, _1
+	movq	(%rsp), %rax	# __rhs, tmp90
+	movq	(%rax), %rax	# *__rhs_8(D), tmp91
+	movq	%rax, 32(%rsp)	# tmp91, D.28649
+	leaq	32(%rsp), %rax	#, tmp92
+	movq	%rax, %rdi	# tmp92,
+	call	_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000000000EEE5countEv	#
+	subq	%rax, %rbx	# _2, _1
+	movq	%rbx, %rax	# _1, _3
+	movq	%rax, 40(%rsp)	# _3, D.28650
+# /usr/include/c++/9/chrono:469: 	return __cd(__cd(__lhs).count() - __cd(__rhs).count());
+	leaq	40(%rsp), %rdx	#, tmp93
+	leaq	48(%rsp), %rax	#, tmp94
+	movq	%rdx, %rsi	# tmp93,
+	movq	%rax, %rdi	# tmp94,
+	call	_ZNSt6chrono8durationIlSt5ratioILl1ELl1000000000EEEC1IlvEERKT_	#
+	movq	48(%rsp), %rax	# D.28651, D.28813
+# /usr/include/c++/9/chrono:470:       }
+	movq	56(%rsp), %rcx	# D.28852, tmp97
+	xorq	%fs:40, %rcx	# MEM[(<address-space-1> long unsigned int *)40B], tmp97
+	je	.L40	#,
+	call	__stack_chk_fail@PLT	#
+.L40:
+	addq	$64, %rsp	#,
+	.cfi_def_cfa_offset 16
+	popq	%rbx	#
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1409:
+	.size	_ZNSt6chronomiIlSt5ratioILl1ELl1000000000EElS2_EENSt11common_typeIJNS_8durationIT_T0_EENS4_IT1_T2_EEEE4typeERKS7_RKSA_, .-_ZNSt6chronomiIlSt5ratioILl1ELl1000000000EElS2_EENSt11common_typeIJNS_8durationIT_T0_EENS4_IT1_T2_EEEE4typeERKS7_RKSA_
+	.section	.text._ZNKSt6chrono10time_pointINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEEE16time_since_epochEv,"axG",@progbits,_ZNKSt6chrono10time_pointINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEEE16time_since_epochEv,comdat
+	.align 2
+	.weak	_ZNKSt6chrono10time_pointINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEEE16time_since_epochEv
+	.type	_ZNKSt6chrono10time_pointINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEEE16time_since_epochEv, @function
+_ZNKSt6chrono10time_pointINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEEE16time_since_epochEv:
+.LFB1410:
+	.cfi_startproc
+	endbr64	
+	movq	%rdi, -8(%rsp)	# this, this
+# /usr/include/c++/9/chrono:650: 	{ return __d; }
+	movq	-8(%rsp), %rax	# this, tmp84
+	movq	(%rax), %rax	# this_2(D)->__d, D.28811
+# /usr/include/c++/9/chrono:650: 	{ return __d; }
+	ret	
+	.cfi_endproc
+.LFE1410:
+	.size	_ZNKSt6chrono10time_pointINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEEE16time_since_epochEv, .-_ZNKSt6chrono10time_pointINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEEE16time_since_epochEv
+	.text
+	.type	_Z9benchmarkIZ4mainEUlvE_ElRKT_, @function
+_Z9benchmarkIZ4mainEUlvE_ElRKT_:
+.LFB1406:
+	.cfi_startproc
+	endbr64	
+	subq	$72, %rsp	#,
+	.cfi_def_cfa_offset 80
+	movq	%rdi, 8(%rsp)	# func, func
+# main.cpp:84: long benchmark(Func const &func) {
+	movq	%fs:40, %rax	# MEM[(<address-space-1> long unsigned int *)40B], tmp98
+	movq	%rax, 56(%rsp)	# tmp98, D.28853
+	xorl	%eax, %eax	# tmp98
+# main.cpp:85:     auto t0 = std::chrono::steady_clock::now();
+	call	_ZNSt6chrono3_V212steady_clock3nowEv@PLT	#
+	movq	%rax, 24(%rsp)	# tmp85, t0
+# main.cpp:86:     func();
+	movq	8(%rsp), %rax	# func, tmp86
+	movq	%rax, %rdi	# tmp86,
+	call	_ZZ4mainENKUlvE_clEv	#
+# main.cpp:87:     auto t1 = std::chrono::steady_clock::now();
+	call	_ZNSt6chrono3_V212steady_clock3nowEv@PLT	#
+	movq	%rax, 32(%rsp)	# tmp88, t1
+# main.cpp:88:     auto dt = std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0);
+	leaq	24(%rsp), %rdx	#, tmp89
+	leaq	32(%rsp), %rax	#, tmp90
+	movq	%rdx, %rsi	# tmp89,
+	movq	%rax, %rdi	# tmp90,
+	call	_ZNSt6chronomiINS_3_V212steady_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE	#
+	movq	%rax, 48(%rsp)	# tmp92, D.28567
+# main.cpp:88:     auto dt = std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0);
+	leaq	48(%rsp), %rax	#, tmp93
+	movq	%rax, %rdi	# tmp93,
+	call	_ZNSt6chrono13duration_castINS_8durationIlSt5ratioILl1ELl1000EEEElS2_ILl1ELl1000000000EEEENSt9enable_ifIXsrNS_13__is_durationIT_EE5valueES8_E4typeERKNS1_IT0_T1_EE	#
+	movq	%rax, 40(%rsp)	# tmp95, dt
+# main.cpp:89:     return dt.count();
+	leaq	40(%rsp), %rax	#, tmp96
+	movq	%rax, %rdi	# tmp96,
+	call	_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000EEE5countEv	#
+# main.cpp:90: }
+	movq	56(%rsp), %rcx	# D.28853, tmp99
+	xorq	%fs:40, %rcx	# MEM[(<address-space-1> long unsigned int *)40B], tmp99
+	je	.L45	#,
+	call	__stack_chk_fail@PLT	#
+.L45:
+	addq	$72, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1406:
+	.size	_Z9benchmarkIZ4mainEUlvE_ElRKT_, .-_Z9benchmarkIZ4mainEUlvE_ElRKT_
+	.section	.text._ZNSt6chrono20__duration_cast_implINS_8durationIlSt5ratioILl1ELl1000EEEES2_ILl1ELl1000000EElLb1ELb0EE6__castIlS2_ILl1ELl1000000000EEEES4_RKNS1_IT_T0_EE,"axG",@progbits,_ZNSt6chrono20__duration_cast_implINS_8durationIlSt5ratioILl1ELl1000EEEES2_ILl1ELl1000000EElLb1ELb0EE6__castIlS2_ILl1ELl1000000000EEEES4_RKNS1_IT_T0_EE,comdat
+	.weak	_ZNSt6chrono20__duration_cast_implINS_8durationIlSt5ratioILl1ELl1000EEEES2_ILl1ELl1000000EElLb1ELb0EE6__castIlS2_ILl1ELl1000000000EEEES4_RKNS1_IT_T0_EE
+	.type	_ZNSt6chrono20__duration_cast_implINS_8durationIlSt5ratioILl1ELl1000EEEES2_ILl1ELl1000000EElLb1ELb0EE6__castIlS2_ILl1ELl1000000000EEEES4_RKNS1_IT_T0_EE, @function
+_ZNSt6chrono20__duration_cast_implINS_8durationIlSt5ratioILl1ELl1000EEEES2_ILl1ELl1000000EElLb1ELb0EE6__castIlS2_ILl1ELl1000000000EEEES4_RKNS1_IT_T0_EE:
+.LFB1417:
+	.cfi_startproc
+	endbr64	
+	subq	$56, %rsp	#,
+	.cfi_def_cfa_offset 64
+	movq	%rdi, 8(%rsp)	# __d, __d
+# /usr/include/c++/9/chrono:149: 	  __cast(const duration<_Rep, _Period>& __d)
+	movq	%fs:40, %rax	# MEM[(<address-space-1> long unsigned int *)40B], tmp94
+	movq	%rax, 40(%rsp)	# tmp94, D.28854
+	xorl	%eax, %eax	# tmp94
+# /usr/include/c++/9/chrono:153: 	      static_cast<_CR>(__d.count()) / static_cast<_CR>(_CF::den)));
+	movq	8(%rsp), %rax	# __d, tmp86
+	movq	%rax, %rdi	# tmp86,
+	call	_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000000000EEE5countEv	#
+	movq	%rax, %rcx	#, _1
+# /usr/include/c++/9/chrono:153: 	      static_cast<_CR>(__d.count()) / static_cast<_CR>(_CF::den)));
+	movabsq	$4835703278458516699, %rdx	#, tmp88
+	movq	%rcx, %rax	# _1, tmp95
+	imulq	%rdx	# tmp88
+	sarq	$18, %rdx	#, tmp89
+	movq	%rcx, %rax	# _1, _1
+	sarq	$63, %rax	#, _1
+	subq	%rax, %rdx	# tmp90, tmp89
+	movq	%rdx, %rax	# tmp89, _2
+	movq	%rax, 24(%rsp)	# _2, D.28748
+# /usr/include/c++/9/chrono:153: 	      static_cast<_CR>(__d.count()) / static_cast<_CR>(_CF::den)));
+	leaq	24(%rsp), %rdx	#, tmp91
+	leaq	32(%rsp), %rax	#, tmp92
+	movq	%rdx, %rsi	# tmp91,
+	movq	%rax, %rdi	# tmp92,
+	call	_ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC1IlvEERKT_	#
+	movq	32(%rsp), %rax	# D.28749, D.28823
+# /usr/include/c++/9/chrono:154: 	  }
+	movq	40(%rsp), %rsi	# D.28854, tmp96
+	xorq	%fs:40, %rsi	# MEM[(<address-space-1> long unsigned int *)40B], tmp96
+	je	.L48	#,
+	call	__stack_chk_fail@PLT	#
+.L48:
+	addq	$56, %rsp	#,
+	.cfi_def_cfa_offset 8
+	ret	
+	.cfi_endproc
+.LFE1417:
+	.size	_ZNSt6chrono20__duration_cast_implINS_8durationIlSt5ratioILl1ELl1000EEEES2_ILl1ELl1000000EElLb1ELb0EE6__castIlS2_ILl1ELl1000000000EEEES4_RKNS1_IT_T0_EE, .-_ZNSt6chrono20__duration_cast_implINS_8durationIlSt5ratioILl1ELl1000EEEES2_ILl1ELl1000000EElLb1ELb0EE6__castIlS2_ILl1ELl1000000000EEEES4_RKNS1_IT_T0_EE
+	.section	.text._ZNKSt6chrono8durationIlSt5ratioILl1ELl1000EEE5countEv,"axG",@progbits,_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000EEE5countEv,comdat
+	.align 2
+	.weak	_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000EEE5countEv
+	.type	_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000EEE5countEv, @function
+_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000EEE5countEv:
+.LFB1418:
+	.cfi_startproc
+	endbr64	
+	movq	%rdi, -8(%rsp)	# this, this
+# /usr/include/c++/9/chrono:347: 	{ return __r; }
+	movq	-8(%rsp), %rax	# this, tmp84
+	movq	(%rax), %rax	# this_2(D)->__r, _3
+# /usr/include/c++/9/chrono:347: 	{ return __r; }
+	ret	
+	.cfi_endproc
+.LFE1418:
+	.size	_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000EEE5countEv, .-_ZNKSt6chrono8durationIlSt5ratioILl1ELl1000EEE5countEv
+	.section	.text._ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC2IlvEERKT_,"axG",@progbits,_ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC5IlvEERKT_,comdat
+	.align 2
+	.weak	_ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC2IlvEERKT_
+	.type	_ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC2IlvEERKT_, @function
+_ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC2IlvEERKT_:
+.LFB1421:
+	.cfi_startproc
+	endbr64	
+	movq	%rdi, -8(%rsp)	# this, this
+	movq	%rsi, -16(%rsp)	# __rep, __rep
+# /usr/include/c++/9/chrono:332: 	  : __r(static_cast<rep>(__rep)) { }
+	movq	-16(%rsp), %rax	# __rep, tmp83
+	movq	(%rax), %rdx	# *__rep_5(D), _1
+	movq	-8(%rsp), %rax	# this, tmp84
+	movq	%rdx, (%rax)	# _1, this_3(D)->__r
+# /usr/include/c++/9/chrono:332: 	  : __r(static_cast<rep>(__rep)) { }
+	nop	
+	ret	
+	.cfi_endproc
+.LFE1421:
+	.size	_ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC2IlvEERKT_, .-_ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC2IlvEERKT_
+	.weak	_ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC1IlvEERKT_
+	.set	_ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC1IlvEERKT_,_ZNSt6chrono8durationIlSt5ratioILl1ELl1000EEEC2IlvEERKT_
+	.section	.rodata
+	.align 4
+.LC0:
+	.long	1325400064
+	.align 4
+.LC1:
+	.long	1065353216
+	.align 4
+.LC2:
+	.long	897988542
+	.align 4
+.LC3:
+	.long	925353389
+	.align 4
+.LC4:
+	.long	1008981770
+	.align 4
+.LC6:
+	.long	973279855
+	.align 4
+.LC7:
+	.long	1056964608
+	.ident	"GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
+	.section	.note.GNU-stack,"",@progbits
+	.section	.note.gnu.property,"a"
+	.align 8
+	.long	 1f - 0f
+	.long	 4f - 1f
+	.long	 5
+0:
+	.string	 "GNU"
+1:
+	.align 8
+	.long	 0xc0000002
+	.long	 3f - 2f
+2:
+	.long	 0x3
+3:
+	.align 8
+4:
diff --git a/main.cpp b/main.cpp
index cf6369b..7ac0426 100644
--- a/main.cpp
+++ b/main.cpp
@@ -3,64 +3,79 @@
 #include <vector>
 #include <chrono>
 #include <cmath>
+#include <array>
+
+const size_t N = 48;
 
 float frand() {
-    return (float)rand() / RAND_MAX * 2 - 1;
+    return (float)std::rand() / RAND_MAX * 2 - 1;
 }
 
 struct Star {
-    float px, py, pz;
-    float vx, vy, vz;
-    float mass;
+    float px[N], py[N], pz[N];
+    float vx[N], vy[N], vz[N];
+    float mass[N];
 };
 
-std::vector<Star> stars;
+Star stars;
 
 void init() {
-    for (int i = 0; i < 48; i++) {
-        stars.push_back({
-            frand(), frand(), frand(),
-            frand(), frand(), frand(),
-            frand() + 1,
-        });
+    #pragma GCC unroll 16
+    for (size_t i = 0; i < N; i++) {
+        stars.px[i] = frand();
+        stars.py[i] = frand();
+        stars.pz[i] = frand();
+        stars.vx[i] = frand();
+        stars.vy[i] = frand();
+        stars.vz[i] = frand();
+        stars.mass[i] = frand() + 1.f;
     }
 }
 
-float G = 0.001;
-float eps = 0.001;
-float dt = 0.01;
+const float G = 0.001;
+const float eps = 0.001;
+const float dt = 0.01;
+const float eps2 = eps * eps;
+const float dt2 = dt * dt;
+const float G_plus_dt = G * dt;
 
 void step() {
-    for (auto &star: stars) {
-        for (auto &other: stars) {
-            float dx = other.px - star.px;
-            float dy = other.py - star.py;
-            float dz = other.pz - star.pz;
-            float d2 = dx * dx + dy * dy + dz * dz + eps * eps;
-            d2 *= sqrt(d2);
-            star.vx += dx * other.mass * G * dt / d2;
-            star.vy += dy * other.mass * G * dt / d2;
-            star.vz += dz * other.mass * G * dt / d2;
+    for (size_t i = 0; i < N; i++) {
+        #pragma GCC unroll 16
+        for (size_t j = 0; j < N; j++) {
+            float dx = stars.px[j] - stars.px[i];
+            float dy = stars.py[j] - stars.py[i];
+            float dz = stars.pz[j] - stars.pz[i];
+            float d2 = dx * dx + dy * dy + dz * dz + eps2;
+            d2 *= std::sqrt(d2);
+            float inv_d2 = 1.f / d2;
+            stars.vx[i] += dx * stars.mass[j] * G_plus_dt * inv_d2;
+            stars.vy[i] += dy * stars.mass[j] * G_plus_dt * inv_d2;
+            stars.vz[i] += dz * stars.mass[j] * G_plus_dt * inv_d2;
         }
     }
-    for (auto &star: stars) {
-        star.px += star.vx * dt;
-        star.py += star.vy * dt;
-        star.pz += star.vz * dt;
+    #pragma GCC unroll 16
+    for (size_t i = 0; i < N; i++) {
+        stars.px[i] += stars.vx[i] * dt;
+        stars.py[i] += stars.vy[i] * dt;
+        stars.pz[i] += stars.vz[i] * dt;
     }
 }
 
 float calc() {
     float energy = 0;
-    for (auto &star: stars) {
-        float v2 = star.vx * star.vx + star.vy * star.vy + star.vz * star.vz;
-        energy += star.mass * v2 / 2;
-        for (auto &other: stars) {
-            float dx = other.px - star.px;
-            float dy = other.py - star.py;
-            float dz = other.pz - star.pz;
-            float d2 = dx * dx + dy * dy + dz * dz + eps * eps;
-            energy -= other.mass * star.mass * G / sqrt(d2) / 2;
+    float half_G = G * 0.5f;
+    for (size_t i = 0; i < N; i++) {
+        float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
+        energy += stars.mass[i] * v2 * 0.5f;
+        #pragma GCC unroll 16
+        for (size_t j = 0; j < N; j++) {
+            float dx = stars.px[j] - stars.px[i];
+            float dy = stars.py[j] - stars.py[i];
+            float dz = stars.pz[j] - stars.pz[i];
+            float d2 = dx * dx + dy * dy + dz * dz + eps2;
+            float inv_sqrt_d2 = 1.f / std::sqrt(d2);
+            energy -= stars.mass[j] * stars.mass[i] * half_G * inv_sqrt_d2;
         }
     }
     return energy;