Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 58fec5c

Browse files
committed
Add assembly version of simple operations on aarch64
For aarch64 and arm64ec with Neon, add assembly versions of the following: * `ceil` * `ceilf` * `fabs` * `fabsf` * `floor` * `floorf` * `fma` * `fmaf` * `round` * `roundf` * `sqrt` * `sqrtf` * `trunc` * `truncf` If the `fp16` target feature is available, which implies `neon`, also include the following: * `ceilf16` * `fabsf16` * `floorf16` * `rintf16` * `roundf16` * `sqrtf16` * `truncf16` Additionally, replace `core::arch` versions of the following with handwritten assembly (which avoids issues with `aarch64be`): * `rint` * `rintf` Instructions for `fmax` and `fmin` are also available but seem to provide different results based on whether NaN inputs are signaling or quiet. Our current implementation does not do this, so omit these for now.
1 parent b67b4cc commit 58fec5c

26 files changed

+411
-36
lines changed

etc/function-definitions.json

+21
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
"ceil": {
108108
"sources": [
109109
"src/libm_helper.rs",
110+
"src/math/arch/aarch64.rs",
110111
"src/math/arch/i586.rs",
111112
"src/math/arch/wasm32.rs",
112113
"src/math/ceil.rs",
@@ -116,6 +117,7 @@
116117
},
117118
"ceilf": {
118119
"sources": [
120+
"src/math/arch/aarch64.rs",
119121
"src/math/arch/wasm32.rs",
120122
"src/math/ceilf.rs",
121123
"src/math/generic/ceil.rs"
@@ -131,6 +133,7 @@
131133
},
132134
"ceilf16": {
133135
"sources": [
136+
"src/math/arch/aarch64.rs",
134137
"src/math/ceilf16.rs",
135138
"src/math/generic/ceil.rs"
136139
],
@@ -274,6 +277,7 @@
274277
"fabs": {
275278
"sources": [
276279
"src/libm_helper.rs",
280+
"src/math/arch/aarch64.rs",
277281
"src/math/arch/wasm32.rs",
278282
"src/math/fabs.rs",
279283
"src/math/generic/fabs.rs"
@@ -282,6 +286,7 @@
282286
},
283287
"fabsf": {
284288
"sources": [
289+
"src/math/arch/aarch64.rs",
285290
"src/math/arch/wasm32.rs",
286291
"src/math/fabsf.rs",
287292
"src/math/generic/fabs.rs"
@@ -297,6 +302,7 @@
297302
},
298303
"fabsf16": {
299304
"sources": [
305+
"src/math/arch/aarch64.rs",
300306
"src/math/fabsf16.rs",
301307
"src/math/generic/fabs.rs"
302308
],
@@ -334,6 +340,7 @@
334340
"floor": {
335341
"sources": [
336342
"src/libm_helper.rs",
343+
"src/math/arch/aarch64.rs",
337344
"src/math/arch/i586.rs",
338345
"src/math/arch/wasm32.rs",
339346
"src/math/floor.rs",
@@ -343,6 +350,7 @@
343350
},
344351
"floorf": {
345352
"sources": [
353+
"src/math/arch/aarch64.rs",
346354
"src/math/arch/wasm32.rs",
347355
"src/math/floorf.rs",
348356
"src/math/generic/floor.rs"
@@ -358,6 +366,7 @@
358366
},
359367
"floorf16": {
360368
"sources": [
369+
"src/math/arch/aarch64.rs",
361370
"src/math/floorf16.rs",
362371
"src/math/generic/floor.rs"
363372
],
@@ -366,12 +375,14 @@
366375
"fma": {
367376
"sources": [
368377
"src/libm_helper.rs",
378+
"src/math/arch/aarch64.rs",
369379
"src/math/fma.rs"
370380
],
371381
"type": "f64"
372382
},
373383
"fmaf": {
374384
"sources": [
385+
"src/math/arch/aarch64.rs",
375386
"src/math/fmaf.rs"
376387
],
377388
"type": "f32"
@@ -725,6 +736,7 @@
725736
},
726737
"rintf16": {
727738
"sources": [
739+
"src/math/arch/aarch64.rs",
728740
"src/math/generic/rint.rs",
729741
"src/math/rintf16.rs"
730742
],
@@ -733,13 +745,15 @@
733745
"round": {
734746
"sources": [
735747
"src/libm_helper.rs",
748+
"src/math/arch/aarch64.rs",
736749
"src/math/generic/round.rs",
737750
"src/math/round.rs"
738751
],
739752
"type": "f64"
740753
},
741754
"roundf": {
742755
"sources": [
756+
"src/math/arch/aarch64.rs",
743757
"src/math/generic/round.rs",
744758
"src/math/roundf.rs"
745759
],
@@ -754,6 +768,7 @@
754768
},
755769
"roundf16": {
756770
"sources": [
771+
"src/math/arch/aarch64.rs",
757772
"src/math/generic/round.rs",
758773
"src/math/roundf16.rs"
759774
],
@@ -816,6 +831,7 @@
816831
"sqrt": {
817832
"sources": [
818833
"src/libm_helper.rs",
834+
"src/math/arch/aarch64.rs",
819835
"src/math/arch/i686.rs",
820836
"src/math/arch/wasm32.rs",
821837
"src/math/generic/sqrt.rs",
@@ -825,6 +841,7 @@
825841
},
826842
"sqrtf": {
827843
"sources": [
844+
"src/math/arch/aarch64.rs",
828845
"src/math/arch/i686.rs",
829846
"src/math/arch/wasm32.rs",
830847
"src/math/generic/sqrt.rs",
@@ -841,6 +858,7 @@
841858
},
842859
"sqrtf16": {
843860
"sources": [
861+
"src/math/arch/aarch64.rs",
844862
"src/math/generic/sqrt.rs",
845863
"src/math/sqrtf16.rs"
846864
],
@@ -888,6 +906,7 @@
888906
"trunc": {
889907
"sources": [
890908
"src/libm_helper.rs",
909+
"src/math/arch/aarch64.rs",
891910
"src/math/arch/wasm32.rs",
892911
"src/math/generic/trunc.rs",
893912
"src/math/trunc.rs"
@@ -896,6 +915,7 @@
896915
},
897916
"truncf": {
898917
"sources": [
918+
"src/math/arch/aarch64.rs",
899919
"src/math/arch/wasm32.rs",
900920
"src/math/generic/trunc.rs",
901921
"src/math/truncf.rs"
@@ -911,6 +931,7 @@
911931
},
912932
"truncf16": {
913933
"sources": [
934+
"src/math/arch/aarch64.rs",
914935
"src/math/generic/trunc.rs",
915936
"src/math/truncf16.rs"
916937
],

0 commit comments

Comments
 (0)