From a9918e59d8eef7d166124377c88873e7ce8cd523 Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Tue, 23 Jan 2024 21:07:44 +0100 Subject: [PATCH 1/8] [Mono] [Arm64] Add basic Vector3 SIMD intrinsics --- src/mono/mono/mini/cpu-arm64.mdesc | 4 ++-- src/mono/mono/mini/mini-arm64.c | 13 +++++++++++-- src/mono/mono/mini/mini-runtime.c | 3 +-- src/mono/mono/mini/mini.c | 3 +++ src/mono/mono/mini/mini.h | 2 +- src/mono/mono/mini/simd-intrinsics.c | 2 +- 6 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 0f8a3a2d234550..99f55aedb3a6ef 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -122,7 +122,7 @@ r8const: dest:f len:20 label: len:0 store_membase_imm: dest:b len:20 store_membase_reg: dest:b src1:i len:20 -storex_membase: dest:b src1:x len:16 +storex_membase: dest:b src1:x len:20 storei1_membase_imm: dest:b len:20 storei1_membase_reg: dest:b src1:i len:12 storei2_membase_imm: dest:b len:20 @@ -136,7 +136,7 @@ storei1_memindex: dest:b src1:i src2:i len:4 storei2_memindex: dest:b src1:i src2:i len:4 storei4_memindex: dest:b src1:i src2:i len:4 load_membase: dest:i src1:b len:20 -loadx_membase: dest:x src1:b len:16 +loadx_membase: dest:x src1:b len:20 loadi1_membase: dest:i src1:b len:32 loadu1_membase: dest:i src1:b len:32 loadi2_membase: dest:i src1:b len:32 diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index dafaabaa5ba06f..00830d15e7fca5 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -437,6 +437,7 @@ get_vector_size_macro (MonoInst *ins) g_assert (ins->klass); int size = mono_class_value_size (ins->klass, NULL); switch (size) { + case 12: case 16: return VREG_FULL; case 8: @@ -4064,13 +4065,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_STOREX_MEMBASE: if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8) code = emit_strfpx (code, sreg1, dreg, GTMREG_TO_INT (ins->inst_offset)); - else + else if (ins->klass && mono_class_value_size (ins->klass, NULL) == 12){ + arm_neon_ins_e (code, SIZE_4, ARMREG_IP0, sreg1, 0, 2); + code = emit_strfpx (code, sreg1, dreg, GTMREG_TO_INT (ins->inst_offset)); + code = emit_strfpw (code, ARMREG_IP0, dreg , GTMREG_TO_INT (ins->inst_offset + 8)); + } else code = emit_strfpq (code, sreg1, dreg, GTMREG_TO_INT (ins->inst_offset)); break; case OP_LOADX_MEMBASE: if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8) code = emit_ldrfpx (code, dreg, sreg1, GTMREG_TO_INT (ins->inst_offset)); - else + else if (ins->klass && mono_class_value_size (ins->klass, NULL) == 12){ + code = emit_ldrfpx (code, dreg, sreg1, GTMREG_TO_INT (ins->inst_offset)); + code = emit_ldrfpw (code, ARMREG_IP0, sreg1, GTMREG_TO_INT (ins->inst_offset + 8)); + arm_neon_ins_e (code, SIZE_4, dreg, ARMREG_IP0, 2, 0); + } else code = emit_ldrfpq (code, dreg, sreg1, GTMREG_TO_INT (ins->inst_offset)); break; case OP_XMOVE: diff --git a/src/mono/mono/mini/mini-runtime.c b/src/mono/mono/mini/mini-runtime.c index 2a4a2d75c16ac7..f5212c9143948a 100644 --- a/src/mono/mono/mini/mini-runtime.c +++ b/src/mono/mono/mini/mini-runtime.c @@ -4489,8 +4489,7 @@ init_class (MonoClass *klass) #ifdef TARGET_ARM64 if (!strcmp (m_class_get_name_space (klass), "System.Numerics")) { - // FIXME: Support Vector3 https://github.com/dotnet/runtime/issues/81501 - if (!strcmp (name, "Vector2") || !strcmp (name, "Vector4") || !strcmp (name, "Quaternion") || !strcmp (name, "Plane")) + if (!strcmp (name, "Vector2") || !strcmp (name, "Vector3") ||!strcmp (name, "Vector4") || !strcmp (name, "Quaternion") || !strcmp (name, "Plane")) mono_class_set_is_simd_type (klass, TRUE); } #endif diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c index b6935d312c6ee8..d3f2d12a25f722 100644 --- a/src/mono/mono/mini/mini.c +++ b/src/mono/mono/mini/mini.c @@ -4591,6 +4591,9 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems) } else if (!strcmp (klass_name, "Vector2")) { *nelems = 2; return MONO_TYPE_R4; + } else if (!strcmp (klass_name, "Vector3")) { + *nelems = 3; + return MONO_TYPE_R4; } else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) { MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0]; int size = mono_class_value_size (klass, NULL); diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index cd2cad8e7dcece..10cc77ef347c41 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -2998,7 +2998,7 @@ mini_class_is_simd (MonoCompile *cfg, MonoClass *klass) return TRUE; int size = mono_type_size (m_class_get_byval_arg (klass), NULL); #ifdef TARGET_ARM64 - if (size == 8 || size == 16) + if (size == 8 || size == 16 || (size == 12 && !strcmp (m_class_get_name (klass), "Vector3"))) return TRUE; #else if (size == 16) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 8955ede9b10506..2d7470d0af3791 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -5923,7 +5923,7 @@ arch_emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCom if (!strcmp (class_ns, "System.Numerics")) { // FIXME: Support Vector2 https://github.com/dotnet/runtime/issues/81501 - if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector4") || + if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector3") || !strcmp (class_name, "Vector4") || !strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane")) return emit_vector_2_3_4 (cfg, cmethod, fsig, args); } From 1e3014262b49168d648ff8a26f21df9e03626949 Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Mon, 29 Jan 2024 21:20:10 +0100 Subject: [PATCH 2/8] Add support for LLVM --- src/mono/mono/mini/mini-llvm.c | 28 ++++++++++++++++++++++------ src/mono/mono/mini/mini.c | 21 ++++++++++++++++++++- src/mono/mono/mini/mini.h | 1 + src/mono/mono/mini/simd-intrinsics.c | 6 ++---- 4 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index 77e389a51cf9a2..cd7eda3797cd01 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -469,12 +469,12 @@ ovr_tag_from_mono_vector_class (MonoClass *klass) llvm_ovr_tag_t ret = 0; switch (size) { case 8: ret |= INTRIN_vector64; break; + case 12: ret |= INTRIN_vector128; break; case 16: ret |= INTRIN_vector128; break; } const char *class_name = m_class_get_name (klass); - if (!strcmp ("Vector2", class_name) || !strcmp ("Vector4", class_name) || !strcmp ("Quaternion", class_name) || !strcmp ("Plane", class_name)) { - // FIXME: Support Vector3 + if (!strcmp ("Vector2", class_name) || !strcmp ("Vector3", class_name) || !strcmp ("Vector4", class_name) || !strcmp ("Quaternion", class_name) || !strcmp ("Plane", class_name)) { return ret | INTRIN_float32; } @@ -507,6 +507,7 @@ ovr_tag_from_llvm_type (LLVMTypeRef type) unsigned int bits = mono_llvm_get_prim_size_bits (type); switch (bits) { case 64: ret |= INTRIN_vector64; break; + case 96: ret |= INTRIN_vector128; break; case 128: ret |= INTRIN_vector128; break; default: g_assert_not_reached (); } @@ -4168,9 +4169,18 @@ emit_entry_bb (EmitContext *ctx, LLVMBuilderRef builder) case LLVMArgVtypeByRef: case LLVMArgAsFpArgs: { - if (mini_class_is_simd (ctx->cfg, mono_class_from_mono_type_internal (ainfo->type))) - /* Treat these as normal values */ - ctx->values [reg] = LLVMBuildLoad2 (builder, ctx->addresses [reg]->type, ctx->addresses [reg]->value, "simd_vtype"); + MonoClass *klass = mono_class_from_mono_type_internal (ainfo->type); + if (mini_class_is_simd (ctx->cfg, klass)){ + LLVMValueRef loadedVector = LLVMBuildLoad2 (builder, ctx->addresses [reg]->type, ctx->addresses [reg]->value, "simd_vtype"); + + if(mono_class_value_size(klass, NULL) == 12){ + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), 3, 0); + loadedVector = LLVMBuildInsertElement(builder, loadedVector, zero, index, "insert_zero"); + } + + ctx->values[reg] = loadedVector; + } break; } default: @@ -6195,13 +6205,19 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) case LLVMArgFpStruct: { LLVMTypeRef ret_type = LLVMGetReturnType (ctx->lmethod_type); LLVMValueRef retval, elem; - gboolean is_simd = mini_class_is_simd (ctx->cfg, mono_class_from_mono_type_internal (sig->ret)); + + MonoClass *klass= mono_class_from_mono_type_internal (sig->ret); + gboolean is_simd = mini_class_is_simd (ctx->cfg, klass); if (is_simd) { retval = LLVMConstNull(ret_type); if (lhs) { int len = LLVMGetVectorSize (LLVMTypeOf (lhs)); + + if (mono_class_value_size (klass, NULL) == 12) + len--; + for (int i = 0; i < len; i++) { elem = LLVMBuildExtractElement (builder, lhs, const_int32 (i), "extract_elem"); retval = LLVMBuildInsertValue (builder, retval, elem, i, "insert_val_struct"); diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c index d3f2d12a25f722..5a50eed64d643b 100644 --- a/src/mono/mono/mini/mini.c +++ b/src/mono/mono/mini/mini.c @@ -4592,7 +4592,7 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems) *nelems = 2; return MONO_TYPE_R4; } else if (!strcmp (klass_name, "Vector3")) { - *nelems = 3; + *nelems = 4; return MONO_TYPE_R4; } else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) { MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0]; @@ -4605,3 +4605,22 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems) return MONO_TYPE_VOID; } } + +guint32 mini_number_of_elements(MonoClass *klass){ + const char *klass_name = m_class_get_name (klass); + if (!strcmp (klass_name, "Vector4") || !strcmp (klass_name, "Quaternion") || !strcmp (klass_name, "Plane")) { + return 4; + } else if (!strcmp (klass_name, "Vector2")) { + return 2; + } else if (!strcmp (klass_name, "Vector3")){ + return 3; + } else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) { + MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0]; + int size = mono_class_value_size (klass, NULL); + return size / mini_primitive_type_size (etype->type); + } else { + printf ("%s\n", klass_name); + NOT_IMPLEMENTED; + return 0; + } +} diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index 10cc77ef347c41..4ae19d723a784b 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -2959,6 +2959,7 @@ enum { int mini_primitive_type_size (MonoTypeEnum type); MonoTypeEnum mini_get_simd_type_info (MonoClass *klass, guint32 *nelems); +guint32 mini_number_of_elements(MonoClass *klass); const char *mono_arch_xregname (int reg); MonoCPUFeatures mono_arch_get_cpu_features (void); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 2d7470d0af3791..5a6fc1b0a336a3 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -649,10 +649,8 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t MonoClass *vector_class = mono_class_from_mono_type_internal (vector_type); int vector_size = mono_class_value_size (vector_class, NULL); int element_size; - - // FIXME: Support Vector3 - guint32 nelems; - mini_get_simd_type_info (vector_class, &nelems); + + guint32 nelems = mini_number_of_elements (vector_class); element_size = vector_size / nelems; gboolean has_single_element = vector_size == element_size; From dfcbb556cd3f1614b17c8b639a19f9bf9d34d804 Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Tue, 30 Jan 2024 16:21:24 +0100 Subject: [PATCH 3/8] Add insert zero as last element to vector3 as vector4 ctor --- src/mono/mono/mini/simd-intrinsics.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 5a6fc1b0a336a3..4b6efbc873c224 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -2719,6 +2719,18 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f for (int i = 1; i < fsig->param_count; ++i) ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, args [i + 1], i, FALSE); + if(len == 3){ + float r4_0 = 0.0f; + MonoInst *zero; + int dreg = alloc_freg (cfg); + MONO_INST_NEW (cfg, zero, OP_R4CONST); + zero->type = STACK_R4; + zero->inst_p0 = (void*)&r4_0; + zero->dreg = dreg; + MONO_ADD_INS (cfg->cbb, zero); + ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, zero, 3, FALSE); + } + ins->dreg = dreg; if (indirect) { From 552c019c657743783c2f347b711c2ba6c313c060 Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Tue, 30 Jan 2024 17:10:20 +0100 Subject: [PATCH 4/8] Fix dreg redeclaration --- src/mono/mono/mini/simd-intrinsics.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 4b6efbc873c224..72192e88a7e6eb 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -2722,11 +2722,11 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f if(len == 3){ float r4_0 = 0.0f; MonoInst *zero; - int dreg = alloc_freg (cfg); + int zero_dreg = alloc_freg (cfg); MONO_INST_NEW (cfg, zero, OP_R4CONST); zero->type = STACK_R4; zero->inst_p0 = (void*)&r4_0; - zero->dreg = dreg; + zero->dreg = zero_dreg; MONO_ADD_INS (cfg->cbb, zero); ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, zero, 3, FALSE); } From 472cabdf0dd06d1ab3898fc04f997773438a1330 Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Wed, 31 Jan 2024 16:59:32 +0100 Subject: [PATCH 5/8] fixed const value insert --- src/mono/mono/mini/simd-intrinsics.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 72192e88a7e6eb..b460f165ecdcb6 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -554,13 +554,14 @@ static MonoInst* emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum element_type, MonoInst *arg1, MonoInst *arg2) { #ifdef TARGET_ARM64 + gint32 simd_size = mono_class_value_size (klass, NULL); if (!COMPILE_LLVM (cfg)) { MonoInst* cmp = emit_xcompare (cfg, klass, element_type, arg1, arg2); MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET; ret->inst_c1 = mono_class_value_size (klass, NULL); return ret; - } else if (mono_class_value_size (klass, NULL) == 16) { + } else if (simd_size== 16 || simd_size == 12) { return emit_simd_ins (cfg, klass, OP_XEQUAL_ARM64_V128_FAST, arg1->dreg, arg2->dreg); } else { return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); @@ -2720,11 +2721,10 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, args [i + 1], i, FALSE); if(len == 3){ - float r4_0 = 0.0f; + static float r4_0 = 0; MonoInst *zero; int zero_dreg = alloc_freg (cfg); MONO_INST_NEW (cfg, zero, OP_R4CONST); - zero->type = STACK_R4; zero->inst_p0 = (void*)&r4_0; zero->dreg = zero_dreg; MONO_ADD_INS (cfg->cbb, zero); From 74a2a29dd9a97223b34c4a57147f638a4333955c Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Wed, 31 Jan 2024 20:07:29 +0100 Subject: [PATCH 6/8] llvm loadx zero padding + formatting + review suggestions --- src/mono/mono/mini/mini-arm64.c | 4 ++-- src/mono/mono/mini/mini-llvm.c | 17 +++++++++++------ src/mono/mono/mini/mini.c | 19 +------------------ src/mono/mono/mini/mini.h | 3 +-- src/mono/mono/mini/simd-intrinsics.c | 13 ++++++++++--- 5 files changed, 25 insertions(+), 31 deletions(-) diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 00830d15e7fca5..0e3da92fd41c0d 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -4065,7 +4065,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_STOREX_MEMBASE: if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8) code = emit_strfpx (code, sreg1, dreg, GTMREG_TO_INT (ins->inst_offset)); - else if (ins->klass && mono_class_value_size (ins->klass, NULL) == 12){ + else if (ins->klass && mono_class_value_size (ins->klass, NULL) == 12) { arm_neon_ins_e (code, SIZE_4, ARMREG_IP0, sreg1, 0, 2); code = emit_strfpx (code, sreg1, dreg, GTMREG_TO_INT (ins->inst_offset)); code = emit_strfpw (code, ARMREG_IP0, dreg , GTMREG_TO_INT (ins->inst_offset + 8)); @@ -4075,7 +4075,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_LOADX_MEMBASE: if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8) code = emit_ldrfpx (code, dreg, sreg1, GTMREG_TO_INT (ins->inst_offset)); - else if (ins->klass && mono_class_value_size (ins->klass, NULL) == 12){ + else if (ins->klass && mono_class_value_size (ins->klass, NULL) == 12) { code = emit_ldrfpx (code, dreg, sreg1, GTMREG_TO_INT (ins->inst_offset)); code = emit_ldrfpw (code, ARMREG_IP0, sreg1, GTMREG_TO_INT (ins->inst_offset + 8)); arm_neon_ins_e (code, SIZE_4, dreg, ARMREG_IP0, 2, 0); diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index cd7eda3797cd01..9a1a10cdd427e1 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -4170,16 +4170,16 @@ emit_entry_bb (EmitContext *ctx, LLVMBuilderRef builder) case LLVMArgAsFpArgs: { MonoClass *klass = mono_class_from_mono_type_internal (ainfo->type); - if (mini_class_is_simd (ctx->cfg, klass)){ + if (mini_class_is_simd (ctx->cfg, klass)) { LLVMValueRef loadedVector = LLVMBuildLoad2 (builder, ctx->addresses [reg]->type, ctx->addresses [reg]->value, "simd_vtype"); - if(mono_class_value_size(klass, NULL) == 12){ - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), 3, 0); - loadedVector = LLVMBuildInsertElement(builder, loadedVector, zero, index, "insert_zero"); + if (mono_class_value_size (klass, NULL) == 12) { + LLVMValueRef zero = LLVMConstReal (LLVMFloatType (), 0.0); + LLVMValueRef index = LLVMConstInt (LLVMInt32Type (), 3, 0); + loadedVector = LLVMBuildInsertElement (builder, loadedVector, zero, index, "insert_zero"); } - ctx->values[reg] = loadedVector; + ctx->values [reg] = loadedVector; } break; } @@ -8317,6 +8317,11 @@ MONO_RESTORE_WARNING src = convert (ctx, LLVMBuildAdd (builder, convert (ctx, values [ins->inst_basereg], IntPtrType ()), LLVMConstInt (IntPtrType (), ins->inst_offset, FALSE), ""), pointer_type (t)); values [ins->dreg] = mono_llvm_build_aligned_load (builder, t, src, "", FALSE, 1); + if (mono_class_value_size (ins->klass, NULL) == 12) { + LLVMValueRef zero = LLVMConstReal (LLVMFloatType (), 0.0); + LLVMValueRef index = LLVMConstInt (LLVMInt32Type (), 3, 0); + values [ins->dreg] = LLVMBuildInsertElement (builder, values [ins->dreg], zero, index, "insert_zero"); + } break; } case OP_STOREX_MEMBASE: { diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c index 5a50eed64d643b..ff424783275204 100644 --- a/src/mono/mono/mini/mini.c +++ b/src/mono/mono/mini/mini.c @@ -4592,6 +4592,7 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems) *nelems = 2; return MONO_TYPE_R4; } else if (!strcmp (klass_name, "Vector3")) { + // For LLVM SIMD support, Vector3 is treated as a 4-element vector (three elements + zero). *nelems = 4; return MONO_TYPE_R4; } else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) { @@ -4606,21 +4607,3 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems) } } -guint32 mini_number_of_elements(MonoClass *klass){ - const char *klass_name = m_class_get_name (klass); - if (!strcmp (klass_name, "Vector4") || !strcmp (klass_name, "Quaternion") || !strcmp (klass_name, "Plane")) { - return 4; - } else if (!strcmp (klass_name, "Vector2")) { - return 2; - } else if (!strcmp (klass_name, "Vector3")){ - return 3; - } else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) { - MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0]; - int size = mono_class_value_size (klass, NULL); - return size / mini_primitive_type_size (etype->type); - } else { - printf ("%s\n", klass_name); - NOT_IMPLEMENTED; - return 0; - } -} diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index 4ae19d723a784b..ef43deba7008b3 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -2959,7 +2959,6 @@ enum { int mini_primitive_type_size (MonoTypeEnum type); MonoTypeEnum mini_get_simd_type_info (MonoClass *klass, guint32 *nelems); -guint32 mini_number_of_elements(MonoClass *klass); const char *mono_arch_xregname (int reg); MonoCPUFeatures mono_arch_get_cpu_features (void); @@ -2999,7 +2998,7 @@ mini_class_is_simd (MonoCompile *cfg, MonoClass *klass) return TRUE; int size = mono_type_size (m_class_get_byval_arg (klass), NULL); #ifdef TARGET_ARM64 - if (size == 8 || size == 16 || (size == 12 && !strcmp (m_class_get_name (klass), "Vector3"))) + if (size == 8 || size == 16 || size == 12) return TRUE; #else if (size == 16) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index b460f165ecdcb6..c184421000649c 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -561,7 +561,7 @@ emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum element_type, Mono ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET; ret->inst_c1 = mono_class_value_size (klass, NULL); return ret; - } else if (simd_size== 16 || simd_size == 12) { + } else if (simd_size == 16 || simd_size == 12) { return emit_simd_ins (cfg, klass, OP_XEQUAL_ARM64_V128_FAST, arg1->dreg, arg2->dreg); } else { return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); @@ -651,7 +651,14 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t int vector_size = mono_class_value_size (vector_class, NULL); int element_size; - guint32 nelems = mini_number_of_elements (vector_class); + guint32 nelems; + mini_get_simd_type_info (vector_class, &nelems); + + // Override nelems for Vector3, with actual number of elements + const char *klass_name = m_class_get_name (vector_class); + if (!strcmp (klass_name, "Vector3")) + nelems = 3; + element_size = vector_size / nelems; gboolean has_single_element = vector_size == element_size; @@ -2720,7 +2727,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f for (int i = 1; i < fsig->param_count; ++i) ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, args [i + 1], i, FALSE); - if(len == 3){ + if (len == 3) { static float r4_0 = 0; MonoInst *zero; int zero_dreg = alloc_freg (cfg); From b47cdf9600debc1bfc3d305e0df19abe6e25b524 Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek <59935235+jkurdek@users.noreply.github.com> Date: Thu, 1 Feb 2024 15:05:15 +0100 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: Ivan Povazan <55002338+ivanpovazan@users.noreply.github.com> --- src/mono/mono/mini/mini.h | 2 +- src/mono/mono/mini/simd-intrinsics.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index ef43deba7008b3..c9adaf023f7e97 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -2998,7 +2998,7 @@ mini_class_is_simd (MonoCompile *cfg, MonoClass *klass) return TRUE; int size = mono_type_size (m_class_get_byval_arg (klass), NULL); #ifdef TARGET_ARM64 - if (size == 8 || size == 16 || size == 12) + if (size == 8 || size == 12 || size == 16) return TRUE; #else if (size == 16) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index c184421000649c..60f81123512893 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -561,7 +561,7 @@ emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum element_type, Mono ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET; ret->inst_c1 = mono_class_value_size (klass, NULL); return ret; - } else if (simd_size == 16 || simd_size == 12) { + } else if (simd_size == 12 || simd_size == 16) { return emit_simd_ins (cfg, klass, OP_XEQUAL_ARM64_V128_FAST, arg1->dreg, arg2->dreg); } else { return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); @@ -654,7 +654,7 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t guint32 nelems; mini_get_simd_type_info (vector_class, &nelems); - // Override nelems for Vector3, with actual number of elements + // Override nelems for Vector3, with actual number of elements, instead of treating it as a 4-element vector (three elements + zero). const char *klass_name = m_class_get_name (vector_class); if (!strcmp (klass_name, "Vector3")) nelems = 3; From e672c53d3f230c7f091f1fbd7c0a13ad465f97c7 Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Thu, 1 Feb 2024 15:06:54 +0100 Subject: [PATCH 8/8] Add review suggestions --- src/mono/mono/mini/mini-llvm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index 9a1a10cdd427e1..a0b197866a0dbb 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -6213,10 +6213,8 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) retval = LLVMConstNull(ret_type); if (lhs) { - int len = LLVMGetVectorSize (LLVMTypeOf (lhs)); - - if (mono_class_value_size (klass, NULL) == 12) - len--; + // Vector3: ret_type is Vector3, lhs is Vector3 represented as a Vector4 (three elements + zero). We need to extract only the first 3 elements from lhs. + int len = mono_class_value_size (klass, NULL) == 12 ? 3 : LLVMGetVectorSize (LLVMTypeOf (lhs)); for (int i = 0; i < len; i++) { elem = LLVMBuildExtractElement (builder, lhs, const_int32 (i), "extract_elem");