diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index 154fd10d85f633..0834a24ad635e1 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -1047,6 +1047,7 @@ arm_encode_arith_imm (int imm, guint32 *shift) /* NEON :: move SIMD register*/ #define arm_neon_mov(p, rd, rn) arm_neon_orr ((p), VREG_FULL, (rd), (rn), (rn)) +#define arm_neon_mov_8b(p, rd, rn) arm_neon_orr ((p), VREG_LOW, (rd), (rn), (rn)) /* NEON :: AES */ #define arm_neon_aes_opcode(p, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), VREG_FULL, 0b00001110001010000000100000000000 | (size) << 22 | (opcode) << 12, (rd), (rn)) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 57e31d0561effd..2479dd0d22d62b 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -558,6 +558,11 @@ arm64_ext_imm: dest:x src1:x src2:x len:4 xinsert_i8: dest:x src1:x src2:i src3:i len:20 xinsert_r8: dest:x src1:x src2:f src3:i len:20 arm64_broadcast_elem: dest:x src1:x len:16 +xconcat: dest:x src1:x src2:x len:8 clob:1 +xlower: dest:x src1:x len:8 +xupper: dest:x src1:x len:8 +xinsert_lower: dest:x src1:x src2:x len:8 clob:1 +xinsert_upper: dest:x src1:x src2:x len:8 clob:1 generic_class_init: src1:a len:44 clob:c gc_safe_point: src1:i len:12 clob:c diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 491f02f6e4044b..0bd13dbe798115 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -4085,7 +4085,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } case OP_CREATE_SCALAR_UNSAFE_INT: { const int t = get_type_size_macro (ins->inst_c1); - arm_neon_ins_g(code, t, dreg, sreg1, 0); + arm_neon_ins_g (code, t, dreg, sreg1, 0); break; } case OP_CREATE_SCALAR_UNSAFE_FLOAT: { @@ -4099,14 +4099,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) t = SIZE_8; break; } - arm_neon_ins_e(code, t, dreg, sreg1, 0, 0); + arm_neon_ins_e (code, t, dreg, sreg1, 0, 0); } break; } - // This requires Vector64 SIMD support - // case OP_XCONCAT: - // arm_neon_ext_16b(code, dreg, sreg1, sreg2, 8); - // break; + case OP_XCONCAT: { + if (dreg != sreg1) + arm_neon_mov (code, dreg, sreg1); + + arm_neon_ins_e (code, SIZE_8, dreg, sreg2, 1, 0); + break; + } case OP_ARM64_USHL: { arm_neon_ushl (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2); break; @@ -4118,6 +4121,33 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) arm_neon_ext_16b (code, dreg, sreg1, sreg2, ins->inst_c0); break; } + case OP_XLOWER: { + if (dreg == sreg1) { + // clean the upper half + arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG); + arm_neon_ins_e (code, SIZE_8, dreg, NEON_TMP_REG, 1, 0); + } else { + arm_neon_eor (code, VREG_FULL, dreg, dreg, dreg); + arm_neon_mov_8b (code, dreg, sreg1); + } + break; + } + case OP_XUPPER: + // shift in 64 zeros from the left + arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG); + arm_neon_ext_16b (code, dreg, sreg1, NEON_TMP_REG, 8); + break; + + case OP_XINSERT_LOWER: + case OP_XINSERT_UPPER: { + if (dreg != sreg1) + arm_neon_mov (code, dreg, sreg1); + + int insert_at = (ins->opcode == OP_XINSERT_LOWER) ? 0 : 1; + arm_neon_ins_e (code, SIZE_8, dreg, sreg2, insert_at, 0); + break; + } + /* BRANCH */ case OP_BR: mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb, MONO_R_ARM64_B); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index dca9ccbd73ccc3..733591a5ab421c 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1201,6 +1201,8 @@ static guint16 sri_vector_methods [] = { SN_WidenLower, SN_WidenUpper, SN_WithElement, + SN_WithLower, + SN_WithUpper, SN_Xor, SN_get_IsHardwareAccelerated, }; @@ -1382,14 +1384,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!COMPILE_LLVM (cfg)) { if (vector_size != 128) return NULL; - switch (id) { - case SN_GetLower: - case SN_GetUpper: - return NULL; - default: - break; } - } #endif #ifdef TARGET_WASM @@ -1662,11 +1657,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi ins->inst_c1 = arg0_type; return ins; } else if (is_create_from_half_vectors_overload (fsig)) { -#if defined(TARGET_ARM64) - // Require Vector64 SIMD support - if (!COMPILE_LLVM (cfg)) - return NULL; -#endif #if defined(TARGET_AMD64) // Require Vector64 SIMD support if (!COMPILE_LLVM (cfg)) @@ -1929,10 +1919,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi #ifdef TARGET_AMD64 if (!COMPILE_LLVM (cfg)) - /* These return a Vector64 */ + /* These return a Vector64 */ return NULL; #endif - return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args); } case SN_GreaterThan: @@ -2304,9 +2293,15 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } case SN_WithLower: case SN_WithUpper: { +#ifdef TARGET_AMD64 + if (!COMPILE_LLVM (cfg)) + /* These return a Vector64 */ + return NULL; +#endif + if (!is_element_type_primitive (fsig->params [0])) return NULL; - int op = id == SN_GetLower ? OP_XINSERT_LOWER : OP_XINSERT_UPPER; + int op = id == SN_WithLower ? OP_XINSERT_LOWER : OP_XINSERT_UPPER; return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args); } default: