Skip to content

Commit

Permalink
[mono][jit] Added Vector128 intrinsics that depend on Vector64 for ar…
Browse files Browse the repository at this point in the history
…m64 (#87765)

* Added Vector128.GetLower,GetUpper as intrinsics on arm64. Enabled OP_XCONCAT.

* Adding WithLower, WithUpper.

* Fixed missing variable.

* Restored check on x64 code.

* xlower and xupper now sanitize the upper half of the dest register.

* Fixed definitions of insert opcodes to reflect that they depend on the original dest state.

* OP_XCONCAT can now also depend on initial dest state.

* Disabling Vector128.WithLower,WithUpper on x64.
  • Loading branch information
jandupej authored Jun 21, 2023
1 parent f644e5b commit 57cfd06
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 21 deletions.
1 change: 1 addition & 0 deletions src/mono/mono/arch/arm64/arm64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,7 @@ arm_encode_arith_imm (int imm, guint32 *shift)

/* NEON :: move SIMD register*/
#define arm_neon_mov(p, rd, rn) arm_neon_orr ((p), VREG_FULL, (rd), (rn), (rn))
#define arm_neon_mov_8b(p, rd, rn) arm_neon_orr ((p), VREG_LOW, (rd), (rn), (rn))

/* NEON :: AES */
#define arm_neon_aes_opcode(p, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), VREG_FULL, 0b00001110001010000000100000000000 | (size) << 22 | (opcode) << 12, (rd), (rn))
Expand Down
5 changes: 5 additions & 0 deletions src/mono/mono/mini/cpu-arm64.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,11 @@ arm64_ext_imm: dest:x src1:x src2:x len:4
xinsert_i8: dest:x src1:x src2:i src3:i len:20
xinsert_r8: dest:x src1:x src2:f src3:i len:20
arm64_broadcast_elem: dest:x src1:x len:16
xconcat: dest:x src1:x src2:x len:8 clob:1
xlower: dest:x src1:x len:8
xupper: dest:x src1:x len:8
xinsert_lower: dest:x src1:x src2:x len:8 clob:1
xinsert_upper: dest:x src1:x src2:x len:8 clob:1

generic_class_init: src1:a len:44 clob:c
gc_safe_point: src1:i len:12 clob:c
Expand Down
42 changes: 36 additions & 6 deletions src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -4085,7 +4085,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
}
case OP_CREATE_SCALAR_UNSAFE_INT: {
const int t = get_type_size_macro (ins->inst_c1);
arm_neon_ins_g(code, t, dreg, sreg1, 0);
arm_neon_ins_g (code, t, dreg, sreg1, 0);
break;
}
case OP_CREATE_SCALAR_UNSAFE_FLOAT: {
Expand All @@ -4099,14 +4099,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
t = SIZE_8;
break;
}
arm_neon_ins_e(code, t, dreg, sreg1, 0, 0);
arm_neon_ins_e (code, t, dreg, sreg1, 0, 0);
}
break;
}
// This requires Vector64 SIMD support
// case OP_XCONCAT:
// arm_neon_ext_16b(code, dreg, sreg1, sreg2, 8);
// break;
case OP_XCONCAT: {
if (dreg != sreg1)
arm_neon_mov (code, dreg, sreg1);

arm_neon_ins_e (code, SIZE_8, dreg, sreg2, 1, 0);
break;
}
case OP_ARM64_USHL: {
arm_neon_ushl (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
break;
Expand All @@ -4118,6 +4121,33 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
arm_neon_ext_16b (code, dreg, sreg1, sreg2, ins->inst_c0);
break;
}
case OP_XLOWER: {
if (dreg == sreg1) {
// clean the upper half
arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
arm_neon_ins_e (code, SIZE_8, dreg, NEON_TMP_REG, 1, 0);
} else {
arm_neon_eor (code, VREG_FULL, dreg, dreg, dreg);
arm_neon_mov_8b (code, dreg, sreg1);
}
break;
}
case OP_XUPPER:
// shift in 64 zeros from the left
arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
arm_neon_ext_16b (code, dreg, sreg1, NEON_TMP_REG, 8);
break;

case OP_XINSERT_LOWER:
case OP_XINSERT_UPPER: {
if (dreg != sreg1)
arm_neon_mov (code, dreg, sreg1);

int insert_at = (ins->opcode == OP_XINSERT_LOWER) ? 0 : 1;
arm_neon_ins_e (code, SIZE_8, dreg, sreg2, insert_at, 0);
break;
}

/* BRANCH */
case OP_BR:
mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb, MONO_R_ARM64_B);
Expand Down
25 changes: 10 additions & 15 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,8 @@ static guint16 sri_vector_methods [] = {
SN_WidenLower,
SN_WidenUpper,
SN_WithElement,
SN_WithLower,
SN_WithUpper,
SN_Xor,
SN_get_IsHardwareAccelerated,
};
Expand Down Expand Up @@ -1382,14 +1384,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
if (!COMPILE_LLVM (cfg)) {
if (vector_size != 128)
return NULL;
switch (id) {
case SN_GetLower:
case SN_GetUpper:
return NULL;
default:
break;
}
}
#endif

#ifdef TARGET_WASM
Expand Down Expand Up @@ -1662,11 +1657,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
ins->inst_c1 = arg0_type;
return ins;
} else if (is_create_from_half_vectors_overload (fsig)) {
#if defined(TARGET_ARM64)
// Require Vector64 SIMD support
if (!COMPILE_LLVM (cfg))
return NULL;
#endif
#if defined(TARGET_AMD64)
// Require Vector64 SIMD support
if (!COMPILE_LLVM (cfg))
Expand Down Expand Up @@ -1929,10 +1919,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi

#ifdef TARGET_AMD64
if (!COMPILE_LLVM (cfg))
/* These return a Vector64 */
/* These return a Vector64 */
return NULL;
#endif

return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args);
}
case SN_GreaterThan:
Expand Down Expand Up @@ -2304,9 +2293,15 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
}
case SN_WithLower:
case SN_WithUpper: {
#ifdef TARGET_AMD64
if (!COMPILE_LLVM (cfg))
/* These return a Vector64 */
return NULL;
#endif

if (!is_element_type_primitive (fsig->params [0]))
return NULL;
int op = id == SN_GetLower ? OP_XINSERT_LOWER : OP_XINSERT_UPPER;
int op = id == SN_WithLower ? OP_XINSERT_LOWER : OP_XINSERT_UPPER;
return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args);
}
default:
Expand Down

0 comments on commit 57cfd06

Please # to comment.