Skip to content

Commit

Permalink
[Mono] Intrinsify multiply and divide for mini JIT on ARM64 (#84004)
Browse files Browse the repository at this point in the history
* Support multiply and divide

* Use expend op code instead

* Remove unused code

* Uncomment
  • Loading branch information
fanyang-mono authored Mar 30, 2023
1 parent 822fc81 commit b606ffc
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 45 deletions.
1 change: 1 addition & 0 deletions src/mono/mono/arch/arm64/arm64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1126,6 +1126,7 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_umov(p, type, rd, rn, index) arm_neon_cpy_opcode ((p), (type == TYPE_I64) ? 0b1 : 0b0, 0b0, (0b00001 << (type)) | ((index) << ((type) + 1)), 0b0111, (rd), (rn))
#define arm_neon_dup_e(p, width, type, rd, rn, index) arm_neon_cpy_opcode ((p), (width), 0b0, (0b00001 << (type)) | ((index) << ((type)+1)), 0b0000, (rd), (rn))
#define arm_neon_fdup_e(p, width, type, rd, rn, index) arm_neon_dup_e ((p), (width), (type) + TYPE_I32, (rd), (rn), (index))
#define arm_neon_dup_g(p, width, type, rd, rn) arm_neon_cpy_opcode ((p), (width), 0b0, (0b00001 << (type)), 0b0001, (rd), (rn))

// Specific opcodes:
#define arm_neon_dup_g_8b(p, rd, rn) arm_neon_cpy_opcode ((p), VREG_LOW, 0b0, 0b00001, 0b0001, (rd), (rn))
Expand Down
6 changes: 6 additions & 0 deletions src/mono/mono/mini/cpu-arm64.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,12 @@ extract_r4: dest:f src1:x len:4
extract_r8: dest:f src1:x len:4
arm64_xaddv: dest:x src1:x len:8
xop_ovr_x_x: dest:x src1:x len:4
expand_i1: dest:x src1:i len:4
expand_i2: dest:x src1:i len:4
expand_i4: dest:x src1:i len:4
expand_i8: dest:x src1:i len:4
expand_r4: dest:x src1:f len:4
expand_r8: dest:x src1:f len:4

generic_class_init: src1:a len:44 clob:c
gc_safe_point: src1:i len:12 clob:c
Expand Down
15 changes: 14 additions & 1 deletion src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -3746,7 +3746,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
}
case OP_XCAST:
break;

case OP_EXPAND_I1:
case OP_EXPAND_I2:
case OP_EXPAND_I4:
case OP_EXPAND_I8: {
const int t = get_type_size_macro (ins->inst_c1);
arm_neon_dup_g (code, VREG_FULL, t, ins->dreg, ins->sreg1);
break;
}
case OP_EXPAND_R4:
case OP_EXPAND_R8: {
const int t = get_type_size_macro (ins->inst_c1);
arm_neon_fdup_e (code, VREG_FULL, t, ins->dreg, ins->sreg1, 0);
break;
}
case OP_EXTRACT_I1:
case OP_EXTRACT_I2:
case OP_EXTRACT_I4:
Expand Down
3 changes: 3 additions & 0 deletions src/mono/mono/mini/simd-arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ SIMD_OP (128, OP_XBINOP, OP_FMAX, WTDSS, _UNDEF,
SIMD_OP (128, OP_XBINOP, OP_IMIN, WTDSS, arm_neon_smin, arm_neon_smin, arm_neon_smin, _SKIP, _UNDEF, _UNDEF)
SIMD_OP (128, OP_XBINOP, OP_IMIN_UN, WTDSS, arm_neon_umin, arm_neon_umin, arm_neon_umin, _SKIP, _UNDEF, _UNDEF)
SIMD_OP (128, OP_XBINOP, OP_FMIN, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmin, arm_neon_fmin)
SIMD_OP (128, OP_XBINOP, OP_IMUL, WTDSS, arm_neon_mul, arm_neon_mul, arm_neon_mul, arm_neon_mul, _UNDEF, _UNDEF)
SIMD_OP (128, OP_XBINOP, OP_FMUL, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmul, arm_neon_fmul)
SIMD_OP (128, OP_XBINOP, OP_FDIV, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fdiv, arm_neon_fdiv)
SIMD_OP (128, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_AND, WDSS, arm_neon_and, arm_neon_and, arm_neon_and, arm_neon_and, arm_neon_and, arm_neon_and)
SIMD_OP (128, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_OR, WDSS, arm_neon_orr, arm_neon_orr, arm_neon_orr, arm_neon_orr, arm_neon_orr, arm_neon_orr)
SIMD_OP (128, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_XOR, WDSS, arm_neon_eor, arm_neon_eor, arm_neon_eor, arm_neon_eor, arm_neon_eor, arm_neon_eor)
Expand Down
98 changes: 54 additions & 44 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,28 @@ emit_simd_ins_for_sig (MonoCompile *cfg, MonoClass *klass, int opcode, int instc

static gboolean type_enum_is_unsigned (MonoTypeEnum type);
static gboolean type_enum_is_float (MonoTypeEnum type);
static int type_to_expand_op (MonoTypeEnum type);

static MonoInst*
handle_mul_div_by_scalar (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum arg_type, int scalar_reg, int vector_reg, int sub_op)
{
MonoInst* ins;

if (COMPILE_LLVM (cfg)) {
ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, scalar_reg, -1);
ins->inst_c1 = arg_type;
ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, vector_reg, ins->dreg);
ins->inst_c0 = sub_op;
} else {
ins = emit_simd_ins (cfg, klass, type_to_expand_op (arg_type), scalar_reg, -1);
ins->inst_c1 = arg_type;
ins = emit_simd_ins (cfg, klass, OP_XBINOP, vector_reg, ins->dreg);
ins->inst_c0 = sub_op;
ins->inst_c1 = arg_type;
}

return ins;
}

static MonoInst*
emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSignature *fsig, MonoInst **args, MonoTypeEnum arg_type, int id)
Expand Down Expand Up @@ -304,13 +326,9 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna
case SN_op_Division: {
const char *class_name = m_class_get_name (klass);
if (strcmp ("Vector2", class_name) && strcmp ("Vector4", class_name) && strcmp ("Quaternion", class_name) && strcmp ("Plane", class_name)) {
if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type != MONO_TYPE_GENERICINST)) {
MonoInst* ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, args [1]->dreg, -1);
ins->inst_c1 = arg_type;
ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, args [0]->dreg, ins->dreg);
ins->inst_c0 = OP_FDIV;
return ins;
} else if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type == MONO_TYPE_GENERICINST)) {
if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type != MONO_TYPE_GENERICINST))
return handle_mul_div_by_scalar (cfg, klass, arg_type, args [1]->dreg, args [0]->dreg, OP_FDIV);
else if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type == MONO_TYPE_GENERICINST)) {
instc0 = OP_FDIV;
break;
} else {
Expand All @@ -330,19 +348,11 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna
case SN_op_Multiply: {
const char *class_name = m_class_get_name (klass);
if (strcmp ("Vector2", class_name) && strcmp ("Vector4", class_name) && strcmp ("Quaternion", class_name) && strcmp ("Plane", class_name)) {
if (fsig->params [1]->type != MONO_TYPE_GENERICINST) {
MonoInst* ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, args [1]->dreg, -1);
ins->inst_c1 = arg_type;
ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, args [0]->dreg, ins->dreg);
ins->inst_c0 = OP_FMUL;
return ins;
} else if (fsig->params [0]->type != MONO_TYPE_GENERICINST) {
MonoInst* ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, args [0]->dreg, -1);
ins->inst_c1 = arg_type;
ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, args [1]->dreg, ins->dreg);
ins->inst_c0 = OP_FMUL;
return ins;
} else if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type == MONO_TYPE_GENERICINST)) {
if (fsig->params [1]->type != MONO_TYPE_GENERICINST)
return handle_mul_div_by_scalar (cfg, klass, arg_type, args [1]->dreg, args [0]->dreg, OP_FMUL);
else if (fsig->params [0]->type != MONO_TYPE_GENERICINST)
return handle_mul_div_by_scalar (cfg, klass, arg_type, args [0]->dreg, args [1]->dreg, OP_FMUL);
else if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type == MONO_TYPE_GENERICINST)) {
instc0 = OP_FMUL;
break;
} else {
Expand Down Expand Up @@ -375,22 +385,18 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna
instc0 = type_enum_is_unsigned (arg_type) ? OP_IMIN_UN : OP_IMIN;
break;
case SN_Multiply:
case SN_op_Multiply:
if (fsig->params [1]->type != MONO_TYPE_GENERICINST) {
MonoInst* ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, args [1]->dreg, -1);
ins->inst_c1 = arg_type;
ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, args [0]->dreg, ins->dreg);
ins->inst_c0 = OP_IMUL;
return ins;
} else if (fsig->params [0]->type != MONO_TYPE_GENERICINST) {
MonoInst* ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, args [0]->dreg, -1);
ins->inst_c1 = arg_type;
ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, args [1]->dreg, ins->dreg);
ins->inst_c0 = OP_IMUL;
return ins;
}
case SN_op_Multiply: {
#ifdef TARGET_ARM64
if (!COMPILE_LLVM (cfg) && (arg_type == MONO_TYPE_I8 || arg_type == MONO_TYPE_U8))
return NULL;
#endif
if (fsig->params [1]->type != MONO_TYPE_GENERICINST)
return handle_mul_div_by_scalar (cfg, klass, arg_type, args [1]->dreg, args [0]->dreg, OP_IMUL);
else if (fsig->params [0]->type != MONO_TYPE_GENERICINST)
return handle_mul_div_by_scalar (cfg, klass, arg_type, args [0]->dreg, args [1]->dreg, OP_IMUL);
instc0 = OP_IMUL;
break;
}
case SN_Subtract:
case SN_op_Subtraction:
instc0 = OP_ISUB;
Expand Down Expand Up @@ -799,9 +805,9 @@ type_enum_is_float (MonoTypeEnum type)
}

static int
type_to_expand_op (MonoType *type)
type_to_expand_op (MonoTypeEnum type)
{
switch (type->type) {
switch (type) {
case MONO_TYPE_I1:
case MONO_TYPE_U1:
return OP_EXPAND_I1;
Expand Down Expand Up @@ -1262,6 +1268,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
case SN_ToScalar:
case SN_Floor:
case SN_Ceiling:
case SN_Divide:
case SN_Multiply:
break;
default:
return NULL;
Expand Down Expand Up @@ -1447,7 +1455,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype))
return NULL;
if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype))
return emit_simd_ins (cfg, klass, type_to_expand_op (etype), args [0]->dreg, -1);
return emit_simd_ins (cfg, klass, type_to_expand_op (etype->type), args [0]->dreg, -1);
else if (is_create_from_half_vectors_overload (fsig))
return emit_simd_ins (cfg, klass, OP_XCONCAT, args [0]->dreg, args [1]->dreg);
else if (is_elementwise_create_overload (fsig, etype))
Expand Down Expand Up @@ -1940,7 +1948,7 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
#ifdef TARGET_ARM64
if (!COMPILE_LLVM (cfg)) {
return NULL;
/*if (size != 16)
if (size != 16)
return NULL;
switch (id) {
case SN_get_One:
Expand All @@ -1955,10 +1963,12 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
case SN_op_ExclusiveOr:
case SN_op_Equality:
case SN_op_Inequality:
case SN_op_Division:
case SN_op_Multiply:
break;
default:
return NULL;
}*/
}
}
#endif

Expand Down Expand Up @@ -2166,7 +2176,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
gboolean indirect = FALSE;
int dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);

int opcode = type_to_expand_op (etype);
int opcode = type_to_expand_op (etype->type);
ins = emit_simd_ins (cfg, klass, opcode, args [1]->dreg, -1);

for (int i = 1; i < fsig->param_count; ++i) {
Expand Down Expand Up @@ -2639,7 +2649,7 @@ emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSig
if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) {
int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);

int opcode = type_to_expand_op (etype);
int opcode = type_to_expand_op (etype->type);
ins = emit_simd_ins (cfg, klass, opcode, args [1]->dreg, -1);
ins->dreg = dreg;
return ins;
Expand Down Expand Up @@ -3408,7 +3418,7 @@ emit_arm64_intrinsics (
break;
}
}
return emit_simd_ins (cfg, ret_klass, type_to_expand_op (rtype), scalar_src_reg, -1);
return emit_simd_ins (cfg, ret_klass, type_to_expand_op (rtype->type), scalar_src_reg, -1);
}
case SN_Extract: {
int extract_op = type_to_xextract_op (arg0_type);
Expand Down Expand Up @@ -3448,7 +3458,7 @@ emit_arm64_intrinsics (
MonoType *etype = get_vector_t_elem_type (fsig->ret);
gboolean is_unsigned = type_is_unsigned (fsig->ret);
gboolean scalar = id == SN_ShiftLeftLogicalSaturateScalar;
int s2v = scalar ? OP_CREATE_SCALAR_UNSAFE : type_to_expand_op (etype);
int s2v = scalar ? OP_CREATE_SCALAR_UNSAFE : type_to_expand_op (etype->type);
int xop = scalar ? OP_XOP_OVR_SCALAR_X_X_X : OP_XOP_OVR_X_X_X;
int iid = is_unsigned ? INTRINS_AARCH64_ADV_SIMD_UQSHL : INTRINS_AARCH64_ADV_SIMD_SQSHL;
MonoInst *shift_vector = emit_simd_ins (cfg, ret_klass, s2v, args [1]->dreg, -1);
Expand Down Expand Up @@ -4851,7 +4861,7 @@ emit_wasm_supported_intrinsics (
case SN_Splat: {
MonoType *etype = get_vector_t_elem_type (fsig->ret);
g_assert (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype));
return emit_simd_ins (cfg, klass, type_to_expand_op (etype), args [0]->dreg, -1);
return emit_simd_ins (cfg, klass, type_to_expand_op (etype->type), args [0]->dreg, -1);
}
case SN_Dot:
return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, INTRINS_WASM_DOT, -1, fsig, args);
Expand Down

0 comments on commit b606ffc

Please # to comment.