Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

[mono][jit] Adding compare all/any intrinsics. #83515

Merged
merged 12 commits into from
Mar 21, 2023
61 changes: 22 additions & 39 deletions src/mono/mono/arch/arm64/arm64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1139,19 +1139,16 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_dup_g_4s(p, rd, rn) arm_neon_cpy_opcode ((p), VREG_FULL, 0b0, 0b00100, 0b0001, (rd), (rn))
#define arm_neon_dup_g_2d(p, rd, rn) arm_neon_cpy_opcode ((p), VREG_FULL, 0b0, 0b00100, 0b0001, (rd), (rn))

// the opcode is smov, but we define variants smovs and smovd by whether they fill a 32 or 64-bit reg.
#define arm_neon_smovs_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00001 | ((index) << 1), 0b0101, (rd), (rn))
#define arm_neon_smovs_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00010 | ((index) << 2), 0b0101, (rd), (rn))
#define arm_neon_smovd_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00001 | ((index) << 1), 0b0101, (rd), (rn))
#define arm_neon_smovd_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00010 | ((index) << 2), 0b0101, (rd), (rn))
#define arm_neon_smovd_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00100 | ((index) << 3), 0b0101, (rd), (rn))

// the opcode is umov, but we define variants smovs and smovd by whether they fill a 32 or 64-bit reg.
#define arm_neon_umovs_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00001 | ((index) << 1), 0b0111, (rd), (rn))
#define arm_neon_umovs_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00010 | ((index) << 2), 0b0111, (rd), (rn))
#define arm_neon_umovd_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00001 | ((index) << 1), 0b0111, (rd), (rn))
#define arm_neon_umovd_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00010 | ((index) << 2), 0b0111, (rd), (rn))
#define arm_neon_umovd_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00100 | ((index) << 3), 0b0111, (rd), (rn))
#define arm_neon_smov_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00001 | ((index) << 1), 0b0101, (rd), (rn))
#define arm_neon_smov_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00010 | ((index) << 2), 0b0101, (rd), (rn))
#define arm_neon_smov_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00100 | ((index) << 3), 0b0101, (rd), (rn))
#define arm_neon_smov_d(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b0, 0b01000 | ((index) << 4), 0b0101, (rd), (rn))

#define arm_neon_umov_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00001 | ((index) << 1), 0b0111, (rd), (rn))
#define arm_neon_umov_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00010 | ((index) << 2), 0b0111, (rd), (rn))
#define arm_neon_umov_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00100 | ((index) << 3), 0b0111, (rd), (rn))
#define arm_neon_umov_d(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b0, 0b01000 | ((index) << 4), 0b0111, (rd), (rn))


/* NEON :: 3-register same FP16 */
// TODO
Expand Down Expand Up @@ -1576,6 +1573,9 @@ arm_encode_arith_imm (int imm, guint32 *shift)
/* NEON :: across lanes */
#define arm_neon_xln_opcode(p, q, u, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), (q), 0b00001110001100000000100000000000 | (u) << 29 | (size) << 22 | (opcode) << 12, (rd), (rn))

#define arm_neon_umaxv(p, width, type, rd, rn) arm_neon_xln_opcode ((p), (width), 0b1, (type), 0b01010, (rd), (rn))
#define arm_neon_uminv(p, width, type, rd, rn) arm_neon_xln_opcode ((p), (width), 0b1, (type), 0b11010, (rd), (rn))

// contrary to most other opcodes, the suffix is the type of source
#define arm_neon_saddlv_8b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b00011, (rd), (rn))
#define arm_neon_saddlv_16b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b00011, (rd), (rn))
Expand Down Expand Up @@ -1609,18 +1609,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_uaddlv_8h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b00011, (rd), (rn))
#define arm_neon_uaddlv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b00011, (rd), (rn))

#define arm_neon_umaxv_8b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01010, (rd), (rn))
#define arm_neon_umaxv_16b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01010, (rd), (rn))
#define arm_neon_umaxv_4h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01010, (rd), (rn))
#define arm_neon_umaxv_8h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b01010, (rd), (rn))
#define arm_neon_umaxv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01010, (rd), (rn))

#define arm_neon_uminv_8b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11010, (rd), (rn))
#define arm_neon_uminv_16b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11010, (rd), (rn))
#define arm_neon_uminv_4h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b11010, (rd), (rn))
#define arm_neon_uminv_8h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11010, (rd), (rn))
#define arm_neon_uminv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b11010, (rd), (rn))

#define arm_neon_fmaxnmv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01100, (rd), (rn))
#define arm_neon_fmaxv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01111, (rd), (rn))
#define arm_neon_fminnmv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_1, 0b01100, (rd), (rn))
Expand Down Expand Up @@ -2313,6 +2301,15 @@ arm_encode_arith_imm (int imm, guint32 *shift)
arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \
} while (0)

#define arm_neon_shimm_shl_immh_immb(size, shift) (((shift) + (8 << (size))) & 0b01111111)
#define arm_neon_shimm_shl_opcode(p, q, u, size, opcode, rd, rn, shift) do { \
int32_t ___temp_emit0 = arm_neon_shimm_shl_immh_immb ((size), (shift)); \
arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \
} while (0)

#define arm_neon_sli(p, width, type, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), (width), 0b1, (type), 0b01010, (rd), (rn), (shift))
#define arm_neon_shrn(p, type, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, (type), 0b10000, (rd), (rn), (shift))

#define arm_neon_sshr_8b(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b00000, (rd), (rn), (shift))
#define arm_neon_sshr_16b(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b00000, (rd), (rn), (shift))
#define arm_neon_sshr_4h(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, SIZE_2, 0b00000, (rd), (rn), (shift))
Expand Down Expand Up @@ -2345,12 +2342,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_srsra_4s(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_4, 0b00110, (rd), (rn), (shift))
#define arm_neon_srsra_2d(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_8, 0b00110, (rd), (rn), (shift))

#define arm_neon_shimm_shl_immh_immb(size, shift) (((shift) + (8 << (size))) & 0b01111111)
#define arm_neon_shimm_shl_opcode(p, q, u, size, opcode, rd, rn, shift) do { \
int32_t ___temp_emit0 = arm_neon_shimm_shl_immh_immb ((size), (shift)); \
arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \
} while (0)

#define arm_neon_shl_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_shl_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_shl_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b0, SIZE_2, 0b01010, (rd), (rn), (shift))
Expand Down Expand Up @@ -2454,14 +2445,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_sri_4s(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01000, (rd), (rn), (shift))
#define arm_neon_sri_2d(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b1, SIZE_8, 0b01000, (rd), (rn), (shift))

#define arm_neon_sli_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_8h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_2s(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_4, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_4s(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_2d(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_8, 0b01010, (rd), (rn), (shift))

#define arm_neon_sqshlu_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01100, (rd), (rn), (shift))
#define arm_neon_sqshlu_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01100, (rd), (rn), (shift))
#define arm_neon_sqshlu_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01100, (rd), (rn), (shift))
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/mini/cpu-arm64.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ xcompare: dest:x src1:x src2:x len:4
xcompare_fp: dest:x src1:x src2:x len:4
negate: dest:x src1:x len:4
ones_complement: dest:x src1:x len:4
xextract: dest:i src1:x len:12
xbinop_forceint: dest:x src1:x src2:x len:4
xcast: dest:x src1:x len:4 clob:1

Expand Down
24 changes: 24 additions & 0 deletions src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -3396,6 +3396,27 @@ emit_move_return_value (MonoCompile *cfg, guint8 * code, MonoInst *ins)
return code;
}

static guint8*
emit_xextract (guint8* code, int width, int mode, int dreg, int sreg1)
{
switch (mode) {
case SIMD_EXTR_IS_ANY_SET:
arm_neon_umaxv (code, width, TYPE_I8, FP_TEMP_REG, sreg1);
arm_neon_umov_b (code, dreg, FP_TEMP_REG, 0);
arm_lsrw(code, dreg, dreg, 7); // dreg contains 0xff for TRUE or 0x0 for FALSE, normalize to 0x1/0x0
break;
case SIMD_EXTR_ARE_ALL_SET:
arm_neon_uminv (code, width, TYPE_I8, FP_TEMP_REG, sreg1);
arm_neon_umov_b (code, dreg, FP_TEMP_REG, 0);
arm_lsrw(code, dreg, dreg, 7);
break;
default:
g_assert_not_reached ();
}

return code;
}

/*
* emit_branch_island:
*
Expand Down Expand Up @@ -3822,6 +3843,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
case OP_XZERO:
arm_neon_eor_16b (code, dreg, dreg, dreg);
break;
case OP_XEXTRACT:
code = emit_xextract (code, VREG_FULL, ins->inst_c0, dreg, sreg1);
break;

/* ALU */
case OP_IADD:
Expand Down
5 changes: 5 additions & 0 deletions src/mono/mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -1479,13 +1479,18 @@ MINI_OP(OP_XCOMPARE_SCALAR, "xcompare_scalar", XREG, XREG, XREG)
MINI_OP(OP_XCOMPARE_FP, "xcompare_fp", XREG, XREG, XREG)
MINI_OP(OP_XCOMPARE_FP_SCALAR, "xcompare_fp_scalar", XREG, XREG, XREG)

/* Extract from XREG into IREG.
* inst_c0 - specific instruction, one of SIMD_EXTR_... */
MINI_OP(OP_XEXTRACT, "xextract", IREG, XREG, NONE)

/*
* Generic SIMD operations, the rest of the JIT doesn't care about the exact operation.
*/
MINI_OP(OP_XBINOP, "xbinop", XREG, XREG, XREG)
MINI_OP(OP_XBINOP_FORCEINT, "xbinop_forceint", XREG, XREG, XREG)
MINI_OP(OP_XBINOP_SCALAR, "xbinop_scalar", XREG, XREG, XREG)
MINI_OP(OP_XBINOP_BYSCALAR, "xbinop_byscalar", XREG, XREG, XREG)

/* inst_c0 contains an INTRINS_ enum, inst_c1 might contain additional data */
MINI_OP(OP_XOP, "xop", NONE, NONE, NONE)
MINI_OP(OP_XOP_X_I, "xop_x_i", XREG, IREG, NONE)
Expand Down
5 changes: 5 additions & 0 deletions src/mono/mono/mini/mini.h
Original file line number Diff line number Diff line change
Expand Up @@ -2933,6 +2933,11 @@ enum {
SIMD_PREFETCH_MODE_2,
};

enum {
SIMD_EXTR_IS_ANY_SET,
SIMD_EXTR_ARE_ALL_SET
};

int mini_primitive_type_size (MonoTypeEnum type);
MonoTypeEnum mini_get_simd_type_info (MonoClass *klass, guint32 *nelems);

Expand Down
110 changes: 72 additions & 38 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,11 +509,18 @@ static MonoInst*
emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoInst *arg1, MonoInst *arg2)
{
#ifdef TARGET_ARM64
int size = mono_class_value_size (klass, NULL);
if (size == 16)
if (!COMPILE_LLVM (cfg)) {
MonoTypeEnum elemt = get_underlying_type (m_class_get_this_arg (arg1->klass));
MonoInst* cmp = emit_xcompare (cfg, arg1->klass, elemt, arg1, arg2);
MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1);
ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET;
ret->inst_c1 = mono_class_value_size (klass, NULL);
return ret;
} else if (mono_class_value_size (klass, NULL) == 16) {
return emit_simd_ins (cfg, klass, OP_XEQUAL_ARM64_V128_FAST, arg1->dreg, arg2->dreg);
else
} else {
return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg);
}
#else
MonoInst *ins = emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg);
if (!COMPILE_LLVM (cfg))
Expand Down Expand Up @@ -1201,9 +1208,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
return NULL;
}

if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256"))
return NULL; // TODO: Fix Vector256.WithUpper/WithLower

if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256") || !strcmp (m_class_get_name (cfg->method->klass), "Vector512"))
return NULL;
// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
#ifdef TARGET_ARM64
if (!COMPILE_LLVM (cfg)) {
Expand All @@ -1216,6 +1223,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
case SN_LessThanOrEqual:
case SN_Negate:
case SN_OnesComplement:
case SN_EqualsAny:
case SN_GreaterThanAny:
case SN_GreaterThanOrEqualAny:
case SN_LessThanAny:
case SN_LessThanOrEqualAny:
case SN_EqualsAll:
case SN_GreaterThanAll:
case SN_GreaterThanOrEqualAll:
case SN_LessThanAll:
case SN_LessThanOrEqualAll:
case SN_Subtract:
case SN_BitwiseAnd:
case SN_BitwiseOr:
Expand Down Expand Up @@ -1488,18 +1505,27 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
if (!is_element_type_primitive (fsig->params [0]))
return NULL;
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
switch (id) {
case SN_Equals:
return emit_xcompare (cfg, klass, arg0_type, args [0], args [1]);
case SN_EqualsAll:
return emit_xequal (cfg, arg_class, args [0], args [1]);
case SN_EqualsAny: {
MonoInst *cmp_eq = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]);
MonoInst *zero = emit_xzero (cfg, arg_class);
return emit_not_xequal (cfg, arg_class, cmp_eq, zero);
if (id == SN_Equals)
return emit_xcompare (cfg, klass, arg0_type, args [0], args [1]);

if (COMPILE_LLVM (cfg)) {
switch (id) {
case SN_EqualsAll:
return emit_xequal (cfg, arg_class, args [0], args [1]);
case SN_EqualsAny: {
MonoInst *cmp_eq = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]);
MonoInst *zero = emit_xzero (cfg, arg_class);
return emit_not_xequal (cfg, arg_class, cmp_eq, zero);
}
}
default: g_assert_not_reached ();
} else {
MonoInst* cmp = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]);
MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1);
ret->inst_c0 = (id == SN_EqualsAll) ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET;
ret->inst_c1 = mono_class_value_size (klass, NULL);
return ret;
}
g_assert_not_reached ();
}
case SN_ExtractMostSignificantBits: {
if (!is_element_type_primitive (fsig->params [0]) || type_enum_is_float (arg0_type))
Expand Down Expand Up @@ -1567,34 +1593,40 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
fsig->ret->type == MONO_TYPE_BOOLEAN &&
mono_metadata_type_equal (fsig->params [0], fsig->params [1]));

MonoInst *cmp = emit_xcompare_for_intrinsic (cfg, klass, id, arg0_type, args [0], args [1]);
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);

gboolean is_all = FALSE;
switch (id) {
case SN_GreaterThanAll:
case SN_GreaterThanOrEqualAll:
case SN_LessThanAll:
case SN_LessThanOrEqualAll: {
// for floating point numbers all ones is NaN and so
// they must be treated differently than integer types
if (type_enum_is_float (arg0_type)) {
case SN_LessThanOrEqualAll:
is_all = TRUE;
break;
}

MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
if (COMPILE_LLVM (cfg)) {
MonoInst *cmp = emit_xcompare_for_intrinsic (cfg, klass, id, arg0_type, args [0], args [1]);
if (is_all) {
// for floating point numbers all ones is NaN and so
// they must be treated differently than integer types
if (type_enum_is_float (arg0_type)) {
MonoInst *zero = emit_xzero (cfg, arg_class);
MonoInst *inverted_cmp = emit_xcompare (cfg, klass, arg0_type, cmp, zero);
return emit_xequal (cfg, arg_class, inverted_cmp, zero);
}

MonoInst *ones = emit_xones (cfg, arg_class);
return emit_xequal (cfg, arg_class, cmp, ones);
} else {
MonoInst *zero = emit_xzero (cfg, arg_class);
MonoInst *inverted_cmp = emit_xcompare (cfg, klass, arg0_type, cmp, zero);
return emit_xequal (cfg, arg_class, inverted_cmp, zero);
return emit_not_xequal (cfg, arg_class, cmp, zero);
}

MonoInst *ones = emit_xones (cfg, arg_class);
return emit_xequal (cfg, arg_class, cmp, ones);
}
case SN_GreaterThanAny:
case SN_GreaterThanOrEqualAny:
case SN_LessThanAny:
case SN_LessThanOrEqualAny: {
MonoInst *zero = emit_xzero (cfg, arg_class);
return emit_not_xequal (cfg, arg_class, cmp, zero);
}
default:
g_assert_not_reached ();
} else {
MonoInst* cmp = emit_xcompare_for_intrinsic (cfg, arg_class, id, arg0_type, args [0], args [1]);
MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1);
ret->inst_c0 = is_all ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET;
ret->inst_c1 = mono_class_value_size (klass, NULL);
return ret;
}
}
case SN_Narrow: {
Expand Down Expand Up @@ -1908,6 +1940,8 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
case SN_op_BitwiseAnd:
case SN_op_BitwiseOr:
case SN_op_ExclusiveOr:
case SN_op_Equality:
case SN_op_Inequality:
break;
default:
return NULL;
Expand Down