diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 10d71f7aad23..26d3ba1d7cbb 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -123,9 +123,12 @@ impl ABIMachineSpec for X64ABIMachineSpec { // extension annotations. Additionally, handling extension attributes this way allows clif // functions that use them with the Winch calling convention to interact successfully with // testing infrastructure. + // The results are also not packed if any of the types are `f16`. This is to simplify the + // implementation of `Inst::load`/`Inst::store` (which would otherwise require multiple + // instructions), and doesn't affect Winch itself as Winch doesn't support `f16` at all. let uses_extension = params .iter() - .any(|p| p.extension != ir::ArgumentExtension::None); + .any(|p| p.extension != ir::ArgumentExtension::None || p.value_type == types::F16); for (ix, param) in params.iter().enumerate() { let last_param = ix == params.len() - 1; @@ -169,13 +172,23 @@ impl ABIMachineSpec for X64ABIMachineSpec { // https://godbolt.org/z/PhG3ob if param.value_type.bits() > 64 - && !param.value_type.is_vector() + && !(param.value_type.is_vector() || param.value_type.is_float()) && !flags.enable_llvm_abi_extensions() { panic!( "i128 args/return values not supported unless LLVM ABI extensions are enabled" ); } + // As MSVC doesn't support f16/f128 there is no standard way to pass/return them with + // the Windows ABI. LLVM passes/returns them in XMM registers. + if matches!(param.value_type, types::F16 | types::F128) + && is_fastcall + && !flags.enable_llvm_abi_extensions() + { + panic!( + "f16/f128 args/return values not supported for windows_fastcall unless LLVM ABI extensions are enabled" + ); + } // Windows fastcall dictates that `__m128i` parameters to a function // are passed indirectly as pointers, so handle that as a special @@ -410,12 +423,20 @@ impl ABIMachineSpec for X64ABIMachineSpec { // bits as well -- see `Inst::store()`). let ty = match ty { types::I8 | types::I16 | types::I32 => types::I64, + // Stack slots are always at least 8 bytes, so it's fine to load 4 bytes instead of only + // two. + types::F16 => types::F32, _ => ty, }; Inst::load(ty, mem, into_reg, ExtKind::None) } fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I { + let ty = match ty { + // See `gen_load_stack`. + types::F16 => types::F32, + _ => ty, + }; Inst::store(ty, from_reg, mem) } @@ -502,6 +523,11 @@ impl ABIMachineSpec for X64ABIMachineSpec { } fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I { + let ty = match ty { + // See `gen_load_stack`. + types::F16 => types::F32, + _ => ty, + }; let mem = Amode::imm_reg(offset, base); Inst::store(ty, from_reg, mem) } diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 2cd101be3936..e104548a4b05 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1644,7 +1644,7 @@ (rule (put_in_gpr val) (if-let (value_type ty) val) (if-let (type_register_class (RegisterClass.Xmm)) ty) - (bitcast_xmm_to_gpr ty (xmm_new (put_in_reg val)))) + (bitcast_xmm_to_gpr (ty_bits ty) (xmm_new (put_in_reg val)))) ;; Put a value into a `GprMem`. ;; @@ -2252,8 +2252,10 @@ ;; Performs an xor operation of the two operands specified. (decl x64_xor_vector (Type Xmm XmmMem) Xmm) +(rule 1 (x64_xor_vector $F16 x y) (x64_xorps x y)) (rule 1 (x64_xor_vector $F32 x y) (x64_xorps x y)) (rule 1 (x64_xor_vector $F64 x y) (x64_xorpd x y)) +(rule 1 (x64_xor_vector $F128 x y) (x64_xorps x y)) (rule 1 (x64_xor_vector $F32X4 x y) (x64_xorps x y)) (rule 1 (x64_xor_vector $F64X2 x y) (x64_xorpd x y)) (rule 0 (x64_xor_vector (multi_lane _ _) x y) (x64_pxor x y)) @@ -2304,6 +2306,9 @@ (rule 2 (x64_load $F64 addr _ext_kind) (x64_movsd_load addr)) +(rule 2 (x64_load $F128 addr _ext_kind) + (x64_movdqu_load addr)) + (rule 2 (x64_load $F32X4 addr _ext_kind) (x64_movups_load addr)) @@ -2719,6 +2724,10 @@ (_ Unit (emit (MInst.Imm size simm64 dst)))) dst)) +;; `f16` immediates. +(rule 2 (imm $F16 (u64_nonzero bits)) + (bitcast_gpr_to_xmm 16 (imm $I16 bits))) + ;; `f32` immediates. (rule 2 (imm $F32 (u64_nonzero bits)) (x64_movd_to_xmm (imm $I32 bits))) @@ -2746,6 +2755,9 @@ (rule 0 (imm ty @ (multi_lane _bits _lanes) 0) (xmm_to_reg (xmm_zero ty))) +;; Special case for `f16` zero immediates +(rule 2 (imm ty @ $F16 (u64_zero)) (xmm_zero ty)) + ;; Special case for `f32` zero immediates (rule 2 (imm ty @ $F32 (u64_zero)) (xmm_zero ty)) @@ -5022,18 +5034,30 @@ ;;;; Casting ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl bitcast_xmm_to_gpr (Type Xmm) Gpr) -(rule (bitcast_xmm_to_gpr $F32 src) +(decl bitcast_xmm_to_gpr (u8 Xmm) Gpr) +(rule (bitcast_xmm_to_gpr 16 src) + (x64_pextrw src 0)) +(rule (bitcast_xmm_to_gpr 32 src) (x64_movd_to_gpr src)) -(rule (bitcast_xmm_to_gpr $F64 src) +(rule (bitcast_xmm_to_gpr 64 src) (x64_movq_to_gpr src)) -(decl bitcast_gpr_to_xmm (Type Gpr) Xmm) -(rule (bitcast_gpr_to_xmm $I32 src) +(decl bitcast_xmm_to_gprs (Xmm) ValueRegs) +(rule (bitcast_xmm_to_gprs src) + (value_regs (x64_movq_to_gpr src) (x64_movq_to_gpr (x64_pshufd src 0b11101110)))) + +(decl bitcast_gpr_to_xmm (u8 Gpr) Xmm) +(rule (bitcast_gpr_to_xmm 16 src) + (x64_pinsrw (xmm_uninit_value) src 0)) +(rule (bitcast_gpr_to_xmm 32 src) (x64_movd_to_xmm src)) -(rule (bitcast_gpr_to_xmm $I64 src) +(rule (bitcast_gpr_to_xmm 64 src) (x64_movq_to_xmm src)) +(decl bitcast_gprs_to_xmm (ValueRegs) Xmm) +(rule (bitcast_gprs_to_xmm src) + (x64_punpcklqdq (x64_movq_to_xmm (value_regs_get_gpr src 0)) (x64_movq_to_xmm (value_regs_get_gpr src 1)))) + ;;;; Stack Addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl stack_addr_impl (StackSlot Offset32) Gpr) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index dfc8e4fe7190..4015617093ad 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1428,10 +1428,11 @@ pub(crate) fn emit( let op = match *ty { types::F64 => SseOpcode::Movsd, types::F32 => SseOpcode::Movsd, + types::F16 => SseOpcode::Movsd, types::F32X4 => SseOpcode::Movaps, types::F64X2 => SseOpcode::Movapd, ty => { - debug_assert!(ty.is_vector() && ty.bytes() == 16); + debug_assert!((ty.is_float() || ty.is_vector()) && ty.bytes() == 16); SseOpcode::Movdqa } }; diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index a8f55c483552..7ce254c50ff2 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -630,11 +630,12 @@ impl Inst { } RegClass::Float => { let opcode = match ty { + types::F16 => panic!("loading a f16 requires multiple instructions"), types::F32 => SseOpcode::Movss, types::F64 => SseOpcode::Movsd, types::F32X4 => SseOpcode::Movups, types::F64X2 => SseOpcode::Movupd, - _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu, + _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => SseOpcode::Movdqu, _ => unimplemented!("unable to load type: {}", ty), }; Inst::xmm_unary_rm_r(opcode, RegMem::mem(from_addr), to_reg) @@ -650,11 +651,12 @@ impl Inst { RegClass::Int => Inst::mov_r_m(OperandSize::from_ty(ty), from_reg, to_addr), RegClass::Float => { let opcode = match ty { + types::F16 => panic!("storing a f16 requires multiple instructions"), types::F32 => SseOpcode::Movss, types::F64 => SseOpcode::Movsd, types::F32X4 => SseOpcode::Movups, types::F64X2 => SseOpcode::Movupd, - _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu, + _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => SseOpcode::Movdqu, _ => unimplemented!("unable to store type: {}", ty), }; Inst::xmm_mov_r_m(opcode, from_reg, to_addr) @@ -1621,6 +1623,7 @@ impl PrettyPrint for Inst { let suffix = match *ty { types::F64 => "sd", types::F32 => "ss", + types::F16 => "ss", types::F32X4 => "aps", types::F64X2 => "apd", _ => "dqa", @@ -2605,9 +2608,9 @@ impl MachInst for Inst { // those, which may write more lanes that we need, but are specified to have // zero-latency. let opcode = match ty { - types::F32 | types::F64 | types::F32X4 => SseOpcode::Movaps, + types::F16 | types::F32 | types::F64 | types::F32X4 => SseOpcode::Movaps, types::F64X2 => SseOpcode::Movapd, - _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqa, + _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => SseOpcode::Movdqa, _ => unimplemented!("unable to move type: {}", ty), }; Inst::xmm_unary_rm_r(opcode, RegMem::reg(src_reg), dst_reg) @@ -2628,8 +2631,10 @@ impl MachInst for Inst { types::I64 => Ok((&[RegClass::Int], &[types::I64])), types::R32 => panic!("32-bit reftype pointer should never be seen on x86-64"), types::R64 => Ok((&[RegClass::Int], &[types::R64])), + types::F16 => Ok((&[RegClass::Float], &[types::F16])), types::F32 => Ok((&[RegClass::Float], &[types::F32])), types::F64 => Ok((&[RegClass::Float], &[types::F64])), + types::F128 => Ok((&[RegClass::Float], &[types::F128])), types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])), _ if ty.is_vector() => { assert!(ty.bits() <= 128); diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index b814f9d23ef1..7410a9dd7b41 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -22,6 +22,11 @@ (value_regs (imm $I64 x) (imm $I64 0))) +;;;; Rules for `f16const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f16const (u16_from_ieee16 x))) + (imm $F16 x)) + ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (f32const (u32_from_ieee32 x))) @@ -32,6 +37,14 @@ (rule (lower (f64const (u64_from_ieee64 x))) (imm $F64 x)) +;;;; Rules for `f128const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (f128const const)) + ;; TODO use Inst::gen_constant() instead. + (x64_xmm_load_const $F128 (const_to_vconst const))) + +(rule 1 (lower (f128const (u128_from_constant 0))) + (xmm_zero $F128)) + ;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (null))) @@ -1585,7 +1598,7 @@ lane1 (u8_from_uimm8 1))) (if-let $true (use_sse41)) - (x64_pinsrq (bitcast_gpr_to_xmm $I64 lane0) lane1 1)) + (x64_pinsrq (bitcast_gpr_to_xmm 64 lane0) lane1 1)) (rule 1 (lower (insertlane vec @ (value_type $F32X4) (sinkable_load val) (u8_from_uimm8 idx))) (if-let $true (use_sse41)) @@ -2926,10 +2939,14 @@ ;; For `$F32` and `$F64` this is important--we only want to load 32 or 64 bits. ;; But for the 128-bit types, this is not strictly necessary for performance but ;; might help with clarity during disassembly. +(rule (lower (has_type $F16 (load flags address offset))) + (x64_pinsrw (xmm_uninit_value) (to_amode flags address offset) 0)) (rule (lower (has_type $F32 (load flags address offset))) (x64_movss_load (to_amode flags address offset))) (rule (lower (has_type $F64 (load flags address offset))) (x64_movsd_load (to_amode flags address offset))) +(rule (lower (has_type $F128 (load flags address offset))) + (x64_movdqu_load (to_amode flags address offset))) (rule (lower (has_type $F32X4 (load flags address offset))) (x64_movups_load (to_amode flags address offset))) (rule (lower (has_type $F64X2 (load flags address offset))) @@ -3007,6 +3024,22 @@ (side_effect (x64_movimm_m ty (to_amode flags address offset) imm))) +;; F16 stores of values in XMM registers. +(rule 0 (lower (store flags + value @ (value_type $F16) + address + offset)) + (side_effect + (x64_movrm $I16 (to_amode flags address offset) (bitcast_xmm_to_gpr 16 value)))) + +(rule 1 (lower (store flags + value @ (value_type $F16) + address + offset)) + (if-let $true (use_sse41)) + (side_effect + (x64_pextrw_store (to_amode flags address offset) value 0))) + ;; F32 stores of values in XMM registers. (rule 1 (lower (store flags value @ (value_type $F32) @@ -3023,6 +3056,14 @@ (side_effect (x64_movsd_store (to_amode flags address offset) value))) +;; F128 stores of values in XMM registers. +(rule 1 (lower (store flags + value @ (value_type $F128) + address + offset)) + (side_effect + (x64_movdqu_store (to_amode flags address offset) value))) + ;; Stores of F32X4 vectors. (rule 1 (lower (store flags value @ (value_type $F32X4) @@ -4013,17 +4054,17 @@ ;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type $I32 (bitcast _ src @ (value_type $F32)))) - (bitcast_xmm_to_gpr $F32 src)) +(rule -3 (lower (has_type (is_gpr_type (fits_in_64 ty)) (bitcast _ src @ (value_type (is_xmm_type _))))) + (bitcast_xmm_to_gpr (ty_bits ty) src)) -(rule (lower (has_type $F32 (bitcast _ src @ (value_type $I32)))) - (bitcast_gpr_to_xmm $I32 src)) +(rule -2 (lower (has_type (is_xmm_type (fits_in_64 ty)) (bitcast _ src @ (value_type (is_gpr_type _))))) + (bitcast_gpr_to_xmm (ty_bits ty) src)) -(rule (lower (has_type $I64 (bitcast _ src @ (value_type $F64)))) - (bitcast_xmm_to_gpr $F64 src)) +(rule -1 (lower (has_type $I128 (bitcast _ src @ (value_type (is_xmm_type _))))) + (bitcast_xmm_to_gprs src)) -(rule (lower (has_type $F64 (bitcast _ src @ (value_type $I64)))) - (bitcast_gpr_to_xmm $I64 src)) +(rule 0 (lower (has_type (is_xmm_type _) (bitcast _ src @ (value_type $I128)))) + (bitcast_gprs_to_xmm src)) ;; Bitcast between types residing in GPR registers is a no-op. (rule 1 (lower (has_type (is_gpr_type _) @@ -4554,7 +4595,7 @@ ;; Case 2: when moving a scalar value of any other type, use MOVD to zero ;; the upper lanes. (rule (lower (scalar_to_vector src @ (value_type ty))) - (bitcast_gpr_to_xmm ty src)) + (bitcast_gpr_to_xmm (ty_bits ty) src)) ;; Case 3: when presented with `load + scalar_to_vector`, coalesce into a single ;; MOVSS/MOVSD instruction. @@ -4581,10 +4622,10 @@ (x64_pshufd (x64_pshuflw (x64_punpcklbw src src) 0) 0))) (rule 1 (lower (has_type $I8X16 (splat src))) (if-let $true (use_ssse3)) - (x64_pshufb (bitcast_gpr_to_xmm $I32 src) (xmm_zero $I8X16))) + (x64_pshufb (bitcast_gpr_to_xmm 32 src) (xmm_zero $I8X16))) (rule 2 (lower (has_type $I8X16 (splat src))) (if-let $true (use_avx2)) - (x64_vpbroadcastb (bitcast_gpr_to_xmm $I32 src))) + (x64_vpbroadcastb (bitcast_gpr_to_xmm 32 src))) (rule 3 (lower (has_type $I8X16 (splat (sinkable_load_exact addr)))) (if-let $true (use_sse41)) (if-let $true (use_ssse3)) @@ -4599,10 +4640,10 @@ ;; at that point is two of the 16-bit values we want to broadcast) to all the ;; lanes. (rule 0 (lower (has_type $I16X8 (splat src))) - (x64_pshufd (x64_pshuflw (bitcast_gpr_to_xmm $I32 src) 0) 0)) + (x64_pshufd (x64_pshuflw (bitcast_gpr_to_xmm 32 src) 0) 0)) (rule 1 (lower (has_type $I16X8 (splat src))) (if-let $true (use_avx2)) - (x64_vpbroadcastw (bitcast_gpr_to_xmm $I32 src))) + (x64_vpbroadcastw (bitcast_gpr_to_xmm 32 src))) (rule 2 (lower (has_type $I16X8 (splat (sinkable_load_exact addr)))) (x64_pshufd (x64_pshuflw (x64_pinsrw (xmm_uninit_value) addr 0) 0) 0)) (rule 3 (lower (has_type $I16X8 (splat (sinkable_load_exact addr)))) @@ -4614,10 +4655,10 @@ ;; ;; Note that sinkable-load cases come later (rule 0 (lower (has_type $I32X4 (splat src))) - (x64_pshufd (bitcast_gpr_to_xmm $I32 src) 0)) + (x64_pshufd (bitcast_gpr_to_xmm 32 src) 0)) (rule 1 (lower (has_type $I32X4 (splat src))) (if-let $true (use_avx2)) - (x64_vpbroadcastd (bitcast_gpr_to_xmm $I32 src))) + (x64_vpbroadcastd (bitcast_gpr_to_xmm 32 src))) ;; f32x4.splat - the source is already in an xmm register so `shufps` is all ;; that's necessary to complete the splat. This is specialized to `vbroadcastss` @@ -4649,7 +4690,7 @@ ;; lane. A minor specialization for sinkable loads to avoid going through a gpr ;; for i64 splats is used as well when `movddup` is available. (rule 0 (lower (has_type $I64X2 (splat src))) - (x64_pshufd (bitcast_gpr_to_xmm $I64 src) 0b01_00_01_00)) + (x64_pshufd (bitcast_gpr_to_xmm 64 src) 0b01_00_01_00)) (rule 0 (lower (has_type $F64X2 (splat src))) (x64_pshufd src 0b01_00_01_00)) (rule 6 (lower (has_type (multi_lane 64 2) (splat (sinkable_load addr)))) diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index c79e2ae5152f..a79016375a22 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -599,7 +599,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { Some(RegisterClass::Gpr { single_register: ty != I128, }) - } else if ty == F32 || ty == F64 || (ty.is_vector() && ty.bits() == 128) { + } else if ty.is_float() || (ty.is_vector() && ty.bits() == 128) { Some(RegisterClass::Xmm) } else { None diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 64c343ea61bc..8798e05c175d 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -436,18 +436,19 @@ macro_rules! isle_common_prelude_methods { #[inline] fn ty_scalar_float(&mut self, ty: Type) -> Option { - match ty { - F32 | F64 => Some(ty), - _ => None, + if ty.is_float() { + Some(ty) + } else { + None } } #[inline] fn ty_float_or_vec(&mut self, ty: Type) -> Option { - match ty { - F32 | F64 => Some(ty), - ty if ty.is_vector() => Some(ty), - _ => None, + if ty.is_float() || ty.is_vector() { + Some(ty) + } else { + None } } @@ -600,6 +601,10 @@ macro_rules! isle_common_prelude_methods { } } + fn u16_from_ieee16(&mut self, val: Ieee16) -> u16 { + val.bits() + } + fn u32_from_ieee32(&mut self, val: Ieee32) -> u32 { val.bits() } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 2e7b3ee8775c..54d8428230c6 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -630,7 +630,11 @@ (decl pure imm64_masked (Type u64) Imm64) (extern constructor imm64_masked imm64_masked) -;; Extract a `u64` from an `Ieee32`. +;; Extract a `u16` from an `Ieee16`. +(decl u16_from_ieee16 (u16) Ieee16) +(extern extractor infallible u16_from_ieee16 u16_from_ieee16) + +;; Extract a `u32` from an `Ieee32`. (decl u32_from_ieee32 (u32) Ieee32) (extern extractor infallible u32_from_ieee32 u32_from_ieee32) diff --git a/cranelift/filetests/filetests/isa/x64/bitcast.clif b/cranelift/filetests/filetests/isa/x64/bitcast.clif index aee59d0171b4..ef1a2f3fca21 100644 --- a/cranelift/filetests/filetests/isa/x64/bitcast.clif +++ b/cranelift/filetests/filetests/isa/x64/bitcast.clif @@ -1,7 +1,59 @@ test compile precise-output +set enable_llvm_abi_extensions target x86_64 -function %f1(f32) -> i32 { +function %bitcast_f16_to_i16(f16) -> i16 { +block0(v0: f16): + v1 = bitcast.i16 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; pextrw $0, %xmm0, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; pextrw $0, %xmm0, %eax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bitcast_i16_to_f16(i16) -> f16 { +block0(v0: i16): + v1 = bitcast.f16 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; uninit %xmm0 +; pinsrw $0, %xmm0, %rdi, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; pinsrw $0, %edi, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bitcast_f32_to_i32(f32) -> i32 { block0(v0: f32): v1 = bitcast.i32 v0 return v1 @@ -26,7 +78,7 @@ block0(v0: f32): ; popq %rbp ; retq -function %f2(i32) -> f32 { +function %bitcast_i32_to_f32(i32) -> f32 { block0(v0: i32): v1 = bitcast.f32 v0 return v1 @@ -51,7 +103,7 @@ block0(v0: i32): ; popq %rbp ; retq -function %f3(f64) -> i64 { +function %bitcast_f64_to_i64(f64) -> i64 { block0(v0: f64): v1 = bitcast.i64 v0 return v1 @@ -76,7 +128,7 @@ block0(v0: f64): ; popq %rbp ; retq -function %f4(i64) -> f64 { +function %bitcast_i64_to_f64(i64) -> f64 { block0(v0: i64): v1 = bitcast.f64 v0 return v1 @@ -101,3 +153,119 @@ block0(v0: i64): ; popq %rbp ; retq +function %bitcast_f128_to_i128(f128) -> i128 { +block0(v0: f128): + v1 = bitcast.i128 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %xmm0, %rax +; pshufd $238, %xmm0, %xmm4 +; movq %xmm4, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %xmm0, %rax +; pshufd $0xee, %xmm0, %xmm4 +; movq %xmm4, %rdx +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bitcast_i128_to_f128(i128) -> f128 { +block0(v0: i128): + v1 = bitcast.f128 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %xmm0 +; movq %rsi, %xmm5 +; punpcklqdq %xmm0, %xmm5, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %xmm0 +; movq %rsi, %xmm5 +; punpcklqdq %xmm5, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bitcast_i64x2_to_i128(i64x2) -> i128 { +block0(v0: i64x2): + v1 = bitcast.i128 little v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %xmm0, %rax +; pshufd $238, %xmm0, %xmm4 +; movq %xmm4, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %xmm0, %rax +; pshufd $0xee, %xmm0, %xmm4 +; movq %xmm4, %rdx +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bitcast_i128_to_i64x2(i128) -> i64x2 { +block0(v0: i128): + v1 = bitcast.i64x2 little v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %xmm0 +; movq %rsi, %xmm5 +; punpcklqdq %xmm0, %xmm5, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %xmm0 +; movq %rsi, %xmm5 +; punpcklqdq %xmm5, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif index be52290ec41f..a3b06408892d 100644 --- a/cranelift/filetests/filetests/isa/x64/call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif @@ -1,4 +1,5 @@ test compile precise-output +set enable_llvm_abi_extensions target x86_64 function %one_arg(i32) system_v { @@ -594,3 +595,98 @@ block0(v0: i32, v1: i8x16): ; popq %rbp ; retq +function %second_f16(f16, f16) -> f16 system_v { +block0(v0: f16, v1: f16): + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movdqa %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %second_f128(f128, f128) -> f128 system_v { +block0(v0: f128, v1: f128): + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movdqa %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %second_f16_fastcall(f16, f16) -> f16 windows_fastcall { +block0(v0: f16, v1: f16): + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movdqa %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %second_f128(f128, f128) -> f128 windows_fastcall { +block0(v0: f128, v1: f128): + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movdqa %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq diff --git a/cranelift/filetests/filetests/isa/x64/f128const.clif b/cranelift/filetests/filetests/isa/x64/f128const.clif new file mode 100644 index 000000000000..22ee804d6d6f --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/f128const.clif @@ -0,0 +1,69 @@ +test compile precise-output +target x86_64 + +function %ret_0() -> f128 { +block0(): + v0 = f128const 0.0 + return v0 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; uninit %xmm0 +; xorps %xmm0, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; xorps %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %ret_1() -> f128 { +block0(): + v0 = f128const 0x1.0 + return v0 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqu const(0), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movdqu 0x14(%rip), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %bh, %bh + diff --git a/cranelift/filetests/filetests/isa/x64/f16const.clif b/cranelift/filetests/filetests/isa/x64/f16const.clif new file mode 100644 index 000000000000..edcaf2ac29fd --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/f16const.clif @@ -0,0 +1,57 @@ +test compile precise-output +target x86_64 + +function %ret_0() -> f16 { +block0(): + v0 = f16const 0.0 + return v0 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; uninit %xmm0 +; xorps %xmm0, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; xorps %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %ret_1() -> f16 { +block0(): + v0 = f16const 0x1.0 + return v0 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl $15360, %esi +; uninit %xmm0 +; pinsrw $0, %xmm0, %rsi, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl $0x3c00, %esi +; pinsrw $0, %esi, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/load-f16-f128.clif b/cranelift/filetests/filetests/isa/x64/load-f16-f128.clif new file mode 100644 index 000000000000..8bd94ae70709 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/load-f16-f128.clif @@ -0,0 +1,54 @@ +test compile precise-output +target x86_64 + +function %load_f16(i64) -> f16 { +block0(v0: i64): + v1 = load.f16 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; uninit %xmm0 +; pinsrw $0, %xmm0, 0(%rdi), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; pinsrw $0, (%rdi), %xmm0 ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %load_f128(i64) -> f128 { +block0(v0: i64): + v1 = load.f128 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqu 0(%rdi), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movdqu (%rdi), %xmm0 ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/select.clif b/cranelift/filetests/filetests/isa/x64/select.clif index 4951bce58d10..d25799b74138 100644 --- a/cranelift/filetests/filetests/isa/x64/select.clif +++ b/cranelift/filetests/filetests/isa/x64/select.clif @@ -65,3 +65,131 @@ block0(v0: f32, v1: f32, v2: i64, v3: i64): ; popq %rbp ; retq +function %select_f16(i8, f16, f16) -> f16 { +block0(v0: i8, v1: f16, v2: f16): + v3 = select.f16 v0, v1, v2 + return v3 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testb %dil, %dil +; movdqa %xmm0, %xmm6 +; movdqa %xmm1, %xmm0 +; movss %xmm0, %xmm0; jz $next; movss %xmm6, %xmm0; $next: +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; testb %dil, %dil +; movdqa %xmm0, %xmm6 +; movdqa %xmm1, %xmm0 +; je 0x19 +; movsd %xmm6, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %select_f32(i8, f32, f32) -> f32 { +block0(v0: i8, v1: f32, v2: f32): + v3 = select.f32 v0, v1, v2 + return v3 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testb %dil, %dil +; movdqa %xmm0, %xmm6 +; movdqa %xmm1, %xmm0 +; movss %xmm0, %xmm0; jz $next; movss %xmm6, %xmm0; $next: +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; testb %dil, %dil +; movdqa %xmm0, %xmm6 +; movdqa %xmm1, %xmm0 +; je 0x19 +; movsd %xmm6, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %select_f64(i8, f64, f64) -> f64 { +block0(v0: i8, v1: f64, v2: f64): + v3 = select.f64 v0, v1, v2 + return v3 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testb %dil, %dil +; movdqa %xmm0, %xmm6 +; movdqa %xmm1, %xmm0 +; movsd %xmm0, %xmm0; jz $next; movsd %xmm6, %xmm0; $next: +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; testb %dil, %dil +; movdqa %xmm0, %xmm6 +; movdqa %xmm1, %xmm0 +; je 0x19 +; movsd %xmm6, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %select_f128(i8, f128, f128) -> f128 { +block0(v0: i8, v1: f128, v2: f128): + v3 = select.f128 v0, v1, v2 + return v3 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testb %dil, %dil +; movdqa %xmm0, %xmm6 +; movdqa %xmm1, %xmm0 +; movdqa %xmm0, %xmm0; jz $next; movdqa %xmm6, %xmm0; $next: +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; testb %dil, %dil +; movdqa %xmm0, %xmm6 +; movdqa %xmm1, %xmm0 +; je 0x19 +; movdqa %xmm6, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/store-f16-f128.clif b/cranelift/filetests/filetests/isa/x64/store-f16-f128.clif new file mode 100644 index 000000000000..729d271c7913 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/store-f16-f128.clif @@ -0,0 +1,55 @@ +test compile precise-output +target x86_64 + +function %store_f16(f16, i64) { +block0(v0: f16, v1: i64): + store.f16 v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; pextrw $0, %xmm0, %rcx +; movw %cx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; pextrw $0, %xmm0, %ecx +; movw %cx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %store_f128(f128, i64) { +block0(v0: f128, v1: i64): + store.f128 v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqu %xmm0, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movdqu %xmm0, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/store-f16-sse41.clif b/cranelift/filetests/filetests/isa/x64/store-f16-sse41.clif new file mode 100644 index 000000000000..bb89bf07354a --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/store-f16-sse41.clif @@ -0,0 +1,28 @@ +test compile precise-output +target x86_64 sse41 + +function %store_f16(f16, i64) { +block0(v0: f16, v1: i64): + store.f16 v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; pextrw $0, %xmm0, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; pextrw $0, %xmm0, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/runtests/bitcast-f16-f128.clif b/cranelift/filetests/filetests/runtests/bitcast-f16-f128.clif index 5448ae48134f..d130c3e75e0d 100644 --- a/cranelift/filetests/filetests/runtests/bitcast-f16-f128.clif +++ b/cranelift/filetests/filetests/runtests/bitcast-f16-f128.clif @@ -1,4 +1,7 @@ test interpret +test run +set enable_llvm_abi_extensions +target x86_64 function %bitcast_i16_f16(i16) -> f16 fast { block0(v0: i16): diff --git a/cranelift/filetests/filetests/runtests/f128const.clif b/cranelift/filetests/filetests/runtests/f128const.clif index d670a2a67871..eb77aa245488 100644 --- a/cranelift/filetests/filetests/runtests/f128const.clif +++ b/cranelift/filetests/filetests/runtests/f128const.clif @@ -1,4 +1,7 @@ test interpret +test run +set enable_llvm_abi_extensions +target x86_64 ;; These values are special for RISC-V since it has a dedicated @@ -51,3 +54,11 @@ block0: } ; run: %f128const_neg_nan() == -NaN + +function %f128const_zero() -> f128 { +block0: + v0 = f128const 0.0 + return v0 +} + +; run: %f128const_zero() == 0.0 diff --git a/cranelift/filetests/filetests/runtests/f16const.clif b/cranelift/filetests/filetests/runtests/f16const.clif index 99507b888548..16797f2fa892 100644 --- a/cranelift/filetests/filetests/runtests/f16const.clif +++ b/cranelift/filetests/filetests/runtests/f16const.clif @@ -1,4 +1,7 @@ test interpret +test run +set enable_llvm_abi_extensions +target x86_64 ;; These values are special for RISC-V since it has a dedicated @@ -51,3 +54,11 @@ block0: } ; run: %f16const_neg_nan() == -NaN + +function %f16const_zero() -> f16 { +block0: + v0 = f16const 0.0 + return v0 +} + +; run: %f16const_zero() == 0.0 diff --git a/cranelift/filetests/filetests/runtests/select-f16-f128.clif b/cranelift/filetests/filetests/runtests/select-f16-f128.clif new file mode 100644 index 000000000000..518d6efa37cf --- /dev/null +++ b/cranelift/filetests/filetests/runtests/select-f16-f128.clif @@ -0,0 +1,139 @@ +test interpret +test run +set enable_llvm_abi_extensions +target x86_64 + +function %select_icmp_i8_f16(i8, f16, f16) -> f16 { +block0(v0: i8, v1: f16, v2: f16): + v3 = iconst.i8 42 + v4 = icmp eq v0, v3 + v5 = select.f16 v4, v1, v2 + return v5 +} +; run: %select_icmp_i8_f16(42, 0x0.0, 0x1.0) == 0x0.0 +; run: %select_icmp_i8_f16(10, 0x0.0, 0x1.0) == 0x1.0 +; run: %select_icmp_i8_f16(42, +Inf, -Inf) == +Inf +; run: %select_icmp_i8_f16(10, +Inf, -Inf) == -Inf +; run: %select_icmp_i8_f16(42, +NaN, -NaN) == +NaN +; run: %select_icmp_i8_f16(10, +NaN, -NaN) == -NaN +; run: %select_icmp_i8_f16(42, 0x0.800p-14, -0x0.800p-14) == 0x0.800p-14 +; run: %select_icmp_i8_f16(10, 0x0.800p-14, -0x0.800p-14) == -0x0.800p-14 + + +function %select_icmp_i8_f128(i8, f128, f128) -> f128 { +block0(v0: i8, v1: f128, v2: f128): + v3 = iconst.i8 42 + v4 = icmp eq v0, v3 + v5 = select.f128 v4, v1, v2 + return v5 +} +; run: %select_icmp_i8_f128(42, 0x0.0, 0x1.0) == 0x0.0 +; run: %select_icmp_i8_f128(10, 0x0.0, 0x1.0) == 0x1.0 +; run: %select_icmp_i8_f128(42, +Inf, -Inf) == +Inf +; run: %select_icmp_i8_f128(10, +Inf, -Inf) == -Inf +; run: %select_icmp_i8_f128(42, +NaN, -NaN) == +NaN +; run: %select_icmp_i8_f128(10, +NaN, -NaN) == -NaN +; run: %select_icmp_i8_f128(42, 0x0.8000000000000000000000000000p-16382, -0x0.8000000000000000000000000000p-16382) == 0x0.8000000000000000000000000000p-16382 +; run: %select_icmp_i8_f128(10, 0x0.8000000000000000000000000000p-16382, -0x0.8000000000000000000000000000p-16382) == -0x0.8000000000000000000000000000p-16382 + + +function %select_icmp_i16_f16(i16, f16, f16) -> f16 { +block0(v0: i16, v1: f16, v2: f16): + v3 = iconst.i16 42 + v4 = icmp eq v0, v3 + v5 = select.f16 v4, v1, v2 + return v5 +} +; run: %select_icmp_i16_f16(42, 0x0.0, 0x1.0) == 0x0.0 +; run: %select_icmp_i16_f16(10, 0x0.0, 0x1.0) == 0x1.0 +; run: %select_icmp_i16_f16(42, +Inf, -Inf) == +Inf +; run: %select_icmp_i16_f16(10, +Inf, -Inf) == -Inf +; run: %select_icmp_i16_f16(42, +NaN, -NaN) == +NaN +; run: %select_icmp_i16_f16(10, +NaN, -NaN) == -NaN +; run: %select_icmp_i16_f16(42, 0x0.800p-14, -0x0.800p-14) == 0x0.800p-14 +; run: %select_icmp_i16_f16(10, 0x0.800p-14, -0x0.800p-14) == -0x0.800p-14 + + +function %select_icmp_i16_f128(i16, f128, f128) -> f128 { +block0(v0: i16, v1: f128, v2: f128): + v3 = iconst.i16 42 + v4 = icmp eq v0, v3 + v5 = select.f128 v4, v1, v2 + return v5 +} +; run: %select_icmp_i16_f128(42, 0x0.0, 0x1.0) == 0x0.0 +; run: %select_icmp_i16_f128(10, 0x0.0, 0x1.0) == 0x1.0 +; run: %select_icmp_i16_f128(42, +Inf, -Inf) == +Inf +; run: %select_icmp_i16_f128(10, +Inf, -Inf) == -Inf +; run: %select_icmp_i16_f128(42, +NaN, -NaN) == +NaN +; run: %select_icmp_i16_f128(10, +NaN, -NaN) == -NaN +; run: %select_icmp_i16_f128(42, 0x0.8000000000000000000000000000p-16382, -0x0.8000000000000000000000000000p-16382) == 0x0.8000000000000000000000000000p-16382 +; run: %select_icmp_i16_f128(10, 0x0.8000000000000000000000000000p-16382, -0x0.8000000000000000000000000000p-16382) == -0x0.8000000000000000000000000000p-16382 + + +function %select_icmp_i32_f16(i32, f16, f16) -> f16 { +block0(v0: i32, v1: f16, v2: f16): + v3 = iconst.i32 42 + v4 = icmp eq v0, v3 + v5 = select.f16 v4, v1, v2 + return v5 +} +; run: %select_icmp_i32_f16(42, 0x0.0, 0x1.0) == 0x0.0 +; run: %select_icmp_i32_f16(10, 0x0.0, 0x1.0) == 0x1.0 +; run: %select_icmp_i32_f16(42, +Inf, -Inf) == +Inf +; run: %select_icmp_i32_f16(10, +Inf, -Inf) == -Inf +; run: %select_icmp_i32_f16(42, +NaN, -NaN) == +NaN +; run: %select_icmp_i32_f16(10, +NaN, -NaN) == -NaN +; run: %select_icmp_i32_f16(42, 0x0.800p-14, -0x0.800p-14) == 0x0.800p-14 +; run: %select_icmp_i32_f16(10, 0x0.800p-14, -0x0.800p-14) == -0x0.800p-14 + + +function %select_icmp_i32_f128(i32, f128, f128) -> f128 { +block0(v0: i32, v1: f128, v2: f128): + v3 = iconst.i32 42 + v4 = icmp eq v0, v3 + v5 = select.f128 v4, v1, v2 + return v5 +} +; run: %select_icmp_i32_f128(42, 0x0.0, 0x1.0) == 0x0.0 +; run: %select_icmp_i32_f128(10, 0x0.0, 0x1.0) == 0x1.0 +; run: %select_icmp_i32_f128(42, +Inf, -Inf) == +Inf +; run: %select_icmp_i32_f128(10, +Inf, -Inf) == -Inf +; run: %select_icmp_i32_f128(42, +NaN, -NaN) == +NaN +; run: %select_icmp_i32_f128(10, +NaN, -NaN) == -NaN +; run: %select_icmp_i32_f128(42, 0x0.8000000000000000000000000000p-16382, -0x0.8000000000000000000000000000p-16382) == 0x0.8000000000000000000000000000p-16382 +; run: %select_icmp_i32_f128(10, 0x0.8000000000000000000000000000p-16382, -0x0.8000000000000000000000000000p-16382) == -0x0.8000000000000000000000000000p-16382 + + +function %select_icmp_i64_f16(i64, f16, f16) -> f16 { +block0(v0: i64, v1: f16, v2: f16): + v3 = iconst.i64 42 + v4 = icmp eq v0, v3 + v5 = select.f16 v4, v1, v2 + return v5 +} +; run: %select_icmp_i64_f16(42, 0x0.0, 0x1.0) == 0x0.0 +; run: %select_icmp_i64_f16(10, 0x0.0, 0x1.0) == 0x1.0 +; run: %select_icmp_i64_f16(42, +Inf, -Inf) == +Inf +; run: %select_icmp_i64_f16(10, +Inf, -Inf) == -Inf +; run: %select_icmp_i64_f16(42, +NaN, -NaN) == +NaN +; run: %select_icmp_i64_f16(10, +NaN, -NaN) == -NaN +; run: %select_icmp_i64_f16(42, 0x0.800p-14, -0x0.800p-14) == 0x0.800p-14 +; run: %select_icmp_i64_f16(10, 0x0.800p-14, -0x0.800p-14) == -0x0.800p-14 + + +function %select_icmp_i64_f128(i64, f128, f128) -> f128 { +block0(v0: i64, v1: f128, v2: f128): + v3 = iconst.i64 42 + v4 = icmp eq v0, v3 + v5 = select.f128 v4, v1, v2 + return v5 +} +; run: %select_icmp_i64_f128(42, 0x0.0, 0x1.0) == 0x0.0 +; run: %select_icmp_i64_f128(10, 0x0.0, 0x1.0) == 0x1.0 +; run: %select_icmp_i64_f128(42, +Inf, -Inf) == +Inf +; run: %select_icmp_i64_f128(10, +Inf, -Inf) == -Inf +; run: %select_icmp_i64_f128(42, +NaN, -NaN) == +NaN +; run: %select_icmp_i64_f128(10, +NaN, -NaN) == -NaN +; run: %select_icmp_i64_f128(42, 0x0.8000000000000000000000000000p-16382, -0x0.8000000000000000000000000000p-16382) == 0x0.8000000000000000000000000000p-16382 +; run: %select_icmp_i64_f128(10, 0x0.8000000000000000000000000000p-16382, -0x0.8000000000000000000000000000p-16382) == -0x0.8000000000000000000000000000p-16382