Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

riscv64: Support scalar-vector bitcasts #8692

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2815,6 +2815,10 @@
;; Generates a bitcast instruction.
;; Args are: src, src_ty, dst_ty
(decl gen_bitcast (Reg Type Type) Reg)
(rule 5 (gen_bitcast r (ty_scalar_float src_ty) (ty_vec_fits_in_register _)) (rv_vfmv_sf r src_ty))
(rule 4 (gen_bitcast r (ty_int_ref_scalar_64 src_ty) (ty_vec_fits_in_register _)) (rv_vmv_sx r src_ty))
(rule 3 (gen_bitcast r (ty_vec_fits_in_register _) (ty_scalar_float dst_ty)) (rv_vfmv_fs r dst_ty))
(rule 2 (gen_bitcast r (ty_vec_fits_in_register _) (ty_int_ref_scalar_64 dst_ty)) (rv_vmv_xs r dst_ty))
(rule 1 (gen_bitcast r $F32 $I32) (rv_fmvxw r))
(rule 1 (gen_bitcast r $F64 $I64) (rv_fmvxd r))
(rule 1 (gen_bitcast r $I32 $F32) (rv_fmvwx r))
Expand Down
10 changes: 8 additions & 2 deletions cranelift/codegen/src/isa/riscv64/inst/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,13 +268,16 @@ impl VecAluOpRRRR {
VecAluOpRRRR::VfnmaccVV | VecAluOpRRRR::VfnmaccVF => 0b101101,
VecAluOpRRRR::VfmsacVV | VecAluOpRRRR::VfmsacVF => 0b101110,
VecAluOpRRRR::VfnmsacVV | VecAluOpRRRR::VfnmsacVF => 0b101111,
VecAluOpRRRR::Vslide1upVX => 0b001110,
}
}

pub fn category(&self) -> VecOpCategory {
match self {
VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VnmsacVV => VecOpCategory::OPMVV,
VecAluOpRRRR::VmaccVX | VecAluOpRRRR::VnmsacVX => VecOpCategory::OPMVX,
VecAluOpRRRR::VmaccVX | VecAluOpRRRR::VnmsacVX | VecAluOpRRRR::Vslide1upVX => {
VecOpCategory::OPMVX
}
VecAluOpRRRR::VfmaccVV
| VecAluOpRRRR::VfnmaccVV
| VecAluOpRRRR::VfmsacVV
Expand All @@ -299,7 +302,10 @@ impl VecAluOpRRRR {

impl VecInstOverlapInfo for VecAluOpRRRR {
fn forbids_src_dst_overlaps(&self) -> bool {
false
match self {
VecAluOpRRRR::Vslide1upVX => true,
_ => false,
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@
(VfnmaccVF)
(VfmsacVF)
(VfnmsacVF)
(Vslide1upVX)
))

;; Register-Imm ALU Ops
Expand Down Expand Up @@ -1095,6 +1096,13 @@
(rule (rv_vslideup_vvi vd vs2 imm mask vstate)
(vec_alu_rrr_uimm5 (VecAluOpRRRImm5.VslideupVI) vd vs2 imm mask vstate))

;; Helper for emitting the `vslide1up.vx` instruction.
;;
;; # vd[0]=x[rs1], vd[i+1] = vs2[i]
(decl rv_vslide1up_vx (VReg VReg XReg VecOpMasking VState) VReg)
(rule (rv_vslide1up_vx vd vs2 rs1 mask vstate)
(vec_alu_rrrr (VecAluOpRRRR.Vslide1upVX) vd vs2 rs1 mask vstate))

;; Helper for emitting the `vmv.x.s` instruction.
;; This instruction copies the first element of the source vector to the destination X register.
;; Masked versions of this instruction are not supported.
Expand Down
20 changes: 18 additions & 2 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2436,8 +2436,24 @@
(elf_tls_get_addr name))

;;;;; Rules for `bitcast`;;;;;;;;;
(rule
(lower (has_type out_ty (bitcast _ v @ (value_type in_ty))))

;; These rules should probably be handled in `gen_bitcast`, but it's convenient to have that return
;; a single register, instead of a `ValueRegs`
(rule 2 (lower (has_type $I128 (bitcast _ v @ (value_type (ty_vec_fits_in_register _)))))
(value_regs
(gen_extractlane $I64X2 v 0)
(gen_extractlane $I64X2 v 1)))

;; Move the high half into a vector register, and then use vslide1up to move it up and
;; insert the lower half in one instruction.
(rule 1 (lower (has_type (ty_vec_fits_in_register _) (bitcast _ v @ (value_type $I128))))
(let ((lo XReg (value_regs_get v 0))
(hi XReg (value_regs_get v 1))
(vstate VState (vstate_from_type $I64X2))
(vec VReg (rv_vmv_sx hi vstate)))
(rv_vslide1up_vx vec vec lo (unmasked) vstate)))

(rule 0 (lower (has_type out_ty (bitcast _ v @ (value_type in_ty))))
(gen_bitcast v in_ty out_ty))

;;;;; Rules for `ceil`;;;;;;;;;
Expand Down
236 changes: 236 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/bitcast-scalar-vector.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v

function %bitcast_vec_to_i128(i64x2) -> i128 {
block0(v0: i64x2):
v1 = bitcast.i128 little v0
return v1
}

; VCode:
; addi sp,sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v8,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma)
; vmv.x.s a0,v8 #avl=2, #vtype=(e64, m1, ta, ma)
; vslidedown.vi v12,v8,1 #avl=2, #vtype=(e64, m1, ta, ma)
; vmv.x.s a1,v12 #avl=2, #vtype=(e64, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; addi sp,sp,16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, sp, 0x10
; .byte 0x07, 0x84, 0x0f, 0x02
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x25, 0x80, 0x42
; .byte 0x57, 0xb6, 0x80, 0x3e
; .byte 0xd7, 0x25, 0xc0, 0x42
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %bitcast_i128_to_vec(i128) -> i64x2 {
block0(v0: i128):
v1 = bitcast.i64x2 little v0
return v1
}

; VCode:
; block0:
; vmv.s.x v12,a1 #avl=2, #vtype=(e64, m1, ta, ma)
; vmv1r.v v14,v12
; vslide1up.vx v14,v12,a0 #avl=2, #vtype=(e64, m1, ta, ma)
; vse8.v v14,0(a2) #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0xe6, 0x05, 0x42
; .byte 0x57, 0x37, 0xc0, 0x9e
; .byte 0x57, 0x67, 0xc5, 0x3a
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x07, 0x06, 0x02
; ret

function %bitcast_vec_to_i64(i32x2) -> i64 {
block0(v0: i32x2):
v1 = bitcast.i64 little v0
return v1
}

; VCode:
; addi sp,sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v8,-16(incoming_arg) #avl=8, #vtype=(e8, m1, ta, ma)
; vmv.x.s a0,v8 #avl=1, #vtype=(e64, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; addi sp,sp,16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x04, 0xcc
; addi t6, sp, 0x10
; .byte 0x07, 0x84, 0x0f, 0x02
; .byte 0x57, 0xf0, 0x80, 0xcd
; .byte 0x57, 0x25, 0x80, 0x42
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %bitcast_i64_to_vec(i64) -> i32x2 {
block0(v0: i64):
v1 = bitcast.i32x2 little v0
return v1
}

; VCode:
; block0:
; vmv.s.x v11,a0 #avl=1, #vtype=(e64, m1, ta, ma)
; vse8.v v11,0(a1) #avl=8, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0xf0, 0x80, 0xcd
; .byte 0xd7, 0x65, 0x05, 0x42
; .byte 0x57, 0x70, 0x04, 0xcc
; .byte 0xa7, 0x85, 0x05, 0x02
; ret

function %bitcast_vec_to_f64(i32x2) -> f64 {
block0(v0: i32x2):
v1 = bitcast.f64 little v0
return v1
}

; VCode:
; addi sp,sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v8,-16(incoming_arg) #avl=8, #vtype=(e8, m1, ta, ma)
; vfmv.f.s fa0,v8 #avl=1, #vtype=(e64, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; addi sp,sp,16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x04, 0xcc
; addi t6, sp, 0x10
; .byte 0x07, 0x84, 0x0f, 0x02
; .byte 0x57, 0xf0, 0x80, 0xcd
; .byte 0x57, 0x15, 0x80, 0x42
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %bitcast_f64_to_vec(f64) -> i32x2 {
block0(v0: f64):
v1 = bitcast.i32x2 little v0
return v1
}

; VCode:
; block0:
; vfmv.s.f v11,fa0 #avl=1, #vtype=(e64, m1, ta, ma)
; vse8.v v11,0(a0) #avl=8, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0xf0, 0x80, 0xcd
; .byte 0xd7, 0x55, 0x05, 0x42
; .byte 0x57, 0x70, 0x04, 0xcc
; .byte 0xa7, 0x05, 0x05, 0x02
; ret

function %bitcast_i16x2_to_f32(i16x2) -> f32 {
block0(v0: i16x2):
v1 = bitcast.f32 little v0
return v1
}

; VCode:
; addi sp,sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v8,-16(incoming_arg) #avl=4, #vtype=(e8, m1, ta, ma)
; vfmv.f.s fa0,v8 #avl=1, #vtype=(e32, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; addi sp,sp,16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x02, 0xcc
; addi t6, sp, 0x10
; .byte 0x07, 0x84, 0x0f, 0x02
; .byte 0x57, 0xf0, 0x00, 0xcd
; .byte 0x57, 0x15, 0x80, 0x42
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %bitcast_f32_to_i16x2(f32) -> i16x2 {
block0(v0: f32):
v1 = bitcast.i16x2 little v0
return v1
}

; VCode:
; block0:
; vfmv.s.f v11,fa0 #avl=1, #vtype=(e32, m1, ta, ma)
; vse8.v v11,0(a0) #avl=4, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0xf0, 0x00, 0xcd
; .byte 0xd7, 0x55, 0x05, 0x42
; .byte 0x57, 0x70, 0x02, 0xcc
; .byte 0xa7, 0x05, 0x05, 0x02
; ret

36 changes: 36 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-bitcast-64bit.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
test run
target riscv64 has_v
target riscv64 has_v has_c has_zcb

function %bitcast_i32x2_to_i64(i32x2) -> i64 {
block0(v0: i32x2):
v1 = bitcast.i64 little v0
return v1
}
; run: %bitcast_i32x2_to_i64([0xBEEF 0xC0FFEE]) == 0x00c0ffee_0000beef
; run: %bitcast_i32x2_to_i64([-1 127]) == 0x0000007f_ffffffff


function %bitcast_i64_to_i32x2(i64) -> i32x2 {
block0(v0: i64):
v1 = bitcast.i32x2 little v0
return v1
}
; run: %bitcast_i64_to_i32x2(0x00c0ffee_0000beef) == [0xBEEF 0xC0FFEE]
; run: %bitcast_i64_to_i32x2(0x0000007f_ffffffff) == [-1 127]

function %bitcast_i32x2_to_f64(i32x2) -> f64 {
block0(v0: i32x2):
v1 = bitcast.f64 little v0
return v1
}
; run: %bitcast_i32x2_to_f64([0xBEEF 0xC0FFEE]) == 0x1.0ffee0000beefp-1011
; run: %bitcast_i32x2_to_f64([-1 127]) == 0x0.0007fffffffffp-1022

function %bitcast_f64_to_i32x2(f64) -> i32x2 {
block0(v0: f64):
v1 = bitcast.i32x2 little v0
return v1
}
; run: %bitcast_f64_to_i32x2(0x1.0ffee0000beefp-1011) == [0xBEEF 0xC0FFEE]
; run: %bitcast_f64_to_i32x2(0x0.0007fffffffffp-1022) == [-1 127]
20 changes: 20 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-bitcast-i128.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
test run
target riscv64 has_v
target riscv64 has_v has_c has_zcb

function %bitcast_i64x2_to_i128(i64x2) -> i128 {
block0(v0: i64x2):
v1 = bitcast.i128 little v0
return v1
}
; run: %bitcast_i64x2_to_i128([0xBEEF 0xC0FFEE]) == 0x0000000000c0ffee_000000000000beef
; run: %bitcast_i64x2_to_i128([-1 127]) == 0x000000000000007f_ffffffffffffffff


function %bitcast_i128_to_i64x2(i128) -> i64x2 {
block0(v0: i128):
v1 = bitcast.i64x2 little v0
return v1
}
; run: %bitcast_i128_to_i64x2(0x0000000000c0ffee_000000000000beef) == [0xBEEF 0xC0FFEE]
; run: %bitcast_i128_to_i64x2(0x000000000000007f_ffffffffffffffff) == [-1 127]
3 changes: 0 additions & 3 deletions cranelift/fuzzgen/src/function_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -764,9 +764,6 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
&[I128],
&[F32 | F64]
),
// https://github.com/bytecodealliance/wasmtime/issues/6104
(Opcode::Bitcast, &[I128], &[_]),
(Opcode::Bitcast, &[_], &[I128]),
// TODO
(
Opcode::SelectSpectreGuard,
Expand Down
Loading