diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index a449144f231..155733ae13e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -76,3 +76,6 @@ testsuite/flambda2-test-list @xclerc tools/merge_archives.ml @mshinwell @xclerc tools/merge_dot_a_files.sh @mshinwell @xclerc tools/objinfo.ml @mshinwell @xclerc + +testsuite/tests/unboxed-primitive-args @TheNumbat +tests/simd @TheNumbat diff --git a/backend/CSEgen.ml b/backend/CSEgen.ml index 1b0f9fc06cf..be95670c65e 100644 --- a/backend/CSEgen.ml +++ b/backend/CSEgen.ml @@ -233,7 +233,7 @@ class cse_generic = object (self) method class_of_operation op = match op with | Imove | Ispill | Ireload -> assert false (* treated specially *) - | Iconst_int _ | Iconst_float _ | Iconst_symbol _ -> Op_pure + | Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _ -> Op_pure | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Iextcall _ | Iprobe _ | Iopaque -> assert false (* treated specially *) | Istackoffset _ -> Op_other diff --git a/backend/afl_instrument.ml b/backend/afl_instrument.ml index e4bd0eea054..93711ba9f46 100644 --- a/backend/afl_instrument.ml +++ b/backend/afl_instrument.ml @@ -93,7 +93,7 @@ and instrument = function | Ctail e -> Ctail (instrument e) (* these are base cases and have no logging *) - | Cconst_int _ | Cconst_natint _ | Cconst_float _ + | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_vec128 _ | Cconst_symbol _ | Cvar _ as c -> c diff --git a/backend/amd64/CSE.ml b/backend/amd64/CSE.ml index 9d5587a19d2..78eb0d38e28 100644 --- a/backend/amd64/CSE.ml +++ b/backend/amd64/CSE.ml @@ -45,7 +45,7 @@ method! class_of_operation op = | Icompf _ | Icsel _ | Ifloatofint | Iintoffloat | Ivalueofint | Iintofvalue - | Iconst_int _ | Iconst_float _ | Iconst_symbol _ + | Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _ | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Iextcall _ | Istackoffset _ | Iload _ | Istore _ | Ialloc _ | Iintop _ | Iintop_imm _ | Iintop_atomic _ diff --git a/backend/amd64/arch.ml b/backend/amd64/arch.ml index 3dcecff2b11..4165d7bf1a4 100644 --- a/backend/amd64/arch.ml +++ b/backend/amd64/arch.ml @@ -31,6 +31,9 @@ let prefetchwt1_support = ref false (* Emit elf notes with trap handling information. *) let trap_notes = ref true +(* Enables usage of vector registers. *) +let simd_regalloc_support = ref false + (* Machine-specific command-line options *) let command_line_options = @@ -59,6 +62,10 @@ let command_line_options = " Emit .note.ocaml_eh section with trap handling information (default)"; "-fno-trap-notes", Arg.Clear trap_notes, " Do not emit .note.ocaml_eh section with trap handling information"; + "-fsimd", Arg.Set simd_regalloc_support, + " Enable register allocation for SIMD vectors"; + "-fno-simd", Arg.Clear simd_regalloc_support, + " Disable register allocation for SIMD vectors (default)" ] (* Specific operations for the AMD64 processor *) @@ -130,6 +137,8 @@ let size_addr = 8 let size_int = 8 let size_float = 8 +let size_vec128 = 16 + let allow_unaligned_access = true (* Behavior of division *) diff --git a/backend/amd64/emit.mlp b/backend/amd64/emit.mlp index 5a7937182c8..58714e7d458 100644 --- a/backend/amd64/emit.mlp +++ b/backend/amd64/emit.mlp @@ -47,9 +47,14 @@ let int_reg_name = let float_reg_name = Array.init 16 (fun i -> XMM i) -let register_name r = - if r < 100 then Reg64 (int_reg_name.(r)) - else Regf (float_reg_name.(r - 100)) +let register_name typ r = + match typ with + | Int | Val | Addr -> Reg64 (int_reg_name.(r)) + | Float -> Regf (float_reg_name.(r - 100)) + | Vec128 -> + if not !simd_regalloc_support then + Misc.fatal_error "SIMD register allocation is not enabled."; + Regf (float_reg_name.(r - 100)) (* CFI directives *) @@ -80,7 +85,7 @@ let fastcode_flag = ref true (* Layout of the stack frame *) let stack_offset = ref 0 -let num_stack_slots = Array.make Proc.num_register_classes 0 +let num_stack_slots = Array.make Proc.num_stack_slot_classes 0 let prologue_required = ref false @@ -88,8 +93,13 @@ let frame_required = ref false let frame_size () = (* includes return address *) if !frame_required then begin + if not !simd_regalloc_support then assert (num_stack_slots.(2) = 0); let sz = - (!stack_offset + 8 * (num_stack_slots.(0) + num_stack_slots.(1)) + 8 + (!stack_offset + + 8 + + 8 * num_stack_slots.(0) + + 8 * num_stack_slots.(1) + + 16 * num_stack_slots.(2) + (if fp then 8 else 0)) in Misc.align sz 16 end else @@ -99,9 +109,14 @@ let slot_offset loc cl = match loc with | Incoming n -> frame_size() + n | Local n -> - if cl = 0 - then !stack_offset + n * 8 - else !stack_offset + (num_stack_slots.(0) + n) * 8 + if not !simd_regalloc_support then assert (num_stack_slots.(2) = 0 && cl < 2); + (!stack_offset + + (* Preserves original ordering (int -> float) *) + match cl with + | 2 -> n * 16 + | 0 -> num_stack_slots.(2) * 16 + n * 8 + | 1 -> num_stack_slots.(2) * 16 + num_stack_slots.(0) * 8 + n * 8 + | _ -> Misc.fatal_error "Unknown register class") | Outgoing n -> n | Domainstate _ -> assert false (* not a stack slot *) @@ -274,15 +289,16 @@ let emit_Llabel fallthrough lbl section_name = let x86_data_type_for_stack_slot = function | Float -> REAL8 - | _ -> QWORD + | Vec128 -> VEC128 + | Int | Addr | Val -> QWORD let reg = function - | { loc = Reg.Reg r } -> register_name r + | { loc = Reg.Reg r; typ = ty } -> register_name ty r | { loc = Stack (Domainstate n); typ = ty } -> let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in mem64 (x86_data_type_for_stack_slot ty) ofs R14 | { loc = Stack s; typ = ty } as r -> - let ofs = slot_offset s (register_class r) in + let ofs = slot_offset s (stack_slot_class r.typ) in mem64 (x86_data_type_for_stack_slot ty) ofs RSP | { loc = Unknown } -> assert false @@ -305,7 +321,7 @@ let reg_low_32_name = Array.map (fun r -> Reg32 r) int_reg_name let emit_subreg tbl typ r = match r.loc with | Reg.Reg r when r < 13 -> tbl.(r) - | Stack s -> mem64 typ (slot_offset s (register_class r)) RSP + | Stack s -> mem64 typ (slot_offset s (stack_slot_class r.Reg.typ)) RSP | _ -> assert false let arg8 i n = emit_subreg reg_low_8_name BYTE i.arg.(n) @@ -347,7 +363,7 @@ let record_frame_label live dbg = | {typ = Val; loc = Reg r} -> live_offset := ((r lsl 1) + 1) :: !live_offset | {typ = Val; loc = Stack s} as reg -> - live_offset := slot_offset s (register_class reg) :: !live_offset + live_offset := slot_offset s (stack_slot_class reg.typ) :: !live_offset | {typ = Addr} as r -> Misc.fatal_error ("bad GC root " ^ Reg.name r) | _ -> () @@ -483,7 +499,7 @@ let build_asm_directives () : (module Asm_targets.Asm_directives_intf.S) = ( include X86_dsl.D type data_type = - | NONE | DWORD | QWORD + | NONE | DWORD | QWORD | VEC128 type nonrec constant = constant let const_int64 num = Const num @@ -497,7 +513,8 @@ let build_asm_directives () : (module Asm_targets.Asm_directives_intf.S) = ( (function | NONE -> X86_ast.NONE | DWORD -> X86_ast.DWORD - | QWORD -> X86_ast.QWORD) + | QWORD -> X86_ast.QWORD + | VEC128 -> X86_ast.VEC128) data_type in label ?typ str @@ -670,6 +687,24 @@ let emit_float_constant f lbl = _label (emit_label lbl); D.qword (Const f) +(* Vector constants *) + +let vec128_constants = ref ([] : (Cmm.vec128_bits * int) list) + +let add_vec128_constant bits = + try + List.assoc bits !vec128_constants + with Not_found -> + let lbl = new_label() in + vec128_constants := (bits, lbl) :: !vec128_constants; + lbl + +let emit_vec128_constant {high; low} lbl = + _label (emit_label lbl); + (* SIMD vectors respect little-endian byte order *) + D.qword (Const low); + D.qword (Const high) + let emit_global_label_for_symbol lbl = add_def_symbol lbl; let lbl = emit_symbol lbl in @@ -683,11 +718,27 @@ let emit_global_label s = let move (src : Reg.t) (dst : Reg.t) = if src.loc <> dst.loc then begin match src.typ, src.loc, dst.typ, dst.loc with - | Float, Reg.Reg _, Float, Reg.Reg _ -> I.movapd (reg src) (reg dst) - | Float, _, Float, _ -> I.movsd (reg src) (reg dst) - | Float, _, Int, _ - | Int, _, Float, _ -> I.movq (reg src) (reg dst) - | _ -> I.mov (reg src) (reg dst) + | Float, Reg.Reg _, Float, Reg.Reg _ + | Vec128, _, Vec128, _ -> + (* Vec128 stack slots are always aligned. *) + I.movapd (reg src) (reg dst) + | Float, _, Float, _ -> + I.movsd (reg src) (reg dst) + | Float, _, Int, _ | Int, _, Float, _ -> + (* CR-soon gyorsh: this case is used by the bits_of_float/float_of_bits intrinsics. + They should instead generate a separate Ispecific and this case should be + removed. *) + I.movq (reg src) (reg dst) + | (Int | Val | Addr), _, (Int | Val | Addr), _ -> + I.mov (reg src) (reg dst) + | Vec128, _, _, _ | _, _, Vec128, _ -> + Misc.fatal_errorf + "Illegal move between a vector and non-vector register (%s to %s)\n" + (Reg.name src) (Reg.name dst) + | Float, _, (Val | Addr), _ | (Val | Addr), _, Float, _ -> + Misc.fatal_errorf + "Illegal move between a float and val/addr register (%s to %s)\n" + (Reg.name src) (Reg.name dst) end let stack_to_stack_move (src : Reg.t) (dst : Reg.t) = @@ -698,7 +749,7 @@ let stack_to_stack_move (src : Reg.t) (dst : Reg.t) = (* Not calling move because r15 is not in int_reg_name. *) I.mov (reg src) r15; I.mov r15 (reg dst) - | Float | Addr -> + | Float | Addr | Vec128 -> Misc.fatal_errorf "Unexpected register type for stack to stack move: from %s to %s\n" (Reg.name src) (Reg.name dst) @@ -906,7 +957,15 @@ let emit_instr fallthrough i = I.xorpd (res i 0) (res i 0) | _ -> let lbl = add_float_constant f in - I.movsd (mem64_rip NONE (emit_label lbl)) (res i 0) + I.movsd (mem64_rip REAL8 (emit_label lbl)) (res i 0) + end + | Lop(Iconst_vec128 {high; low}) -> + begin match (high, low) with + | 0x0000_0000_0000_0000L, 0x0000_0000_0000_0000L -> + I.xorpd (res i 0) (res i 0) + | _ -> + let lbl = add_vec128_constant {high; low} in + I.movupd (mem64_rip VEC128 (emit_label lbl)) (res i 0) end | Lop(Iconst_symbol s) -> add_used_symbol s.sym_name; @@ -966,11 +1025,13 @@ let emit_instr fallthrough i = | Sixteen_unsigned -> I.movzx (addressing addr WORD i 0) dest | Sixteen_signed -> - I.movsx (addressing addr WORD i 0) dest; + I.movsx (addressing addr WORD i 0) dest | Thirtytwo_unsigned -> I.mov (addressing addr DWORD i 0) (res32 i 0) | Thirtytwo_signed -> I.movsxd (addressing addr DWORD i 0) dest + | Onetwentyeight -> + I.movupd (addressing addr VEC128 i 0) dest | Single -> I.cvtss2sd (addressing addr REAL4 i 0) dest | Double -> @@ -986,6 +1047,8 @@ let emit_instr fallthrough i = I.mov (arg16 i 0) (addressing addr WORD i 1) | Thirtytwo_signed | Thirtytwo_unsigned -> I.mov (arg32 i 0) (addressing addr DWORD i 1) + | Onetwentyeight -> + I.movupd (arg i 0) (addressing addr VEC128 i 1) | Single -> I.cvtsd2ss (arg i 0) xmm15; I.movss xmm15 (addressing addr REAL4 i 1) @@ -1112,9 +1175,9 @@ let emit_instr fallthrough i = I.movq a0 (res i 0); I.neg (res i 0) | Lop(Inegf) -> - I.xorpd (mem64_rip OWORD (emit_symbol "caml_negf_mask")) (res i 0) + I.xorpd (mem64_rip VEC128 (emit_symbol "caml_negf_mask")) (res i 0) | Lop(Iabsf) -> - I.andpd (mem64_rip OWORD (emit_symbol "caml_absf_mask")) (res i 0) + I.andpd (mem64_rip VEC128 (emit_symbol "caml_absf_mask")) (res i 0) | Lop(Iaddf | Isubf | Imulf | Idivf as floatop) -> instr_for_floatop floatop (arg i 1) (res i 0) | Lop(Ifloatofint) -> @@ -1324,8 +1387,8 @@ let emit_instr fallthrough i = we must be careful not to clobber it before use. *) let (tmp1, tmp2) = if i.arg.(0).loc = Reg 0 (* rax *) - then (phys_reg 4 (*rdx*), phys_reg 0 (*rax*)) - else (phys_reg 0 (*rax*), phys_reg 4 (*rdx*)) in + then (phys_reg Int 4 (*rdx*), phys_reg Int 0 (*rax*)) + else (phys_reg Int 0 (*rax*), phys_reg Int 4 (*rdx*)) in I.lea (mem64_rip NONE lbl) (reg tmp1); I.movsxd (mem64 DWORD 0 (arg64 i 0) ~scale:4 ~base:(reg64 tmp1)) @@ -1415,7 +1478,7 @@ let fundecl fundecl = local_realloc_sites := []; bound_error_sites := []; bound_error_call := 0; - for i = 0 to Proc.num_register_classes - 1 do + for i = 0 to Proc.num_stack_slot_classes - 1 do num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i); done; prologue_required := fundecl.fun_prologue_required; @@ -1474,6 +1537,8 @@ let emit_item = function | Cint n -> D.qword (const_nat n) | Csingle f -> D.long (Const (Int64.of_int32 (Int32.bits_of_float f))) | Cdouble f -> D.qword (Const (Int64.bits_of_float f)) + (* SIMD vectors respect little-endian byte order *) + | Cvec128 {high; low} -> D.qword (Const low); D.qword (Const high) | Csymbol_address s -> add_used_symbol s.sym_name; D.qword (ConstLabel (emit_cmm_symbol s)) @@ -1554,15 +1619,20 @@ let begin_assembly unix = () -let make_stack_loc ~offset i (r : Reg.t) = +let make_stack_loc ~offset n (r : Reg.t) = (* Use "Outgoing" stack locations, instead of "Local", because [slot_offset] emits (Outgoing n) directly as offset [n] from the stack pointer, rather than a calculation relative to the stack frame, which is incorrect for naked floats (arising from live variables, not probe arguments) in the wrapper's frame. *) - let loc = Stack (Outgoing (offset + (8*i))) in + let loc = Stack (Outgoing (offset + n)) in (* Manufacture stack entry with this register's type *) + (match r.typ with + | Int | Val | Addr | Float -> () + | Vec128 -> + if not !simd_regalloc_support then + Misc.fatal_error "SIMD register allocation is not enabled."); Reg.at_location r.typ loc (* CR mshinwell: Not now, but after code review, it would be better to @@ -1613,6 +1683,24 @@ let make_stack_loc ~offset i (r : Reg.t) = This assumption might no longer hold in the presence of unboxed types. *) +let size_of_regs regs = + Array.fold_right + (fun r acc -> + match r.Reg.typ with + | Int | Addr | Val -> acc + size_int + | Float -> acc + size_float + | Vec128 -> acc + size_vec128) + regs 0 + +let stack_locations ~offset regs = + let _, locs = Array.fold_right (fun r (n, offsets) -> + let next = n + match r.Reg.typ with + | Int | Val | Addr -> size_int + | Float -> size_float + | Vec128 -> size_vec128 in + next, (make_stack_loc n r ~offset :: offsets)) regs (0, []) in + locs |> List.rev |> Array.of_list + let emit_probe_handler_wrapper p = let wrap_label = probe_handler_wrapper_name p.probe_label in let probe_name, handler_code_sym = @@ -1626,7 +1714,7 @@ let emit_probe_handler_wrapper p = recall that the wrapper does however have its own frame.) *) frame_required := true; stack_offset := p.stack_offset; - for i = 0 to Proc.num_register_classes - 1 do + for i = 0 to Proc.num_stack_slot_classes - 1 do num_stack_slots.(i) <- p.num_stack_slots.(i); done; (* Account for the return address that is now pushed on the stack. *) @@ -1642,18 +1730,16 @@ let emit_probe_handler_wrapper p = I.mov rsp rbp; end; (* Prepare to call the handler: calculate and allocate stack space. - The calculations are simplified using the following property. *) - assert (size_addr = 8 && size_int = 8 && size_float = 8); - (* Compute the size of stack slots for all live hard registers. *) + Compute the size of stack slots for all live hard registers. *) let live = Reg.Set.elements p.probe_insn.live |> List.filter Reg.is_reg |> Array.of_list in - let live_offset = 8 * (Array.length live) in + let live_offset = size_of_regs live in (* Compute the size of stack slots for spilling all arguments of the probe. *) let aux_offset = 8 (* for saving r15 *) in - let tmp_offset = 8 * (Array.length p.probe_insn.arg) in + let tmp_offset = size_of_regs p.probe_insn.arg in let loc_args, loc_offset = Proc.loc_arguments (Reg.typv p.probe_insn.arg) in (* Ensure the stack is aligned. Assuming that the stack at the probe site is 16-byte aligned, @@ -1669,7 +1755,7 @@ let emit_probe_handler_wrapper p = emit_stack_offset n; (* Save all live hard registers *) let offset = aux_offset + tmp_offset + loc_offset in - let saved_live = Array.mapi (make_stack_loc ~offset) live in + let saved_live = stack_locations ~offset live in Array.iteri (fun i reg -> move reg saved_live.(i)) live; (* Spill r15 to free it to be used as a temporary register for stack to stack copying. *) @@ -1678,11 +1764,11 @@ let emit_probe_handler_wrapper p = I.mov r15 (reg saved_r15); (* Spill all arguments of the probe. Some of these may already be on the stack, in which case a temporary is used for the move. *) - let tmp = Array.mapi (make_stack_loc ~offset:loc_offset) p.probe_insn.arg in - Array.iteri (fun i reg -> move_allowing_stack_to_stack reg (tmp.(i))) + let saved_args = stack_locations ~offset:loc_offset p.probe_insn.arg in + Array.iteri (fun i reg -> move_allowing_stack_to_stack reg (saved_args.(i))) p.probe_insn.arg; (* Load probe arguments to correct locations for the handler *) - Array.iteri (fun i reg -> move_allowing_stack_to_stack tmp.(i) reg) loc_args; + Array.iteri (fun i reg -> move_allowing_stack_to_stack saved_args.(i) reg) loc_args; (* Reload spilled registers used as temporaries *) I.mov (reg saved_r15) r15; (* Emit call to handler *) @@ -1696,7 +1782,7 @@ let emit_probe_handler_wrapper p = | Stack (Outgoing k) -> (match r.typ with | Val -> k::acc - | Int | Float -> acc + | Int | Float | Vec128 -> acc | Addr -> Misc.fatal_error ("bad GC root " ^ Reg.name r)) | _ -> assert false) saved_live @@ -1764,8 +1850,8 @@ let emit_probe_notes0 () = let arg_name = match arg.loc with | Stack s -> - Printf.sprintf "%d(%%rsp)" (slot_offset s (register_class arg)) - | Reg reg -> Proc.register_name reg + Printf.sprintf "%d(%%rsp)" (slot_offset s (stack_slot_class arg.Reg.typ)) + | Reg reg -> Proc.register_name arg.Reg.typ reg | Unknown -> Misc.fatal_errorf "Cannot create probe: illegal argument: %a" Printmach.reg arg @@ -1865,7 +1951,8 @@ let end_assembly () = | _ -> D.section [".rodata.cst8"] (Some "aM") ["@progbits";"8"] end; D.align ~data:true 8; - List.iter (fun (cst,lbl) -> emit_float_constant cst lbl) !float_constants + List.iter (fun (cst,lbl) -> emit_float_constant cst lbl) !float_constants; + List.iter (fun (cst,lbl) -> emit_vec128_constant cst lbl) !vec128_constants; end; (* Emit probe handler wrappers *) diff --git a/backend/amd64/proc.ml b/backend/amd64/proc.ml index 2bb29f951cb..dc08ca0ce12 100644 --- a/backend/amd64/proc.ml +++ b/backend/amd64/proc.ml @@ -48,7 +48,7 @@ let win64 = Arch.win64 r14 domain state pointer r15 allocation pointer - xmm0 - xmm15 100 - 115 *) + xmm0 - xmm15 100 - 115 *) (* Conventions: rax - r13: OCaml function arguments @@ -103,19 +103,47 @@ let register_class r = match r.typ with | Val | Int | Addr -> 0 | Float -> 1 + | Vec128 -> + if not !simd_regalloc_support then + Misc.fatal_error "SIMD register allocation is not enabled."; + 1 -let register_class_tag c = +let num_stack_slot_classes = 3 + +let stack_slot_class typ = + match typ with + | Val | Addr | Int -> 0 + | Float -> 1 + | Vec128 -> + if not !simd_regalloc_support then + Misc.fatal_error "SIMD register allocation is not enabled."; + 2 + +let stack_class_tag c = match c with | 0 -> "i" | 1 -> "f" - | c -> Misc.fatal_errorf "Unspecified register class %d" c + | 2 -> + if not !simd_regalloc_support then + Misc.fatal_error "SIMD register allocation is not enabled."; + "x" + | c -> Misc.fatal_errorf "Unspecified stack slot class %d" c let num_available_registers = [| 13; 16 |] let first_available_register = [| 0; 100 |] -let register_name r = - if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) +let register_name ty r = + (* If the ID doesn't match the type, the array access will raise. *) + match ty with + | Int | Addr | Val -> + int_reg_name.(r - first_available_register.(0)) + | Float -> + float_reg_name.(r - first_available_register.(1)) + | Vec128 -> + if not !simd_regalloc_support then + Misc.fatal_error "SIMD register allocation is not enabled."; + float_reg_name.(r - first_available_register.(1)) (* Pack registers starting at %rax so as to reduce the number of REX prefixes and thus improve code density *) @@ -133,18 +161,34 @@ let hard_float_reg = for i = 0 to 15 do v.(i) <- Reg.at_location Float (Reg (100 + i)) done; v -let all_phys_regs = - Array.append hard_int_reg hard_float_reg - -let phys_reg n = - if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100) +let hard_vec128_reg = + let v = Array.make 16 Reg.dummy in + for i = 0 to 15 do v.(i) <- Reg.at_location Vec128 (Reg (100 + i)) done; + fun () -> if !simd_regalloc_support then v + else Misc.fatal_error "SIMD register allocation is not enabled." -let rax = phys_reg 0 -let rdx = phys_reg 4 -let r10 = phys_reg 10 -let r11 = phys_reg 11 -let rbp = phys_reg 12 -let rxmm15 = phys_reg 115 +let all_phys_regs = + let basic_regs = Array.append hard_int_reg hard_float_reg in + fun () -> if !simd_regalloc_support then Array.append basic_regs (hard_vec128_reg ()) + else basic_regs + +let phys_reg ty n = + match ty with + | Int | Addr | Val -> hard_int_reg.(n) + | Float -> hard_float_reg.(n - 100) + | Vec128 -> (hard_vec128_reg ()).(n - 100) + +let rax = phys_reg Int 0 +let rdx = phys_reg Int 4 +let r10 = phys_reg Int 10 +let r11 = phys_reg Int 11 +let rbp = phys_reg Int 12 + +(* CSE needs to know that all versions of xmm15 are destroyed. *) +let destroy_xmm15 () = + if !simd_regalloc_support + then [| phys_reg Float 115; phys_reg Vec128 115 |] + else [| phys_reg Float 115 |] let destroyed_by_plt_stub = if not X86_proc.use_plt then [| |] else [| r10; r11 |] @@ -154,6 +198,10 @@ let num_destroyed_by_plt_stub = Array.length destroyed_by_plt_stub let destroyed_by_plt_stub_set = Reg.set_of_array destroyed_by_plt_stub let stack_slot slot ty = + (match ty with + | Float | Int | Addr | Val -> () + | Vec128 -> if not !simd_regalloc_support then + Misc.fatal_error "SIMD register allocation is not enabled."); Reg.at_location ty (Stack slot) (* Instruction selection *) @@ -164,8 +212,7 @@ let word_addressed = false let size_domainstate_args = 64 * size_int -let calling_conventions first_int last_int first_float last_float - make_stack first_stack +let calling_conventions first_int last_int first_float last_float make_stack first_stack arg = let loc = Array.make (Array.length arg) Reg.dummy in let int = ref first_int in @@ -175,7 +222,7 @@ let calling_conventions first_int last_int first_float last_float match arg.(i) with | Val | Int | Addr as ty -> if !int <= last_int then begin - loc.(i) <- phys_reg !int; + loc.(i) <- phys_reg ty !int; incr int end else begin loc.(i) <- stack_slot (make_stack !ofs) ty; @@ -184,13 +231,23 @@ let calling_conventions first_int last_int first_float last_float assert (not (Reg.Set.mem loc.(i) destroyed_by_plt_stub_set)) | Float -> if !float <= last_float then begin - loc.(i) <- phys_reg !float; + loc.(i) <- phys_reg Float !float; incr float end else begin loc.(i) <- stack_slot (make_stack !ofs) Float; ofs := !ofs + size_float end + | Vec128 -> + if !float <= last_float then begin + loc.(i) <- phys_reg Vec128 !float; + incr float + end else begin + ofs := Misc.align !ofs 16; + loc.(i) <- stack_slot (make_stack !ofs) Vec128; + ofs := !ofs + size_vec128 + end done; + (* CR mslater: (SIMD) will need to be 32/64 if vec256/512 are used. *) (loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *) let incoming ofs = @@ -252,7 +309,7 @@ let win64_loc_external_arguments arg = match arg.(i) with | Val | Int | Addr as ty -> if !reg < 4 then begin - loc.(i) <- phys_reg win64_int_external_arguments.(!reg); + loc.(i) <- phys_reg ty win64_int_external_arguments.(!reg); incr reg end else begin loc.(i) <- stack_slot (Outgoing !ofs) ty; @@ -260,13 +317,23 @@ let win64_loc_external_arguments arg = end | Float -> if !reg < 4 then begin - loc.(i) <- phys_reg win64_float_external_arguments.(!reg); + loc.(i) <- phys_reg Float win64_float_external_arguments.(!reg); incr reg end else begin loc.(i) <- stack_slot (Outgoing !ofs) Float; ofs := !ofs + size_float end + | Vec128 -> + if !reg < 4 then begin + loc.(i) <- phys_reg Vec128 win64_float_external_arguments.(!reg); + incr reg + end else begin + ofs := Misc.align !ofs 16; + loc.(i) <- stack_slot (Outgoing !ofs) Vec128; + ofs := !ofs + size_vec128 + end done; + (* CR mslater: (SIMD) will need to be 32/64 if vec256/512 are used. *) (loc, Misc.align !ofs 16) (* keep stack 16-aligned *) let loc_external_arguments ty_args = @@ -302,18 +369,27 @@ let regs_are_volatile _rs = false (* Registers destroyed by operations *) +let destroyed_at_c_call_win64 = + let basic_regs = Array.append + (Array.map (phys_reg Int) [|0;4;5;6;7;10;11|]) + (Array.sub hard_float_reg 0 6) + in + fun () -> if !simd_regalloc_support + then Array.append basic_regs (Array.sub (hard_vec128_reg ()) 0 6) + else basic_regs + +let destroyed_at_c_call_unix = + (* Unix: rbp, rbx, r12-r15 preserved *) + let basic_regs = Array.append + (Array.map (phys_reg Int) [|0;2;3;4;5;6;7;10;11|]) + hard_float_reg + in + fun () -> if !simd_regalloc_support + then Array.append basic_regs (hard_vec128_reg ()) + else basic_regs + let destroyed_at_c_call = - if win64 then - (* Win64: rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15 preserved *) - Array.of_list(List.map phys_reg - [0;4;5;6;7;10;11; - 100;101;102;103;104;105]) - else - (* Unix: rbp, rbx, r12-r15 preserved *) - Array.of_list(List.map phys_reg - [0;2;3;4;5;6;7;10;11; - 100;101;102;103;104;105;106;107; - 108;109;110;111;112;113;114;115]) + if win64 then destroyed_at_c_call_win64 else destroyed_at_c_call_unix let destroyed_at_alloc_or_poll = if X86_proc.use_plt then @@ -329,12 +405,13 @@ let has_pushtrap traps = (* note: keep this function in sync with `destroyed_at_{basic,terminator}` below. *) let destroyed_at_oper = function - Iop(Icall_ind | Icall_imm _ | Iextcall { alloc = true; }) -> - all_phys_regs - | Iop(Iextcall { alloc = false; }) -> destroyed_at_c_call + Iop(Icall_ind | Icall_imm _ | Iextcall { alloc = true; }) + -> all_phys_regs () + | Iop(Iextcall { alloc = false; }) -> destroyed_at_c_call () | Iop(Iintop(Idiv | Imod)) | Iop(Iintop_imm((Idiv | Imod), _)) -> [| rax; rdx |] - | Iop(Istore(Single, _, _)) -> [| rxmm15 |] + | Iop(Istore(Single, _, _)) + -> destroy_xmm15 () | Iop(Ialloc _ | Ipoll _) -> destroyed_at_alloc_or_poll | Iop(Iintop(Imulh _ | Icomp _) | Iintop_imm((Icomp _), _)) -> [| rax |] @@ -359,13 +436,13 @@ let destroyed_at_oper = function | Iop(Iintop_atomic _) | Iop(Istore((Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val - | Double ), _, _)) + | Double | Onetwentyeight ), _, _)) | Iop(Imove | Ispill | Ireload | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Icompf _ | Icsel _ | Ifloatofint | Iintoffloat | Ivalueofint | Iintofvalue - | Iconst_int _ | Iconst_float _ | Iconst_symbol _ + | Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _ | Itailcall_ind | Itailcall_imm _ | Istackoffset _ | Iload (_, _, _) | Iname_for_debugger _ | Iprobe _| Iprobe_is_enabled _ | Iopaque) | Iend | Ireturn _ | Iifthenelse (_, _, _) | Icatch (_, _, _, _) @@ -378,7 +455,6 @@ let destroyed_at_oper = function else [||] - let destroyed_at_raise = all_phys_regs let destroyed_at_reloadretaddr = [| |] @@ -393,7 +469,7 @@ let destroyed_at_basic (basic : Cfg_intf.S.basic) = | Op (Intop (Idiv | Imod)) | Op (Intop_imm ((Idiv | Imod), _)) -> [| rax; rdx |] | Op(Store(Single, _, _)) -> - [| rxmm15 |] + destroy_xmm15 () | Op(Intop(Imulh _ | Icomp _) | Intop_imm((Icomp _), _)) -> [| rax |] | Op (Specific (Irdtsc | Irdpmc)) -> @@ -401,12 +477,12 @@ let destroyed_at_basic (basic : Cfg_intf.S.basic) = | Op (Intop Icheckbound | Intop_imm (Icheckbound, _)) -> assert false | Op (Move | Spill | Reload - | Const_int _ | Const_float _ | Const_symbol _ + | Const_int _ | Const_float _ | Const_symbol _ | Const_vec128 _ | Stackoffset _ | Load _ | Store ((Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val - | Double ), _, _) + | Double | Onetwentyeight ), _, _) | Intop (Iadd | Isub | Imul | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr | Ipopcnt | Iclz _ | Ictz _) | Intop_imm ((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor @@ -447,9 +523,9 @@ let destroyed_at_terminator (terminator : Cfg_intf.S.terminator) = [| rax; rdx |] | Call_no_return { func_symbol = _; alloc; ty_res = _; ty_args = _; } | Prim {op = External { func_symbol = _; alloc; ty_res = _; ty_args = _; }; _} -> - if alloc then all_phys_regs else destroyed_at_c_call + if alloc then all_phys_regs () else destroyed_at_c_call () | Call {op = Indirect | Direct _; _} -> - all_phys_regs + all_phys_regs () | Specific_can_raise { op = (Ilea _ | Ibswap _ | Isqrtf | Isextend32 | Izextend32 | Ifloatarithmem _ | Ifloatsqrtf _ | Ifloat_iround | Ifloat_round _ | Ifloat_min | Ifloat_max @@ -502,7 +578,7 @@ let safe_register_pressure = function | Ifloatofint | Iintoffloat | Ivalueofint | Iintofvalue | Icompf _ | Icsel _ - | Iconst_int _ | Iconst_float _ | Iconst_symbol _ + | Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _ | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Istackoffset _ | Iload (_, _, _) | Istore (_, _, _) | Iintop _ | Iintop_imm (_, _) | Iintop_atomic _ @@ -536,12 +612,12 @@ let max_register_pressure = | Iintop_atomic _ | Istore((Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val - | Double ), + | Double | Onetwentyeight ), _, _) | Imove | Ispill | Ireload | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Icsel _ | Ifloatofint | Iintoffloat | Ivalueofint | Iintofvalue - | Iconst_int _ | Iconst_float _ | Iconst_symbol _ + | Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _ | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Istackoffset _ | Iload (_, _, _) | Ispecific(Ilea _ | Isextend32 | Izextend32 | Iprefetch _ | Ipause @@ -559,7 +635,9 @@ let max_register_pressure = let frame_required ~fun_contains_calls ~fun_num_stack_slots = fp || fun_contains_calls || - fun_num_stack_slots.(0) > 0 || fun_num_stack_slots.(1) > 0 + fun_num_stack_slots.(0) > 0 || + fun_num_stack_slots.(1) > 0 || + fun_num_stack_slots.(2) > 0 let prologue_required ~fun_contains_calls ~fun_num_stack_slots = frame_required ~fun_contains_calls ~fun_num_stack_slots @@ -575,6 +653,12 @@ let init () = end else num_available_registers.(0) <- 13 +(* Precolored_regs is not always the same as [all_phys_regs], as some physical registers + may not be allocatable (e.g. rbp when frame pointers are enabled). *) +let precolored_regs () = + let phys_regs = Reg.set_of_array (all_phys_regs ()) in + if fp then Reg.Set.remove rbp phys_regs else phys_regs + let operation_supported = function | Cpopcnt -> !popcnt_support | Cprefetch _ | Catomic _ diff --git a/backend/amd64/regalloc_stack_operands.ml b/backend/amd64/regalloc_stack_operands.ml index c3e4b8251a8..ead88266d43 100644 --- a/backend/amd64/regalloc_stack_operands.ml +++ b/backend/amd64/regalloc_stack_operands.ml @@ -192,8 +192,8 @@ let basic (map : spilled_map) (instr : Cfg.basic Cfg.instruction) = | Op (Specific (Irdtsc | Irdpmc)) | Op (Intop (Ipopcnt | Iclz _| Ictz _)) | Op (Intop_atomic _) - | Op (Move | Spill | Reload | Negf | Absf | Const_float _ | Compf _ | Stackoffset _ - | Load _ | Store _ | Name_for_debugger _ | Probe_is_enabled _ + | Op (Move | Spill | Reload | Negf | Absf | Const_float _ | Const_vec128 _ | Compf _ + | Stackoffset _ | Load _ | Store _ | Name_for_debugger _ | Probe_is_enabled _ | Valueofint | Intofvalue | Opaque | Begin_region | End_region ) | Op (Specific (Isqrtf | Isextend32 | Izextend32 | Ilea _ | Istore_int (_, _, _) diff --git a/backend/amd64/reload.ml b/backend/amd64/reload.ml index 37115a3b1c5..35fda48a6f7 100644 --- a/backend/amd64/reload.ml +++ b/backend/amd64/reload.ml @@ -178,7 +178,7 @@ method! reload_operation op arg res = | Ilfence | Isfence | Imfence | Iprefetch _ | Ibswap _| Ifloatsqrtf _) - | Imove|Ispill|Ireload|Inegf|Iabsf|Iconst_float _|Icall_ind|Icall_imm _ + | Imove|Ispill|Ireload|Inegf|Iabsf|Iconst_float _|Iconst_vec128 _|Icall_ind|Icall_imm _ | Icompf _ | Itailcall_ind|Itailcall_imm _|Iextcall _|Istackoffset _|Iload (_, _, _) | Istore (_, _, _)|Ialloc _|Iname_for_debugger _|Iprobe _|Iprobe_is_enabled _ diff --git a/backend/amd64/selection.ml b/backend/amd64/selection.ml index 1cbe721b140..60b15935cc7 100644 --- a/backend/amd64/selection.ml +++ b/backend/amd64/selection.ml @@ -87,9 +87,9 @@ let rec select_addr exp = exception Use_default -let rax = phys_reg 0 -let rcx = phys_reg 5 -let rdx = phys_reg 4 +let rax = phys_reg Int 0 +let rcx = phys_reg Int 5 +let rdx = phys_reg Int 4 let pseudoregs_for_operation op arg res = match op with @@ -170,7 +170,7 @@ let pseudoregs_for_operation op arg res = |Ipause|Ilfence|Isfence|Imfence |Ioffset_loc (_, _)|Ifloatsqrtf _|Irdtsc|Iprefetch _) | Imove|Ispill|Ireload|Ifloatofint|Iintoffloat|Ivalueofint|Iintofvalue - | Iconst_int _|Iconst_float _ + | Iconst_int _|Iconst_float _|Iconst_vec128 _ | Iconst_symbol _|Icall_ind|Icall_imm _|Itailcall_ind|Itailcall_imm _ | Iextcall _|Istackoffset _|Iload (_, _, _) | Istore (_, _, _)|Ialloc _ | Iname_for_debugger _|Iprobe _|Iprobe_is_enabled _ | Iopaque @@ -263,7 +263,7 @@ method! select_store is_assign addr exp = (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple []) | (Cconst_natint (n, _dbg)) when is_immediate_natint n -> (Ispecific(Istore_int(n, addr, is_assign)), Ctuple []) - | Cconst_int _ + | Cconst_int _ | Cconst_vec128 _ | Cconst_natint (_, _) | Cconst_float (_, _) | Cconst_symbol (_, _) | Cvar _ | Clet (_, _, _) | Clet_mut (_, _, _, _) | Cphantom_let (_, _, _) | Cassign (_, _) | Ctuple _ | Cop (_, _, _) | Csequence (_, _) diff --git a/backend/arm64/arch.ml b/backend/arm64/arch.ml index f61ed7fa36f..a01706c51dd 100644 --- a/backend/arm64/arch.ml +++ b/backend/arm64/arch.ml @@ -75,6 +75,8 @@ let size_addr = 8 let size_int = 8 let size_float = 8 +let size_vec128 = 16 + let allow_unaligned_access = false (* Behavior of division *) diff --git a/backend/arm64/emit.mlp b/backend/arm64/emit.mlp index 0a5a259109a..22711e03c1d 100644 --- a/backend/arm64/emit.mlp +++ b/backend/arm64/emit.mlp @@ -33,11 +33,11 @@ let fastcode_flag = ref true (* Names for special regs *) -let reg_domain_state_ptr = phys_reg 25 (* x28 *) -let reg_trap_ptr = phys_reg 23 (* x26 *) -let reg_alloc_ptr = phys_reg 24 (* x27 *) -let reg_tmp1 = phys_reg 26 (* x16 *) -let reg_x8 = phys_reg 8 (* x8 *) +let reg_domain_state_ptr = phys_reg Int 25 (* x28 *) +let reg_trap_ptr = phys_reg Int 23 (* x26 *) +let reg_alloc_ptr = phys_reg Int 24 (* x27 *) +let reg_tmp1 = phys_reg Int 26 (* x16 *) +let reg_x8 = phys_reg Int 8 (* x8 *) (* Output a label *) @@ -71,7 +71,7 @@ let emit_symbol_size sym = (* Output a pseudo-register *) let emit_reg = function - {loc = Reg r} -> emit_string (register_name r) + {loc = Reg r; typ} -> emit_string (register_name typ r) | _ -> fatal_error "Emit.emit_reg" (* Likewise, but with the 32-bit name of the register *) @@ -90,7 +90,7 @@ let emit_wreg = function let stack_offset = ref 0 -let num_stack_slots = Array.make Proc.num_register_classes 0 +let num_stack_slots = Array.make Proc.num_stack_slot_classes 0 let prologue_required = ref false @@ -130,7 +130,7 @@ let emit_stack r = let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in `[{emit_reg reg_domain_state_ptr}, #{emit_int ofs}]` | Stack s -> - let ofs = slot_offset s (register_class r) in + let ofs = slot_offset s (stack_slot_class r.typ) in `[sp, #{emit_int ofs}]` | _ -> fatal_error "Emit.emit_stack" @@ -160,7 +160,7 @@ let record_frame_label live dbg = | {typ = Val; loc = Reg r} -> live_offset := ((r lsl 1) + 1) :: !live_offset | {typ = Val; loc = Stack s} as reg -> - live_offset := slot_offset s (register_class reg) :: !live_offset + live_offset := slot_offset s (stack_slot_class reg.typ) :: !live_offset | {typ = Addr} as r -> Misc.fatal_error ("bad GC root " ^ Reg.name r) | _ -> ()) @@ -495,6 +495,9 @@ module BR = Branch_relaxation.Make (struct num_instructions_for_intconst n | Lop (Iconst_float _) -> 2 | Lop (Iconst_symbol _) -> 2 + | Lop (Iconst_vec128 _) -> + (* CR mslater: (SIMD) arm64 *) + Misc.fatal_error "128-bit vectors are not supported on this architecture" | Lop (Iintop_atomic _) -> (* Never generated; builtins are not yet translated to atomics *) assert false @@ -764,6 +767,9 @@ let emit_instr i = let lbl = float_literal f in emit_load_literal i.res.(0) lbl end + | Lop(Iconst_vec128 _) -> + (* CR mslater: (SIMD) arm64 *) + Misc.fatal_error "128-bit vectors are not supported on this architecture" | Lop(Iconst_symbol s) -> emit_load_symbol_addr i.res.(0) s.sym_name | Lop(Icall_ind) -> @@ -818,6 +824,8 @@ let emit_instr i = ` fcvt {emit_reg dst}, s7\n` | Word_int | Word_val | Double -> ` ldr {emit_reg dst}, {emit_addressing addr base}\n` + (* CR mslater: (SIMD) arm64 *) + | Onetwentyeight -> fatal_error "arm64: got 128 bit memory chunk" end | Lop(Istore(size, addr, _)) -> let src = i.arg.(0) in @@ -840,6 +848,8 @@ let emit_instr i = ` str s7, {emit_addressing addr base}\n`; | Word_int | Word_val | Double -> ` str {emit_reg src}, {emit_addressing addr base}\n` + (* CR mslater: (SIMD) arm64 *) + | Onetwentyeight -> fatal_error "arm64: got 128 bit memory chunk" end | Lop(Ialloc { bytes = n; dbginfo; mode = Alloc_heap }) -> assembly_code_for_allocation i ~n ~far:false ~dbginfo @@ -1128,7 +1138,7 @@ let fundecl fundecl = stack_offset := 0; call_gc_sites := []; bound_error_sites := []; - for i = 0 to Proc.num_register_classes - 1 do + for i = 0 to Proc.num_stack_slot_classes - 1 do num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i); done; prologue_required := fundecl.fun_prologue_required; @@ -1180,6 +1190,9 @@ let emit_item = function | Cint n -> ` .quad {emit_nativeint n}\n` | Csingle f -> emit_float32_directive ".long" (Int32.bits_of_float f) | Cdouble f -> emit_float64_directive ".quad" (Int64.bits_of_float f) + | Cvec128 _ -> + (* CR mslater: (SIMD) arm64 *) + Misc.fatal_error "128-bit vectors not supported on this architecture" | Csymbol_address s -> ` .quad {emit_symbol s.sym_name}\n` | Cstring s -> emit_string_directive " .ascii " s | Cskip n -> if n > 0 then ` .space {emit_int n}\n` diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml index 9deabe5e7ac..7e742cc3955 100644 --- a/backend/arm64/proc.ml +++ b/backend/arm64/proc.ml @@ -66,12 +66,23 @@ let register_class r = match r.typ with | Val | Int | Addr -> 0 | Float -> 1 + (* CR mslater: (SIMD) arm64 *) + | Vec128 -> fatal_error "arm64: got vec128 register" -let register_class_tag c = +let num_stack_slot_classes = 2 + +let stack_slot_class typ = + match typ with + | Val | Int | Addr -> 0 + | Float -> 1 + (* CR mslater: (SIMD) arm64 *) + | Vec128 -> fatal_error "arm64: got vec128 register" + +let stack_class_tag c = match c with | 0 -> "i" | 1 -> "f" - | c -> Misc.fatal_errorf "Unspecified register class %d" c + | c -> Misc.fatal_errorf "Unspecified stack slot class %d" c let num_available_registers = [| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *) @@ -79,8 +90,14 @@ let num_available_registers = let first_available_register = [| 0; 100 |] -let register_name r = - if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) +let register_name ty r = + match ty with + | Val | Int | Addr -> + int_reg_name.(r - first_available_register.(0)) + | Float -> + float_reg_name.(r - first_available_register.(1)) + (* CR mslater: (SIMD) arm64 *) + | Vec128 -> fatal_error "arm64: got vec128 register" let rotate_registers = true @@ -103,11 +120,19 @@ let hard_float_reg = let all_phys_regs = Array.append hard_int_reg hard_float_reg -let phys_reg n = - if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100) +let precolored_regs = + let phys_regs = Reg.set_of_array all_phys_regs in + fun () -> phys_regs + +let phys_reg ty n = + match ty with + | Int | Addr | Val -> hard_int_reg.(n) + | Float -> hard_float_reg.(n - 100) + (* CR mslater: (SIMD) arm64 *) + | Vec128 -> fatal_error "arm64: got vec128 register" -let reg_x8 = phys_reg 8 -let reg_d7 = phys_reg 107 +let reg_x8 = phys_reg Int 8 +let reg_d7 = phys_reg Float 107 let stack_slot slot ty = Reg.at_location ty (Stack slot) @@ -118,7 +143,7 @@ let size_domainstate_args = 64 * size_int let loc_int last_int make_stack int ofs = if !int <= last_int then begin - let l = phys_reg !int in + let l = phys_reg Int !int in incr int; l end else begin ofs := Misc.align !ofs size_int; @@ -128,7 +153,7 @@ let loc_int last_int make_stack int ofs = let loc_float last_float make_stack float ofs = if !float <= last_float then begin - let l = phys_reg !float in + let l = phys_reg Float !float in incr float; l end else begin ofs := Misc.align !ofs size_float; @@ -138,7 +163,7 @@ let loc_float last_float make_stack float ofs = let loc_int32 last_int make_stack int ofs = if !int <= last_int then begin - let l = phys_reg !int in + let l = phys_reg Int !int in incr int; l end else begin let l = stack_slot (make_stack !ofs) Int in @@ -158,6 +183,8 @@ let calling_conventions loc.(i) <- loc_int last_int make_stack int ofs | Float -> loc.(i) <- loc_float last_float make_stack float ofs + (* CR mslater: (SIMD) arm64 *) + | Vec128 -> fatal_error "arm64: got vec128 register" done; (loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *) @@ -220,6 +247,8 @@ let external_calling_conventions loc.(i) <- [| loc_int32 last_int make_stack int ofs |] | XFloat -> loc.(i) <- [| loc_float last_float make_stack float ofs |] + (* CR mslater: (SIMD) arm64 *) + | XVec128 -> fatal_error "arm64: got vec128 register" end) ty_args; (loc, Misc.align !ofs 16) (* keep stack 16-aligned *) @@ -230,7 +259,7 @@ let loc_external_arguments ty_args = let loc_external_results res = let (loc, _) = calling_conventions 0 1 100 100 not_supported 0 res in loc -let loc_exn_bucket = phys_reg 0 +let loc_exn_bucket = phys_reg Int 0 (* See "DWARF for the ARM 64-bit architecture (AArch64)" available from developer.arm.com. *) @@ -265,11 +294,13 @@ let regs_are_volatile _rs = false let destroyed_at_c_call = (* x19-x28, d8-d15 preserved *) - Array.of_list (List.map phys_reg - [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15; - 100;101;102;103;104;105;106;107; + Array.append + (Array.of_list (List.map (phys_reg Int) + [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15])) + (Array.of_list (List.map (phys_reg Float) + [100;101;102;103;104;105;106;107; 116;117;118;119;120;121;122;123; - 124;125;126;127;128;129;130;131]) + 124;125;126;127;128;129;130;131])) (* note: keep this function in sync with `destroyed_at_{basic,terminator}` below. *) let destroyed_at_oper = function @@ -284,7 +315,7 @@ let destroyed_at_oper = function [| reg_d7 |] (* d7 / s7 destroyed *) | _ -> [||] -let destroyed_at_raise = all_phys_regs +let destroyed_at_raise () = all_phys_regs let destroyed_at_reloadretaddr = [| |] diff --git a/backend/arm64/selection.ml b/backend/arm64/selection.ml index fefbcf75236..2406572cb31 100644 --- a/backend/arm64/selection.ml +++ b/backend/arm64/selection.ml @@ -33,7 +33,9 @@ let is_offset chunk n = | Thirtytwo_unsigned | Thirtytwo_signed | Single -> n land 3 = 0 && n lsr 2 < 0x1000 | Word_int | Word_val | Double -> - n land 7 = 0 && n lsr 3 < 0x1000) + n land 7 = 0 && n lsr 3 < 0x1000 + (* CR mslater: (SIMD) arm64 *) + | Onetwentyeight -> Misc.fatal_error "arm64: got 128 bit memory chunk") let is_logical_immediate n = Arch.is_logical_immediate (Nativeint.of_int n) diff --git a/backend/asm_targets/asm_directives_intf.ml b/backend/asm_targets/asm_directives_intf.ml index 8a5ec8345c1..46be8c4cb73 100644 --- a/backend/asm_targets/asm_directives_intf.ml +++ b/backend/asm_targets/asm_directives_intf.ml @@ -46,6 +46,7 @@ module type Arg = sig | NONE | DWORD | QWORD + | VEC128 val file : file_num:int -> file_name:string -> unit diff --git a/backend/cfg/cfg.ml b/backend/cfg/cfg.ml index 000b4b11b49..2683a18a0f6 100644 --- a/backend/cfg/cfg.ml +++ b/backend/cfg/cfg.ml @@ -247,6 +247,8 @@ let dump_op ppf = function | Const_int n -> Format.fprintf ppf "const_int %nd" n | Const_float f -> Format.fprintf ppf "const_float %F" (Int64.float_of_bits f) | Const_symbol s -> Format.fprintf ppf "const_symbol %s" s.sym_name + | Const_vec128 { high; low } -> + Format.fprintf ppf "const vec128 %016Lx:%016Lx" high low | Stackoffset n -> Format.fprintf ppf "stackoffset %d" n | Load _ -> Format.fprintf ppf "load" | Store _ -> Format.fprintf ppf "store" @@ -449,6 +451,7 @@ let is_pure_operation : operation -> bool = function | Const_int _ -> true | Const_float _ -> true | Const_symbol _ -> true + | Const_vec128 _ -> true | Stackoffset _ -> false | Load _ -> true | Store _ -> false @@ -498,11 +501,16 @@ let is_pure_basic : basic -> bool = function let is_noop_move instr = match instr.desc with - | Op (Move | Spill | Reload) -> - (match instr.arg.(0).loc with + | Op (Move | Spill | Reload) -> ( + match instr.arg.(0).loc with | Unknown -> false - | Reg _ | Stack _ -> Reg.same_loc instr.arg.(0) instr.res.(0)) - && Proc.register_class instr.arg.(0) = Proc.register_class instr.res.(0) + | Reg _ -> + Reg.same_loc instr.arg.(0) instr.res.(0) + && Proc.register_class instr.arg.(0) = Proc.register_class instr.res.(0) + | Stack _ -> + Reg.same_loc instr.arg.(0) instr.res.(0) + && Proc.stack_slot_class instr.arg.(0).typ + = Proc.stack_slot_class instr.res.(0).typ) | Op (Csel _) -> ( match instr.res.(0).loc with | Unknown -> false @@ -512,11 +520,12 @@ let is_noop_move instr = let ifnot = instr.arg.(len - 1) in Reg.same_loc instr.res.(0) ifso && Reg.same_loc instr.res.(0) ifnot) | Op - ( Const_int _ | Const_float _ | Const_symbol _ | Stackoffset _ | Load _ - | Store _ | Intop _ | Intop_imm _ | Intop_atomic _ | Negf | Absf | Addf - | Subf | Mulf | Divf | Compf _ | Floatofint | Intoffloat | Opaque - | Valueofint | Intofvalue | Probe_is_enabled _ | Specific _ - | Name_for_debugger _ | Begin_region | End_region ) + ( Const_int _ | Const_float _ | Const_symbol _ | Const_vec128 _ + | Stackoffset _ | Load _ | Store _ | Intop _ | Intop_imm _ + | Intop_atomic _ | Negf | Absf | Addf | Subf | Mulf | Divf | Compf _ + | Floatofint | Intoffloat | Opaque | Valueofint | Intofvalue + | Probe_is_enabled _ | Specific _ | Name_for_debugger _ | Begin_region + | End_region ) | Reloadretaddr | Pushtrap _ | Poptrap | Prologue -> false diff --git a/backend/cfg/cfg_intf.ml b/backend/cfg/cfg_intf.ml index 1e1be5e6aac..02e6d06447b 100644 --- a/backend/cfg/cfg_intf.ml +++ b/backend/cfg/cfg_intf.ml @@ -62,6 +62,7 @@ module S = struct | Const_int of nativeint (* CR-someday xclerc: change to `Targetint.t` *) | Const_float of int64 | Const_symbol of Cmm.symbol + | Const_vec128 of Cmm.vec128_bits | Stackoffset of int | Load of Cmm.memory_chunk * Arch.addressing_mode * Mach.mutable_flag | Store of Cmm.memory_chunk * Arch.addressing_mode * bool diff --git a/backend/cfg/cfg_to_linear_desc.ml b/backend/cfg/cfg_to_linear_desc.ml index d034ccf4578..1391f887ff0 100644 --- a/backend/cfg/cfg_to_linear_desc.ml +++ b/backend/cfg/cfg_to_linear_desc.ml @@ -15,6 +15,7 @@ let from_basic (basic : basic) : Linear.instruction_desc = | Const_int n -> Iconst_int n | Const_float n -> Iconst_float n | Const_symbol n -> Iconst_symbol n + | Const_vec128 bits -> Iconst_vec128 bits | Stackoffset n -> Istackoffset n | Load (c, m, i) -> Iload (c, m, i) | Store (c, m, b) -> Istore (c, m, b) diff --git a/backend/cfg/cfgize.ml b/backend/cfg/cfgize.ml index 5bd294c1232..9b54bda1556 100644 --- a/backend/cfg/cfgize.ml +++ b/backend/cfg/cfgize.ml @@ -144,6 +144,7 @@ let basic_or_terminator_of_operation : | Iconst_int i -> Basic (Op (Const_int i)) | Iconst_float f -> Basic (Op (Const_float f)) | Iconst_symbol s -> Basic (Op (Const_symbol s)) + | Iconst_vec128 bits -> Basic (Op (Const_vec128 bits)) | Icall_ind -> With_next_label (fun label_after -> Call { op = Indirect; label_after }) | Icall_imm { func } -> @@ -670,10 +671,11 @@ module Stack_offset_and_exn = struct | Op (Stackoffset n) -> stack_offset + n, traps | Op ( Move | Spill | Reload | Const_int _ | Const_float _ | Const_symbol _ - | Load _ | Store _ | Intop _ | Intop_imm _ | Intop_atomic _ | Negf - | Absf | Addf | Subf | Mulf | Divf | Compf _ | Floatofint | Intoffloat - | Valueofint | Csel _ | Intofvalue | Probe_is_enabled _ | Opaque - | Begin_region | End_region | Specific _ | Name_for_debugger _ ) + | Const_vec128 _ | Load _ | Store _ | Intop _ | Intop_imm _ + | Intop_atomic _ | Negf | Absf | Addf | Subf | Mulf | Divf | Compf _ + | Floatofint | Intoffloat | Valueofint | Csel _ | Intofvalue + | Probe_is_enabled _ | Opaque | Begin_region | End_region | Specific _ + | Name_for_debugger _ ) | Reloadretaddr | Prologue -> stack_offset, traps diff --git a/backend/cfg/linear_to_cfg.ml b/backend/cfg/linear_to_cfg.ml index bb6298a109c..ec063de200c 100644 --- a/backend/cfg/linear_to_cfg.ml +++ b/backend/cfg/linear_to_cfg.ml @@ -616,6 +616,7 @@ let rec create_blocks (t : t) (i : L.instruction) (block : C.basic_block) | Iconst_int n -> basic (Const_int n) | Iconst_float n -> basic (Const_float n) | Iconst_symbol n -> basic (Const_symbol n) + | Iconst_vec128 bits -> basic (Const_vec128 bits) | Inegf -> basic Negf | Iabsf -> basic Absf | Iaddf -> basic Addf diff --git a/backend/checkmach.ml b/backend/checkmach.ml index c3a27f70871..dc72562d34e 100644 --- a/backend/checkmach.ml +++ b/backend/checkmach.ml @@ -932,8 +932,8 @@ end = struct let transform_operation t (op : Mach.operation) ~next ~exn dbg = match op with | Imove | Ispill | Ireload | Iconst_int _ | Iconst_float _ | Iconst_symbol _ - | Iload _ | Icompf _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf - | Ifloatofint | Iintoffloat + | Iconst_vec128 _ | Iload _ | Icompf _ | Inegf | Iabsf | Iaddf | Isubf + | Imulf | Idivf | Ifloatofint | Iintoffloat | Iintop_imm ( ( Iadd | Isub | Imul | Imulh _ | Idiv | Imod | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr | Ipopcnt | Iclz _ | Ictz _ | Icomp _ ), diff --git a/backend/cmm.ml b/backend/cmm.ml index 87034fab329..f1e7376a2d7 100644 --- a/backend/cmm.ml +++ b/backend/cmm.ml @@ -18,6 +18,7 @@ type machtype_component = Cmx_format.machtype_component = | Addr | Int | Float + | Vec128 type machtype = machtype_component array @@ -26,10 +27,11 @@ let typ_val = [|Val|] let typ_addr = [|Addr|] let typ_int = [|Int|] let typ_float = [|Float|] +let typ_vec128 = [|Vec128|] (** [machtype_component]s are partially ordered as follows: - Addr Float + Addr Float Vec128 ^ | Val @@ -56,8 +58,11 @@ let lub_component comp1 comp2 = | Addr, Addr -> Addr | Addr, Val -> Addr | Float, Float -> Float - | (Int | Addr | Val), Float - | Float, (Int | Addr | Val) -> + | Vec128, Vec128 -> Vec128 + | (Int | Addr | Val), (Float | Vec128) + | (Float | Vec128), (Int | Addr | Val) + | Float, Vec128 + | Vec128, Float -> (* Float unboxing code must be sure to avoid this case. *) assert false @@ -73,8 +78,11 @@ let ge_component comp1 comp2 = | Addr, Addr -> true | Addr, Val -> true | Float, Float -> true - | (Int | Addr | Val), Float - | Float, (Int | Addr | Val) -> + | Vec128, Vec128 -> true + | (Int | Addr | Val), (Float | Vec128) + | (Float | Vec128), (Int | Addr | Val) + | Float, Vec128 + | Vec128, Float -> assert false type exttype = @@ -82,12 +90,14 @@ type exttype = | XInt32 | XInt64 | XFloat + | XVec128 let machtype_of_exttype = function | XInt -> typ_int | XInt32 -> typ_int | XInt64 -> if Arch.size_int = 4 then [|Int;Int|] else typ_int | XFloat -> typ_float + | XVec128 -> typ_vec128 let machtype_of_exttype_list xtl = Array.concat (List.map machtype_of_exttype xtl) @@ -182,6 +192,7 @@ type memory_chunk = | Word_val | Single | Double + | Onetwentyeight and operation = Capply of machtype * Lambda.region_close @@ -225,6 +236,7 @@ and operation = type kind_for_unboxing = | Any | Boxed_integer of Lambda.boxed_integer + | Boxed_vector of Lambda.boxed_vector | Boxed_float type is_global = Global | Local @@ -238,12 +250,15 @@ type symbol = { sym_name : string; sym_global : is_global } +type vec128_bits = { low : int64; high: int64 } + let global_symbol sym_name = { sym_name; sym_global = Global } type expression = Cconst_int of int * Debuginfo.t | Cconst_natint of nativeint * Debuginfo.t | Cconst_float of float * Debuginfo.t + | Cconst_vec128 of vec128_bits * Debuginfo.t | Cconst_symbol of symbol * Debuginfo.t | Cvar of Backend_var.t | Clet of Backend_var.With_provenance.t * expression * expression @@ -298,6 +313,7 @@ type data_item = | Cint of nativeint | Csingle of float | Cdouble of float + | Cvec128 of vec128_bits | Csymbol_address of symbol | Cstring of string | Cskip of int @@ -342,6 +358,7 @@ let iter_shallow_tail f = function | Cconst_int _ | Cconst_natint _ | Cconst_float _ + | Cconst_vec128 _ | Cconst_symbol _ | Cvar _ | Cassign _ @@ -384,6 +401,7 @@ let map_shallow_tail ?kind f = function | Cconst_int _ | Cconst_natint _ | Cconst_float _ + | Cconst_vec128 _ | Cconst_symbol _ | Cvar _ | Cassign _ @@ -440,6 +458,7 @@ let iter_shallow f = function | Cconst_int _ | Cconst_natint _ | Cconst_float _ + | Cconst_vec128 _ | Cconst_symbol _ | Cvar _ -> () @@ -477,6 +496,7 @@ let map_shallow f = function | Cconst_int _ | Cconst_natint _ | Cconst_float _ + | Cconst_vec128 _ | Cconst_symbol _ | Cvar _ as c -> @@ -488,10 +508,12 @@ let equal_machtype_component left right = | Addr, Addr -> true | Int, Int -> true | Float, Float -> true - | Val, (Addr | Int | Float) - | Addr, (Val | Int | Float) - | Int, (Val | Addr | Float) - | Float, (Val | Addr | Int) -> + | Vec128, Vec128 -> true + | Val, (Addr | Int | Float | Vec128) + | Addr, (Val | Int | Float | Vec128) + | Int, (Val | Addr | Float | Vec128) + | Float, (Val | Addr | Int | Vec128) + | Vec128, (Val | Addr | Int | Float) -> false let equal_exttype left right = @@ -500,10 +522,12 @@ let equal_exttype left right = | XInt32, XInt32 -> true | XInt64, XInt64 -> true | XFloat, XFloat -> true - | XInt, (XInt32 | XInt64 | XFloat) - | XInt32, (XInt | XInt64 | XFloat) - | XInt64, (XInt | XInt32 | XFloat) - | XFloat, (XInt | XInt32 | XInt64) -> + | XVec128, XVec128 -> true + | XInt, (XInt32 | XInt64 | XFloat | XVec128) + | XInt32, (XInt | XInt64 | XFloat | XVec128) + | XInt64, (XInt | XInt32 | XFloat | XVec128) + | XFloat, (XInt | XInt32 | XInt64 | XVec128) + | XVec128, (XInt | XInt32 | XInt64 | XFloat) -> false let equal_float_comparison left right = @@ -542,26 +566,40 @@ let equal_memory_chunk left right = | Word_val, Word_val -> true | Single, Single -> true | Double, Double -> true + | Onetwentyeight, Onetwentyeight -> true | Byte_unsigned, (Byte_signed | Sixteen_unsigned | Sixteen_signed | Thirtytwo_unsigned - | Thirtytwo_signed | Word_int | Word_val | Single | Double) + | Thirtytwo_signed | Word_int | Word_val | Single | Double + | Onetwentyeight) | Byte_signed, (Byte_unsigned | Sixteen_unsigned | Sixteen_signed | Thirtytwo_unsigned - | Thirtytwo_signed | Word_int | Word_val | Single | Double) + | Thirtytwo_signed | Word_int | Word_val | Single | Double + | Onetwentyeight) | Sixteen_unsigned, (Byte_unsigned | Byte_signed | Sixteen_signed | Thirtytwo_unsigned - | Thirtytwo_signed | Word_int | Word_val | Single | Double) + | Thirtytwo_signed | Word_int | Word_val | Single | Double + | Onetwentyeight) | Sixteen_signed, (Byte_unsigned | Byte_signed | Sixteen_unsigned | Thirtytwo_unsigned - | Thirtytwo_signed | Word_int | Word_val | Single | Double) + | Thirtytwo_signed | Word_int | Word_val | Single | Double + | Onetwentyeight) | Thirtytwo_unsigned, (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed - | Thirtytwo_signed | Word_int | Word_val | Single | Double) + | Thirtytwo_signed | Word_int | Word_val | Single | Double + | Onetwentyeight) | Thirtytwo_signed, (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed - | Thirtytwo_unsigned | Word_int | Word_val | Single | Double) + | Thirtytwo_unsigned | Word_int | Word_val | Single | Double + | Onetwentyeight) | Word_int, (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed - | Thirtytwo_unsigned | Thirtytwo_signed | Word_val | Single | Double) + | Thirtytwo_unsigned | Thirtytwo_signed | Word_val | Single | Double + | Onetwentyeight) | Word_val, (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed - | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Single | Double) + | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Single | Double + | Onetwentyeight) | Single, (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed - | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val | Double) + | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val | Double + | Onetwentyeight) | Double, (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed - | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val | Single) -> + | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val | Single + | Onetwentyeight) + | Onetwentyeight, (Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed + | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val | Single + | Double) -> false let equal_integer_comparison left right = diff --git a/backend/cmm.mli b/backend/cmm.mli index 4afc8c87c1f..019c7603587 100644 --- a/backend/cmm.mli +++ b/backend/cmm.mli @@ -20,6 +20,7 @@ type machtype_component = Cmx_format.machtype_component = | Addr | Int | Float + | Vec128 (* - [Val] denotes a valid OCaml value: either a pointer to the beginning of a heap block, an infix pointer if it is preceded by the correct @@ -54,6 +55,7 @@ val typ_val: machtype val typ_addr: machtype val typ_int: machtype val typ_float: machtype +val typ_vec128: machtype (** Least upper bound of two [machtype_component]s. *) val lub_component @@ -73,6 +75,7 @@ type exttype = | XInt32 (**r 32-bit integer *) | XInt64 (**r 64-bit integer *) | XFloat (**r double-precision FP number *) + | XVec128 (**r 128-bit vector *) (** A variant of [machtype] used to describe arguments to external C functions *) @@ -178,6 +181,9 @@ type memory_chunk = | Single | Double (* word-aligned 64-bit float see PR#10433 *) + | Onetwentyeight (* word-aligned 128-bit vector + CR mslater: (SIMD) alignment *) + and operation = Capply of machtype * Lambda.region_close | Cextcall of @@ -229,6 +235,7 @@ and operation = type kind_for_unboxing = | Any (* This may contain anything, including non-scannable things *) | Boxed_integer of Lambda.boxed_integer + | Boxed_vector of Lambda.boxed_vector | Boxed_float type is_global = Global | Local @@ -250,6 +257,10 @@ type symbol = { sym_name : string; sym_global : is_global } +(* SIMD vectors are untyped in the backend. + This record holds the bitwise representation of a 128-bit value. *) +type vec128_bits = { low : int64; high: int64 } + val global_symbol : string -> symbol (** Every basic block should have a corresponding [Debuginfo.t] for its @@ -258,6 +269,7 @@ type expression = Cconst_int of int * Debuginfo.t | Cconst_natint of nativeint * Debuginfo.t | Cconst_float of float * Debuginfo.t + | Cconst_vec128 of vec128_bits * Debuginfo.t | Cconst_symbol of symbol * Debuginfo.t | Cvar of Backend_var.t | Clet of Backend_var.With_provenance.t * expression * expression @@ -315,6 +327,7 @@ type data_item = | Cint of nativeint | Csingle of float | Cdouble of float + | Cvec128 of vec128_bits | Csymbol_address of symbol | Cstring of string | Cskip of int diff --git a/backend/cmm_helpers.ml b/backend/cmm_helpers.ml index a69028ddefb..090343df451 100644 --- a/backend/cmm_helpers.ml +++ b/backend/cmm_helpers.ml @@ -81,6 +81,11 @@ let float_header = block_header Obj.double_tag (size_float / size_addr) let float_local_header = local_block_header Obj.double_tag (size_float / size_addr) +let boxedvec128_header = block_header Obj.abstract_tag (size_vec128 / size_addr) + +let boxedvec128_local_header = + local_block_header Obj.abstract_tag (size_vec128 / size_addr) + let floatarray_header len = (* Zero-sized float arrays have tag zero for consistency with [caml_alloc_float_array]. *) @@ -144,6 +149,15 @@ let alloc_float_header mode dbg = | Lambda.Alloc_heap -> Cconst_natint (float_header, dbg) | Lambda.Alloc_local -> Cconst_natint (float_local_header, dbg) +let alloc_boxedvector_header vi mode dbg = + let header, local_header = + match vi with + | Primitive.Pvec128 -> boxedvec128_header, boxedvec128_local_header + in + match mode with + | Lambda.Alloc_heap -> Cconst_natint (header, dbg) + | Lambda.Alloc_local -> Cconst_natint (local_header, dbg) + let alloc_floatarray_header len dbg = Cconst_natint (floatarray_header len, dbg) let alloc_closure_header ~mode sz dbg = @@ -699,6 +713,41 @@ let rec unbox_float dbg = | Ctail e -> Ctail (unbox_float dbg e) | cmm -> Cop (Cload (Double, Immutable), [cmm], dbg)) +(* Vectors *) + +let box_vector dbg vi m c = + Cop (Calloc m, [alloc_boxedvector_header vi m dbg; c], dbg) + +let rec unbox_vec128 dbg = + map_tail ~kind:Any (function + | Cop (Calloc _, [Cconst_natint (hdr, _); c], _) + when Nativeint.equal hdr boxedvec128_header + || Nativeint.equal hdr boxedvec128_local_header -> + c + | Cconst_symbol (s, _dbg) as cmm -> ( + match Cmmgen_state.structured_constant_of_sym s.sym_name with + | Some (Uconst_vec128 { low; high }) -> + Cconst_vec128 ({ low; high }, dbg) (* or keep _dbg? *) + | _ -> Cop (Cload (Onetwentyeight, Immutable), [cmm], dbg)) + | Cregion e as cmm -> ( + (* It is valid to push unboxing inside a Cregion except when the extra + unboxing logic pushes a tail call out of tail position *) + match + map_tail ~kind:Any + (function + | Cop (Capply (_, Rc_close_at_apply), _, _) -> raise Exit + | Ctail e -> Ctail (unbox_vec128 dbg e) + | e -> unbox_vec128 dbg e) + e + with + | e -> Cregion e + | exception Exit -> Cop (Cload (Onetwentyeight, Immutable), [cmm], dbg)) + | Ctail e -> Ctail (unbox_vec128 dbg e) + | cmm -> Cop (Cload (Onetwentyeight, Immutable), [cmm], dbg)) + +let unbox_vector dbg vi e = + match vi with Primitive.Pvec128 -> unbox_vec128 dbg e + (* Complex *) let box_complex dbg c_re c_im = @@ -1019,6 +1068,7 @@ module Extended_machtype_component = struct | Tagged_int | Any_int | Float + | Vec128 let of_machtype_component (component : machtype_component) = match component with @@ -1026,6 +1076,7 @@ module Extended_machtype_component = struct | Addr -> Addr | Int -> Any_int | Float -> Float + | Vec128 -> Vec128 let to_machtype_component t : machtype_component = match t with @@ -1033,6 +1084,7 @@ module Extended_machtype_component = struct | Addr -> Addr | Tagged_int | Any_int -> Int | Float -> Float + | Vec128 -> Vec128 let change_tagged_int_to_val t : machtype_component = match t with @@ -1041,6 +1093,7 @@ module Extended_machtype_component = struct | Tagged_int -> Val | Any_int -> Int | Float -> Float + | Vec128 -> Vec128 end module Extended_machtype = struct @@ -1056,6 +1109,8 @@ module Extended_machtype = struct let typ_float = [| Extended_machtype_component.Float |] + let typ_vec128 = [| Extended_machtype_component.Vec128 |] + let typ_void = [||] let of_machtype machtype = @@ -1073,6 +1128,7 @@ module Extended_machtype = struct | Pbottom -> Misc.fatal_error "No unique Extended_machtype for layout [Pbottom]" | Punboxed_float -> typ_float + | Punboxed_vector Pvec128 -> typ_vec128 | Punboxed_int _ -> (* Only 64-bit architectures, so this is always [typ_int] *) typ_any_int @@ -1093,6 +1149,7 @@ let machtype_identifier t = | Val -> 'V' | Int -> 'I' | Float -> 'F' + | Vec128 -> 'X' | Addr -> Misc.fatal_error "[Addr] is forbidden inside arity for generic functions" in @@ -2738,17 +2795,19 @@ let tuplify_function arity return = let max_arity_optimized = 15 +let ints_per_float = size_float / Arch.size_int + +let ints_per_vec128 = size_vec128 / Arch.size_int + let machtype_stored_size t = - if Arch.size_int = 4 - then - Array.fold_left - (fun cur c -> - match c with - | Addr -> Misc.fatal_error "[Addr] cannot be stored" - | Val | Int -> cur + 1 - | Float -> cur + 2) - 0 t - else Array.length t + Array.fold_left + (fun cur c -> + match c with + | Addr -> Misc.fatal_error "[Addr] cannot be stored" + | Val | Int -> cur + 1 + | Float -> cur + ints_per_float + | Vec128 -> cur + ints_per_vec128) + 0 t let machtype_non_scanned_size t = Array.fold_left @@ -2757,7 +2816,8 @@ let machtype_non_scanned_size t = | Addr -> Misc.fatal_error "[Addr] cannot be stored" | Val -> cur | Int -> cur + 1 - | Float -> cur + if Arch.size_int = 4 then 2 else 1) + | Float -> cur + ints_per_float + | Vec128 -> cur + ints_per_vec128) 0 t let make_tuple l = match l with [e] -> e | _ -> Ctuple l @@ -2766,7 +2826,10 @@ let value_slot_given_machtype vs = let non_scanned, scanned = List.partition (fun (_, c) -> - match c with Int | Float -> true | Val -> false | Addr -> assert false) + match c with + | Int | Float | Vec128 -> true + | Val -> false + | Addr -> assert false) vs in List.map (fun (v, _) -> Cvar v) (non_scanned @ scanned) @@ -2782,8 +2845,11 @@ let read_from_closure_given_machtype t clos base_offset dbg = | Int -> (non_scanned_pos + 1, scanned_pos), load Word_int non_scanned_pos | Float -> - ( ((non_scanned_pos + if Arch.size_int = 4 then 2 else 1), scanned_pos), + ( (non_scanned_pos + ints_per_float, scanned_pos), load Double non_scanned_pos ) + | Vec128 -> + ( (non_scanned_pos + ints_per_vec128, scanned_pos), + load Onetwentyeight non_scanned_pos ) | Val -> (non_scanned_pos, scanned_pos + 1), load Word_val scanned_pos | Addr -> Misc.fatal_error "[Addr] cannot be read") (base_offset, base_offset + machtype_non_scanned_size t) @@ -3698,6 +3764,9 @@ let emit_nativeint_constant symb n cont = emit_block symb boxedintnat_header (emit_boxed_nativeint_constant_fields n cont) +let emit_vec128_constant symb bits cont = + emit_block symb boxedvec128_header (Cvec128 bits :: cont) + let emit_float_array_constant symb fields cont = emit_block symb (floatarray_header (List.length fields)) @@ -4056,6 +4125,8 @@ let int32 ~dbg i = natint_const_untagged dbg (Nativeint.of_int32 i) cross-compiling for 64-bit on a 32-bit host *) let int64 ~dbg i = natint_const_untagged dbg (Int64.to_nativeint i) +let vec128 ~dbg bits = Cconst_vec128 (bits, dbg) + let nativeint ~dbg i = natint_const_untagged dbg i let letin v ~defining_expr ~body = @@ -4063,9 +4134,9 @@ let letin v ~defining_expr ~body = | Cvar v' when Backend_var.same (Backend_var.With_provenance.var v) v' -> defining_expr | Cvar _ | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_symbol _ - | Clet _ | Clet_mut _ | Cphantom_let _ | Cassign _ | Ctuple _ | Cop _ - | Csequence _ | Cifthenelse _ | Cswitch _ | Ccatch _ | Cexit _ | Ctrywith _ - | Cregion _ | Ctail _ -> + | Cconst_vec128 _ | Clet _ | Clet_mut _ | Cphantom_let _ | Cassign _ + | Ctuple _ | Cop _ | Csequence _ | Cifthenelse _ | Cswitch _ | Ccatch _ + | Cexit _ | Ctrywith _ | Cregion _ | Ctail _ -> Clet (v, defining_expr, body) let letin_mut v ty e body = Clet_mut (v, ty, e, body) @@ -4319,6 +4390,8 @@ let cint i = Cmm.Cint i let cfloat f = Cmm.Cdouble f +let cvec128 bits = Cmm.Cvec128 bits + let symbol_address s = Cmm.Csymbol_address s let define_symbol symbol = [Cdefine_symbol symbol] @@ -4354,7 +4427,7 @@ let cmm_arith_size (e : Cmm.expression) = in match e with | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_symbol _ | Cvar _ - -> + | Cconst_vec128 _ -> Some 0 | Cop _ -> Some (cmm_arith_size0 e) | Clet _ | Clet_mut _ | Cphantom_let _ | Cassign _ | Ctuple _ | Csequence _ @@ -4375,6 +4448,7 @@ let kind_of_layout (layout : Lambda.layout) = match layout with | Pvalue Pfloatval -> Boxed_float | Pvalue (Pboxedintval bi) -> Boxed_integer bi + | Pvalue (Pboxedvectorval vi) -> Boxed_vector vi | Pvalue (Pgenval | Pintval | Pvariant _ | Parrayval _) - | Ptop | Pbottom | Punboxed_float | Punboxed_int _ -> + | Ptop | Pbottom | Punboxed_float | Punboxed_int _ | Punboxed_vector _ -> Any diff --git a/backend/cmm_helpers.mli b/backend/cmm_helpers.mli index 0d6b3816a73..ae606d726ab 100644 --- a/backend/cmm_helpers.mli +++ b/backend/cmm_helpers.mli @@ -210,6 +210,17 @@ val box_float : Debuginfo.t -> Lambda.alloc_mode -> expression -> expression val unbox_float : Debuginfo.t -> expression -> expression +(** Vector boxing and unboxing *) +val box_vector : + Debuginfo.t -> + Primitive.boxed_vector -> + Lambda.alloc_mode -> + expression -> + expression + +val unbox_vector : + Debuginfo.t -> Primitive.boxed_vector -> expression -> expression + (** Complex number creation and access *) val box_complex : Debuginfo.t -> expression -> expression -> expression @@ -372,6 +383,7 @@ module Extended_machtype_component : sig | Tagged_int | Any_int | Float + | Vec128 end module Extended_machtype : sig @@ -389,6 +401,8 @@ module Extended_machtype : sig val typ_void : t + val typ_vec128 : t + (** Conversion from a normal Cmm machtype. *) val of_machtype : machtype -> t @@ -942,6 +956,9 @@ val emit_int64_constant : symbol -> int64 -> data_item list -> data_item list val emit_nativeint_constant : symbol -> nativeint -> data_item list -> data_item list +val emit_vec128_constant : + symbol -> Cmm.vec128_bits -> data_item list -> data_item list + val emit_float_array_constant : symbol -> float list -> data_item list -> data_item list @@ -988,6 +1005,9 @@ val int32 : dbg:Debuginfo.t -> int32 -> expression (** Create a constant int expression from an int64. *) val int64 : dbg:Debuginfo.t -> int64 -> expression +(** Create a constant vec128 expression from two int64s. *) +val vec128 : dbg:Debuginfo.t -> Cmm.vec128_bits -> expression + (** Create a constant int expression from a nativeint. *) val nativeint : dbg:Debuginfo.t -> Nativeint.t -> expression @@ -1233,6 +1253,9 @@ val cint : nativeint -> data_item (** Static float. *) val cfloat : float -> data_item +(** Static 128-bit vector. *) +val cvec128 : Cmm.vec128_bits -> data_item + (** Static symbol. *) val symbol_address : symbol -> data_item diff --git a/backend/cmm_invariants.ml b/backend/cmm_invariants.ml index 527ca24eb79..4eed664912e 100644 --- a/backend/cmm_invariants.ml +++ b/backend/cmm_invariants.ml @@ -127,7 +127,7 @@ end let rec check env (expr : Cmm.expression) = match expr with - | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_symbol _ + | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_symbol _ | Cconst_vec128 _ | Cvar _ -> () | Clet (_, expr, body) diff --git a/backend/cmmgen.ml b/backend/cmmgen.ml index b7851224d48..648bb0ebb84 100644 --- a/backend/cmmgen.ml +++ b/backend/cmmgen.ml @@ -39,6 +39,7 @@ open Cmm_builtins type boxed_number = | Boxed_float of alloc_mode * Debuginfo.t | Boxed_integer of boxed_integer * alloc_mode * Debuginfo.t + | Boxed_vector of boxed_vector * alloc_mode * Debuginfo.t type env = { unboxed_ids : (V.t * boxed_number) V.tbl; @@ -162,6 +163,7 @@ let get_field env layout ptr n dbg = | Pvalue Pintval | Punboxed_int _ -> Word_int | Pvalue _ -> Word_val | Punboxed_float -> Double + | Punboxed_vector Pvec128 -> Onetwentyeight | Ptop -> Misc.fatal_errorf "get_field with Ptop: %a" Debuginfo.print_compact dbg | Pbottom -> @@ -274,6 +276,8 @@ let emit_structured_constant symb cst cont = emit_int64_constant symb n cont | Uconst_nativeint n -> emit_nativeint_constant symb n cont + | Uconst_vec128 {high; low} -> + emit_vec128_constant symb {high; low} cont | Uconst_block (tag, csts) -> let cont = List.fold_right emit_constant csts cont in emit_block symb (block_header tag (List.length csts)) cont @@ -319,6 +323,7 @@ let typ_of_boxed_number = function | Boxed_float _ -> Cmm.typ_float | Boxed_integer (Pint64, _,_) when size_int = 4 -> [|Int;Int|] | Boxed_integer _ -> Cmm.typ_int + | Boxed_vector (Pvec128, _, _) -> Cmm.typ_vec128 let equal_unboxed_integer ui1 ui2 = match ui1, ui2 with @@ -338,6 +343,7 @@ let box_number bn arg = match bn with | Boxed_float (m, dbg) -> box_float dbg m arg | Boxed_integer (bi, m, dbg) -> box_int dbg bi m arg + | Boxed_vector (vi, m, dbg) -> box_vector dbg vi m arg (* Returns the unboxed representation of a boxed float or integer. For Pint32 on 64-bit archs, the high 32 bits of the result are undefined. *) @@ -349,6 +355,8 @@ let unbox_number dbg bn arg = low_32 dbg (unbox_int dbg Pint32 arg) | Boxed_integer (bi, _, _) -> unbox_int dbg bi arg + | Boxed_vector (vi, _, _) -> + unbox_vector dbg vi arg (* Auxiliary functions for optimizing "let" of boxed numbers (floats and boxed integers *) @@ -380,7 +388,7 @@ let join_unboxed_number_kind ~strict k1 k2 = | _, _ -> No_unboxing let is_strict : kind_for_unboxing -> bool = function - | Boxed_integer _ | Boxed_float -> false + | Boxed_integer _ | Boxed_float | Boxed_vector _ -> false | Any -> true let rec is_unboxed_number_cmm = function @@ -414,6 +422,8 @@ let rec is_unboxed_number_cmm = function Boxed (Boxed_integer (Pint32, alloc_heap, Debuginfo.none), true) | Some (Uconst_int64 _) -> Boxed (Boxed_integer (Pint64, alloc_heap, Debuginfo.none), true) + | Some (Uconst_vec128 _) -> + Boxed (Boxed_vector (Pvec128, alloc_heap, Debuginfo.none), true) | _ -> No_unboxing end @@ -424,6 +434,7 @@ let rec is_unboxed_number_cmm = function | Cconst_int _ | Cconst_natint _ | Cconst_float _ + | Cconst_vec128 _ | Cvar _ | Cassign _ | Ctuple _ @@ -911,6 +922,12 @@ and transl_ccall env prim args dbg = | Pint32 -> XInt32 | Pint64 -> XInt64 in (xty, transl_unbox_int dbg env bi arg) + | Unboxed_vector bi -> + let xty = + match bi with + | Pvec128 -> XVec128 + in + (xty, transl_unbox_vector dbg env bi arg) | Untagged_int -> (XInt, untag_int (transl env arg) dbg) in @@ -939,6 +956,7 @@ and transl_ccall env prim args dbg = | _, Unboxed_integer Pint64 when size_int = 4 -> ([|Int; Int|], box_int dbg Pint64 alloc_heap) | _, Unboxed_integer bi -> (typ_int, box_int dbg bi alloc_heap) + | _, Unboxed_vector Pvec128 -> (typ_vec128, box_vector dbg Pvec128 alloc_heap) | _, Untagged_int -> (typ_int, (fun i -> tag_int i dbg)) in let typ_args, args = transl_args prim.prim_native_repr_args args in @@ -1283,6 +1301,9 @@ and transl_unbox_float dbg env exp = and transl_unbox_int dbg env bi exp = unbox_int dbg bi (transl env exp) +and transl_unbox_vector dbg env bi exp = + unbox_vector dbg bi (transl env exp) + (* transl_unbox_int, but may return garbage in upper bits *) and transl_unbox_int_low dbg env bi e = let e = transl_unbox_int dbg env bi e in @@ -1346,7 +1367,7 @@ and transl_let env str (layout : Lambda.layout) id exp transl_body = there may be constant closures inside that need lifting out. *) let _cbody : expression = transl_body env in cexp - | Punboxed_float | Punboxed_int _ -> begin + | Punboxed_float | Punboxed_int _ | Punboxed_vector _ -> begin let cexp = transl env exp in let cbody = transl_body env in match str with diff --git a/backend/coloring.ml b/backend/coloring.ml index 9325289d443..46f55697efb 100644 --- a/backend/coloring.ml +++ b/backend/coloring.ml @@ -44,27 +44,28 @@ let allocate_registers() = let unconstrained = ref [] in (* Reset the stack slot counts *) - let num_stack_slots = Array.make Proc.num_register_classes 0 in + let num_stack_slots = Array.make Proc.num_stack_slot_classes 0 in (* Preallocate the spilled registers in the stack. Split the remaining registers into constrained and unconstrained. *) let remove_reg reg = let cl = Proc.register_class reg in + let stack_cl = Proc.stack_slot_class reg.typ in if reg.spill then begin (* Preallocate the registers in the stack *) - let nslots = num_stack_slots.(cl) in + let nslots = num_stack_slots.(stack_cl) in let conflict = Array.make nslots false in List.iter (fun r -> match r.loc with Stack(Local n) -> - if Proc.register_class r = cl then conflict.(n) <- true + if Proc.stack_slot_class r.typ = stack_cl then conflict.(n) <- true | _ -> ()) reg.interf; let slot = ref 0 in while !slot < nslots && conflict.(!slot) do incr slot done; reg.loc <- Stack(Local !slot); - if !slot >= nslots then num_stack_slots.(cl) <- !slot + 1 + if !slot >= nslots then num_stack_slots.(stack_cl) <- !slot + 1 end else if reg.degree < Proc.num_available_registers.(cl) then unconstrained := reg :: !unconstrained else begin @@ -90,6 +91,7 @@ let allocate_registers() = (* Assign a location to a register, the best we can. *) let assign_location reg = let cl = Proc.register_class reg in + let stack_cl = Proc.stack_slot_class reg.typ in let first_reg = Proc.first_available_register.(cl) in let num_regs = Proc.num_available_registers.(cl) in let score = Array.make num_regs 0 in @@ -161,7 +163,7 @@ let allocate_registers() = if start >= num_regs then 0 else start) end else begin (* Sorry, we must put the pseudoreg in a stack location *) - let nslots = num_stack_slots.(cl) in + let nslots = num_stack_slots.(stack_cl) in let score = Array.make nslots 0 in (* Compute the scores as for registers *) List.iter @@ -207,7 +209,7 @@ let allocate_registers() = else begin (* Allocate a new stack slot *) reg.loc <- Stack(Local nslots); - num_stack_slots.(cl) <- nslots + 1 + num_stack_slots.(stack_cl) <- nslots + 1 end end; (* Cancel the preferences of this register so that they don't influence diff --git a/backend/comballoc.ml b/backend/comballoc.ml index 5275f0a9490..d67b8652879 100644 --- a/backend/comballoc.ml +++ b/backend/comballoc.ml @@ -87,9 +87,8 @@ let rec combine i allocstate = end | Iop((Imove|Ispill|Ireload|Inegf|Iabsf|Iaddf|Isubf|Imulf|Idivf|Ifloatofint| Iintoffloat|Ivalueofint|Iintofvalue|Iopaque|Iconst_int _|Iconst_float _| - Iconst_symbol _|Istackoffset _|Iload (_, _, _)|Istore (_, _, _)|Icompf _| - Icsel _ | - Ispecific _|Iname_for_debugger _|Iprobe_is_enabled _)) + Iconst_vec128 _|Iconst_symbol _|Istackoffset _|Iload (_, _, _)|Istore (_, _, _)| + Icompf _|Icsel _ |Ispecific _|Iname_for_debugger _|Iprobe_is_enabled _)) | Iop(Iintop(Iadd | Isub | Imul | Idiv | Imod | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr | Ipopcnt | Imulh _ | Iclz _ | Ictz _ | Icomp _)) diff --git a/backend/debug/available_regs.ml b/backend/debug/available_regs.ml index b792fa48c7a..7ea2d378849 100644 --- a/backend/debug/available_regs.ml +++ b/backend/debug/available_regs.ml @@ -184,6 +184,7 @@ let rec available_regs (instr : M.instruction) ~(avail_before : RAS.t) : RAS.t = else RD.Set.made_unavailable_by_clobber avail_before ~regs_clobbered:instr.res ~register_class:Proc.register_class + ~stack_class:(fun r -> Proc.stack_slot_class r.typ) in let results = Array.map2 @@ -209,7 +210,8 @@ let rec available_regs (instr : M.instruction) ~(avail_before : RAS.t) : RAS.t = Array.append (Proc.destroyed_at_oper instr.desc) instr.res in RD.Set.made_unavailable_by_clobber avail_before ~regs_clobbered - ~register_class:Proc.register_class + ~register_class:Proc.register_class ~stack_class:(fun r -> + Proc.stack_slot_class r.typ) in (* Second: the cases of (a) allocations and (b) OCaml to OCaml function calls. In these cases, since the GC may run, registers always become diff --git a/backend/debug/reg_with_debug_info.ml b/backend/debug/reg_with_debug_info.ml index 9d36d9be824..6da74b6f615 100644 --- a/backend/debug/reg_with_debug_info.ml +++ b/backend/debug/reg_with_debug_info.ml @@ -99,23 +99,29 @@ let reg t = t.reg let location t = t.reg.loc let holds_pointer t = - match t.reg.typ with Addr | Val -> true | Int | Float -> false + match t.reg.typ with Addr | Val -> true | Int | Float | Vec128 -> false let holds_non_pointer t = not (holds_pointer t) let assigned_to_stack t = match t.reg.loc with Stack _ -> true | Reg _ | Unknown -> false -let regs_at_same_location (reg1 : Reg.t) (reg2 : Reg.t) ~register_class = +let regs_at_same_location (reg1 : Reg.t) (reg2 : Reg.t) ~register_class + ~stack_class = (* We need to check the register classes too: two locations both saying "stack offset N" might actually be different physical locations, for example if one is of class "Int" and another "Float" on amd64. [register_class] will be [Proc.register_class], but cannot be here, due to a circular dependency. *) - reg1.loc = reg2.loc && register_class reg1 = register_class reg2 + reg1.loc = reg2.loc + && + match reg1.loc with + | Reg _ -> register_class reg1 = register_class reg2 + | Stack _ -> stack_class reg1 = stack_class reg2 + | Unknown -> Misc.fatal_errorf "regs_at_same_location got Unknown locations" -let at_same_location t (reg : Reg.t) ~register_class = - regs_at_same_location t.reg reg ~register_class +let at_same_location t (reg : Reg.t) ~register_class ~stack_class = + regs_at_same_location t.reg reg ~register_class ~stack_class let debug_info t = t.debug_info @@ -152,12 +158,14 @@ module Set = struct (fun reg acc -> add (create_without_debug_info ~reg) acc) regs empty - let made_unavailable_by_clobber t ~regs_clobbered ~register_class = + let made_unavailable_by_clobber t ~regs_clobbered ~register_class ~stack_class + = Reg.Set.fold (fun reg acc -> let made_unavailable = filter - (fun reg' -> regs_at_same_location reg'.reg reg ~register_class) + (fun reg' -> + regs_at_same_location reg'.reg reg ~register_class ~stack_class) t in union made_unavailable acc) diff --git a/backend/debug/reg_with_debug_info.mli b/backend/debug/reg_with_debug_info.mli index a26d1e12359..56619dd597f 100644 --- a/backend/debug/reg_with_debug_info.mli +++ b/backend/debug/reg_with_debug_info.mli @@ -63,7 +63,12 @@ val debug_info : t -> Debug_info.t option (physical or pseudoregister) location as the register [reg], which is not equipped with debugging information. [register_class] should be [Proc.register_class]. *) -val at_same_location : t -> Reg.t -> register_class:(Reg.t -> int) -> bool +val at_same_location : + t -> + Reg.t -> + register_class:(Reg.t -> int) -> + stack_class:(Reg.t -> int) -> + bool val holds_pointer : t -> bool @@ -98,7 +103,11 @@ module Set : sig registers in [regs_clobbered]. (Think of [t] as a set of available registers.) [register_class] should always be [Proc.register_class]. *) val made_unavailable_by_clobber : - t -> regs_clobbered:Reg.t array -> register_class:(Reg.t -> int) -> t + t -> + regs_clobbered:Reg.t array -> + register_class:(Reg.t -> int) -> + stack_class:(Reg.t -> int) -> + t end val print : diff --git a/backend/interf.ml b/backend/interf.ml index c34eaa0a867..6436e344bf1 100644 --- a/backend/interf.ml +++ b/backend/interf.ml @@ -114,7 +114,7 @@ let build_graph fundecl = | Iexit _ -> () | Itrywith(body, _kind, (_ts, handler)) -> - add_interf_set Proc.destroyed_at_raise handler.live; + add_interf_set (Proc.destroyed_at_raise ()) handler.live; interf body; interf handler; interf i.next | Iraise _ -> () in diff --git a/backend/interval.ml b/backend/interval.ml index 1546e39effa..4955ba7b812 100644 --- a/backend/interval.ml +++ b/backend/interval.ml @@ -108,7 +108,7 @@ let insert_destroyed_at_oper intervals instr pos = update_interval_position_by_array intervals destroyed pos Result let insert_destroyed_at_raise intervals pos = - let destroyed = Proc.destroyed_at_raise in + let destroyed = Proc.destroyed_at_raise () in if Array.length destroyed > 0 then update_interval_position_by_array intervals destroyed pos Result diff --git a/backend/linscan.ml b/backend/linscan.ml index 21416be23d7..802779f4298 100644 --- a/backend/linscan.ml +++ b/backend/linscan.ml @@ -72,7 +72,7 @@ let rec release_expired_inactive ci pos = function (* Allocate a new stack slot to the interval. *) let allocate_stack_slot num_stack_slots i = - let cl = Proc.register_class i.reg in + let cl = Proc.stack_slot_class i.reg.typ in let ss = num_stack_slots.(cl) in num_stack_slots.(cl) <- succ ss; i.reg.loc <- Stack(Local ss); @@ -189,7 +189,7 @@ let allocate_registers() = }; done; (* Reset the stack slot counts *) - let num_stack_slots = Array.make Proc.num_register_classes 0 in + let num_stack_slots = Array.make Proc.num_stack_slot_classes 0 in (* Add all fixed intervals (sorted by end position) *) List.iter (fun i -> diff --git a/backend/mach.ml b/backend/mach.ml index 9b62441e9aa..fe5a5dce0d6 100644 --- a/backend/mach.ml +++ b/backend/mach.ml @@ -52,6 +52,7 @@ type operation = | Ireload | Iconst_int of nativeint | Iconst_float of int64 + | Iconst_vec128 of Cmm.vec128_bits | Iconst_symbol of Cmm.symbol | Icall_ind | Icall_imm of { func : Cmm.symbol; } @@ -175,7 +176,7 @@ let rec instr_iter f i = instr_iter f body; instr_iter f handler; instr_iter f i.next | Iraise _ -> () | Iop (Imove | Ispill | Ireload - | Iconst_int _ | Iconst_float _ | Iconst_symbol _ + | Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _ | Icall_ind | Icall_imm _ | Iextcall _ | Istackoffset _ | Iload _ | Istore _ | Ialloc _ | Iintop _ | Iintop_imm _ | Iintop_atomic _ @@ -206,7 +207,7 @@ let operation_is_pure = function | Icompf _ | Icsel _ | Ifloatofint | Iintoffloat - | Iconst_int _ | Iconst_float _ | Iconst_symbol _ + | Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _ | Iload (_, _, _) | Iname_for_debugger _ -> true @@ -226,7 +227,7 @@ let operation_can_raise op = | Icompf _ | Icsel _ | Ifloatofint | Iintoffloat | Ivalueofint | Iintofvalue - | Iconst_int _ | Iconst_float _ | Iconst_symbol _ + | Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _ | Istackoffset _ | Istore _ | Iload (_, _, _) | Iname_for_debugger _ | Itailcall_imm _ | Itailcall_ind | Iopaque | Ibeginregion | Iendregion diff --git a/backend/mach.mli b/backend/mach.mli index ba68d8c0373..83b5c81315b 100644 --- a/backend/mach.mli +++ b/backend/mach.mli @@ -55,6 +55,7 @@ type operation = | Ireload | Iconst_int of nativeint | Iconst_float of int64 + | Iconst_vec128 of Cmm.vec128_bits | Iconst_symbol of Cmm.symbol | Icall_ind | Icall_imm of { func : Cmm.symbol; } diff --git a/backend/polling.ml b/backend/polling.ml index c496d4bef5a..59983584e1e 100644 --- a/backend/polling.ml +++ b/backend/polling.ml @@ -241,7 +241,7 @@ let find_poll_alloc_or_calls instr = | Iop(Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ ) -> Some (Function_call, i.dbg) | Iop(Iextcall { alloc = true }) -> Some (External_call, i.dbg) - | Iop(Imove | Ispill | Ireload | Iconst_int _ | Iconst_float _ | + | Iop(Imove | Ispill | Ireload | Iconst_int _ | Iconst_float _ | Iconst_vec128 _ | Iconst_symbol _ | Iextcall { alloc = false } | Istackoffset _ | Iload _ | Istore _ | Iintop _ | Iintop_imm _ | Iintop_atomic _ | Ifloatofint | Iintoffloat | Inegf | Iabsf | Iaddf | Isubf | diff --git a/backend/printcmm.ml b/backend/printcmm.ml index 8b4eae9bdc8..85b2f7ff65b 100644 --- a/backend/printcmm.ml +++ b/backend/printcmm.ml @@ -34,6 +34,7 @@ let machtype_component ppf = function | Addr -> fprintf ppf "addr" | Int -> fprintf ppf "int" | Float -> fprintf ppf "float" + | Vec128 -> fprintf ppf "vec128" let machtype ppf mty = match Array.length mty with @@ -48,6 +49,7 @@ let exttype ppf = function | XInt32 -> fprintf ppf "int32" | XInt64 -> fprintf ppf "int64" | XFloat -> fprintf ppf "float" + | XVec128 -> fprintf ppf "vec128" let extcall_signature ppf (ty_res, ty_args) = begin match ty_args with @@ -97,6 +99,7 @@ let chunk = function | Sixteen_signed -> "signed int16" | Thirtytwo_unsigned -> "unsigned int32" | Thirtytwo_signed -> "signed int32" + | Onetwentyeight -> "vec128" | Word_int -> "int" | Word_val -> "val" | Single -> "float32" @@ -244,6 +247,7 @@ let rec expr ppf = function | Cconst_int (n, _dbg) -> fprintf ppf "%i" n | Cconst_natint (n, _dbg) -> fprintf ppf "%s" (Nativeint.to_string n) + | Cconst_vec128 ({low; high}, _dbg) -> fprintf ppf "%016Lx:%016Lx" high low | Cconst_float (n, _dbg) -> fprintf ppf "%F" n | Cconst_symbol (s, _dbg) -> fprintf ppf "%a:\"%s\"" is_global s.sym_global s.sym_name | Cvar id -> V.print ppf id @@ -411,6 +415,8 @@ let data_item ppf = function | Cint n -> fprintf ppf "int %s" (Nativeint.to_string n) | Csingle f -> fprintf ppf "single %F" f | Cdouble f -> fprintf ppf "double %F" f + | Cvec128 {high; low} -> + fprintf ppf "vec128 %s:%s" (Int64.to_string high) (Int64.to_string low) | Csymbol_address s -> fprintf ppf "addr %a:\"%s\"" is_global s.sym_global s.sym_name | Cstring s -> fprintf ppf "string \"%s\"" s | Cskip n -> fprintf ppf "skip %i" n diff --git a/backend/printmach.ml b/backend/printmach.ml index 7ffc5457a16..e7306848c7a 100644 --- a/backend/printmach.ml +++ b/backend/printmach.ml @@ -23,14 +23,14 @@ open Interval module V = Backend_var -let loc ?(wrap_out = fun ppf f -> f ppf) ~reg_class ~unknown ppf l = - match l with +let loc ?(wrap_out = fun ppf f -> f ppf) ~unknown ppf loc typ = + match loc with | Unknown -> unknown ppf | Reg r -> - wrap_out ppf (fun ppf -> fprintf ppf "%s" (Proc.register_name r)) + wrap_out ppf (fun ppf -> fprintf ppf "%s" (Proc.register_name typ r)) | Stack(Local s) -> wrap_out ppf (fun ppf -> - fprintf ppf "s[%s:%i]" (Proc.register_class_tag reg_class) s) + fprintf ppf "s[%s:%i]" (Proc.stack_class_tag (Proc.stack_slot_class typ)) s) | Stack(Incoming s) -> wrap_out ppf (fun ppf -> fprintf ppf "par[%i]" s) | Stack(Outgoing s) -> @@ -43,11 +43,16 @@ let reg ppf r = fprintf ppf "%s" (Reg.name r) else fprintf ppf "%s" - (match r.typ with Val -> "V" | Addr -> "A" | Int -> "I" | Float -> "F"); + (match r.typ with + | Val -> "V" + | Addr -> "A" + | Int -> "I" + | Float -> "F" + | Vec128 -> "X"); fprintf ppf "/%i" r.stamp; loc ~wrap_out:(fun ppf f -> fprintf ppf "[%t]" f) - ~reg_class:(Proc.register_class r) ~unknown:(fun _ -> ()) ppf r.loc + ~unknown:(fun _ -> ()) ppf r.loc r.typ let regs' ?(print_reg = reg) ppf v = let reg = print_reg in @@ -159,6 +164,7 @@ let operation' ?(print_reg = reg) op arg ppf res = | Iconst_int n -> fprintf ppf "%s" (Nativeint.to_string n) | Iconst_float f -> fprintf ppf "%F" (Int64.float_of_bits f) | Iconst_symbol s -> fprintf ppf "\"%s\"" s.sym_name + | Iconst_vec128 {high; low} -> fprintf ppf "%016Lx:%016Lx" high low | Icall_ind -> fprintf ppf "call %a" regs arg | Icall_imm { func; } -> fprintf ppf "call \"%s\" %a" func.sym_name regs arg | Itailcall_ind -> fprintf ppf "tailcall %a" regs arg diff --git a/backend/printmach.mli b/backend/printmach.mli index 1f0672230b2..c4086611157 100644 --- a/backend/printmach.mli +++ b/backend/printmach.mli @@ -17,7 +17,7 @@ open Format -val loc: ?wrap_out:(formatter -> (formatter -> unit) -> unit) -> reg_class:int -> unknown:(formatter -> unit) -> formatter -> Reg.location -> unit +val loc: ?wrap_out:(formatter -> (formatter -> unit) -> unit) -> unknown:(formatter -> unit) -> formatter -> Reg.location -> Cmm.machtype_component -> unit val reg: formatter -> Reg.t -> unit val regs': ?print_reg:(formatter -> Reg.t -> unit) -> formatter -> Reg.t array -> unit val regs: formatter -> Reg.t array -> unit diff --git a/backend/proc.mli b/backend/proc.mli index ba6edc08d1a..f541a790cc4 100644 --- a/backend/proc.mli +++ b/backend/proc.mli @@ -21,13 +21,19 @@ val word_addressed: bool (* Registers available for register allocation *) val num_register_classes: int val register_class: Reg.t -> int -val register_class_tag: int -> string val num_available_registers: int array val first_available_register: int array -val register_name: int -> string -val phys_reg: int -> Reg.t +val register_name: Cmm.machtype_component -> int -> string +val phys_reg: Cmm.machtype_component -> int -> Reg.t val rotate_registers: bool -val all_phys_regs : Reg.t array +val precolored_regs : unit -> Reg.Set.t + +(* The number of stack slot classes may differ from the number of register classes. + On x86, we use the same class for floating point and SIMD vector registers, + but they take up different amounts of space on the stack. *) +val num_stack_slot_classes: int +val stack_slot_class: Cmm.machtype_component -> int +val stack_class_tag: int -> string (* Calling conventions *) val loc_arguments: Cmm.machtype -> Reg.t array * int @@ -55,7 +61,7 @@ val max_register_pressure: Mach.operation -> int array (* Registers destroyed by operations *) val destroyed_at_oper: Mach.instruction_desc -> Reg.t array -val destroyed_at_raise: Reg.t array +val destroyed_at_raise: unit -> Reg.t array val destroyed_at_reloadretaddr : Reg.t array val destroyed_at_pushtrap : Reg.t array val destroyed_at_basic : Cfg_intf.S.basic -> Reg.t array diff --git a/backend/reg.ml b/backend/reg.ml index 1b302a9e765..6058f3e9a36 100644 --- a/backend/reg.ml +++ b/backend/reg.ml @@ -195,6 +195,7 @@ let is_reg t = let size_of_contents_in_bytes t = match t.typ with + | Vec128 -> Arch.size_vec128 | Float -> Arch.size_float | Addr -> assert (Arch.size_addr = Arch.size_int); diff --git a/backend/regalloc/regalloc_invariants.ml b/backend/regalloc/regalloc_invariants.ml index fc2daefdd99..7ea23b64233 100644 --- a/backend/regalloc/regalloc_invariants.ml +++ b/backend/regalloc/regalloc_invariants.ml @@ -17,6 +17,7 @@ let precondition : Cfg_with_layout.t -> unit = | Const_int _ -> () | Const_float _ -> () | Const_symbol _ -> () + | Const_vec128 _ -> () | Stackoffset _ -> () | Load _ -> () | Store _ -> () @@ -100,9 +101,9 @@ let precondition : Cfg_with_layout.t -> unit = let fun_num_stack_slots = (Cfg_with_layout.cfg cfg_with_layout).fun_num_stack_slots in - Array.iteri fun_num_stack_slots ~f:(fun reg_class num_slots -> + Array.iteri fun_num_stack_slots ~f:(fun stack_class num_slots -> if num_slots <> 0 - then fatal "register class %d has %d slots(s)" reg_class num_slots) + then fatal "stack slot class %d has %d slots(s)" stack_class num_slots) let postcondition_layout : Cfg_with_layout.t -> unit = fun cfg_with_layout -> @@ -145,13 +146,15 @@ let postcondition_layout : Cfg_with_layout.t -> unit = let register_classes_must_be_consistent (id : Instruction.id) (reg : Reg.t) : unit = match reg.Reg.loc with - | Reg phys_reg -> - let phys_reg = Proc.phys_reg phys_reg in - if not (same_reg_class reg phys_reg) - then + | Reg phys_reg -> ( + try + let (_ : Reg.t) = Proc.phys_reg reg.typ phys_reg in + () + with Invalid_argument _ -> fatal - "instruction %d assigned %a to %a but they are in different classes" - id Printmach.reg reg Printmach.reg phys_reg + "instruction %d assigned %a to register %i, which has an \ + incompatible class" + id Printmach.reg reg phys_reg) | Stack _ | Unknown -> () in let register_classes_must_be_consistent (id : Instruction.id) @@ -160,7 +163,7 @@ let postcondition_layout : Cfg_with_layout.t -> unit = in let module Int = Numbers.Int in let used_stack_slots = - Array.init Proc.num_register_classes ~f:(fun _ -> Int.Set.empty) + Array.init Proc.num_stack_slot_classes ~f:(fun _ -> Int.Set.empty) in let record_stack_slot_use (reg : Reg.t) : unit = match reg.loc with @@ -169,9 +172,9 @@ let postcondition_layout : Cfg_with_layout.t -> unit = | Stack stack_loc -> ( match stack_loc with | Local index -> - let reg_class = Proc.register_class reg in - used_stack_slots.(reg_class) - <- Int.Set.add index used_stack_slots.(reg_class) + let stack_class = Proc.stack_slot_class reg.typ in + used_stack_slots.(stack_class) + <- Int.Set.add index used_stack_slots.(stack_class) | Incoming _ -> () | Outgoing _ -> () | Domainstate _ -> ()) @@ -203,7 +206,7 @@ let postcondition_layout : Cfg_with_layout.t -> unit = let fun_num_stack_slots = (Cfg_with_layout.cfg cfg_with_layout).fun_num_stack_slots in - Array.iteri fun_num_stack_slots ~f:(fun reg_class num_slots -> + Array.iteri fun_num_stack_slots ~f:(fun stack_class num_slots -> let available_slots = Seq.ints 0 |> Seq.take num_slots |> Int.Set.of_seq in @@ -211,16 +214,20 @@ let postcondition_layout : Cfg_with_layout.t -> unit = set |> Int.Set.elements |> List.map ~f:string_of_int |> String.concat ", " in - let invalid = Int.Set.diff used_stack_slots.(reg_class) available_slots in + let invalid = + Int.Set.diff used_stack_slots.(stack_class) available_slots + in if not (Int.Set.is_empty invalid) then - fatal "register class %d uses the following invalid slots: %s" reg_class - (string_of_set invalid); - let unused = Int.Set.diff available_slots used_stack_slots.(reg_class) in + fatal "stack slot class %d uses the following invalid slots: %s" + stack_class (string_of_set invalid); + let unused = + Int.Set.diff available_slots used_stack_slots.(stack_class) + in if not (Int.Set.is_empty unused) then - fatal "register class %d has the following unused slots: %s" reg_class - (string_of_set unused)) + fatal "stack slot class %d has the following unused slots: %s" + stack_class (string_of_set unused)) let postcondition_liveness : Cfg_with_infos.t -> unit = fun cfg_with_infos -> diff --git a/backend/regalloc/regalloc_irc.ml b/backend/regalloc/regalloc_irc.ml index fc925c9f653..84e5b70f095 100644 --- a/backend/regalloc/regalloc_irc.ml +++ b/backend/regalloc/regalloc_irc.ml @@ -89,8 +89,9 @@ let build : State.t -> Cfg_with_infos.t -> unit = let live = Cfg_dataflow.Instr.Tbl.find liveness first_id in Reg.Set.iter (fun reg1 -> - Array.iter (filter_fp Proc.destroyed_at_raise) ~f:(fun reg2 -> - State.add_edge state reg1 reg2)) + Array.iter + (filter_fp (Proc.destroyed_at_raise ())) + ~f:(fun reg2 -> State.add_edge state reg1 reg2)) (Reg.Set.remove Proc.loc_exn_bucket live.before)) let make_work_list : State.t -> unit = @@ -525,6 +526,6 @@ let run : Cfg_with_infos.t -> Cfg_with_infos.t = state ~f:(fun () -> update_register_locations (); - Array.iter all_precolored_regs ~f:(fun reg -> reg.Reg.degree <- 0)) + Reg.Set.iter (fun reg -> reg.Reg.degree <- 0) (all_precolored_regs ())) cfg_with_infos; cfg_with_infos diff --git a/backend/regalloc/regalloc_irc_state.ml b/backend/regalloc/regalloc_irc_state.ml index d4f1ca80a40..abe423381aa 100644 --- a/backend/regalloc/regalloc_irc_state.ml +++ b/backend/regalloc/regalloc_irc_state.ml @@ -60,7 +60,8 @@ let[@inline] make ~initial ~stack_slots ~next_instruction_id () = reg.Reg.interf <- []; reg.Reg.degree <- 0); List.iter initial ~f:(fun reg -> reg.Reg.irc_work_list <- Initial); - Array.iter all_precolored_regs ~f:(fun reg -> + Reg.Set.iter + (fun reg -> reg.Reg.irc_work_list <- Reg.Precolored; reg.Reg.irc_color <- (match reg.Reg.loc with @@ -70,7 +71,8 @@ let[@inline] make ~initial ~stack_slots ~next_instruction_id () = Printmach.reg reg); reg.Reg.irc_alias <- None; reg.Reg.interf <- []; - reg.Reg.degree <- Degree.infinite); + reg.Reg.degree <- Degree.infinite) + (all_precolored_regs ()); let num_registers = List.length initial in let original_capacity = Int.min max_capacity num_registers in let simplify_work_list = RegWorkList.make ~original_capacity in @@ -140,7 +142,8 @@ let[@inline] reset state ~new_temporaries = reg.Reg.irc_alias <- None; reg.Reg.interf <- []; reg.Reg.degree <- 0); - Array.iter all_precolored_regs ~f:(fun reg -> + Reg.Set.iter + (fun reg -> assert (reg.Reg.irc_work_list = Reg.Precolored); (match reg.Reg.loc, reg.Reg.irc_color with | Reg color, Some color' -> assert (color = color') @@ -148,7 +151,8 @@ let[@inline] reset state ~new_temporaries = | (Unknown | Stack _), _ -> assert false); reg.Reg.irc_alias <- None; reg.Reg.interf <- []; - assert (reg.Reg.degree = Degree.infinite)); + assert (reg.Reg.degree = Degree.infinite)) + (all_precolored_regs ()); state.initial <- Doubly_linked_list.of_list new_temporaries; Doubly_linked_list.transfer ~from:state.colored_nodes ~to_:state.initial (); RegWorkList.iter state.coalesced_nodes ~f:(fun reg -> @@ -534,10 +538,10 @@ let[@inline] invariant state = then ( (* interf (list) is morally a set *) List.iter (Reg.all_registers ()) ~f:check_inter_has_no_duplicates; - Array.iter all_precolored_regs ~f:check_inter_has_no_duplicates; + Reg.Set.iter check_inter_has_no_duplicates (all_precolored_regs ()); (* register sets are disjoint *) check_disjoint ~is_disjoint:Reg.Set.disjoint - [ "precolored", Reg.set_of_array all_precolored_regs; + [ "precolored", all_precolored_regs (); "initial", reg_set_of_doubly_linked_list state.initial; "simplify_work_list", reg_set_of_reg_work_list state.simplify_work_list; "freeze_work_list", reg_set_of_reg_work_list state.freeze_work_list; @@ -547,7 +551,7 @@ let[@inline] invariant state = "colored_nodes", reg_set_of_doubly_linked_list state.colored_nodes; "select_stack", Reg.Set.of_list state.select_stack ]; List.iter ~f:check_set_and_field_consistency_reg - [ "precolored", Reg.set_of_array all_precolored_regs, Reg.Precolored; + [ "precolored", all_precolored_regs (), Reg.Precolored; "initial", reg_set_of_doubly_linked_list state.initial, Reg.Initial; ( "simplify_work_list", reg_set_of_reg_work_list state.simplify_work_list, @@ -605,7 +609,7 @@ let[@inline] invariant state = (reg_set_of_reg_work_list state.spill_work_list)) in let work_lists_or_precolored = - Reg.Set.union (Reg.set_of_array all_precolored_regs) work_lists + Reg.Set.union (all_precolored_regs ()) work_lists in Reg.Set.iter (fun u -> diff --git a/backend/regalloc/regalloc_irc_utils.ml b/backend/regalloc/regalloc_irc_utils.ml index 69ad7ab5fe4..886d7da68cc 100644 --- a/backend/regalloc/regalloc_irc_utils.ml +++ b/backend/regalloc/regalloc_irc_utils.ml @@ -111,6 +111,7 @@ let is_move_basic : Cfg.basic -> bool = | Const_int _ -> false | Const_float _ -> false | Const_symbol _ -> false + | Const_vec128 _ -> false | Stackoffset _ -> false | Load _ -> false | Store _ -> false @@ -140,22 +141,9 @@ let is_move_basic : Cfg.basic -> bool = let is_move_instruction : Cfg.basic Cfg.instruction -> bool = fun instr -> is_move_basic instr.desc -let all_precolored_regs : Reg.t array = +let all_precolored_regs = Proc.init (); - let num_available_registers = - Array.fold_left Proc.num_available_registers ~f:( + ) ~init:0 - in - let res = Array.make num_available_registers Reg.dummy in - let i = ref 0 in - for reg_class = 0 to pred Proc.num_register_classes do - let first_available_register = Proc.first_available_register.(reg_class) in - let num_available_registers = Proc.num_available_registers.(reg_class) in - for reg_idx = 0 to pred num_available_registers do - res.(!i) <- Proc.phys_reg (first_available_register + reg_idx); - incr i - done - done; - res + Proc.precolored_regs let k reg = Proc.num_available_registers.(Proc.register_class reg) diff --git a/backend/regalloc/regalloc_irc_utils.mli b/backend/regalloc/regalloc_irc_utils.mli index a2123413f85..5333ff04b89 100644 --- a/backend/regalloc/regalloc_irc_utils.mli +++ b/backend/regalloc/regalloc_irc_utils.mli @@ -64,7 +64,7 @@ end val is_move_instruction : Instruction.t -> bool -val all_precolored_regs : Reg.t array +val all_precolored_regs : unit -> Reg.Set.t val k : Reg.t -> int diff --git a/backend/regalloc/regalloc_ls.ml b/backend/regalloc/regalloc_ls.ml index 7c65e41bb88..7644bb4dce1 100644 --- a/backend/regalloc/regalloc_ls.ml +++ b/backend/regalloc/regalloc_ls.ml @@ -75,7 +75,7 @@ let build_intervals : State.t -> Cfg_with_infos.t -> unit = let off = on + 1 in if trap_handler then - Array.iter Proc.destroyed_at_raise ~f:(fun reg -> + Array.iter (Proc.destroyed_at_raise ()) ~f:(fun reg -> update_range reg ~begin_:on ~end_:on); instr.ls_order <- on; Array.iter instr.arg ~f:(fun reg -> update_range reg ~begin_:on ~end_:on); diff --git a/backend/regalloc/regalloc_ls_state.ml b/backend/regalloc/regalloc_ls_state.ml index 80fa3cbd3ad..f733614fde4 100644 --- a/backend/regalloc/regalloc_ls_state.ml +++ b/backend/regalloc/regalloc_ls_state.ml @@ -56,6 +56,8 @@ let[@inline] release_expired_intervals state ~pos = let[@inline] active state ~reg_class = state.active.(reg_class) +let[@inline] active_classes state = state.active + let[@inline] stack_slots state = state.stack_slots let[@inline] get_and_incr_instruction_id state = diff --git a/backend/regalloc/regalloc_ls_state.mli b/backend/regalloc/regalloc_ls_state.mli index 95e7646e175..da0b601817d 100644 --- a/backend/regalloc/regalloc_ls_state.mli +++ b/backend/regalloc/regalloc_ls_state.mli @@ -20,6 +20,8 @@ val release_expired_intervals : t -> pos:int -> unit val active : t -> reg_class:int -> ClassIntervals.t +val active_classes : t -> ClassIntervals.t array + val stack_slots : t -> Regalloc_stack_slots.t val get_and_incr_instruction_id : t -> Instruction.id diff --git a/backend/regalloc/regalloc_rewrite.ml b/backend/regalloc/regalloc_rewrite.ml index 88fb32e5c64..2b97e08f64d 100644 --- a/backend/regalloc/regalloc_rewrite.ml +++ b/backend/regalloc/regalloc_rewrite.ml @@ -261,8 +261,8 @@ let postlude : if Utils.debug then Array.iteri (Cfg_with_layout.cfg cfg_with_layout).fun_num_stack_slots - ~f:(fun reg_class num_stack_slots -> - Utils.log ~indent:1 "stack_slots[%d]=%d" reg_class num_stack_slots); + ~f:(fun stack_class num_stack_slots -> + Utils.log ~indent:1 "stack_slots[%d]=%d" stack_class num_stack_slots); remove_prologue_if_not_required cfg_with_layout; update_live_fields cfg_with_layout (Cfg_with_infos.liveness cfg_with_infos); f (); diff --git a/backend/regalloc/regalloc_stack_slots.ml b/backend/regalloc/regalloc_stack_slots.ml index e455f7e1964..13314fa702f 100644 --- a/backend/regalloc/regalloc_stack_slots.ml +++ b/backend/regalloc/regalloc_stack_slots.ml @@ -14,22 +14,22 @@ type t = let[@inline] make () = let stack_slots = Reg.Tbl.create 128 in - let num_stack_slots = Array.make Proc.num_register_classes 0 in + let num_stack_slots = Array.make Proc.num_stack_slot_classes 0 in { stack_slots; num_stack_slots } -let[@inline] size_for_all_reg_classes t = +let[@inline] size_for_all_stack_classes t = Array.fold_left t.num_stack_slots ~f:( + ) ~init:0 -let[@inline] get_and_incr t ~reg_class = - let res = t.num_stack_slots.(reg_class) in - t.num_stack_slots.(reg_class) <- succ res; +let[@inline] get_and_incr t ~stack_class = + let res = t.num_stack_slots.(stack_class) in + t.num_stack_slots.(stack_class) <- succ res; res let[@inline] get_or_create t reg = match Reg.Tbl.find_opt t.stack_slots reg with | Some slot -> slot | None -> - let res = get_and_incr t ~reg_class:(Proc.register_class reg) in + let res = get_and_incr t ~stack_class:(Proc.stack_slot_class reg.Reg.typ) in Reg.Tbl.replace t.stack_slots reg res; res @@ -47,12 +47,12 @@ let[@inline] update_cfg_with_layout t cfg_with_layout = let fun_num_stack_slots = (Cfg_with_layout.cfg cfg_with_layout).fun_num_stack_slots in - for reg_class = 0 to pred Proc.num_register_classes do - fun_num_stack_slots.(reg_class) <- t.num_stack_slots.(reg_class) + for stack_class = 0 to pred Proc.num_stack_slot_classes do + fun_num_stack_slots.(stack_class) <- t.num_stack_slots.(stack_class) done (** The optimization below is conceptually fairly close to what linscan does: - - for each register class / stack slot couple, we compute the interval of + - for each stack slot class / stack slot couple, we compute the interval of uses; - we re-assign slots by putting in the same "bucket" slots whose intervals do not overlap. @@ -138,7 +138,7 @@ module Intervals : sig type slots = t type t = Interval.t array array - (* first index is register class, second index is slot index *) + (* first index is stack slot class, second index is slot index *) val build_from_cfg : slots -> Cfg_with_infos.t -> t @@ -148,14 +148,14 @@ with type slots := t = struct type t = Interval.t array array let make slots = - Array.init Proc.num_register_classes ~f:(fun reg_class -> - Array.init slots.num_stack_slots.(reg_class) ~f:(fun _ -> + Array.init Proc.num_stack_slot_classes ~f:(fun stack_class -> + Array.init slots.num_stack_slots.(stack_class) ~f:(fun _ -> { Interval.start = Point.dummy; end_ = Point.dummy })) let visit_reg (t : t) (point : Point.t) (reg : Reg.t) : unit = apply_reg_stack_local reg ~f:(fun slot_index -> - let reg_class = Proc.register_class reg in - let interval = t.(reg_class).(slot_index) in + let stack_class = Proc.stack_slot_class reg.typ in + let interval = t.(stack_class).(slot_index) in if interval.start == Point.dummy then interval.start <- point; interval.end_ <- point) @@ -194,10 +194,10 @@ with type slots := t = struct intervals let print ppf t = - Array.iteri t ~f:(fun reg_class intervals -> + Array.iteri t ~f:(fun stack_class intervals -> Array.iteri intervals ~f:(fun slot_index interval -> - Format.fprintf ppf "reg_class=%d slot_index=%d -> %a\n" reg_class - slot_index Interval.print interval)) + Format.fprintf ppf "stack_class=%d slot_index=%d -> %a\n" + stack_class slot_index Interval.print interval)) end module Int = Numbers.Int @@ -211,13 +211,13 @@ module Buckets : sig val contains_empty : t -> bool - val find_bucket : t -> reg_class:int -> slot_index:slot -> int option + val find_bucket : t -> stack_class:int -> slot_index:slot -> int option val print : Format.formatter -> t -> unit end with type slots := t = struct type t = Interval.t Int.Tbl.t array array - (* first index is register class, second index is bucket index, table key is + (* first index is stack slot class, second index is bucket index, table key is slot index *) let does_not_fit (bucket : Interval.t Int.Tbl.t) (interval : Interval.t) : @@ -232,13 +232,13 @@ with type slots := t = struct let build_from_intervals slots intervals = let buckets = - Array.init Proc.num_register_classes ~f:(fun reg_class -> - let num_slots = slots.num_stack_slots.(reg_class) in + Array.init Proc.num_stack_slot_classes ~f:(fun stack_class -> + let num_slots = slots.num_stack_slots.(stack_class) in Array.init num_slots ~f:(fun _ -> Int.Tbl.create num_slots)) in - Array.iteri intervals ~f:(fun reg_class intervals -> + Array.iteri intervals ~f:(fun stack_class intervals -> Array.iteri intervals ~f:(fun slot_index interval -> - let buckets = buckets.(reg_class) in + let buckets = buckets.(stack_class) in let bucket_index = ref 0 in while !bucket_index < Array.length buckets @@ -259,8 +259,8 @@ with type slots := t = struct let last_bucket = buckets.(len - 1) in Int.Tbl.length last_bucket = 0) - let find_bucket t ~reg_class ~slot_index = - let buckets = t.(reg_class) in + let find_bucket t ~stack_class ~slot_index = + let buckets = t.(stack_class) in let len = Array.length buckets in let bucket_index = ref 0 in while @@ -272,9 +272,9 @@ with type slots := t = struct if !bucket_index < len then Some !bucket_index else None let print ppf t = - Array.iteri t ~f:(fun reg_class buckets -> + Array.iteri t ~f:(fun stack_class buckets -> Array.iteri buckets ~f:(fun bucket_index bucket -> - Format.fprintf ppf "reg_class=%d bucket_index=%d\n" reg_class + Format.fprintf ppf "stack_class=%d bucket_index=%d\n" stack_class bucket_index; Int.Tbl.iter (fun slot_index interval -> @@ -284,7 +284,7 @@ with type slots := t = struct end let optimize (t : t) (cfg_with_infos : Cfg_with_infos.t) : unit = - if size_for_all_reg_classes t > 0 + if size_for_all_stack_classes t > 0 then ( (* First, compute the intervals for all stack slots *) let intervals = Intervals.build_from_cfg t cfg_with_infos in @@ -299,36 +299,38 @@ let optimize (t : t) (cfg_with_infos : Cfg_with_infos.t) : unit = (* Finally, if so, reassign the slot indices *) if optimized then ( - let max_bucket_indices = Array.make Proc.num_register_classes (-1) in + let max_bucket_indices = Array.make Proc.num_stack_slot_classes (-1) in List.iter (Reg.all_registers ()) ~f:(fun (reg : Reg.t) -> apply_reg_stack_local reg ~f:(fun slot_index -> - let reg_class = Proc.register_class reg in - match Buckets.find_bucket buckets ~reg_class ~slot_index with + let stack_class = Proc.stack_slot_class reg.typ in + match Buckets.find_bucket buckets ~stack_class ~slot_index with | None -> - fatal "slot %d (reg_class=%d) is not in any of the buckets" - slot_index reg_class + fatal "slot %d (stack_class=%d) is not in any of the buckets" + slot_index stack_class | Some bucket_index -> if debug then Format.eprintf "changing the slot index of %a (class %d): %d ~> %d\n%!" - Printmach.reg reg reg_class slot_index bucket_index; + Printmach.reg reg stack_class slot_index bucket_index; reg.loc <- Stack (Local bucket_index); - max_bucket_indices.(reg_class) - <- Stdlib.Int.max max_bucket_indices.(reg_class) bucket_index; + max_bucket_indices.(stack_class) + <- Stdlib.Int.max + max_bucket_indices.(stack_class) + bucket_index; if Reg.Tbl.mem t.stack_slots reg then Reg.Tbl.replace t.stack_slots reg bucket_index)); - for reg_class = 0 to pred Proc.num_register_classes do - let old_value = t.num_stack_slots.(reg_class) in - let new_value = succ max_bucket_indices.(reg_class) in + for stack_class = 0 to pred Proc.num_stack_slot_classes do + let old_value = t.num_stack_slots.(stack_class) in + let new_value = succ max_bucket_indices.(stack_class) in if new_value > old_value then fatal "more slots are now used for class %d (before: %d, after: %d)" - reg_class old_value new_value; + stack_class old_value new_value; if debug then - Format.eprintf "reg_class %d has %d fewer slots (%d ~> %d)\n%!" - reg_class (old_value - new_value) old_value new_value; - t.num_stack_slots.(reg_class) <- new_value + Format.eprintf "stack_class %d has %d fewer slots (%d ~> %d)\n%!" + stack_class (old_value - new_value) old_value new_value; + t.num_stack_slots.(stack_class) <- new_value done; Cfg_with_infos.invalidate_liveness cfg_with_infos)) diff --git a/backend/regalloc/regalloc_stack_slots.mli b/backend/regalloc/regalloc_stack_slots.mli index 77eaa4f1389..b24de06185a 100644 --- a/backend/regalloc/regalloc_stack_slots.mli +++ b/backend/regalloc/regalloc_stack_slots.mli @@ -6,9 +6,9 @@ type t val make : unit -> t -val size_for_all_reg_classes : t -> int +val size_for_all_stack_classes : t -> int -val get_and_incr : t -> reg_class:int -> slot +val get_and_incr : t -> stack_class:int -> slot val get_or_create : t -> Reg.t -> slot diff --git a/backend/regalloc/regalloc_utils.ml b/backend/regalloc/regalloc_utils.ml index 3c6a97219fe..3c62c74f380 100644 --- a/backend/regalloc/regalloc_utils.ml +++ b/backend/regalloc/regalloc_utils.ml @@ -250,6 +250,10 @@ let same_reg_class : Reg.t -> Reg.t -> bool = fun reg1 reg2 -> Int.equal (Proc.register_class reg1) (Proc.register_class reg2) +let same_stack_class : Reg.t -> Reg.t -> bool = + fun reg1 reg2 -> + Int.equal (Proc.stack_slot_class reg1.typ) (Proc.stack_slot_class reg2.typ) + let make_temporary : same_class_and_base_name_as:Reg.t -> name_prefix:string -> Reg.t = fun ~same_class_and_base_name_as:reg ~name_prefix -> diff --git a/backend/regalloc/regalloc_utils.mli b/backend/regalloc/regalloc_utils.mli index a1b48ca8560..2a114a61f2d 100644 --- a/backend/regalloc/regalloc_utils.mli +++ b/backend/regalloc/regalloc_utils.mli @@ -92,6 +92,8 @@ end val same_reg_class : Reg.t -> Reg.t -> bool +val same_stack_class : Reg.t -> Reg.t -> bool + val make_temporary : same_class_and_base_name_as:Reg.t -> name_prefix:string -> Reg.t diff --git a/backend/regalloc/regalloc_validate.ml b/backend/regalloc/regalloc_validate.ml index 42c5bc7e6ca..53d83347919 100644 --- a/backend/regalloc/regalloc_validate.ml +++ b/backend/regalloc/regalloc_validate.ml @@ -25,7 +25,7 @@ module Location : sig val to_loc_lossy : t -> Reg.location - val print : Format.formatter -> t -> unit + val print : Cmm.machtype_component -> Format.formatter -> t -> unit val equal : t -> t -> bool @@ -35,15 +35,15 @@ module Location : sig end = struct module Stack = struct (** This type is based on [Reg.stack_location]. The first difference is that - for [Stack (Local index)] this types additionally stores [reg_class] - because local stacks are separate for different register classes. + for [Stack (Local index)] this types additionally stores [stack_class] + because local stacks are separate for different stack slot classes. Secondly for all stacks it stores index in words and not byte offset. That gives the guarantee that if indices are different then the locations do not overlap. *) type t = | Local of { index : int; - reg_class : int + stack_class : int } | Incoming of { index : int } | Outgoing of { index : int } @@ -84,9 +84,9 @@ end = struct let word_index_to_byte_offset index = index * word_size - let of_stack_loc ~reg_class loc = + let of_stack_loc ~stack_class loc = match loc with - | Reg.Local index -> Local { index; reg_class } + | Reg.Local index -> Local { index; stack_class } | Reg.Incoming offset -> Incoming { index = byte_offset_to_word_index offset } | Reg.Outgoing offset -> @@ -101,13 +101,6 @@ end = struct | Outgoing { index } -> Reg.Outgoing (word_index_to_byte_offset index) | Domainstate { index } -> Reg.Domainstate (word_index_to_byte_offset index) - - let unknown_reg_class = -1 - - let reg_class_lossy t = - match t with - | Local { reg_class; _ } -> reg_class - | Incoming _ | Outgoing _ | Domainstate _ -> unknown_reg_class end type t = @@ -120,7 +113,10 @@ end = struct | Reg.Reg idx -> Some (Reg idx) | Reg.Stack stack -> Some - (Stack (Stack.of_stack_loc ~reg_class:(Proc.register_class reg) stack)) + (Stack + (Stack.of_stack_loc + ~stack_class:(Proc.stack_slot_class reg.Reg.typ) + stack)) let of_reg_exn reg = of_reg reg |> Option.get @@ -131,13 +127,8 @@ end = struct | Reg idx -> Reg.Reg idx | Stack stack -> Reg.Stack (Stack.to_stack_loc_lossy stack) - let reg_class_lossy t = - match t with Reg _ -> -1 | Stack stack -> Stack.reg_class_lossy stack - - let print ppf t = - Printmach.loc ~reg_class:(reg_class_lossy t) - ~unknown:(fun _ -> assert false) - ppf (to_loc_lossy t) + let print typ ppf t = + Printmach.loc ~unknown:(fun _ -> assert false) ppf (to_loc_lossy t) typ let compare (t1 : t) (t2 : t) : int = (* CR-someday azewierzejew: Implement proper comparison. *) @@ -210,6 +201,8 @@ module Register : sig val print : Format.formatter -> t -> unit + val typ : t -> Cmm.machtype_component + module Set : Set.S with type elt = t module Map : Map.S with type key = t @@ -240,10 +233,12 @@ end = struct loc = Reg_id.to_loc_lossy t.reg_id } + let typ (t : t) = t.for_print.typ + let print (ppf : Format.formatter) (t : t) : unit = match t.reg_id with | Preassigned { location } -> - Format.fprintf ppf "R[%a]" Location.print location + Format.fprintf ppf "R[%a]" (Location.print t.for_print.typ) location | Named _ -> Printmach.reg ppf (to_dummy_reg t) let compare (t1 : t) (t2 : t) : int = Reg_id.compare t1.reg_id t2.reg_id @@ -454,7 +449,10 @@ end = struct | Preassigned { location = prev_loc }, Some new_loc -> Regalloc_utils.fatal "%s: changed preassigned register's location from %a to %a" context - Location.print prev_loc Location.print new_loc) + (Location.print (Register.typ reg_desc)) + prev_loc + (Location.print loc_reg.Reg.typ) + new_loc) reg_arr loc_arr; () @@ -773,7 +771,9 @@ end = struct type t = Register.t * Location.t let print ppf (r, l) = - Format.fprintf ppf "%a=%a" Register.print r Location.print l + Format.fprintf ppf "%a=%a" Register.print r + (Location.print (Register.typ r)) + l end exception Verification_failed of string @@ -872,7 +872,7 @@ end = struct then ( Format.fprintf Format.str_formatter "Unsatisfiable equations when removing result equations.\n\ - Existing equation has to agree one 0 or 2 sides (cannot on exactly \ + Existing equation has to agree on 0 or 2 sides (cannot be exactly \ 1) with the removed equation.\n\ Existing equation %a.\n\ Removed equation: %a." Equation.print (eq_reg, eq_loc) Equation.print @@ -913,9 +913,10 @@ end = struct | None -> () | Some regs -> assert (not (Register.Set.is_empty regs)); + let typ = Register.Set.choose regs |> Register.typ in Format.fprintf Format.str_formatter - "Destroying a location %a in which a live registers %a is stored" - Location.print destroyed_loc + "Destroying a location %a in which live registers %a are stored" + (Location.print typ) destroyed_loc (Format.pp_print_seq ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") Register.print) @@ -964,9 +965,9 @@ module type Description_value = sig end let print_reg_as_loc ppf reg = - Printmach.loc ~reg_class:(Proc.register_class reg) + Printmach.loc ~unknown:(fun ppf -> Format.fprintf ppf "") - ppf reg.Reg.loc + ppf reg.Reg.loc reg.Reg.typ module Domain : Cfg_dataflow.Domain_S with type t = Equation_set.t = struct (** This type corresponds to the set of equations in the dataflow from the @@ -1125,7 +1126,7 @@ module Transfer (Desc_val : Description_value) : equations |> Equation_set.verify_destroyed_locations ~destroyed: - (Location.of_regs_exn Proc.destroyed_at_raise) + (Location.of_regs_exn (Proc.destroyed_at_raise ())) |> Result.map_error (fun message -> Printf.sprintf "While verifying locations destroyed at raise: %s" @@ -1306,8 +1307,8 @@ end = struct Format.fprintf ppf "Function argument locations: %a\n" (Format.pp_print_seq ~pp_sep:(fun ppf () -> Format.pp_print_string ppf ", ") - Location.print) - (Array.to_seq loc_fun_args); + (fun ppf (reg, loc) -> Location.print (Register.typ reg) ppf loc)) + (Array.to_seq (Array.combine reg_fun_args loc_fun_args)); () end diff --git a/backend/reloadgen.ml b/backend/reloadgen.ml index d5ca5521b3d..53cb179c8f1 100644 --- a/backend/reloadgen.ml +++ b/backend/reloadgen.ml @@ -67,7 +67,7 @@ method reload_operation op arg res = begin match arg.(0), res.(0) with {loc = Stack s1}, {loc = Stack s2} -> if s1 = s2 - && Proc.register_class arg.(0) = Proc.register_class res.(0) then + && Proc.stack_slot_class arg.(0).typ = Proc.stack_slot_class res.(0).typ then (* nothing will be emitted later, not necessary to apply constraints *) (arg, res) diff --git a/backend/selectgen.ml b/backend/selectgen.ml index cc33b0ed374..7e513560f36 100644 --- a/backend/selectgen.ml +++ b/backend/selectgen.ml @@ -199,6 +199,7 @@ let oper_result_type = function begin match c with | Word_val -> typ_val | Single | Double -> typ_float + | Onetwentyeight -> typ_vec128 | _ -> typ_int end | Calloc _ -> typ_val @@ -236,6 +237,7 @@ let size_component = function | Val | Addr -> Arch.size_addr | Int -> Arch.size_int | Float -> Arch.size_float + | Vec128 -> Arch.size_vec128 let size_machtype mty = let size = ref 0 in @@ -250,6 +252,7 @@ let size_expr (env:environment) exp = | Cconst_symbol _ -> Arch.size_addr | Cconst_float _ -> Arch.size_float + | Cconst_vec128 _ -> Arch.size_vec128 | Cvar id -> begin try V.Map.find id localenv @@ -474,6 +477,7 @@ method is_simple_expr = function | Cconst_natint _ -> true | Cconst_float _ -> true | Cconst_symbol _ -> true + | Cconst_vec128 _ -> true | Cvar _ -> true | Ctuple el -> List.for_all self#is_simple_expr el | Clet(_id, arg, body) | Clet_mut(_id, _, arg, body) -> @@ -519,7 +523,7 @@ method is_simple_expr = function method effects_of exp = let module EC = Effect_and_coeffect in match exp with - | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_symbol _ + | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_symbol _ | Cconst_vec128 _ | Cvar _ -> EC.none | Ctuple el -> EC.join_list_map el self#effects_of | Clet (_id, arg, body) | Clet_mut (_id, _, arg, body) -> @@ -853,6 +857,9 @@ method emit_expr_aux (env:environment) exp : | Cconst_float (n, _dbg) -> let r = self#regs_for typ_float in ret (self#insert_op env (Iconst_float (Int64.bits_of_float n)) [||] r) + | Cconst_vec128 (bits, _dbg) -> + let r = self#regs_for typ_vec128 in + ret (self#insert_op env (Iconst_vec128 bits) [||] r) | Cconst_symbol (n, _dbg) -> (* Cconst_symbol _ evaluates to a statically-allocated address, so its value fits in a typ_int register and is never changed by the GC. @@ -1372,7 +1379,11 @@ method emit_stores env data regs_addr = Istore(_, _, _) -> for i = 0 to Array.length regs - 1 do let r = regs.(i) in - let kind = if r.typ = Float then Double else Word_val in + let kind = match r.typ with + | Float -> Double + | Vec128 -> Onetwentyeight + | Val | Addr | Int -> Word_val + in self#insert env (Iop(Istore(kind, !a, false))) (Array.append [|r|] regs_addr) [||]; @@ -1613,7 +1624,7 @@ method emit_tail (env:environment) exp = self#emit_tail { env with regions } e end | Cop _ - | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_symbol _ + | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_symbol _ | Cconst_vec128 _ | Cvar _ | Cassign _ | Ctuple _ @@ -1660,7 +1671,7 @@ method emit_fundecl ~future_funcnames f = fun_codegen_options = f.Cmm.fun_codegen_options; fun_dbg = f.Cmm.fun_dbg; fun_poll = f.Cmm.fun_poll; - fun_num_stack_slots = Array.make Proc.num_register_classes 0; + fun_num_stack_slots = Array.make Proc.num_stack_slot_classes 0; fun_contains_calls = !contains_calls; } diff --git a/backend/x86_ast.mli b/backend/x86_ast.mli index 52b1c3b37cb..08b91732545 100644 --- a/backend/x86_ast.mli +++ b/backend/x86_ast.mli @@ -57,7 +57,8 @@ type constant = type data_type = | NONE | REAL4 | REAL8 (* floating point values *) - | BYTE | WORD | DWORD | QWORD | OWORD (* integer values *) + | BYTE | WORD | DWORD | QWORD (* integer values *) + | VEC128 (* vector values (float & integer) *) | NEAR | PROC type reg64 = @@ -67,7 +68,6 @@ type reg64 = type reg8h = | AH | BH | CH | DH - type regf = XMM of int type arch = X64 | X86 @@ -145,6 +145,7 @@ type instruction = | MINSD of arg * arg | MOV of arg * arg | MOVAPD of arg * arg + | MOVUPD of arg * arg | MOVD of arg * arg | MOVQ of arg * arg | MOVLPD of arg * arg diff --git a/backend/x86_binary_emitter.ml b/backend/x86_binary_emitter.ml index 15b11cf3adc..09ab0ed0d22 100644 --- a/backend/x86_binary_emitter.ml +++ b/backend/x86_binary_emitter.ml @@ -434,8 +434,8 @@ let emit_mod_rm_reg b rex opcodes rm reg = emit_rex b (rex lor rex_of_reg16 reg16 lor rexr_reg reg lor rexb_rm rm); buf_opcodes b opcodes; buf_int8 b (mod_rm_reg 0b11 rm reg) - | Regf rm -> - let rm = rd_of_regf rm in + | Regf regf -> + let rm = rd_of_regf regf in emit_rex b (rex lor rexr_reg reg lor rexb_rm rm); buf_opcodes b opcodes; buf_int8 b (mod_rm_reg 0b11 rm reg) @@ -569,6 +569,16 @@ let emit_movapd b dst src = emit_mod_rm_reg b 0 [ 0x0f; 0x29 ] rm (rd_of_regf reg) | _ -> assert false +let emit_movupd b dst src = + match (dst, src) with + | Regf reg, ((Regf _ | Mem _ | Mem64_RIP _) as rm) -> + buf_int8 b 0x66; + emit_mod_rm_reg b 0 [ 0x0f; 0x10 ] rm (rd_of_regf reg) + | ((Mem _ | Mem64_RIP _) as rm), Regf reg -> + buf_int8 b 0x66; + emit_mod_rm_reg b 0 [ 0x0f; 0x11 ] rm (rd_of_regf reg) + | _ -> assert false + let emit_movd b ~dst ~src = match (dst, src) with | Regf reg, ((Reg32 _ | Mem _ | Mem64_RIP _) as rm) -> @@ -1441,6 +1451,7 @@ let assemble_instr b loc = function | MINSD (src, dst) -> emit_minsd b ~dst ~src | MOV (src, dst) -> emit_MOV b dst src | MOVAPD (src, dst) -> emit_movapd b dst src + | MOVUPD (src, dst) -> emit_movupd b dst src | MOVD (src, dst) -> emit_movd b ~dst ~src | MOVQ (src, dst) -> emit_movq b ~dst ~src | MOVLPD (src, dst) -> emit_movlpd b dst src diff --git a/backend/x86_dsl.ml b/backend/x86_dsl.ml index 0e39a6d1eb7..f0b39857c1d 100644 --- a/backend/x86_dsl.ml +++ b/backend/x86_dsl.ml @@ -156,6 +156,7 @@ module I = struct let minsd x y = emit (MINSD (x,y)) let mov x y = emit (MOV (x, y)) let movapd x y = emit (MOVAPD (x, y)) + let movupd x y = emit (MOVUPD (x, y)) let movd x y = emit (MOVD (x, y)) let movq x y = emit (MOVQ (x, y)) let movsd x y = emit (MOVSD (x, y)) diff --git a/backend/x86_dsl.mli b/backend/x86_dsl.mli index bfc9e4b1b8d..6dd82de113a 100644 --- a/backend/x86_dsl.mli +++ b/backend/x86_dsl.mli @@ -151,6 +151,7 @@ module I : sig val minsd: arg -> arg -> unit val mov: arg -> arg -> unit val movapd: arg -> arg -> unit + val movupd: arg -> arg -> unit val movd: arg -> arg -> unit val movq: arg -> arg -> unit val movsd: arg -> arg -> unit diff --git a/backend/x86_gas.ml b/backend/x86_gas.ml index e88e5efd708..f4c44076ecd 100644 --- a/backend/x86_gas.ml +++ b/backend/x86_gas.ml @@ -98,8 +98,8 @@ let suf arg = | DWORD | REAL8 -> "l" | QWORD -> "q" | REAL4 -> "s" - | NONE -> "" - | OWORD | NEAR | PROC -> assert false + | VEC128 | NONE -> "" + | NEAR | PROC -> assert false let i0 b s = bprintf b "\t%s" s let i1 b s x = bprintf b "\t%s\t%a" s arg x @@ -164,6 +164,7 @@ let print_instr b = function i2 b "movabsq" arg1 arg2 | MOV (arg1, arg2) -> i2_s b "mov" arg1 arg2 | MOVAPD (arg1, arg2) -> i2 b "movapd" arg1 arg2 + | MOVUPD (arg1, arg2) -> i2 b "movupd" arg1 arg2 | MOVD (arg1, arg2) -> i2 b "movd" arg1 arg2 | MOVQ (arg1, arg2) -> i2 b "movq" arg1 arg2 | MOVLPD (arg1, arg2) -> i2 b "movlpd" arg1 arg2 diff --git a/backend/x86_masm.ml b/backend/x86_masm.ml index ae78851ec58..5c681950aa2 100644 --- a/backend/x86_masm.ml +++ b/backend/x86_masm.ml @@ -19,8 +19,8 @@ open X86_proc let bprintf = Printf.bprintf let string_of_datatype = function + | VEC128 -> "XMMWORD" | QWORD -> "QWORD" - | OWORD -> "OWORD" | NONE -> assert false | REAL4 -> "REAL4" | REAL8 -> "REAL8" @@ -32,8 +32,8 @@ let string_of_datatype = function let string_of_datatype_ptr = function + | VEC128 -> "XMMWORD PTR " | QWORD -> "QWORD PTR " - | OWORD -> "OWORD PTR " | NONE -> "" | REAL4 -> "REAL4 PTR " | REAL8 -> "REAL8 PTR " @@ -161,6 +161,7 @@ let print_instr b = function i2 b "mov" arg1 (Reg32 r) | MOV (arg1, arg2) -> i2 b "mov" arg1 arg2 | MOVAPD (arg1, arg2) -> i2 b "movapd" arg1 arg2 + | MOVUPD (arg1, arg2) -> i2 b "movupd" arg1 arg2 | MOVD (arg1, arg2) -> i2 b "movd" arg1 arg2 | MOVQ (arg1, arg2) -> i2 b "movq" arg1 arg2 | MOVLPD (arg1, arg2) -> i2 b "movlpd" arg1 arg2 diff --git a/file_formats/cmx_format.mli b/file_formats/cmx_format.mli index 5d7fabc45fd..ef686fab74c 100644 --- a/file_formats/cmx_format.mli +++ b/file_formats/cmx_format.mli @@ -42,7 +42,7 @@ type export_info_raw = | Flambda2_raw of Flambda2_cmx.Flambda_cmx_format.raw option (* Declare machtype here to avoid depending on [Cmm]. *) -type machtype_component = Val | Addr | Int | Float +type machtype_component = Val | Addr | Int | Float | Vec128 type machtype = machtype_component array type apply_fn := machtype list * machtype * Lambda.alloc_mode diff --git a/middle_end/clambda.ml b/middle_end/clambda.ml index c0c09a5c410..4bd7fd68e57 100644 --- a/middle_end/clambda.ml +++ b/middle_end/clambda.ml @@ -32,6 +32,7 @@ type ustructured_constant = | Uconst_int32 of int32 | Uconst_int64 of int64 | Uconst_nativeint of nativeint + | Uconst_vec128 of { high : int64; low : int64 } | Uconst_block of int * uconstant list | Uconst_float_array of float list | Uconst_string of string @@ -212,6 +213,7 @@ let rank_structured_constant = function | Uconst_float_array _ -> 5 | Uconst_string _ -> 6 | Uconst_closure _ -> 7 + | Uconst_vec128 _ -> 8 let compare_structured_constants c1 c2 = match c1, c2 with @@ -227,6 +229,9 @@ let compare_structured_constants c1 c2 = | Uconst_string s1, Uconst_string s2 -> String.compare s1 s2 | Uconst_closure (_,lbl1,_), Uconst_closure (_,lbl2,_) -> String.compare lbl1 lbl2 + | Uconst_vec128 {high = l0; low = l1}, Uconst_vec128 {high = r0; low = r1} -> + let cmp = Int64.compare l0 r0 in + if cmp = 0 then Int64.compare l1 r1 else cmp | _, _ -> (* no overflow possible here *) rank_structured_constant c1 - rank_structured_constant c2 diff --git a/middle_end/clambda.mli b/middle_end/clambda.mli index 9e1409e5721..42250fb7010 100644 --- a/middle_end/clambda.mli +++ b/middle_end/clambda.mli @@ -32,6 +32,7 @@ type ustructured_constant = | Uconst_int32 of int32 | Uconst_int64 of int64 | Uconst_nativeint of nativeint + | Uconst_vec128 of { high : int64; low : int64 } | Uconst_block of int * uconstant list | Uconst_float_array of float list | Uconst_string of string diff --git a/middle_end/clambda_primitives.ml b/middle_end/clambda_primitives.ml index 92725a7a771..a550d96726e 100644 --- a/middle_end/clambda_primitives.ml +++ b/middle_end/clambda_primitives.ml @@ -158,18 +158,23 @@ and value_kind = Lambda.value_kind = non_consts : (int * value_kind list) list; } | Parrayval of array_kind + | Pboxedvectorval of boxed_vector and layout = Lambda.layout = | Ptop | Pvalue of value_kind | Punboxed_float | Punboxed_int of boxed_integer + | Punboxed_vector of boxed_vector | Pbottom and block_shape = Lambda.block_shape and boxed_integer = Primitive.boxed_integer = Pnativeint | Pint32 | Pint64 +and boxed_vector = Primitive.boxed_vector = + | Pvec128 + and bigarray_kind = Lambda.bigarray_kind = Pbigarray_unknown | Pbigarray_float32 | Pbigarray_float64 diff --git a/middle_end/clambda_primitives.mli b/middle_end/clambda_primitives.mli index b65e674ee76..11ecd5bd378 100644 --- a/middle_end/clambda_primitives.mli +++ b/middle_end/clambda_primitives.mli @@ -161,18 +161,24 @@ and value_kind = Lambda.value_kind = non_consts : (int * value_kind list) list; } | Parrayval of array_kind + | Pboxedvectorval of boxed_vector and layout = Lambda.layout = | Ptop | Pvalue of value_kind | Punboxed_float | Punboxed_int of boxed_integer + | Punboxed_vector of boxed_vector | Pbottom and block_shape = Lambda.block_shape + and boxed_integer = Primitive.boxed_integer = Pnativeint | Pint32 | Pint64 +and boxed_vector = Primitive.boxed_vector = + | Pvec128 + and bigarray_kind = Lambda.bigarray_kind = Pbigarray_unknown | Pbigarray_float32 | Pbigarray_float64 diff --git a/middle_end/closure/closure.ml b/middle_end/closure/closure.ml index 95f7588333d..df139603f61 100644 --- a/middle_end/closure/closure.ml +++ b/middle_end/closure/closure.ml @@ -64,7 +64,9 @@ let is_gc_ignorable kind = | Punboxed_float -> true | Punboxed_int _ -> true | Pvalue Pintval -> true - | Pvalue (Pgenval | Pfloatval | Pboxedintval _ | Pvariant _ | Parrayval _) -> false + | Punboxed_vector _ -> true + | Pvalue (Pgenval | Pfloatval | Pboxedintval _ | Pboxedvectorval _ + | Pvariant _ | Parrayval _) -> false let split_closure_fv kinds fv = List.fold_right (fun id (not_scanned, scanned) -> @@ -1757,7 +1759,7 @@ let collect_exported_structured_constants a = and structured_constant = function | Uconst_block (_, ul) -> List.iter const ul | Uconst_float _ | Uconst_int32 _ - | Uconst_int64 _ | Uconst_nativeint _ + | Uconst_int64 _ | Uconst_nativeint _ | Uconst_vec128 _ | Uconst_float_array _ | Uconst_string _ -> () | Uconst_closure _ -> assert false (* Cannot be generated *) and ulam = function diff --git a/middle_end/flambda/closure_offsets.ml b/middle_end/flambda/closure_offsets.ml index cfb1791786d..d771cdd3e54 100644 --- a/middle_end/flambda/closure_offsets.ml +++ b/middle_end/flambda/closure_offsets.ml @@ -79,6 +79,7 @@ let add_closure_offsets and not stored in a closure." | Punboxed_float -> true | Punboxed_int _ -> true + | Punboxed_vector _ -> true | Pvalue Pintval -> true | Pvalue _ -> false) free_vars diff --git a/middle_end/flambda/flambda_to_clambda.ml b/middle_end/flambda/flambda_to_clambda.ml index a2d6c2a23f2..b2613464916 100644 --- a/middle_end/flambda/flambda_to_clambda.ml +++ b/middle_end/flambda/flambda_to_clambda.ml @@ -712,6 +712,7 @@ and to_clambda_set_of_closures t env and not stored in a closure." | Punboxed_float -> true | Punboxed_int _ -> true + | Punboxed_vector _ -> true | Pvalue Pintval -> true | Pvalue _ -> false) free_vars diff --git a/middle_end/flambda2/from_lambda/closure_conversion.ml b/middle_end/flambda2/from_lambda/closure_conversion.ml index 728991b2ca3..b835937e8c1 100644 --- a/middle_end/flambda2/from_lambda/closure_conversion.ml +++ b/middle_end/flambda2/from_lambda/closure_conversion.ml @@ -206,7 +206,7 @@ let find_value_approximation env simple = match Reg_width_const.descr const with | Tagged_immediate i -> Value_approximation.Value_int i | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ -> + | Naked_vec128 _ | Naked_nativeint _ -> Value_approximation.Value_unknown) let find_value_approximation_through_symbol acc env simple = @@ -448,6 +448,8 @@ let close_c_call acc env ~loc ~let_bound_ids_with_kinds Some (P.Box_number (Naked_int32, Alloc_mode.For_allocations.heap)) | _, Unboxed_integer Pint64 -> Some (P.Box_number (Naked_int64, Alloc_mode.For_allocations.heap)) + | _, Unboxed_vector Pvec128 -> + Some (P.Box_number (Naked_vec128, Alloc_mode.For_allocations.heap)) | _, Untagged_int -> Some P.Tag_immediate in let return_continuation, needs_wrapper = @@ -473,6 +475,7 @@ let close_c_call acc env ~loc ~let_bound_ids_with_kinds | Unboxed_integer Pint32 -> K.naked_int32 | Unboxed_integer Pint64 -> K.naked_int64 | Untagged_int -> K.naked_immediate + | Unboxed_vector Pvec128 -> K.naked_vec128 in let param_arity = List.map kind_of_primitive_native_repr prim_native_repr_args @@ -559,6 +562,7 @@ let close_c_call acc env ~loc ~let_bound_ids_with_kinds | _, Unboxed_integer Pint32 -> Some (P.Unbox_number Naked_int32) | _, Unboxed_integer Pint64 -> Some (P.Unbox_number Naked_int64) | _, Untagged_int -> Some P.Untag_immediate + | _, Unboxed_vector Pvec128 -> Some (P.Unbox_number Naked_vec128) in match unbox_arg with | None -> fun args acc -> call (arg :: args) acc diff --git a/middle_end/flambda2/from_lambda/closure_conversion_aux.ml b/middle_end/flambda2/from_lambda/closure_conversion_aux.ml index 32008def512..11821118b77 100644 --- a/middle_end/flambda2/from_lambda/closure_conversion_aux.ml +++ b/middle_end/flambda2/from_lambda/closure_conversion_aux.ml @@ -468,9 +468,9 @@ module Acc = struct Block_approximation (fields, Alloc_mode.For_types.unknown ()) else Value_unknown | Set_of_closures _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | Immutable_float_block _ | Immutable_float_array _ - | Immutable_value_array _ | Empty_array | Mutable_string _ - | Immutable_string _ -> + | Boxed_vec128 _ | Boxed_nativeint _ | Immutable_float_block _ + | Immutable_float_array _ | Immutable_value_array _ | Empty_array + | Mutable_string _ | Immutable_string _ -> Value_unknown in let symbol_approximations = diff --git a/middle_end/flambda2/from_lambda/lambda_to_flambda.ml b/middle_end/flambda2/from_lambda/lambda_to_flambda.ml index b495613bb54..004cb76f423 100644 --- a/middle_end/flambda2/from_lambda/lambda_to_flambda.ml +++ b/middle_end/flambda2/from_lambda/lambda_to_flambda.ml @@ -1197,7 +1197,7 @@ let rec cps acc env ccenv (lam : L.lambda) (k : cps_continuation) let id = Ident.create_local name in let result_layout = L.primitive_result_layout prim in (match result_layout with - | Pvalue _ | Punboxed_float | Punboxed_int _ -> () + | Pvalue _ | Punboxed_float | Punboxed_int _ | Punboxed_vector _ -> () | Ptop | Pbottom -> Misc.fatal_errorf "Invalid result layout %a for primitive %a" Printlambda.layout result_layout Printlambda.primitive prim); diff --git a/middle_end/flambda2/from_lambda/lambda_to_flambda_primitives.ml b/middle_end/flambda2/from_lambda/lambda_to_flambda_primitives.ml index 00fc7108122..cd2c1c99c4a 100644 --- a/middle_end/flambda2/from_lambda/lambda_to_flambda_primitives.ml +++ b/middle_end/flambda2/from_lambda/lambda_to_flambda_primitives.ml @@ -429,6 +429,8 @@ let bigarray_box_or_tag_raw_value_to_read kind alloc_mode = fun arg -> H.Unary (Box_number (Naked_int64, alloc_mode), Prim arg) | Naked_number Naked_nativeint -> fun arg -> H.Unary (Box_number (Naked_nativeint, alloc_mode), Prim arg) + | Naked_number Naked_vec128 -> + fun arg -> H.Unary (Box_number (Naked_vec128, alloc_mode), Prim arg) | Region -> error "a region expression" | Rec_info -> error "recursion info" @@ -449,6 +451,8 @@ let bigarray_unbox_or_untag_value_to_store kind = fun arg -> H.Prim (Unary (Unbox_number Naked_int64, arg)) | Naked_number Naked_nativeint -> fun arg -> H.Prim (Unary (Unbox_number Naked_nativeint, arg)) + | Naked_number Naked_vec128 -> + fun arg -> H.Prim (Unary (Unbox_number Naked_vec128, arg)) | Region -> error "a region expression" | Rec_info -> error "recursion info" diff --git a/middle_end/flambda2/identifiers/int_ids.ml b/middle_end/flambda2/identifiers/int_ids.ml index 7b708fa4684..f1c6d4f6fe3 100644 --- a/middle_end/flambda2/identifiers/int_ids.ml +++ b/middle_end/flambda2/identifiers/int_ids.ml @@ -48,6 +48,7 @@ module Const_data = struct | Naked_int32 of Int32.t | Naked_int64 of Int64.t | Naked_nativeint of Targetint_32_64.t + | Naked_vec128 of Numeric_types.Vec128_by_bit_pattern.t let flags = const_flags @@ -86,6 +87,11 @@ module Const_data = struct Flambda_colours.naked_number Targetint_32_64.print n Flambda_colours.pop + | Naked_vec128 v -> + Format.fprintf ppf "%t#%a%t" + Flambda_colours.naked_number + Numeric_types.Vec128_by_bit_pattern.print v + Flambda_colours.pop let compare t1 t2 = match t1, t2 with @@ -97,6 +103,8 @@ module Const_data = struct | Naked_int32 n1, Naked_int32 n2 -> Int32.compare n1 n2 | Naked_int64 n1, Naked_int64 n2 -> Int64.compare n1 n2 | Naked_nativeint n1, Naked_nativeint n2 -> Targetint_32_64.compare n1 n2 + | Naked_vec128 v1, Naked_vec128 v2 -> + Numeric_types.Vec128_by_bit_pattern.compare v1 v2 | Naked_immediate _, _ -> -1 | _, Naked_immediate _ -> 1 | Tagged_immediate _, _ -> -1 @@ -107,6 +115,8 @@ module Const_data = struct | _, Naked_int32 _ -> 1 | Naked_int64 _, _ -> -1 | _, Naked_int64 _ -> 1 + | Naked_vec128 _, _ -> -1 + | _, Naked_vec128 _ -> 1 let equal t1 t2 = if t1 == t2 @@ -121,8 +131,11 @@ module Const_data = struct | Naked_int32 n1, Naked_int32 n2 -> Int32.equal n1 n2 | Naked_int64 n1, Naked_int64 n2 -> Int64.equal n1 n2 | Naked_nativeint n1, Naked_nativeint n2 -> Targetint_32_64.equal n1 n2 + | Naked_vec128 v1, Naked_vec128 v2 -> + Numeric_types.Vec128_by_bit_pattern.equal v1 v2 | ( ( Naked_immediate _ | Tagged_immediate _ | Naked_float _ - | Naked_int32 _ | Naked_int64 _ | Naked_nativeint _ ), + | Naked_vec128 _ | Naked_int32 _ | Naked_int64 _ | Naked_nativeint _ + ), _ ) -> false @@ -134,6 +147,7 @@ module Const_data = struct | Naked_int32 n -> Hashtbl.hash n | Naked_int64 n -> Hashtbl.hash n | Naked_nativeint n -> Targetint_32_64.hash n + | Naked_vec128 v -> Numeric_types.Vec128_by_bit_pattern.hash v end) end @@ -264,6 +278,8 @@ module Const = struct let naked_nativeint i = create (Naked_nativeint i) + let naked_vec128 i = create (Naked_vec128 i) + let const_true = tagged_immediate Targetint_31_63.bool_true let const_false = tagged_immediate Targetint_31_63.bool_false diff --git a/middle_end/flambda2/identifiers/int_ids.mli b/middle_end/flambda2/identifiers/int_ids.mli index 5d8b3b374d2..b33d3123adc 100644 --- a/middle_end/flambda2/identifiers/int_ids.mli +++ b/middle_end/flambda2/identifiers/int_ids.mli @@ -60,6 +60,8 @@ module Const : sig val naked_nativeint : Targetint_32_64.t -> t + val naked_vec128 : Numeric_types.Vec128_by_bit_pattern.t -> t + module Descr : sig type t = private | Naked_immediate of Targetint_31_63.t @@ -68,6 +70,7 @@ module Const : sig | Naked_int32 of Int32.t | Naked_int64 of Int64.t | Naked_nativeint of Targetint_32_64.t + | Naked_vec128 of Numeric_types.Vec128_by_bit_pattern.t include Container_types.S with type t := t end diff --git a/middle_end/flambda2/identifiers/reg_width_const.ml b/middle_end/flambda2/identifiers/reg_width_const.ml index e61472864f9..5f4311314f1 100644 --- a/middle_end/flambda2/identifiers/reg_width_const.ml +++ b/middle_end/flambda2/identifiers/reg_width_const.ml @@ -24,3 +24,4 @@ let of_descr (descr : Descr.t) = | Naked_int32 i -> naked_int32 i | Naked_int64 i -> naked_int64 i | Naked_nativeint i -> naked_nativeint i + | Naked_vec128 v -> naked_vec128 v diff --git a/middle_end/flambda2/kinds/flambda_kind.ml b/middle_end/flambda2/kinds/flambda_kind.ml index a014ec6e7f8..0669bd8cb47 100644 --- a/middle_end/flambda2/kinds/flambda_kind.ml +++ b/middle_end/flambda2/kinds/flambda_kind.ml @@ -21,6 +21,7 @@ module Naked_number_kind = struct | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 let print ppf t = match t with @@ -29,6 +30,7 @@ module Naked_number_kind = struct | Naked_int32 -> Format.pp_print_string ppf "Naked_int32" | Naked_int64 -> Format.pp_print_string ppf "Naked_int64" | Naked_nativeint -> Format.pp_print_string ppf "Naked_nativeint" + | Naked_vec128 -> Format.pp_print_string ppf "Naked_vec128" end type t = @@ -53,6 +55,8 @@ let naked_int64 = Naked_number Naked_int64 let naked_nativeint = Naked_number Naked_nativeint +let naked_vec128 = Naked_number Naked_vec128 + let region = Region let rec_info = Rec_info @@ -66,6 +70,7 @@ let to_lambda (t : t) : Lambda.layout = | Naked_number Naked_int32 -> Punboxed_int Pint32 | Naked_number Naked_int64 -> Punboxed_int Pint64 | Naked_number Naked_nativeint -> Punboxed_int Pnativeint + | Naked_number Naked_vec128 -> Punboxed_vector Pvec128 | Region -> Misc.fatal_error "Can't convert kind [Region] to lambda layout" | Rec_info -> Misc.fatal_error "Can't convert kind [Rec_info] to lambda layout" @@ -105,6 +110,9 @@ include Container_types.Make (struct | Naked_nativeint -> Format.fprintf ppf "%t@<1>\u{2115}@<1>\u{2115}%t" colour Flambda_colours.pop + | Naked_vec128 -> + Format.fprintf ppf "%t@<1>\u{2115}@<1>\u{1d54d}128%t" colour + Flambda_colours.pop else Format.fprintf ppf "(Naked_number %a)" Naked_number_kind.print naked_number_kind @@ -127,7 +135,9 @@ let is_naked_float t = match t with | Naked_number Naked_float -> true | Value - | Naked_number (Naked_immediate | Naked_int32 | Naked_int64 | Naked_nativeint) + | Naked_number + ( Naked_immediate | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 ) | Region | Rec_info -> false @@ -227,6 +237,7 @@ module Boxable_number = struct | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 let unboxed_kind t : kind = match t with @@ -234,10 +245,11 @@ module Boxable_number = struct | Naked_int32 -> Naked_number Naked_int32 | Naked_int64 -> Naked_number Naked_int64 | Naked_nativeint -> Naked_number Naked_nativeint + | Naked_vec128 -> Naked_number Naked_vec128 let primitive_kind t : Primitive.boxed_integer = match t with - | Naked_float -> assert false + | Naked_vec128 | Naked_float -> assert false | Naked_int32 -> Pint32 | Naked_int64 -> Pint64 | Naked_nativeint -> Pnativeint @@ -251,6 +263,7 @@ module Boxable_number = struct | Naked_int32 -> Format.pp_print_string ppf "Naked_int32" | Naked_int64 -> Format.pp_print_string ppf "Naked_int64" | Naked_nativeint -> Format.pp_print_string ppf "Naked_nativeint" + | Naked_vec128 -> Format.pp_print_string ppf "Naked_vec128" let compare = Stdlib.compare @@ -265,6 +278,7 @@ module Boxable_number = struct | Naked_int32 -> Format.pp_print_string ppf "naked_int32" | Naked_int64 -> Format.pp_print_string ppf "naked_int64" | Naked_nativeint -> Format.pp_print_string ppf "naked_nativeint" + | Naked_vec128 -> Format.pp_print_string ppf "naked_vec128" let print_lowercase_short ppf t = match t with @@ -272,6 +286,7 @@ module Boxable_number = struct | Naked_int32 -> Format.pp_print_string ppf "int32" | Naked_int64 -> Format.pp_print_string ppf "int64" | Naked_nativeint -> Format.pp_print_string ppf "nativeint" + | Naked_vec128 -> Format.pp_print_string ppf "vec128" end module With_subkind = struct @@ -282,6 +297,7 @@ module With_subkind = struct | Boxed_int32 | Boxed_int64 | Boxed_nativeint + | Boxed_vec128 | Tagged_immediate | Variant of { consts : Targetint_31_63.Set.t; @@ -306,6 +322,7 @@ module With_subkind = struct | Boxed_int32, Boxed_int32 | Boxed_int64, Boxed_int64 | Boxed_nativeint, Boxed_nativeint + | Boxed_vec128, Boxed_vec128 | Tagged_immediate, Tagged_immediate | Float_array, Float_array | Immediate_array, Immediate_array @@ -353,8 +370,8 @@ module With_subkind = struct true (* All other combinations are incompatible: *) | ( ( Anything | Boxed_float | Boxed_int32 | Boxed_int64 | Boxed_nativeint - | Tagged_immediate | Variant _ | Float_block _ | Float_array - | Immediate_array | Value_array | Generic_array ), + | Boxed_vec128 | Tagged_immediate | Variant _ | Float_block _ + | Float_array | Immediate_array | Value_array | Generic_array ), _ ) -> false @@ -380,6 +397,9 @@ module With_subkind = struct | Boxed_nativeint -> Format.fprintf ppf "%t=boxed_@<1>\u{2115}@<1>\u{2115}%t" colour Flambda_colours.pop + | Boxed_vec128 -> + Format.fprintf ppf "%t=boxed_@<1>\u{2115}@<1>\u{1d54d}128%t" colour + Flambda_colours.pop | Variant { consts; non_consts } -> let print_field ppf { kind = _; subkind } = print ppf subkind in Format.fprintf ppf "%t=Variant((consts (%a))@ (non_consts (%a)))%t" @@ -420,7 +440,7 @@ module With_subkind = struct | Naked_number _ | Region | Rec_info -> ( match subkind with | Anything -> () - | Boxed_float | Boxed_int32 | Boxed_int64 | Boxed_nativeint + | Boxed_float | Boxed_int32 | Boxed_int64 | Boxed_nativeint | Boxed_vec128 | Tagged_immediate | Variant _ | Float_block _ | Float_array | Immediate_array | Value_array | Generic_array -> Misc.fatal_errorf "Subkind %a is not valid for kind %a" Subkind.print @@ -449,6 +469,8 @@ module With_subkind = struct let naked_nativeint = create naked_nativeint Anything + let naked_vec128 = create naked_vec128 Anything + let region = create region Anything let boxed_float = create value Boxed_float @@ -459,6 +481,8 @@ module With_subkind = struct let boxed_nativeint = create value Boxed_nativeint + let boxed_vec128 = create value Boxed_vec128 + let tagged_immediate = create value Tagged_immediate let rec_info = create rec_info Anything @@ -495,6 +519,7 @@ module With_subkind = struct | Naked_int32 -> naked_int32 | Naked_int64 -> naked_int64 | Naked_nativeint -> naked_nativeint + | Naked_vec128 -> naked_vec128 let rec from_lambda_value_kind (vk : Lambda.value_kind) = match vk with @@ -503,6 +528,7 @@ module With_subkind = struct | Pboxedintval Pint32 -> boxed_int32 | Pboxedintval Pint64 -> boxed_int64 | Pboxedintval Pnativeint -> boxed_nativeint + | Pboxedvectorval Pvec128 -> boxed_vec128 | Pintval -> tagged_immediate | Pvariant { consts; non_consts } -> ( match consts, non_consts with @@ -544,6 +570,7 @@ module With_subkind = struct | Punboxed_int Pint32 -> naked_int32 | Punboxed_int Pint64 -> naked_int64 | Punboxed_int Pnativeint -> naked_nativeint + | Punboxed_vector Pvec128 -> naked_vec128 include Container_types.Make (struct type nonrec t = t @@ -555,8 +582,8 @@ module With_subkind = struct Format.fprintf ppf "@[%a%a@]" print kind Subkind.print subkind | ( (Naked_number _ | Region | Rec_info), ( Boxed_float | Boxed_int32 | Boxed_int64 | Boxed_nativeint - | Tagged_immediate | Variant _ | Float_block _ | Float_array - | Immediate_array | Value_array | Generic_array ) ) -> + | Boxed_vec128 | Tagged_immediate | Variant _ | Float_block _ + | Float_array | Immediate_array | Value_array | Generic_array ) ) -> assert false (* see [create] *) @@ -574,7 +601,7 @@ module With_subkind = struct let has_useful_subkind_info (t : t) = match t.subkind with | Anything -> false - | Boxed_float | Boxed_int32 | Boxed_int64 | Boxed_nativeint + | Boxed_float | Boxed_int32 | Boxed_int64 | Boxed_nativeint | Boxed_vec128 | Tagged_immediate | Variant _ | Float_block _ | Float_array | Immediate_array | Value_array | Generic_array -> true diff --git a/middle_end/flambda2/kinds/flambda_kind.mli b/middle_end/flambda2/kinds/flambda_kind.mli index 78658a4de9c..8b0451160ff 100644 --- a/middle_end/flambda2/kinds/flambda_kind.mli +++ b/middle_end/flambda2/kinds/flambda_kind.mli @@ -23,6 +23,7 @@ module Naked_number_kind : sig | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 val print : Format.formatter -> t -> unit end @@ -56,6 +57,8 @@ val naked_int64 : t val naked_nativeint : t +val naked_vec128 : t + val region : t val rec_info : t @@ -112,6 +115,7 @@ module Boxable_number : sig | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 val unboxed_kind : t -> kind @@ -134,6 +138,7 @@ module With_subkind : sig | Boxed_int32 | Boxed_int64 | Boxed_nativeint + | Boxed_vec128 | Tagged_immediate | Variant of { consts : Targetint_31_63.Set.t; @@ -172,6 +177,8 @@ module With_subkind : sig val naked_nativeint : t + val naked_vec128 : t + val region : t val boxed_float : t @@ -182,6 +189,8 @@ module With_subkind : sig val boxed_nativeint : t + val boxed_vec128 : t + val tagged_immediate : t val rec_info : t diff --git a/middle_end/flambda2/numbers/numeric_types.ml b/middle_end/flambda2/numbers/numeric_types.ml index 4894ebadf43..c6ef3377661 100644 --- a/middle_end/flambda2/numbers/numeric_types.ml +++ b/middle_end/flambda2/numbers/numeric_types.ml @@ -250,3 +250,65 @@ module Int64 = struct let cross_product = Pair.create_from_cross_product end + +module type Vector_width = sig + val size_in_int64s : int +end + +module Vector_by_bit_pattern (Width : Vector_width) = struct + module T0 = struct + type t = Int64.t Array.t + + let rec compare l r i = + if i = Width.size_in_int64s + then 0 + else + let cmp = Int64.compare l.(i) r.(i) in + if cmp = 0 then compare l r (i + 1) else cmp + + let compare l r = compare l r 0 + + let equal = Array.for_all2 Int64.equal + + let hash v = Hashtbl.hash v + + let print ppf t = + Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.pp_print_char ppf ':') + (fun ppf i64 -> Format.fprintf ppf "%016Lx" i64) + ppf (Array.to_list t) + end + + include T0 + module Self = Container_types.Make (T0) + include Self + + let zero = Array.init Width.size_in_int64s (fun _ -> 0L) + + let to_int64_array t = t + + let of_int64_array t = + if not (Array.length t = Width.size_in_int64s) + then + Misc.fatal_error + "Vector_by_bit_pattern.of_int64_array: wrong length array"; + t +end + +module Vec128_by_bit_pattern = struct + include Vector_by_bit_pattern (struct + let size_in_int64s = 2 + end) + + type bits = + { high : int64; + low : int64 + } + + let to_bits t = + match to_int64_array t with + | [| high; low |] -> { high; low } + | _ -> Misc.fatal_error "Vec128.to_bits: wrong size vector" + + let of_bits { high; low } = of_int64_array [| high; low |] +end diff --git a/middle_end/flambda2/numbers/numeric_types.mli b/middle_end/flambda2/numbers/numeric_types.mli index 7db0b4e9558..2ccbd662930 100644 --- a/middle_end/flambda2/numbers/numeric_types.mli +++ b/middle_end/flambda2/numbers/numeric_types.mli @@ -134,3 +134,21 @@ module Int64 : sig val cross_product : Set.t -> Set.t -> Pair.Set.t end + +module Vec128_by_bit_pattern : sig + (** 128-bit value whose comparison and equality relations are lexicographically + ordered by bit pattern. *) + + include Container_types.S + + val zero : t + + type bits = + { high : int64; + low : int64 + } + + val to_bits : t -> bits + + val of_bits : bits -> t +end diff --git a/middle_end/flambda2/parser/fexpr.ml b/middle_end/flambda2/parser/fexpr.ml index 03395f4e2ef..1fd0b889764 100644 --- a/middle_end/flambda2/parser/fexpr.ml +++ b/middle_end/flambda2/parser/fexpr.ml @@ -57,6 +57,7 @@ type const = | Naked_float of float | Naked_int32 of int32 | Naked_int64 of int64 + | Naked_vec128 of Numeric_types.Vec128_by_bit_pattern.bits | Naked_nativeint of targetint type field_of_block = @@ -89,6 +90,7 @@ type static_data = | Boxed_int32 of int32 or_variable | Boxed_int64 of int64 or_variable | Boxed_nativeint of targetint or_variable + | Boxed_vec128 of Numeric_types.Vec128_by_bit_pattern.bits or_variable | Immutable_float_block of float or_variable list | Immutable_float_array of float or_variable list | Immutable_value_array of field_of_block list @@ -104,6 +106,7 @@ type subkind = | Boxed_int32 | Boxed_int64 | Boxed_nativeint + | Boxed_vec128 | Tagged_immediate | Variant of { consts : targetint list; @@ -178,6 +181,7 @@ type box_kind = Flambda_kind.Boxable_number.t = | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 type generic_array_specialisation = | No_specialisation diff --git a/middle_end/flambda2/parser/fexpr_to_flambda.ml b/middle_end/flambda2/parser/fexpr_to_flambda.ml index cce7facc6dd..7d13c088030 100644 --- a/middle_end/flambda2/parser/fexpr_to_flambda.ml +++ b/middle_end/flambda2/parser/fexpr_to_flambda.ml @@ -238,6 +238,9 @@ let targetint (i : Fexpr.targetint) : Targetint_32_64.t = let targetint_31_63 (i : Fexpr.targetint) : Targetint_31_63.t = Targetint_31_63.of_int64 i +let vec128 bits : Numeric_types.Vec128_by_bit_pattern.t = + Numeric_types.Vec128_by_bit_pattern.of_bits bits + let tag_scannable (tag : Fexpr.tag_scannable) : Tag.Scannable.t = Tag.Scannable.create_exn tag @@ -252,6 +255,7 @@ let rec subkind : Fexpr.subkind -> Flambda_kind.With_subkind.Subkind.t = | Boxed_int32 -> Boxed_int32 | Boxed_int64 -> Boxed_int64 | Boxed_nativeint -> Boxed_nativeint + | Boxed_vec128 -> Boxed_vec128 | Tagged_immediate -> Tagged_immediate | Variant { consts; non_consts } -> let consts = @@ -293,6 +297,7 @@ let const (c : Fexpr.const) : Reg_width_const.t = | Naked_int32 i -> Reg_width_const.naked_int32 i | Naked_int64 i -> Reg_width_const.naked_int64 i | Naked_nativeint i -> Reg_width_const.naked_nativeint (i |> targetint) + | Naked_vec128 bits -> Reg_width_const.naked_vec128 (bits |> vec128) let rec rec_info env (ri : Fexpr.rec_info) : Rec_info_expr.t = let module US = Rec_info_expr.Unrolling_state in @@ -758,6 +763,8 @@ let rec expr env (e : Fexpr.expr) : Flambda.Expr.t = static_const (SC.boxed_int64 (or_variable Fun.id env i)) | Boxed_nativeint i -> static_const (SC.boxed_nativeint (or_variable targetint env i)) + | Boxed_vec128 i -> + static_const (SC.boxed_vec128 (or_variable vec128 env i)) | Immutable_float_block elements -> static_const (SC.immutable_float_block diff --git a/middle_end/flambda2/parser/flambda_parser.mli b/middle_end/flambda2/parser/flambda_parser.mli index 97852cedadb..f1f876171ef 100644 --- a/middle_end/flambda2/parser/flambda_parser.mli +++ b/middle_end/flambda2/parser/flambda_parser.mli @@ -1,7 +1,7 @@ (* The type of tokens. *) -type token = +type token = | TILDEMINUS | TILDE | SYMBOL of (Fexpr.compilation_unit option * string) @@ -185,20 +185,20 @@ val flambda_unit: (Lexing.lexbuf -> token) -> Lexing.lexbuf -> (Fexpr.flambda_un val expect_test_spec: (Lexing.lexbuf -> token) -> Lexing.lexbuf -> (Fexpr.expect_test_spec) module MenhirInterpreter : sig - + (* The incremental API. *) - + include CamlinternalMenhirLib.IncrementalEngine.INCREMENTAL_ENGINE with type token = token - + end (* The entry point(s) to the incremental API. *) module Incremental : sig - + val flambda_unit: Lexing.position -> (Fexpr.flambda_unit) MenhirInterpreter.checkpoint - + val expect_test_spec: Lexing.position -> (Fexpr.expect_test_spec) MenhirInterpreter.checkpoint - + end diff --git a/middle_end/flambda2/parser/flambda_to_fexpr.ml b/middle_end/flambda2/parser/flambda_to_fexpr.ml index 844ae908008..f1725cb35b7 100644 --- a/middle_end/flambda2/parser/flambda_to_fexpr.ml +++ b/middle_end/flambda2/parser/flambda_to_fexpr.ml @@ -354,6 +354,8 @@ let name env n = let float f = f |> Numeric_types.Float_by_bit_pattern.to_float +let vec128 v = v |> Numeric_types.Vec128_by_bit_pattern.to_bits + let targetint i = i |> Targetint_32_64.to_int64 let const c : Fexpr.const = @@ -367,6 +369,8 @@ let const c : Fexpr.const = | Naked_float f -> Naked_float (f |> float) | Naked_int32 i -> Naked_int32 i | Naked_int64 i -> Naked_int64 i + | Naked_vec128 i -> + Naked_vec128 (Numeric_types.Vec128_by_bit_pattern.to_bits i) | Naked_nativeint i -> Naked_nativeint (i |> targetint) let depth_or_infinity (d : int Or_infinity.t) : Fexpr.rec_info = @@ -421,6 +425,7 @@ let rec subkind (k : Flambda_kind.With_subkind.Subkind.t) : Fexpr.subkind = | Boxed_int32 -> Boxed_int32 | Boxed_int64 -> Boxed_int64 | Boxed_nativeint -> Boxed_nativeint + | Boxed_vec128 -> Boxed_vec128 | Tagged_immediate -> Tagged_immediate | Variant { consts; non_consts } -> variant_subkind consts non_consts | Float_array -> Float_array @@ -689,6 +694,7 @@ let static_const env (sc : Static_const.t) : Fexpr.static_data = | Boxed_int32 i -> Boxed_int32 (or_variable Fun.id env i) | Boxed_int64 i -> Boxed_int64 (or_variable Fun.id env i) | Boxed_nativeint i -> Boxed_nativeint (or_variable targetint env i) + | Boxed_vec128 i -> Boxed_vec128 (or_variable vec128 env i) | Immutable_float_block elements -> Immutable_float_block (List.map (or_variable float env) elements) | Immutable_float_array elements -> diff --git a/middle_end/flambda2/parser/print_fexpr.ml b/middle_end/flambda2/parser/print_fexpr.ml index 316cbb68cdc..6371ccae309 100644 --- a/middle_end/flambda2/parser/print_fexpr.ml +++ b/middle_end/flambda2/parser/print_fexpr.ml @@ -132,6 +132,7 @@ let naked_number_kind ppf (nnk : Flambda_kind.Naked_number_kind.t) = | Naked_int32 -> "int32" | Naked_int64 -> "int64" | Naked_nativeint -> "nativeint" + | Naked_vec128 -> "vec128" let rec subkind ppf (k : subkind) = let str s = Format.pp_print_string ppf s in @@ -142,6 +143,7 @@ let rec subkind ppf (k : subkind) = | Boxed_int32 -> str "int32 boxed" | Boxed_int64 -> str "int64 boxed" | Boxed_nativeint -> str "nativeint boxed" + | Boxed_vec128 -> str "vec128 boxed" | Variant { consts; non_consts } -> variant_subkind ppf consts non_consts | Tagged_immediate -> str "imm tagged" | Float_array -> str "float array" @@ -255,6 +257,7 @@ let const ppf (c : Fexpr.const) = | Naked_int32 i -> Format.fprintf ppf "%lil" i | Naked_int64 i -> Format.fprintf ppf "%LiL" i | Naked_nativeint i -> Format.fprintf ppf "%Lin" i + | Naked_vec128 { high; low } -> Format.fprintf ppf "%016Lx:%016Lx" high low let rec simple ppf : simple -> unit = function | Symbol s -> symbol ppf s @@ -319,10 +322,13 @@ let static_data ppf : static_data -> unit = function | Boxed_int32 (Const i) -> Format.fprintf ppf "%lil" i | Boxed_int64 (Const i) -> Format.fprintf ppf "%LiL" i | Boxed_nativeint (Const i) -> Format.fprintf ppf "%Lin" i + | Boxed_vec128 (Const { high; low }) -> + Format.fprintf ppf "%016Lx:%016Lx" high low | Boxed_float (Var v) -> boxed_variable ppf v ~kind:"float" | Boxed_int32 (Var v) -> boxed_variable ppf v ~kind:"int32" | Boxed_int64 (Var v) -> boxed_variable ppf v ~kind:"int64" | Boxed_nativeint (Var v) -> boxed_variable ppf v ~kind:"nativeint" + | Boxed_vec128 (Var v) -> boxed_variable ppf v ~kind:"vec128" | Immutable_float_block elements -> Format.fprintf ppf "Float_block (%a)" (pp_comma_list float_or_variable) @@ -495,6 +501,7 @@ let unop ppf u = | Naked_int32 -> print verb_not_imm "int32" | Naked_int64 -> print verb_not_imm "int64" | Naked_nativeint -> print verb_not_imm "nativeint" + | Naked_vec128 -> print verb_not_imm "vec128" in match (u : unop) with | Array_length -> str "%array_length" diff --git a/middle_end/flambda2/simplify/expr_builder.ml b/middle_end/flambda2/simplify/expr_builder.ml index bd9e39ce827..354ea9cc71b 100644 --- a/middle_end/flambda2/simplify/expr_builder.ml +++ b/middle_end/flambda2/simplify/expr_builder.ml @@ -537,7 +537,7 @@ let create_let_symbols uacc lifted_constant ~body = | Naked_number Naked_float -> Naked_floats { size = Unknown } | Naked_number ( Naked_immediate | Naked_nativeint | Naked_int32 - | Naked_int64 ) + | Naked_vec128 | Naked_int64 ) | Region | Rec_info -> Misc.fatal_errorf "Unexpected kind %a for symbol projection: %a" diff --git a/middle_end/flambda2/simplify/lifting/reification.ml b/middle_end/flambda2/simplify/lifting/reification.ml index 5cc00438b79..59b4ebb3a38 100644 --- a/middle_end/flambda2/simplify/lifting/reification.ml +++ b/middle_end/flambda2/simplify/lifting/reification.ml @@ -37,7 +37,7 @@ let create_static_const dacc dbg (to_lift : T.to_lift) : RSC.t = match Reg_width_const.descr const with | Tagged_immediate imm -> F.Tagged_immediate imm | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ -> + | Naked_vec128 _ | Naked_nativeint _ -> Misc.fatal_errorf "Expected a constant of kind [Value] but got %a (dbg %a)" Reg_width_const.print const Debuginfo.print_compact dbg)) @@ -54,6 +54,7 @@ let create_static_const dacc dbg (to_lift : T.to_lift) : RSC.t = | Boxed_int32 i -> RSC.create_boxed_int32 art (Const i) | Boxed_int64 i -> RSC.create_boxed_int64 art (Const i) | Boxed_nativeint i -> RSC.create_boxed_nativeint art (Const i) + | Boxed_vec128 v -> RSC.create_boxed_vec128 art (Const v) | Immutable_float_array { fields } -> let fields = List.map (fun f -> Or_variable.Const f) fields in RSC.create_immutable_float_array art fields diff --git a/middle_end/flambda2/simplify/rebuilt_static_const.ml b/middle_end/flambda2/simplify/rebuilt_static_const.ml index 3925d8bf9b0..9c7f7dc948b 100644 --- a/middle_end/flambda2/simplify/rebuilt_static_const.ml +++ b/middle_end/flambda2/simplify/rebuilt_static_const.ml @@ -126,6 +126,11 @@ let create_boxed_nativeint are_rebuilding or_var = then Block_not_rebuilt { free_names = Or_variable.free_names or_var } else create_normal_non_code (SC.boxed_nativeint or_var) +let create_boxed_vec128 are_rebuilding or_var = + if ART.do_not_rebuild_terms are_rebuilding + then Block_not_rebuilt { free_names = Or_variable.free_names or_var } + else create_normal_non_code (SC.boxed_vec128 or_var) + let create_immutable_float_block are_rebuilding fields = if ART.do_not_rebuild_terms are_rebuilding then @@ -190,7 +195,7 @@ let map_set_of_closures t ~f = (SC.set_of_closures set_of_closures); free_names = Set_of_closures.free_names set_of_closures } - | Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ + | Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ | Boxed_vec128 _ | Boxed_nativeint _ | Immutable_float_block _ | Immutable_float_array _ | Immutable_value_array _ | Empty_array | Mutable_string _ | Immutable_string _ -> diff --git a/middle_end/flambda2/simplify/rebuilt_static_const.mli b/middle_end/flambda2/simplify/rebuilt_static_const.mli index 03f487f5ebe..790f4796d87 100644 --- a/middle_end/flambda2/simplify/rebuilt_static_const.mli +++ b/middle_end/flambda2/simplify/rebuilt_static_const.mli @@ -59,6 +59,11 @@ val create_boxed_int64 : Are_rebuilding_terms.t -> Int64.t Or_variable.t -> t val create_boxed_nativeint : Are_rebuilding_terms.t -> Targetint_32_64.t Or_variable.t -> t +val create_boxed_vec128 : + Are_rebuilding_terms.t -> + Numeric_types.Vec128_by_bit_pattern.t Or_variable.t -> + t + val create_immutable_float_block : Are_rebuilding_terms.t -> Numeric_types.Float_by_bit_pattern.t Or_variable.t list -> diff --git a/middle_end/flambda2/simplify/simplify_binary_primitive.ml b/middle_end/flambda2/simplify/simplify_binary_primitive.ml index 10a95033e41..627fafea315 100644 --- a/middle_end/flambda2/simplify/simplify_binary_primitive.ml +++ b/middle_end/flambda2/simplify/simplify_binary_primitive.ml @@ -923,7 +923,7 @@ let simplify_immutable_block_load access_kind ~min_name_mode dacc ~original_term ~projection_bound_to:result_var ~kind:Flambda_kind.With_subkind.tagged_immediate | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ -> + | Naked_nativeint _ | Naked_vec128 _ -> Misc.fatal_errorf "Kind error for [Block_load] of %a at index %a" Simple.print arg1 Simple.print arg2) ~name:(fun _ ~coercion:_ -> dacc) diff --git a/middle_end/flambda2/simplify/simplify_extcall.ml b/middle_end/flambda2/simplify/simplify_extcall.ml index e8560098315..ba9f27d189f 100644 --- a/middle_end/flambda2/simplify/simplify_extcall.ml +++ b/middle_end/flambda2/simplify/simplify_extcall.ml @@ -111,7 +111,10 @@ let simplify_comparison ~dbg ~dacc ~cont ~tagged_prim ~float_prim | Proved (Boxed _), Proved Tagged_immediate | ( Proved (Boxed - (_, (Naked_float | Naked_int32 | Naked_int64 | Naked_nativeint), _)), + ( _, + ( Naked_float | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 ), + _ )), Proved (Boxed _) ) (* One or two of the arguments is not known *) | Unknown, Unknown diff --git a/middle_end/flambda2/simplify/simplify_static_const.ml b/middle_end/flambda2/simplify/simplify_static_const.ml index 99afdb19e80..d84d4ff423c 100644 --- a/middle_end/flambda2/simplify/simplify_static_const.ml +++ b/middle_end/flambda2/simplify/simplify_static_const.ml @@ -37,7 +37,7 @@ let simplify_field_of_block dacc (field : Field_of_static_block.t) = match Reg_width_const.descr const with | Tagged_immediate imm -> Field_of_static_block.Tagged_immediate imm, ty | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ -> + | Naked_nativeint _ | Naked_vec128 _ -> (* CR mshinwell: This should be "invalid" and propagate up *) field, ty) @@ -125,6 +125,17 @@ let simplify_static_const_of_kind_value dacc (static_const : Static_const.t) (DA.are_rebuilding_terms dacc) or_var, dacc ) + | Boxed_vec128 or_var -> + let or_var, ty = + simplify_or_variable dacc + (fun f -> T.this_boxed_vec128 f Alloc_mode.For_types.heap) + or_var K.value + in + let dacc = bind_result_sym ty in + ( Rebuilt_static_const.create_boxed_vec128 + (DA.are_rebuilding_terms dacc) + or_var, + dacc ) | Immutable_float_block fields -> let fields_with_tys = List.map diff --git a/middle_end/flambda2/simplify/simplify_switch_expr.ml b/middle_end/flambda2/simplify/simplify_switch_expr.ml index 0ef1827bd8d..4e9e7a16716 100644 --- a/middle_end/flambda2/simplify/simplify_switch_expr.ml +++ b/middle_end/flambda2/simplify/simplify_switch_expr.ml @@ -180,7 +180,7 @@ let rebuild_arm uacc arm (action, use_id, arity, env_at_use) maybe_mergeable ~mergeable_arms ~identity_arms ~not_arms else maybe_mergeable ~mergeable_arms ~identity_arms ~not_arms | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ -> + | Naked_vec128 _ | Naked_nativeint _ -> maybe_mergeable ~mergeable_arms ~identity_arms ~not_arms in Simple.pattern_match arg ~const ~name:(fun _ ~coercion:_ -> diff --git a/middle_end/flambda2/simplify/simplify_unary_primitive.ml b/middle_end/flambda2/simplify/simplify_unary_primitive.ml index 765d0694079..3c07c9019b3 100644 --- a/middle_end/flambda2/simplify/simplify_unary_primitive.ml +++ b/middle_end/flambda2/simplify/simplify_unary_primitive.ml @@ -110,6 +110,10 @@ let simplify_unbox_number (boxable_number_kind : K.Boxable_number.t) dacc ( T.boxed_nativeint_alias_to ~naked_nativeint:result_var' (Alloc_mode.For_types.unknown ()), K.naked_nativeint ) + | Naked_vec128 -> + ( T.boxed_vec128_alias_to ~naked_vec128:result_var' + (Alloc_mode.For_types.unknown ()), + K.naked_vec128 ) in let alloc_mode = T.prove_alloc_mode_of_boxed_number (DA.typing_env dacc) boxed_number_ty @@ -165,6 +169,7 @@ let simplify_box_number (boxable_number_kind : K.Boxable_number.t) alloc_mode | Naked_int32 -> T.box_int32 naked_number_ty alloc_mode | Naked_int64 -> T.box_int64 naked_number_ty alloc_mode | Naked_nativeint -> T.box_nativeint naked_number_ty alloc_mode + | Naked_vec128 -> T.box_vec128 naked_number_ty alloc_mode in let dacc = DA.add_variable dacc result_var ty in SPR.create original_term ~try_reify:true dacc @@ -569,6 +574,7 @@ let simplify_obj_dup dbg dacc ~original_term ~arg ~arg_ty ~result_var = | Naked_int32 -> T.box_int32 | Naked_int64 -> T.box_int64 | Naked_nativeint -> T.box_nativeint + | Naked_vec128 -> T.box_vec128 in let ty = boxer contents_ty Alloc_mode.For_types.heap in let dacc = DA.add_variable dacc result_var ty in diff --git a/middle_end/flambda2/simplify/unboxing/build_unboxing_denv.ml b/middle_end/flambda2/simplify/unboxing/build_unboxing_denv.ml index 343c2bed584..8ab1b7511fd 100644 --- a/middle_end/flambda2/simplify/unboxing/build_unboxing_denv.ml +++ b/middle_end/flambda2/simplify/unboxing/build_unboxing_denv.ml @@ -130,7 +130,8 @@ let rec denv_of_decision denv ~param_var (decision : U.decision) : DE.t = Unbox ( Unique_tag_and_size _ | Variant _ | Closure_single_entry _ | Number - ( (Naked_float | Naked_int32 | Naked_int64 | Naked_nativeint), + ( ( Naked_float | Naked_int32 | Naked_int64 + | Naked_nativeint | Naked_vec128 ), _ ) ); is_int = _ } -> @@ -195,3 +196,8 @@ let rec denv_of_decision denv ~param_var (decision : U.decision) : DE.t = in denv_of_number_decision K.naked_nativeint shape param_var naked_nativeint denv + | Unbox (Number (Naked_vec128, { param = naked_vec128; args = _ })) -> + let shape = + T.boxed_vec128_alias_to ~naked_vec128 (Alloc_mode.For_types.unknown ()) + in + denv_of_number_decision K.naked_vec128 shape param_var naked_vec128 denv diff --git a/middle_end/flambda2/simplify/unboxing/is_unboxing_beneficial.ml b/middle_end/flambda2/simplify/unboxing/is_unboxing_beneficial.ml index c30062be732..0f1827e02c4 100644 --- a/middle_end/flambda2/simplify/unboxing/is_unboxing_beneficial.ml +++ b/middle_end/flambda2/simplify/unboxing/is_unboxing_beneficial.ml @@ -79,8 +79,9 @@ let rec filter_non_beneficial_decisions decision : U.decision = decision | Unbox (Number - ((Naked_float | Naked_int32 | Naked_int64 | Naked_nativeint), epa)) as - decision -> + ( ( Naked_float | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 ), + epa )) as decision -> if is_unboxing_beneficial_for_epa epa then decision else Do_not_unbox Not_beneficial diff --git a/middle_end/flambda2/simplify/unboxing/optimistic_unboxing_decision.ml b/middle_end/flambda2/simplify/unboxing/optimistic_unboxing_decision.ml index 11ecfe8e663..70f0e8a2a08 100644 --- a/middle_end/flambda2/simplify/unboxing/optimistic_unboxing_decision.ml +++ b/middle_end/flambda2/simplify/unboxing/optimistic_unboxing_decision.ml @@ -54,7 +54,8 @@ let deciders = Unboxers.Float.decider; Unboxers.Int32.decider; Unboxers.Int64.decider; - Unboxers.Nativeint.decider ] + Unboxers.Nativeint.decider; + Unboxers.Vec128.decider ] let rec make_optimistic_decision ~depth ~recursive tenv ~param_type : U.decision = diff --git a/middle_end/flambda2/simplify/unboxing/unboxers.ml b/middle_end/flambda2/simplify/unboxing/unboxers.ml index de4b628eec5..fddb5846084 100644 --- a/middle_end/flambda2/simplify/unboxing/unboxers.ml +++ b/middle_end/flambda2/simplify/unboxing/unboxers.ml @@ -123,6 +123,24 @@ module Nativeint = struct } end +module Vec128 = struct + let decider = + { param_name = "unboxed_vec128"; + kind = K.Naked_number_kind.Naked_vec128; + prove_is_a_boxed_number = T.prove_is_a_boxed_vec128 + } + + let unboxing_prim simple = P.(Unary (Unbox_number Naked_vec128, simple)) + + let unboxer = + { var_name = "unboxed_vec128"; + invalid_const = + Const.naked_vec128 Numeric_types.Vec128_by_bit_pattern.zero; + unboxing_prim; + prove_simple = T.meet_boxed_vec128_containing_simple + } +end + module Field = struct let unboxing_prim bak ~block ~index = let field_const = Simple.const (Const.tagged_immediate index) in diff --git a/middle_end/flambda2/simplify/unboxing/unboxers.mli b/middle_end/flambda2/simplify/unboxing/unboxers.mli index bfa61f223c4..096a5530c0c 100644 --- a/middle_end/flambda2/simplify/unboxing/unboxers.mli +++ b/middle_end/flambda2/simplify/unboxing/unboxers.mli @@ -48,6 +48,8 @@ module Int64 : Number_S module Nativeint : Number_S +module Vec128 : Number_S + module Field : sig val unboxing_prim : P.Block_access_kind.t -> block:Simple.t -> index:Targetint_31_63.t -> P.t diff --git a/middle_end/flambda2/simplify/unboxing/unboxing_epa.ml b/middle_end/flambda2/simplify/unboxing/unboxing_epa.ml index ad9dbc3052f..cccf90e9ae9 100644 --- a/middle_end/flambda2/simplify/unboxing/unboxing_epa.ml +++ b/middle_end/flambda2/simplify/unboxing/unboxing_epa.ml @@ -159,8 +159,9 @@ let extra_args_for_const_ctor_of_variant Unbox ( Unique_tag_and_size _ | Variant _ | Closure_single_entry _ | Number - ((Naked_float | Naked_int32 | Naked_int64 | Naked_nativeint), _) - ); + ( ( Naked_float | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 ), + _ ) ); is_int = _ } -> Misc.fatal_errorf @@ -241,6 +242,9 @@ and compute_extra_args_for_one_decision_and_use_aux ~(pass : U.pass) rewrite_id | Unbox (Number (Naked_immediate, epa)) -> compute_extra_arg_for_number Naked_immediate Unboxers.Immediate.unboxer epa rewrite_id ~typing_env_at_use arg_being_unboxed + | Unbox (Number (Naked_vec128, epa)) -> + compute_extra_arg_for_number Naked_vec128 Unboxers.Vec128.unboxer epa + rewrite_id ~typing_env_at_use arg_being_unboxed and compute_extra_args_for_block ~pass rewrite_id ~typing_env_at_use arg_being_unboxed tag fields : U.decision = @@ -460,7 +464,7 @@ let add_extra_params_and_args extra_params_and_args decision = Unbox ( Unique_tag_and_size _ | Variant _ | Closure_single_entry _ | Number - ( ( Naked_float | Naked_int32 | Naked_int64 + ( ( Naked_float | Naked_int32 | Naked_int64 | Naked_vec128 | Naked_nativeint ), _ ) ); is_int = _ diff --git a/middle_end/flambda2/terms/code_size.ml b/middle_end/flambda2/terms/code_size.ml index 4c3e8eb612a..4abf165f9d5 100644 --- a/middle_end/flambda2/terms/code_size.ml +++ b/middle_end/flambda2/terms/code_size.ml @@ -120,14 +120,14 @@ let arith_conversion_size src dst = let unbox_number kind = match (kind : Flambda_kind.Boxable_number.t) with - | Naked_float -> 1 (* 1 load *) + | Naked_float | Naked_vec128 -> 1 (* 1 load *) | Naked_int64 when arch32 -> 4 (* 2 Cadda + 2 loads *) | Naked_int32 | Naked_int64 | Naked_nativeint -> 2 (* Cadda + load *) let box_number kind = match (kind : Flambda_kind.Boxable_number.t) with - | Naked_float -> alloc_size (* 1 alloc *) + | Naked_float | Naked_vec128 -> alloc_size (* 1 alloc *) | Naked_int32 when not arch32 -> 1 + alloc_size (* shift/sextend + alloc *) | Naked_int32 | Naked_int64 | Naked_nativeint -> alloc_size (* alloc *) diff --git a/middle_end/flambda2/terms/flambda.ml b/middle_end/flambda2/terms/flambda.ml index b64f66d73e2..f0920c58a0f 100644 --- a/middle_end/flambda2/terms/flambda.ml +++ b/middle_end/flambda2/terms/flambda.ml @@ -1379,6 +1379,9 @@ module Named = struct Simple.const (Reg_width_const.naked_int64 Int64.zero) | Naked_number Naked_nativeint -> Simple.const (Reg_width_const.naked_nativeint Targetint_32_64.zero) + | Naked_number Naked_vec128 -> + Simple.const + (Reg_width_const.naked_vec128 Numeric_types.Vec128_by_bit_pattern.zero) | Region -> Misc.fatal_error "[Region] kind not expected here" | Rec_info -> Misc.fatal_error "[Rec_info] kind not expected here" in @@ -1410,7 +1413,7 @@ module Named = struct | Deleted_code | Static_const ( Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | Immutable_float_block _ + | Boxed_vec128 _ | Boxed_nativeint _ | Immutable_float_block _ | Immutable_float_array _ | Mutable_string _ | Immutable_string _ | Empty_array | Immutable_value_array _ ) -> diff --git a/middle_end/flambda2/terms/static_const.ml b/middle_end/flambda2/terms/static_const.ml index 2a02b89f1b8..21be9fc8236 100644 --- a/middle_end/flambda2/terms/static_const.ml +++ b/middle_end/flambda2/terms/static_const.ml @@ -23,6 +23,7 @@ type t = | Boxed_int32 of Int32.t Or_variable.t | Boxed_int64 of Int64.t Or_variable.t | Boxed_nativeint of Targetint_32_64.t Or_variable.t + | Boxed_vec128 of Numeric_types.Vec128_by_bit_pattern.t Or_variable.t | Immutable_float_block of Numeric_types.Float_by_bit_pattern.t Or_variable.t list | Immutable_float_array of @@ -44,6 +45,8 @@ let boxed_int64 or_var = Boxed_int64 or_var let boxed_nativeint or_var = Boxed_nativeint or_var +let boxed_vec128 or_var = Boxed_vec128 or_var + let immutable_float_block fields = Immutable_float_block fields let immutable_float_array fields = @@ -96,6 +99,11 @@ let [@ocamlformat "disable"] print ppf t = Flambda_colours.static_part Flambda_colours.pop (Or_variable.print Targetint_32_64.print) or_var + | Boxed_vec128 or_var -> + fprintf ppf "@[(%tBoxed_vec128%t@ %a)@]" + Flambda_colours.static_part + Flambda_colours.pop + (Or_variable.print Numeric_types.Vec128_by_bit_pattern.print) or_var | Immutable_float_block fields -> fprintf ppf "@[(%tImmutable_float_block%t@ @[[| %a |]@])@]" Flambda_colours.static_part @@ -161,6 +169,9 @@ include Container_types.Make (struct Or_variable.compare Numeric_types.Int64.compare or_var1 or_var2 | Boxed_nativeint or_var1, Boxed_nativeint or_var2 -> Or_variable.compare Targetint_32_64.compare or_var1 or_var2 + | Boxed_vec128 or_var1, Boxed_vec128 or_var2 -> + Or_variable.compare Numeric_types.Vec128_by_bit_pattern.compare or_var1 + or_var2 | Immutable_float_block fields1, Immutable_float_array fields2 -> Misc.Stdlib.List.compare (Or_variable.compare Numeric_types.Float_by_bit_pattern.compare) @@ -188,6 +199,8 @@ include Container_types.Make (struct | _, Boxed_int64 _ -> 1 | Boxed_nativeint _, _ -> -1 | _, Boxed_nativeint _ -> 1 + | Boxed_vec128 _, _ -> -1 + | _, Boxed_vec128 _ -> 1 | Immutable_float_block _, _ -> -1 | _, Immutable_float_block _ -> 1 | Immutable_float_array _, _ -> -1 @@ -218,6 +231,7 @@ let free_names t = | Boxed_int32 or_var -> Or_variable.free_names or_var | Boxed_int64 or_var -> Or_variable.free_names or_var | Boxed_nativeint or_var -> Or_variable.free_names or_var + | Boxed_vec128 or_var -> Or_variable.free_names or_var | Mutable_string { initial_value = _ } | Immutable_string _ | Empty_array -> Name_occurrences.empty | Immutable_float_block fields | Immutable_float_array fields -> @@ -256,6 +270,9 @@ let apply_renaming t renaming = | Boxed_nativeint or_var -> let or_var' = Or_variable.apply_renaming or_var renaming in if or_var == or_var' then t else Boxed_nativeint or_var' + | Boxed_vec128 or_var -> + let or_var' = Or_variable.apply_renaming or_var renaming in + if or_var == or_var' then t else Boxed_vec128 or_var' | Mutable_string { initial_value = _ } | Immutable_string _ -> t | Immutable_float_block fields -> let fields' = @@ -303,12 +320,14 @@ let ids_for_export t = | Boxed_float (Var (var, _dbg)) | Boxed_int32 (Var (var, _dbg)) | Boxed_int64 (Var (var, _dbg)) - | Boxed_nativeint (Var (var, _dbg)) -> + | Boxed_nativeint (Var (var, _dbg)) + | Boxed_vec128 (Var (var, _dbg)) -> Ids_for_export.add_variable Ids_for_export.empty var | Boxed_float (Const _) | Boxed_int32 (Const _) | Boxed_int64 (Const _) | Boxed_nativeint (Const _) + | Boxed_vec128 (Const _) | Mutable_string { initial_value = _ } | Immutable_string _ -> Ids_for_export.empty @@ -332,8 +351,9 @@ let ids_for_export t = let is_block t = match t with | Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ | Boxed_nativeint _ - | Immutable_float_block _ | Immutable_float_array _ | Immutable_string _ - | Mutable_string _ | Empty_array | Immutable_value_array _ -> + | Boxed_vec128 _ | Immutable_float_block _ | Immutable_float_array _ + | Immutable_string _ | Mutable_string _ | Empty_array + | Immutable_value_array _ -> true | Set_of_closures _ -> false @@ -341,8 +361,9 @@ let is_set_of_closures t = match t with | Set_of_closures _ -> true | Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ | Boxed_nativeint _ - | Immutable_float_block _ | Immutable_float_array _ | Immutable_string _ - | Mutable_string _ | Empty_array | Immutable_value_array _ -> + | Boxed_vec128 _ | Immutable_float_block _ | Immutable_float_array _ + | Immutable_string _ | Mutable_string _ | Empty_array + | Immutable_value_array _ -> false let is_fully_static t = free_names t |> Name_occurrences.no_variables @@ -351,8 +372,9 @@ let can_share0 t = match t with | Block (_, Immutable, _) | Set_of_closures _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | Immutable_float_block _ | Immutable_float_array _ - | Immutable_string _ | Empty_array | Immutable_value_array _ -> + | Boxed_vec128 _ | Boxed_nativeint _ | Immutable_float_block _ + | Immutable_float_array _ | Immutable_string _ | Empty_array + | Immutable_value_array _ -> true | Block (_, (Mutable | Immutable_unique), _) | Mutable_string _ -> false @@ -362,8 +384,9 @@ let must_be_set_of_closures t = match t with | Set_of_closures set -> set | Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ | Boxed_nativeint _ - | Immutable_float_block _ | Immutable_float_array _ | Empty_array - | Immutable_value_array _ | Immutable_string _ | Mutable_string _ -> + | Boxed_vec128 _ | Immutable_float_block _ | Immutable_float_array _ + | Empty_array | Immutable_value_array _ | Immutable_string _ + | Mutable_string _ -> Misc.fatal_errorf "Not a set of closures:@ %a" print t let match_against_bound_static_pattern t (pat : Bound_static.Pattern.t) @@ -386,14 +409,14 @@ let match_against_bound_static_pattern t (pat : Bound_static.Pattern.t) Misc.fatal_errorf "Mismatch on declared function slots:@ %a@ =@ %a" Bound_static.Pattern.print pat print t; set_of_closures_callback ~closure_symbols set_of_closures - | ( ( Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ + | ( ( Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ | Boxed_vec128 _ | Boxed_nativeint _ | Immutable_float_block _ | Immutable_float_array _ | Immutable_value_array _ | Empty_array | Immutable_string _ | Mutable_string _ ), Block_like symbol ) -> block_like_callback symbol t | Set_of_closures _, (Block_like _ | Code _) - | ( ( Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ + | ( ( Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ | Boxed_vec128 _ | Boxed_nativeint _ | Immutable_float_block _ | Immutable_float_array _ | Immutable_value_array _ | Empty_array | Immutable_string _ | Mutable_string _ ), diff --git a/middle_end/flambda2/terms/static_const.mli b/middle_end/flambda2/terms/static_const.mli index 17950a648b3..89cf4bd78c1 100644 --- a/middle_end/flambda2/terms/static_const.mli +++ b/middle_end/flambda2/terms/static_const.mli @@ -29,6 +29,7 @@ type t = private | Boxed_int32 of Int32.t Or_variable.t | Boxed_int64 of Int64.t Or_variable.t | Boxed_nativeint of Targetint_32_64.t Or_variable.t + | Boxed_vec128 of Numeric_types.Vec128_by_bit_pattern.t Or_variable.t | Immutable_float_block of Numeric_types.Float_by_bit_pattern.t Or_variable.t list | Immutable_float_array of @@ -62,6 +63,8 @@ val boxed_int64 : Int64.t Or_variable.t -> t val boxed_nativeint : Targetint_32_64.t Or_variable.t -> t +val boxed_vec128 : Numeric_types.Vec128_by_bit_pattern.t Or_variable.t -> t + val immutable_float_block : Numeric_types.Float_by_bit_pattern.t Or_variable.t list -> t diff --git a/middle_end/flambda2/to_cmm/to_cmm_env.ml b/middle_end/flambda2/to_cmm/to_cmm_env.ml index 36b67dbd886..4f774228e71 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_env.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_env.ml @@ -394,8 +394,8 @@ let splittable_primitive dbg prim args = Splittable_prim { dbg; prim; args } let is_cmm_simple cmm = match (cmm : Cmm.expression) with - | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_symbol _ | Cvar _ - -> + | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_vec128 _ + | Cconst_symbol _ | Cvar _ -> true | Clet _ | Clet_mut _ | Cphantom_let _ | Cassign _ | Ctuple _ | Cop _ | Csequence _ | Cifthenelse _ | Cswitch _ | Ccatch _ | Cexit _ | Ctrywith _ diff --git a/middle_end/flambda2/to_cmm/to_cmm_expr.ml b/middle_end/flambda2/to_cmm/to_cmm_expr.ml index fa5c7051fc0..6e2deec94e0 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_expr.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_expr.ml @@ -186,7 +186,8 @@ let translate_apply0 ~dbg_with_inlined:dbg env res apply = match Flambda_kind.With_subkind.kind kind with | Naked_number Naked_int32 -> C.sign_extend_32 | Naked_number - (Naked_float | Naked_immediate | Naked_int64 | Naked_nativeint) + ( Naked_float | Naked_immediate | Naked_int64 | Naked_nativeint + | Naked_vec128 ) | Value | Rec_info | Region -> fun _dbg cmm -> cmm) | _ -> @@ -629,6 +630,7 @@ and let_cont_exn_handler env res k body vars handler free_vars_of_handler | Naked_number (Naked_immediate | Naked_int32 | Naked_int64 | Naked_nativeint) -> C.int ~dbg 0 + | Naked_number Naked_vec128 -> C.vec128 ~dbg { high = 0L; low = 0L } | Region | Rec_info -> Misc.fatal_errorf "No dummy value available for kind %a" K.With_subkind.print kind diff --git a/middle_end/flambda2/to_cmm/to_cmm_primitive.ml b/middle_end/flambda2/to_cmm/to_cmm_primitive.ml index 705e1f45616..165d166309a 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_primitive.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_primitive.ml @@ -50,6 +50,7 @@ let value_slot_offset env value_slot = let unbox_number ~dbg kind arg = match (kind : K.Boxable_number.t) with | Naked_float -> C.unbox_float dbg arg + | Naked_vec128 -> C.unbox_vector dbg Pvec128 arg | Naked_int32 | Naked_int64 | Naked_nativeint -> let primitive_kind = K.Boxable_number.primitive_kind kind in C.unbox_int dbg primitive_kind arg @@ -58,6 +59,7 @@ let box_number ~dbg kind alloc_mode arg = let alloc_mode = Alloc_mode.For_allocations.to_lambda alloc_mode in match (kind : K.Boxable_number.t) with | Naked_float -> C.box_float dbg alloc_mode arg + | Naked_vec128 -> C.box_vector dbg Pvec128 alloc_mode arg | Naked_int32 | Naked_int64 | Naked_nativeint -> let primitive_kind = K.Boxable_number.primitive_kind kind in C.box_int_gen dbg primitive_kind alloc_mode arg diff --git a/middle_end/flambda2/to_cmm/to_cmm_result.ml b/middle_end/flambda2/to_cmm/to_cmm_result.ml index eac1d30a973..2347830a71e 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_result.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_result.ml @@ -95,7 +95,7 @@ let check_for_module_symbol t symbol = let defines_a_symbol data = match (data : Cmm.data_item) with | Cdefine_symbol _ -> true - | Cint8 _ | Cint16 _ | Cint32 _ | Cint _ | Csingle _ | Cdouble _ + | Cint8 _ | Cint16 _ | Cint32 _ | Cint _ | Csingle _ | Cdouble _ | Cvec128 _ | Csymbol_address _ | Cstring _ | Cskip _ | Calign _ -> false diff --git a/middle_end/flambda2/to_cmm/to_cmm_shared.ml b/middle_end/flambda2/to_cmm/to_cmm_shared.ml index 9b6a239d885..c2311f6cd4f 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_shared.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_shared.ml @@ -36,6 +36,7 @@ let exttype_of_kind (k : Flambda_kind.t) : Cmm.exttype = match Targetint_32_64.num_bits with | Thirty_two -> XInt32 | Sixty_four -> XInt64) + | Naked_number Naked_vec128 -> XVec128 | Region -> Misc.fatal_error "[Region] kind not expected here" | Rec_info -> Misc.fatal_error "[Rec_info] kind not expected here" @@ -45,11 +46,12 @@ let machtype_of_kind (kind : Flambda_kind.With_subkind.t) = match Flambda_kind.With_subkind.subkind kind with | Tagged_immediate -> Cmm.typ_int | Anything | Boxed_float | Boxed_int32 | Boxed_int64 | Boxed_nativeint - | Variant _ | Float_block _ | Float_array | Immediate_array | Value_array - | Generic_array -> + | Boxed_vec128 | Variant _ | Float_block _ | Float_array | Immediate_array + | Value_array | Generic_array -> Cmm.typ_val) | Naked_number Naked_float -> Cmm.typ_float | Naked_number Naked_int64 -> typ_int64 + | Naked_number Naked_vec128 -> Cmm.typ_vec128 | Naked_number (Naked_immediate | Naked_int32 | Naked_nativeint) -> Cmm.typ_int | Region | Rec_info -> assert false @@ -60,11 +62,12 @@ let extended_machtype_of_kind (kind : Flambda_kind.With_subkind.t) = match Flambda_kind.With_subkind.subkind kind with | Tagged_immediate -> Extended_machtype.typ_tagged_int | Anything | Boxed_float | Boxed_int32 | Boxed_int64 | Boxed_nativeint - | Variant _ | Float_block _ | Float_array | Immediate_array | Value_array - | Generic_array -> + | Boxed_vec128 | Variant _ | Float_block _ | Float_array | Immediate_array + | Value_array | Generic_array -> Extended_machtype.typ_val) | Naked_number Naked_float -> Extended_machtype.typ_float | Naked_number Naked_int64 -> Extended_machtype.typ_int64 + | Naked_number Naked_vec128 -> Extended_machtype.typ_vec128 | Naked_number (Naked_immediate | Naked_int32 | Naked_nativeint) -> Extended_machtype.typ_any_int | Region | Rec_info -> assert false @@ -76,13 +79,14 @@ let memory_chunk_of_kind (kind : Flambda_kind.With_subkind.t) : Cmm.memory_chunk match Flambda_kind.With_subkind.subkind kind with | Tagged_immediate -> Word_int | Anything | Boxed_float | Boxed_int32 | Boxed_int64 | Boxed_nativeint - | Variant _ | Float_block _ | Float_array | Immediate_array | Value_array - | Generic_array -> + | Boxed_vec128 | Variant _ | Float_block _ | Float_array | Immediate_array + | Value_array | Generic_array -> Word_val) | Naked_number (Naked_int32 | Naked_int64 | Naked_nativeint | Naked_immediate) -> Word_int | Naked_number Naked_float -> Double + | Naked_number Naked_vec128 -> Onetwentyeight | Region | Rec_info -> Misc.fatal_errorf "Bad kind %a for [memory_chunk_of_kind]" Flambda_kind.With_subkind.print kind @@ -131,6 +135,11 @@ let const ~dbg cst = | Naked_float f -> float ~dbg (Numeric_types.Float_by_bit_pattern.to_float f) | Naked_int32 i -> int32 ~dbg i | Naked_int64 i -> int64 ~dbg i + | Naked_vec128 i -> + let { Numeric_types.Vec128_by_bit_pattern.high; low } = + Numeric_types.Vec128_by_bit_pattern.to_bits i + in + vec128 ~dbg { high; low } | Naked_nativeint t -> targetint ~dbg t let simple ?consider_inlining_effectful_expressions ~dbg env res s = @@ -163,7 +172,14 @@ let const_static cst = (tag_targetint (Targetint_31_63.to_targetint i))) ] | Naked_float f -> [cfloat (Numeric_types.Float_by_bit_pattern.to_float f)] | Naked_int32 i -> [cint (Nativeint.of_int32 i)] + (* We don't compile flambda-backend in 32-bit mode, so nativeint is 64 + bits. *) | Naked_int64 i -> [cint (Int64.to_nativeint i)] + | Naked_vec128 v -> + let { Numeric_types.Vec128_by_bit_pattern.high; low } = + Numeric_types.Vec128_by_bit_pattern.to_bits v + in + [cvec128 { high; low }] | Naked_nativeint t -> [cint (nativeint_of_targetint t)] let simple_static res s = diff --git a/middle_end/flambda2/to_cmm/to_cmm_static.ml b/middle_end/flambda2/to_cmm/to_cmm_static.ml index 468e87adf22..cfcc47862b7 100644 --- a/middle_end/flambda2/to_cmm/to_cmm_static.ml +++ b/middle_end/flambda2/to_cmm/to_cmm_static.ml @@ -158,6 +158,20 @@ let static_const0 env res ~updates (bound_static : Bound_static.Pattern.t) ~emit:C.emit_nativeint_constant ~transl ~structured v res updates in env, res, updates + | Block_like symbol, Boxed_vec128 v -> + let default = Numeric_types.Vec128_by_bit_pattern.zero in + let transl v = + let { Numeric_types.Vec128_by_bit_pattern.high; low } = + Numeric_types.Vec128_by_bit_pattern.to_bits v + in + { Cmm.high; low } + in + let structured { Cmm.high; low } = Clambda.Uconst_vec128 { high; low } in + let res, env, updates = + static_boxed_number ~kind:Onetwentyeight ~env ~symbol ~default + ~emit:C.emit_vec128_constant ~transl ~structured v res updates + in + env, res, updates | Block_like s, (Immutable_float_block fields | Immutable_float_array fields) -> let aux = @@ -197,7 +211,7 @@ let static_const0 env res ~updates (bound_static : Bound_static.Pattern.t) "[Set_of_closures] values cannot be bound by [Block_like] bindings:@ %a" SC.print static_const | ( (Code _ | Set_of_closures _), - ( Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ + ( Block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ | Boxed_vec128 _ | Boxed_nativeint _ | Immutable_float_block _ | Immutable_float_array _ | Immutable_value_array _ | Empty_array | Mutable_string _ | Immutable_string _ ) ) -> diff --git a/middle_end/flambda2/types/env/typing_env.ml b/middle_end/flambda2/types/env/typing_env.ml index 5c7f86841d1..94c691d3032 100644 --- a/middle_end/flambda2/types/env/typing_env.ml +++ b/middle_end/flambda2/types/env/typing_env.ml @@ -1308,14 +1308,14 @@ end = struct match Reg_width_const.descr const with | Tagged_immediate i -> VA.Value_int i | Naked_immediate _ | Naked_float _ | Naked_int32 _ - | Naked_int64 _ | Naked_nativeint _ -> + | Naked_vec128 _ | Naked_int64 _ | Naked_nativeint _ -> VA.Value_unknown) ~var:(fun _ ~coercion:_ -> VA.Value_unknown) ~symbol:(fun symbol ~coercion:_ -> VA.Value_symbol symbol) | Ok (No_alias head) -> ( match head with | Mutable_block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | String _ | Array _ -> + | Boxed_vec128 _ | Boxed_nativeint _ | String _ | Array _ -> Value_unknown | Closures { by_function_slot; alloc_mode = _ } -> ( match TG.Row_like_for_closures.get_singleton by_function_slot with @@ -1357,7 +1357,7 @@ end = struct Block_approximation (Array.of_list fields, alloc_mode) else Value_unknown)) | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> assert false in let symbol_ty, _binding_time_and_mode = diff --git a/middle_end/flambda2/types/expand_head.ml b/middle_end/flambda2/types/expand_head.ml index a7c8091f312..17ff71a9bfd 100644 --- a/middle_end/flambda2/types/expand_head.ml +++ b/middle_end/flambda2/types/expand_head.ml @@ -37,6 +37,8 @@ module Expanded_type : sig val create_naked_nativeint : Type_grammar.head_of_kind_naked_nativeint -> t + val create_naked_vec128 : Type_grammar.head_of_kind_naked_vec128 -> t + val create_rec_info : Type_grammar.head_of_kind_rec_info -> t val create_region : Type_grammar.head_of_kind_region -> t @@ -62,6 +64,7 @@ module Expanded_type : sig | Naked_int32 of Type_grammar.head_of_kind_naked_int32 | Naked_int64 of Type_grammar.head_of_kind_naked_int64 | Naked_nativeint of Type_grammar.head_of_kind_naked_nativeint + | Naked_vec128 of Type_grammar.head_of_kind_naked_vec128 | Rec_info of Type_grammar.head_of_kind_rec_info | Region of Type_grammar.head_of_kind_region @@ -79,6 +82,8 @@ module Expanded_type : sig Type_grammar.head_of_kind_naked_int64 Or_unknown_or_bottom.t | Naked_nativeint of Type_grammar.head_of_kind_naked_nativeint Or_unknown_or_bottom.t + | Naked_vec128 of + Type_grammar.head_of_kind_naked_vec128 Or_unknown_or_bottom.t | Rec_info of Type_grammar.head_of_kind_rec_info Or_unknown_or_bottom.t | Region of Type_grammar.head_of_kind_region Or_unknown_or_bottom.t @@ -91,6 +96,7 @@ end = struct | Naked_int32 of TG.head_of_kind_naked_int32 | Naked_int64 of TG.head_of_kind_naked_int64 | Naked_nativeint of TG.head_of_kind_naked_nativeint + | Naked_vec128 of TG.head_of_kind_naked_vec128 | Rec_info of TG.head_of_kind_rec_info | Region of TG.head_of_kind_region @@ -118,6 +124,9 @@ end = struct let create_naked_nativeint head = { kind = K.naked_nativeint; descr = Ok (Naked_nativeint head) } + let create_naked_vec128 head = + { kind = K.naked_vec128; descr = Ok (Naked_vec128 head) } + let create_rec_info head = { kind = K.rec_info; descr = Ok (Rec_info head) } let create_region head = { kind = K.region; descr = Ok (Region head) } @@ -165,6 +174,15 @@ end = struct match TG.apply_coercion_head_of_kind_naked_float head coercion with | Bottom -> create_bottom K.naked_float | Ok head -> create_naked_float head)) + | Naked_vec128 Unknown -> create_unknown K.naked_vec128 + | Naked_vec128 Bottom -> create_bottom K.naked_vec128 + | Naked_vec128 (Ok (No_alias head)) -> ( + match coercion with + | None -> create_naked_vec128 head + | Some coercion -> ( + match TG.apply_coercion_head_of_kind_naked_vec128 head coercion with + | Bottom -> create_bottom K.naked_vec128 + | Ok head -> create_naked_vec128 head)) | Naked_int32 Unknown -> create_unknown K.naked_int32 | Naked_int32 Bottom -> create_bottom K.naked_int32 | Naked_int32 (Ok (No_alias head)) -> ( @@ -213,6 +231,7 @@ end = struct | Value (Ok (Equals _)) | Naked_immediate (Ok (Equals _)) | Naked_float (Ok (Equals _)) + | Naked_vec128 (Ok (Equals _)) | Naked_int32 (Ok (Equals _)) | Naked_int64 (Ok (Equals _)) | Naked_nativeint (Ok (Equals _)) @@ -232,6 +251,7 @@ end = struct | Naked_int32 head -> TG.create_from_head_naked_int32 head | Naked_int64 head -> TG.create_from_head_naked_int64 head | Naked_nativeint head -> TG.create_from_head_naked_nativeint head + | Naked_vec128 head -> TG.create_from_head_naked_vec128 head | Rec_info head -> TG.create_from_head_rec_info head | Region head -> TG.create_from_head_region head) @@ -247,6 +267,8 @@ end = struct Type_grammar.head_of_kind_naked_int64 Or_unknown_or_bottom.t | Naked_nativeint of Type_grammar.head_of_kind_naked_nativeint Or_unknown_or_bottom.t + | Naked_vec128 of + Type_grammar.head_of_kind_naked_vec128 Or_unknown_or_bottom.t | Rec_info of Type_grammar.head_of_kind_rec_info Or_unknown_or_bottom.t | Region of Type_grammar.head_of_kind_region Or_unknown_or_bottom.t @@ -260,6 +282,7 @@ end = struct | Naked_number Naked_int32 -> Naked_int32 Unknown | Naked_number Naked_int64 -> Naked_int64 Unknown | Naked_number Naked_nativeint -> Naked_nativeint Unknown + | Naked_number Naked_vec128 -> Naked_vec128 Unknown | Rec_info -> Rec_info Unknown | Region -> Region Unknown) | Bottom -> ( @@ -270,6 +293,7 @@ end = struct | Naked_number Naked_int32 -> Naked_int32 Bottom | Naked_number Naked_int64 -> Naked_int64 Bottom | Naked_number Naked_nativeint -> Naked_nativeint Bottom + | Naked_number Naked_vec128 -> Naked_vec128 Bottom | Rec_info -> Rec_info Bottom | Region -> Region Bottom) | Ok (Value head) -> Value (Ok head) @@ -278,6 +302,7 @@ end = struct | Ok (Naked_int32 head) -> Naked_int32 (Ok head) | Ok (Naked_int64 head) -> Naked_int64 (Ok head) | Ok (Naked_nativeint head) -> Naked_nativeint (Ok head) + | Ok (Naked_vec128 head) -> Naked_vec128 (Ok head) | Ok (Rec_info head) -> Rec_info (Ok head) | Ok (Region head) -> Region (Ok head) end @@ -312,7 +337,9 @@ let expand_head_of_alias_type env kind | Naked_int64 i -> ET.create_naked_int64 (TG.Head_of_kind_naked_int64.create i) | Naked_nativeint i -> - ET.create_naked_nativeint (TG.Head_of_kind_naked_nativeint.create i)) + ET.create_naked_nativeint (TG.Head_of_kind_naked_nativeint.create i) + | Naked_vec128 i -> + ET.create_naked_vec128 (TG.Head_of_kind_naked_vec128.create i)) ~name let expand_head0 env ty ~known_canonical_simple_at_in_types_mode = diff --git a/middle_end/flambda2/types/expand_head.mli b/middle_end/flambda2/types/expand_head.mli index 7e88deb019e..ccc1714fe7b 100644 --- a/middle_end/flambda2/types/expand_head.mli +++ b/middle_end/flambda2/types/expand_head.mli @@ -32,6 +32,8 @@ module Expanded_type : sig val create_naked_nativeint : Type_grammar.head_of_kind_naked_nativeint -> t + val create_naked_vec128 : Type_grammar.head_of_kind_naked_vec128 -> t + val create_rec_info : Type_grammar.head_of_kind_rec_info -> t val create_region : Type_grammar.head_of_kind_region -> t @@ -57,6 +59,7 @@ module Expanded_type : sig | Naked_int32 of Type_grammar.head_of_kind_naked_int32 | Naked_int64 of Type_grammar.head_of_kind_naked_int64 | Naked_nativeint of Type_grammar.head_of_kind_naked_nativeint + | Naked_vec128 of Type_grammar.head_of_kind_naked_vec128 | Rec_info of Type_grammar.head_of_kind_rec_info | Region of Type_grammar.head_of_kind_region @@ -74,6 +77,8 @@ module Expanded_type : sig Type_grammar.head_of_kind_naked_int64 Or_unknown_or_bottom.t | Naked_nativeint of Type_grammar.head_of_kind_naked_nativeint Or_unknown_or_bottom.t + | Naked_vec128 of + Type_grammar.head_of_kind_naked_vec128 Or_unknown_or_bottom.t | Rec_info of Type_grammar.head_of_kind_rec_info Or_unknown_or_bottom.t | Region of Type_grammar.head_of_kind_region Or_unknown_or_bottom.t diff --git a/middle_end/flambda2/types/flambda2_types.mli b/middle_end/flambda2/types/flambda2_types.mli index 5301e1c546a..8406b81f3f9 100644 --- a/middle_end/flambda2/types/flambda2_types.mli +++ b/middle_end/flambda2/types/flambda2_types.mli @@ -365,6 +365,9 @@ val this_boxed_int64 : Numeric_types.Int64.t -> Alloc_mode.For_types.t -> t val this_boxed_nativeint : Targetint_32_64.t -> Alloc_mode.For_types.t -> t +val this_boxed_vec128 : + Numeric_types.Vec128_by_bit_pattern.t -> Alloc_mode.For_types.t -> t + val these_tagged_immediates : Targetint_31_63.Set.t -> t val these_boxed_floats : @@ -412,6 +415,9 @@ val boxed_int64_alias_to : naked_int64:Variable.t -> Alloc_mode.For_types.t -> t val boxed_nativeint_alias_to : naked_nativeint:Variable.t -> Alloc_mode.For_types.t -> t +val boxed_vec128_alias_to : + naked_vec128:Variable.t -> Alloc_mode.For_types.t -> t + val box_float : t -> Alloc_mode.For_types.t -> t val box_int32 : t -> Alloc_mode.For_types.t -> t @@ -420,6 +426,8 @@ val box_int64 : t -> Alloc_mode.For_types.t -> t val box_nativeint : t -> Alloc_mode.For_types.t -> t +val box_vec128 : t -> Alloc_mode.For_types.t -> t + val tagged_immediate_alias_to : naked_immediate:Variable.t -> t val tag_immediate : t -> t @@ -599,6 +607,8 @@ val prove_is_a_boxed_int64 : Typing_env.t -> t -> unit proof_of_property val prove_is_a_boxed_nativeint : Typing_env.t -> t -> unit proof_of_property +val prove_is_a_boxed_vec128 : Typing_env.t -> t -> unit proof_of_property + val prove_is_or_is_not_a_boxed_float : Typing_env.t -> t -> bool proof_of_property @@ -675,6 +685,9 @@ val meet_boxed_int64_containing_simple : val meet_boxed_nativeint_containing_simple : Typing_env.t -> min_name_mode:Name_mode.t -> t -> Simple.t meet_shortcut +val meet_boxed_vec128_containing_simple : + Typing_env.t -> min_name_mode:Name_mode.t -> t -> Simple.t meet_shortcut + val meet_block_field_simple : Typing_env.t -> min_name_mode:Name_mode.t -> @@ -714,6 +727,7 @@ type to_lift = private | Boxed_int32 of Numeric_types.Int32.t | Boxed_int64 of Numeric_types.Int64.t | Boxed_nativeint of Targetint_32_64.t + | Boxed_vec128 of Numeric_types.Vec128_by_bit_pattern.t | Immutable_float_array of { fields : Numeric_types.Float_by_bit_pattern.t list } | Immutable_value_array of { fields : Simple.t list } diff --git a/middle_end/flambda2/types/grammar/more_type_creators.ml b/middle_end/flambda2/types/grammar/more_type_creators.ml index 298f030fa7b..f2f1ef746db 100644 --- a/middle_end/flambda2/types/grammar/more_type_creators.ml +++ b/middle_end/flambda2/types/grammar/more_type_creators.ml @@ -26,6 +26,7 @@ let unknown (kind : K.t) = | Naked_number Naked_int32 -> TG.any_naked_int32 | Naked_number Naked_int64 -> TG.any_naked_int64 | Naked_number Naked_nativeint -> TG.any_naked_nativeint + | Naked_number Naked_vec128 -> TG.any_naked_vec128 | Rec_info -> TG.any_rec_info | Region -> TG.any_region @@ -39,6 +40,7 @@ let bottom (kind : K.t) = | Naked_number Naked_int32 -> TG.bottom_naked_int32 | Naked_number Naked_int64 -> TG.bottom_naked_int64 | Naked_number Naked_nativeint -> TG.bottom_naked_nativeint + | Naked_number Naked_vec128 -> TG.bottom_naked_vec128 | Rec_info -> TG.bottom_rec_info | Region -> TG.bottom_region @@ -87,6 +89,9 @@ let this_boxed_int64 i alloc_mode = let this_boxed_nativeint i alloc_mode = TG.box_nativeint (TG.this_naked_nativeint i) alloc_mode +let this_boxed_vec128 i alloc_mode = + TG.box_vec128 (TG.this_naked_vec128 i) alloc_mode + let these_boxed_floats fs alloc_mode = TG.box_float (these_naked_floats fs) alloc_mode @@ -111,6 +116,9 @@ let any_boxed_int64 = let any_boxed_nativeint = TG.box_nativeint TG.any_naked_nativeint (Alloc_mode.For_types.unknown ()) +let any_boxed_vec128 = + TG.box_vec128 TG.any_naked_vec128 (Alloc_mode.For_types.unknown ()) + let any_block = TG.create_variant ~is_unique:false ~immediates:(Known TG.bottom_naked_immediate) ~blocks:Unknown @@ -255,6 +263,7 @@ let type_for_const const = | Naked_int32 n -> TG.this_naked_int32 n | Naked_int64 n -> TG.this_naked_int64 n | Naked_nativeint n -> TG.this_naked_nativeint n + | Naked_vec128 n -> TG.this_naked_vec128 n let kind_for_const const = TG.kind (type_for_const const) @@ -290,12 +299,14 @@ let rec unknown_with_subkind ?(alloc_mode = Alloc_mode.For_types.unknown ()) | Naked_number Naked_int32 -> TG.any_naked_int32 | Naked_number Naked_int64 -> TG.any_naked_int64 | Naked_number Naked_nativeint -> TG.any_naked_nativeint + | Naked_number Naked_vec128 -> TG.any_naked_vec128 | Rec_info -> TG.any_rec_info | Region -> TG.any_region) | Boxed_float -> any_boxed_float | Boxed_int32 -> any_boxed_int32 | Boxed_int64 -> any_boxed_int64 | Boxed_nativeint -> any_boxed_nativeint + | Boxed_vec128 -> any_boxed_vec128 | Tagged_immediate -> any_tagged_immediate | Variant { consts; non_consts } -> let const_ctors = these_naked_immediates consts in diff --git a/middle_end/flambda2/types/grammar/more_type_creators.mli b/middle_end/flambda2/types/grammar/more_type_creators.mli index 46e6df57046..988630953fc 100644 --- a/middle_end/flambda2/types/grammar/more_type_creators.mli +++ b/middle_end/flambda2/types/grammar/more_type_creators.mli @@ -58,6 +58,11 @@ val this_boxed_int64 : int64 -> Alloc_mode.For_types.t -> Type_grammar.t val this_boxed_nativeint : Targetint_32_64.t -> Alloc_mode.For_types.t -> Type_grammar.t +val this_boxed_vec128 : + Numeric_types.Vec128_by_bit_pattern.t -> + Alloc_mode.For_types.t -> + Type_grammar.t + val these_boxed_floats : Numeric_types.Float_by_bit_pattern.Set.t -> Alloc_mode.For_types.t -> diff --git a/middle_end/flambda2/types/grammar/type_grammar.ml b/middle_end/flambda2/types/grammar/type_grammar.ml index 99f80cf67be..2f88f31cf42 100644 --- a/middle_end/flambda2/types/grammar/type_grammar.ml +++ b/middle_end/flambda2/types/grammar/type_grammar.ml @@ -16,6 +16,7 @@ module K = Flambda_kind module Float = Numeric_types.Float_by_bit_pattern +module Vec128 = Numeric_types.Vec128_by_bit_pattern module Int32 = Numeric_types.Int32 module Int64 = Numeric_types.Int64 module RWC = Reg_width_const @@ -44,6 +45,7 @@ type t = | Naked_int32 of head_of_kind_naked_int32 TD.t | Naked_int64 of head_of_kind_naked_int64 TD.t | Naked_nativeint of head_of_kind_naked_nativeint TD.t + | Naked_vec128 of head_of_kind_naked_vec128 TD.t | Rec_info of head_of_kind_rec_info TD.t | Region of head_of_kind_region TD.t @@ -58,6 +60,7 @@ and head_of_kind_value = | Boxed_int32 of t * Alloc_mode.For_types.t | Boxed_int64 of t * Alloc_mode.For_types.t | Boxed_nativeint of t * Alloc_mode.For_types.t + | Boxed_vec128 of t * Alloc_mode.For_types.t | Closures of { by_function_slot : row_like_for_closures; alloc_mode : Alloc_mode.For_types.t @@ -110,6 +113,8 @@ and head_of_kind_naked_int64 = Int64.Set.t and head_of_kind_naked_nativeint = Targetint_32_64.Set.t +and head_of_kind_naked_vec128 = Vec128.Set.t + and head_of_kind_rec_info = Rec_info_expr.t and head_of_kind_region = unit @@ -219,6 +224,9 @@ let rec free_names0 ~follow_value_slots t = | Naked_nativeint ty -> type_descr_free_names ~free_names_head:free_names_head_of_kind_naked_nativeint ty + | Naked_vec128 ty -> + type_descr_free_names ~free_names_head:free_names_head_of_kind_naked_vec128 + ty | Rec_info ty -> type_descr_free_names ~free_names_head:free_names_head_of_kind_rec_info ty | Region ty -> @@ -237,6 +245,7 @@ and free_names_head_of_kind_value0 ~follow_value_slots head = | Boxed_int32 (ty, _alloc_mode) -> free_names0 ~follow_value_slots ty | Boxed_int64 (ty, _alloc_mode) -> free_names0 ~follow_value_slots ty | Boxed_nativeint (ty, _alloc_mode) -> free_names0 ~follow_value_slots ty + | Boxed_vec128 (ty, _alloc_mode) -> free_names0 ~follow_value_slots ty | Closures { by_function_slot; alloc_mode = _ } -> free_names_row_like_for_closures ~follow_value_slots by_function_slot | String _ -> Name_occurrences.empty @@ -271,6 +280,8 @@ and free_names_head_of_kind_naked_int64 _ = Name_occurrences.empty and free_names_head_of_kind_naked_nativeint _ = Name_occurrences.empty +and free_names_head_of_kind_naked_vec128 _ = Name_occurrences.empty + and free_names_head_of_kind_rec_info head = Rec_info_expr.free_names_in_types head @@ -451,6 +462,13 @@ let rec apply_renaming t renaming = ~free_names_head:free_names_head_of_kind_naked_nativeint ty renaming in if ty == ty' then t else Naked_nativeint ty' + | Naked_vec128 ty -> + let ty' = + TD.apply_renaming + ~apply_renaming_head:apply_renaming_head_of_kind_naked_vec128 + ~free_names_head:free_names_head_of_kind_naked_vec128 ty renaming + in + if ty == ty' then t else Naked_vec128 ty' | Rec_info ty -> let ty' = TD.apply_renaming @@ -493,6 +511,9 @@ and apply_renaming_head_of_kind_value head renaming = | Boxed_nativeint (ty, alloc_mode) -> let ty' = apply_renaming ty renaming in if ty == ty' then head else Boxed_nativeint (ty', alloc_mode) + | Boxed_vec128 (ty, alloc_mode) -> + let ty' = apply_renaming ty renaming in + if ty == ty' then head else Boxed_vec128 (ty', alloc_mode) | Closures { by_function_slot; alloc_mode } -> let by_function_slot' = apply_renaming_row_like_for_closures by_function_slot renaming @@ -554,6 +575,8 @@ and apply_renaming_head_of_kind_naked_int64 head _ = head and apply_renaming_head_of_kind_naked_nativeint head _ = head +and apply_renaming_head_of_kind_naked_vec128 head _ = head + and apply_renaming_head_of_kind_rec_info head renaming = Rec_info_expr.apply_renaming head renaming @@ -712,6 +735,10 @@ let rec print ppf t = Format.fprintf ppf "@[(Naked_nativeint@ %a)@]" (TD.print ~print_head:print_head_of_kind_naked_nativeint) ty + | Naked_vec128 ty -> + Format.fprintf ppf "@[(Naked_vec128@ %a)@]" + (TD.print ~print_head:print_head_of_kind_naked_vec128) + ty | Rec_info ty -> Format.fprintf ppf "@[(Rec_info@ %a)@]" (TD.print ~print_head:print_head_of_kind_rec_info) @@ -747,6 +774,9 @@ and print_head_of_kind_value ppf head = | Boxed_nativeint (ty, alloc_mode) -> Format.fprintf ppf "@[(Boxed_nativeint@ %a@ %a)@]" Alloc_mode.For_types.print alloc_mode print ty + | Boxed_vec128 (ty, alloc_mode) -> + Format.fprintf ppf "@[(Boxed_vec128@ %a@ %a)@]" + Alloc_mode.For_types.print alloc_mode print ty | Closures { by_function_slot; alloc_mode } -> print_row_like_for_closures alloc_mode ppf by_function_slot | String str_infos -> @@ -797,6 +827,9 @@ and print_head_of_kind_naked_nativeint ppf head = Format.fprintf ppf "@[(Naked_nativeint@ (%a))@]" Targetint_32_64.Set.print head +and print_head_of_kind_naked_vec128 ppf head = + Format.fprintf ppf "@[(Naked_vec128@ (%a))@]" Vec128.Set.print head + and print_head_of_kind_rec_info ppf head = Rec_info_expr.print ppf head and print_head_of_kind_region ppf () = Format.pp_print_string ppf "Region" @@ -925,6 +958,9 @@ let rec ids_for_export t = | Naked_nativeint ty -> TD.ids_for_export ~ids_for_export_head:ids_for_export_head_of_kind_naked_nativeint ty + | Naked_vec128 ty -> + TD.ids_for_export + ~ids_for_export_head:ids_for_export_head_of_kind_naked_vec128 ty | Rec_info ty -> TD.ids_for_export ~ids_for_export_head:ids_for_export_head_of_kind_rec_info ty @@ -942,6 +978,7 @@ and ids_for_export_head_of_kind_value head = | Boxed_int32 (t, _alloc_mode) -> ids_for_export t | Boxed_int64 (t, _alloc_mode) -> ids_for_export t | Boxed_nativeint (t, _alloc_mode) -> ids_for_export t + | Boxed_vec128 (t, _alloc_mode) -> ids_for_export t | Closures { by_function_slot; alloc_mode = _ } -> ids_for_export_row_like_for_closures by_function_slot | String _ -> Ids_for_export.empty @@ -972,6 +1009,8 @@ and ids_for_export_head_of_kind_naked_int32 _ = Ids_for_export.empty and ids_for_export_head_of_kind_naked_int64 _ = Ids_for_export.empty +and ids_for_export_head_of_kind_naked_vec128 _ = Ids_for_export.empty + and ids_for_export_head_of_kind_naked_nativeint _ = Ids_for_export.empty and ids_for_export_head_of_kind_rec_info head = @@ -1124,6 +1163,13 @@ let rec apply_coercion t coercion : t Or_bottom.t = coercion ty in if ty == ty' then t else Naked_nativeint ty' + | Naked_vec128 ty -> + let<+ ty' = + TD.apply_coercion + ~apply_coercion_head:apply_coercion_head_of_kind_naked_vec128 coercion + ty + in + if ty == ty' then t else Naked_vec128 ty' | Rec_info ty -> let<+ ty' = TD.apply_coercion @@ -1157,7 +1203,8 @@ and apply_coercion_head_of_kind_value head coercion : _ Or_bottom.t = to have a [Boxed_float] wrapper that would lift a float coercion to a value coercion. *) if Coercion.is_id coercion then Ok head else Bottom - | Boxed_int32 _ | Boxed_int64 _ | Boxed_nativeint _ | String _ -> + | Boxed_int32 _ | Boxed_int64 _ | Boxed_nativeint _ | Boxed_vec128 _ + | String _ -> (* Similarly, we don't have lifted coercions for these. *) if Coercion.is_id coercion then Ok head else Bottom | Array @@ -1193,6 +1240,9 @@ and apply_coercion_head_of_kind_naked_int64 head coercion : _ Or_bottom.t = and apply_coercion_head_of_kind_naked_nativeint head coercion : _ Or_bottom.t = if Coercion.is_id coercion then Ok head else Bottom +and apply_coercion_head_of_kind_naked_vec128 head coercion : _ Or_bottom.t = + if Coercion.is_id coercion then Ok head else Bottom + and apply_coercion_head_of_kind_rec_info head coercion : _ Or_bottom.t = (* Currently no coercion has an effect on a depth variable and [Rec_info_expr.t] does not contain any other variety of name. *) @@ -1440,6 +1490,14 @@ let rec remove_unused_value_slots_and_shortcut_aliases t ~used_value_slots remove_unused_value_slots_and_shortcut_aliases_head_of_kind_naked_nativeint in if ty == ty' then t else Naked_nativeint ty' + | Naked_vec128 ty -> + let ty' = + TD.remove_unused_value_slots_and_shortcut_aliases ty ~used_value_slots + ~canonicalise + ~remove_unused_value_slots_and_shortcut_aliases_head: + remove_unused_value_slots_and_shortcut_aliases_head_of_kind_naked_vec128 + in + if ty == ty' then t else Naked_vec128 ty' | Rec_info ty -> let ty' = TD.remove_unused_value_slots_and_shortcut_aliases ty ~used_value_slots @@ -1499,6 +1557,12 @@ and remove_unused_value_slots_and_shortcut_aliases_head_of_kind_value head ~canonicalise in if ty == ty' then head else Boxed_nativeint (ty', alloc_mode) + | Boxed_vec128 (ty, alloc_mode) -> + let ty' = + remove_unused_value_slots_and_shortcut_aliases ty ~used_value_slots + ~canonicalise + in + if ty == ty' then head else Boxed_vec128 (ty', alloc_mode) | Closures { by_function_slot; alloc_mode } -> let by_function_slot' = remove_unused_value_slots_and_shortcut_aliases_row_like_for_closures @@ -1586,6 +1650,10 @@ and remove_unused_value_slots_and_shortcut_aliases_head_of_kind_naked_nativeint head ~used_value_slots:_ ~canonicalise:_ = head +and remove_unused_value_slots_and_shortcut_aliases_head_of_kind_naked_vec128 + head ~used_value_slots:_ ~canonicalise:_ = + head + and remove_unused_value_slots_and_shortcut_aliases_head_of_kind_rec_info head ~used_value_slots:_ ~canonicalise:_ = head @@ -1778,7 +1846,7 @@ let rec project_variables_out ~to_project ~expand t = match apply_coercion (expand var) coercion with | Value ty -> ty | ( Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ ) as ty -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ ) as ty -> Misc.fatal_errorf "Wrong kind while expanding %a: expecting [Value], got type %a" Variable.print var print ty @@ -1795,7 +1863,7 @@ let rec project_variables_out ~to_project ~expand t = match apply_coercion (expand var) coercion with | Naked_immediate ty -> ty | ( Value _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ ) as ty -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ ) as ty -> Misc.fatal_errorf "Wrong kind while expanding %a: expecting [Naked_immediate], got \ type %a" @@ -1814,7 +1882,7 @@ let rec project_variables_out ~to_project ~expand t = match apply_coercion (expand var) coercion with | Naked_float ty -> ty | ( Value _ | Naked_immediate _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ ) as ty -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ ) as ty -> Misc.fatal_errorf "Wrong kind while expanding %a: expecting [Naked_float], got type %a" Variable.print var print ty @@ -1832,7 +1900,7 @@ let rec project_variables_out ~to_project ~expand t = match apply_coercion (expand var) coercion with | Naked_int32 ty -> ty | ( Value _ | Naked_immediate _ | Naked_float _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ ) as ty -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ ) as ty -> Misc.fatal_errorf "Wrong kind while expanding %a: expecting [Naked_int32], got type %a" Variable.print var print ty @@ -1850,7 +1918,7 @@ let rec project_variables_out ~to_project ~expand t = match apply_coercion (expand var) coercion with | Naked_int64 ty -> ty | ( Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ - | Naked_nativeint _ | Rec_info _ | Region _ ) as ty -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ ) as ty -> Misc.fatal_errorf "Wrong kind while expanding %a: expecting [Naked_int64], got type %a" Variable.print var print ty @@ -1868,7 +1936,7 @@ let rec project_variables_out ~to_project ~expand t = match apply_coercion (expand var) coercion with | Naked_nativeint ty -> ty | ( Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ - | Naked_int64 _ | Rec_info _ | Region _ ) as ty -> + | Naked_vec128 _ | Naked_int64 _ | Rec_info _ | Region _ ) as ty -> Misc.fatal_errorf "Wrong kind while expanding %a: expecting [Naked_nativeint], got \ type %a" @@ -1882,12 +1950,31 @@ let rec project_variables_out ~to_project ~expand t = ty in if ty == ty' then t else Naked_nativeint ty' + | Naked_vec128 ty -> + let expand_with_coercion var ~coercion = + match apply_coercion (expand var) coercion with + | Naked_vec128 ty -> ty + | ( Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ + | Naked_nativeint _ | Naked_int64 _ | Rec_info _ | Region _ ) as ty -> + Misc.fatal_errorf + "Wrong kind while expanding %a: expecting [Naked_vec128], got type %a" + Variable.print var print ty + in + let ty' = + TD.project_variables_out + ~free_names_head:free_names_head_of_kind_naked_vec128 ~to_project + ~expand:expand_with_coercion + ~project_head:(project_head_of_kind_naked_vec128 ~to_project ~expand) + ty + in + if ty == ty' then t else Naked_vec128 ty' | Rec_info ty -> let expand_with_coercion var ~coercion = match apply_coercion (expand var) coercion with | Rec_info ty -> ty | ( Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ - | Naked_int64 _ | Naked_nativeint _ | Region _ ) as ty -> + | Naked_vec128 _ | Naked_int64 _ | Naked_nativeint _ | Region _ ) as ty + -> Misc.fatal_errorf "Wrong kind while expanding %a: expecting [Rec_info], got type %a" Variable.print var print ty @@ -1904,7 +1991,8 @@ let rec project_variables_out ~to_project ~expand t = match apply_coercion (expand var) coercion with | Region ty -> ty | ( Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ - | Naked_int64 _ | Naked_nativeint _ | Rec_info _ ) as ty -> + | Naked_vec128 _ | Naked_int64 _ | Naked_nativeint _ | Rec_info _ ) as + ty -> Misc.fatal_errorf "Wrong kind while expanding %a: expecting [Region], got type %a" Variable.print var print ty @@ -1944,6 +2032,9 @@ and project_head_of_kind_value ~to_project ~expand head = | Boxed_nativeint (ty, alloc_mode) -> let ty' = project_variables_out ~to_project ~expand ty in if ty == ty' then head else Boxed_nativeint (ty', alloc_mode) + | Boxed_vec128 (ty, alloc_mode) -> + let ty' = project_variables_out ~to_project ~expand ty in + if ty == ty' then head else Boxed_vec128 (ty', alloc_mode) | Closures { by_function_slot; alloc_mode } -> let by_function_slot' = project_row_like_for_closures ~to_project ~expand by_function_slot @@ -2005,6 +2096,8 @@ and project_head_of_kind_naked_int64 ~to_project:_ ~expand:_ head = head and project_head_of_kind_naked_nativeint ~to_project:_ ~expand:_ head = head +and project_head_of_kind_naked_vec128 ~to_project:_ ~expand:_ head = head + and project_head_of_kind_rec_info ~to_project ~expand:_ head = match (head : head_of_kind_rec_info) with | Const _ | Succ _ | Unroll_to _ -> head @@ -2176,6 +2269,7 @@ let kind t = | Naked_int32 _ -> K.naked_int32 | Naked_int64 _ -> K.naked_int64 | Naked_nativeint _ -> K.naked_nativeint + | Naked_vec128 _ -> K.naked_vec128 | Rec_info _ -> K.rec_info | Region _ -> K.region @@ -2383,6 +2477,7 @@ module Row_like_for_blocks = struct | Naked_number Naked_int32 | Naked_number Naked_int64 | Naked_number Naked_nativeint + | Naked_number Naked_vec128 | Region | Rec_info -> Misc.fatal_errorf "Bad kind %a for fields" Flambda_kind.print field_kind) @@ -2404,6 +2499,7 @@ module Row_like_for_blocks = struct | Naked_number Naked_int32 | Naked_number Naked_int64 | Naked_number Naked_nativeint + | Naked_number Naked_vec128 | Region | Rec_info -> Misc.fatal_errorf "Bad kind %a for fields" Flambda_kind.print field_kind) @@ -2628,6 +2724,7 @@ let get_alias_exn t = | Naked_int32 ty -> TD.get_alias_exn ty | Naked_int64 ty -> TD.get_alias_exn ty | Naked_nativeint ty -> TD.get_alias_exn ty + | Naked_vec128 ty -> TD.get_alias_exn ty | Rec_info ty -> TD.get_alias_exn ty | Region ty -> TD.get_alias_exn ty @@ -2642,6 +2739,7 @@ let is_obviously_bottom t = | Naked_int32 ty -> TD.is_obviously_bottom ty | Naked_int64 ty -> TD.is_obviously_bottom ty | Naked_nativeint ty -> TD.is_obviously_bottom ty + | Naked_vec128 ty -> TD.is_obviously_bottom ty | Rec_info ty -> TD.is_obviously_bottom ty | Region ty -> TD.is_obviously_bottom ty @@ -2653,6 +2751,7 @@ let is_obviously_unknown t = | Naked_int32 ty -> TD.is_obviously_unknown ty | Naked_int64 ty -> TD.is_obviously_unknown ty | Naked_nativeint ty -> TD.is_obviously_unknown ty + | Naked_vec128 ty -> TD.is_obviously_unknown ty | Rec_info ty -> TD.is_obviously_unknown ty | Region ty -> TD.is_obviously_unknown ty @@ -2664,6 +2763,7 @@ let alias_type_of (kind : K.t) name : t = | Naked_number Naked_int32 -> Naked_int32 (TD.create_equals name) | Naked_number Naked_int64 -> Naked_int64 (TD.create_equals name) | Naked_number Naked_nativeint -> Naked_nativeint (TD.create_equals name) + | Naked_number Naked_vec128 -> Naked_vec128 (TD.create_equals name) | Rec_info -> Rec_info (TD.create_equals name) | Region -> Region (TD.create_equals name) @@ -2679,6 +2779,8 @@ let bottom_naked_int64 = Naked_int64 TD.bottom let bottom_naked_nativeint = Naked_nativeint TD.bottom +let bottom_naked_vec128 = Naked_vec128 TD.bottom + let bottom_rec_info = Rec_info TD.bottom let bottom_region = Region TD.bottom @@ -2695,6 +2797,8 @@ let any_naked_int64 = Naked_int64 TD.unknown let any_naked_nativeint = Naked_nativeint TD.unknown +let any_naked_vec128 = Naked_vec128 TD.unknown + let any_region = Region TD.unknown let any_rec_info = Rec_info TD.unknown @@ -2714,6 +2818,9 @@ let this_naked_int64 i : t = let this_naked_nativeint i : t = Naked_nativeint (TD.create_equals (Simple.const (RWC.naked_nativeint i))) +let this_naked_vec128 i : t = + Naked_vec128 (TD.create_equals (Simple.const (RWC.naked_vec128 i))) + let these_naked_immediates is = match Targetint_31_63.Set.get_singleton is with | Some i -> this_naked_immediate i @@ -2757,21 +2864,21 @@ let these_naked_nativeints is = let box_float (t : t) alloc_mode : t = match t with | Naked_float _ -> Value (TD.create (Boxed_float (t, alloc_mode))) - | Value _ | Naked_immediate _ | Naked_int32 _ | Naked_int64 _ + | Value _ | Naked_immediate _ | Naked_int32 _ | Naked_int64 _ | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Misc.fatal_errorf "Type of wrong kind for [box_float]: %a" print t let box_int32 (t : t) alloc_mode : t = match t with | Naked_int32 _ -> Value (TD.create (Boxed_int32 (t, alloc_mode))) - | Value _ | Naked_immediate _ | Naked_float _ | Naked_int64 _ + | Value _ | Naked_immediate _ | Naked_float _ | Naked_int64 _ | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Misc.fatal_errorf "Type of wrong kind for [box_int32]: %a" print t let box_int64 (t : t) alloc_mode : t = match t with | Naked_int64 _ -> Value (TD.create (Boxed_int64 (t, alloc_mode))) - | Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ + | Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Misc.fatal_errorf "Type of wrong kind for [box_int64]: %a" print t @@ -2779,9 +2886,16 @@ let box_nativeint (t : t) alloc_mode : t = match t with | Naked_nativeint _ -> Value (TD.create (Boxed_nativeint (t, alloc_mode))) | Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Rec_info _ | Region _ -> + | Naked_vec128 _ | Rec_info _ | Region _ -> Misc.fatal_errorf "Type of wrong kind for [box_nativeint]: %a" print t +let box_vec128 (t : t) alloc_mode : t = + match t with + | Naked_vec128 _ -> Value (TD.create (Boxed_vec128 (t, alloc_mode))) + | Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ + | Naked_nativeint _ | Rec_info _ | Region _ -> + Misc.fatal_errorf "Type of wrong kind for [box_vec128]: %a" print t + let this_tagged_immediate imm : t = Value (TD.create_equals (Simple.const (RWC.tagged_immediate imm))) @@ -2796,7 +2910,7 @@ let tag_immediate t : t = blocks = Known Row_like_for_blocks.bottom })) | Value _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ | Naked_nativeint _ - | Rec_info _ | Region _ -> + | Naked_vec128 _ | Rec_info _ | Region _ -> Misc.fatal_errorf "Type of wrong kind for [tag_immediate]: %a" print t let tagged_immediate_alias_to ~naked_immediate : t = @@ -2822,6 +2936,9 @@ let boxed_nativeint_alias_to ~naked_nativeint = box_nativeint (Naked_nativeint (TD.create_equals (Simple.var naked_nativeint))) +let boxed_vec128_alias_to ~naked_vec128 = + box_vec128 (Naked_vec128 (TD.create_equals (Simple.var naked_vec128))) + let this_immutable_string str = let size = Targetint_31_63.of_int (String.length str) in let string_info = @@ -2874,6 +2991,8 @@ module Descr = struct | Naked_int64 of head_of_kind_naked_int64 TD.Descr.t Or_unknown_or_bottom.t | Naked_nativeint of head_of_kind_naked_nativeint TD.Descr.t Or_unknown_or_bottom.t + | Naked_vec128 of + head_of_kind_naked_vec128 TD.Descr.t Or_unknown_or_bottom.t | Rec_info of head_of_kind_rec_info TD.Descr.t Or_unknown_or_bottom.t | Region of head_of_kind_region TD.Descr.t Or_unknown_or_bottom.t end @@ -2886,6 +3005,7 @@ let descr t : Descr.t = | Naked_int32 ty -> Naked_int32 (TD.descr ty) | Naked_int64 ty -> Naked_int64 (TD.descr ty) | Naked_nativeint ty -> Naked_nativeint (TD.descr ty) + | Naked_vec128 ty -> Naked_vec128 (TD.descr ty) | Rec_info ty -> Rec_info (TD.descr ty) | Region ty -> Region (TD.descr ty) @@ -2901,6 +3021,8 @@ let create_from_head_naked_int64 head = Naked_int64 (TD.create head) let create_from_head_naked_nativeint head = Naked_nativeint (TD.create head) +let create_from_head_naked_vec128 head = Naked_vec128 (TD.create head) + let create_from_head_rec_info head = Rec_info (TD.create head) let create_from_head_region head = Region (TD.create head) @@ -2921,6 +3043,8 @@ module Head_of_kind_value = struct let create_boxed_nativeint ty alloc_mode = Boxed_nativeint (ty, alloc_mode) + let create_boxed_vec128 ty alloc_mode = Boxed_vec128 (ty, alloc_mode) + let create_tagged_immediate imm : t = Variant { is_unique = false; @@ -2993,6 +3117,8 @@ module Head_of_kind_naked_int32 = Make_head_of_kind_naked_number (Int32) module Head_of_kind_naked_int64 = Make_head_of_kind_naked_number (Int64) module Head_of_kind_naked_nativeint = Make_head_of_kind_naked_number (Targetint_32_64) +module Head_of_kind_naked_vec128 = + Make_head_of_kind_naked_number (Numeric_types.Vec128_by_bit_pattern) let rec recover_some_aliases t = match t with @@ -3003,7 +3129,8 @@ let rec recover_some_aliases t = | Ok (No_alias ( Mutable_block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | String _ | Closures _ | Array _ )) -> + | Boxed_vec128 _ | Boxed_nativeint _ | String _ | Closures _ | Array _ + )) -> t | Ok (No_alias (Variant { immediates; blocks; is_unique = _ })) -> ( match blocks with @@ -3027,13 +3154,13 @@ let rec recover_some_aliases t = match Reg_width_const.descr const with | Naked_immediate i -> this_tagged_immediate i | Tagged_immediate _ | Naked_float _ | Naked_int32 _ - | Naked_int64 _ | Naked_nativeint _ -> + | Naked_int64 _ | Naked_nativeint _ | Naked_vec128 _ -> Misc.fatal_errorf "Immediates case returned wrong kind of constant:@ %a" Reg_width_const.print const) | Unknown | Bottom | Ok (No_alias _) -> t) | Value _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Misc.fatal_errorf "Immediates case returned wrong kind:@ %a" print t' ())))) | Naked_immediate ty -> ( @@ -3072,4 +3199,11 @@ let rec recover_some_aliases t = match Targetint_32_64.Set.get_singleton is with | Some f -> this_naked_nativeint f | None -> t)) + | Naked_vec128 ty -> ( + match TD.descr ty with + | Unknown | Bottom | Ok (Equals _) -> t + | Ok (No_alias is) -> ( + match Vec128.Set.get_singleton is with + | Some f -> this_naked_vec128 f + | None -> t)) | Rec_info _ | Region _ -> t diff --git a/middle_end/flambda2/types/grammar/type_grammar.mli b/middle_end/flambda2/types/grammar/type_grammar.mli index 42c4a01da1e..7ee96a5bbe3 100644 --- a/middle_end/flambda2/types/grammar/type_grammar.mli +++ b/middle_end/flambda2/types/grammar/type_grammar.mli @@ -36,6 +36,7 @@ type t = private | Naked_int32 of head_of_kind_naked_int32 Type_descr.t | Naked_int64 of head_of_kind_naked_int64 Type_descr.t | Naked_nativeint of head_of_kind_naked_nativeint Type_descr.t + | Naked_vec128 of head_of_kind_naked_vec128 Type_descr.t | Rec_info of head_of_kind_rec_info Type_descr.t | Region of head_of_kind_region Type_descr.t @@ -51,6 +52,7 @@ and head_of_kind_value = private | Boxed_int32 of t * Alloc_mode.For_types.t | Boxed_int64 of t * Alloc_mode.For_types.t | Boxed_nativeint of t * Alloc_mode.For_types.t + | Boxed_vec128 of t * Alloc_mode.For_types.t | Closures of { by_function_slot : row_like_for_closures; alloc_mode : Alloc_mode.For_types.t @@ -80,6 +82,9 @@ and head_of_kind_naked_int64 = private Numeric_types.Int64.Set.t and head_of_kind_naked_nativeint = private Targetint_32_64.Set.t +and head_of_kind_naked_vec128 = private + Numeric_types.Vec128_by_bit_pattern.Set.t + and head_of_kind_rec_info = Rec_info_expr.t and head_of_kind_region = unit @@ -183,6 +188,8 @@ val bottom_naked_int64 : t val bottom_naked_nativeint : t +val bottom_naked_vec128 : t + val bottom_rec_info : t val bottom_region : t @@ -199,6 +206,8 @@ val any_naked_int64 : t val any_naked_nativeint : t +val any_naked_vec128 : t + val any_region : t val any_rec_info : t @@ -217,6 +226,8 @@ val this_naked_int64 : Numeric_types.Int64.t -> t val this_naked_nativeint : Targetint_32_64.t -> t +val this_naked_vec128 : Numeric_types.Vec128_by_bit_pattern.t -> t + val these_naked_immediates : Targetint_31_63.Set.t -> t val these_naked_floats : Numeric_types.Float_by_bit_pattern.Set.t -> t @@ -236,6 +247,9 @@ val boxed_int64_alias_to : naked_int64:Variable.t -> Alloc_mode.For_types.t -> t val boxed_nativeint_alias_to : naked_nativeint:Variable.t -> Alloc_mode.For_types.t -> t +val boxed_vec128_alias_to : + naked_vec128:Variable.t -> Alloc_mode.For_types.t -> t + val box_float : t -> Alloc_mode.For_types.t -> t val box_int32 : t -> Alloc_mode.For_types.t -> t @@ -244,6 +258,8 @@ val box_int64 : t -> Alloc_mode.For_types.t -> t val box_nativeint : t -> Alloc_mode.For_types.t -> t +val box_vec128 : t -> Alloc_mode.For_types.t -> t + val tagged_immediate_alias_to : naked_immediate:Variable.t -> t val tag_immediate : t -> t @@ -498,6 +514,8 @@ module Descr : sig head_of_kind_naked_int64 Type_descr.Descr.t Or_unknown_or_bottom.t | Naked_nativeint of head_of_kind_naked_nativeint Type_descr.Descr.t Or_unknown_or_bottom.t + | Naked_vec128 of + head_of_kind_naked_vec128 Type_descr.Descr.t Or_unknown_or_bottom.t | Rec_info of head_of_kind_rec_info Type_descr.Descr.t Or_unknown_or_bottom.t | Region of head_of_kind_region Type_descr.Descr.t Or_unknown_or_bottom.t @@ -517,6 +535,8 @@ val create_from_head_naked_int64 : head_of_kind_naked_int64 -> t val create_from_head_naked_nativeint : head_of_kind_naked_nativeint -> t +val create_from_head_naked_vec128 : head_of_kind_naked_vec128 -> t + val create_from_head_rec_info : head_of_kind_rec_info -> t val create_from_head_region : head_of_kind_region -> t @@ -543,6 +563,11 @@ val apply_coercion_head_of_kind_naked_nativeint : Coercion.t -> head_of_kind_naked_nativeint Or_bottom.t +val apply_coercion_head_of_kind_naked_vec128 : + head_of_kind_naked_vec128 -> + Coercion.t -> + head_of_kind_naked_vec128 Or_bottom.t + val apply_coercion_head_of_kind_rec_info : head_of_kind_rec_info -> Coercion.t -> head_of_kind_rec_info Or_bottom.t @@ -570,6 +595,8 @@ module Head_of_kind_value : sig val create_boxed_nativeint : flambda_type -> Alloc_mode.For_types.t -> t + val create_boxed_vec128 : flambda_type -> Alloc_mode.For_types.t -> t + val create_tagged_immediate : Targetint_31_63.t -> t val create_closures : Row_like_for_closures.t -> Alloc_mode.For_types.t -> t @@ -636,4 +663,10 @@ module Head_of_kind_naked_nativeint : with type n = Targetint_32_64.t with type n_set = Targetint_32_64.Set.t +module Head_of_kind_naked_vec128 : + Head_of_kind_naked_number_intf + with type t = head_of_kind_naked_vec128 + with type n = Numeric_types.Vec128_by_bit_pattern.t + with type n_set = Numeric_types.Vec128_by_bit_pattern.Set.t + val recover_some_aliases : t -> t diff --git a/middle_end/flambda2/types/meet_and_join.ml b/middle_end/flambda2/types/meet_and_join.ml index cdb3aa1c099..bbcaf44bc3b 100644 --- a/middle_end/flambda2/types/meet_and_join.ml +++ b/middle_end/flambda2/types/meet_and_join.ml @@ -309,6 +309,11 @@ and meet_expanded_head0 env (descr1 : ET.descr) (descr2 : ET.descr) : meet_head_of_kind_naked_nativeint env head1 head2 in ET.create_naked_nativeint head, env_extension + | Naked_vec128 head1, Naked_vec128 head2 -> + let<+ head, env_extension = + meet_head_of_kind_naked_vec128 env head1 head2 + in + ET.create_naked_vec128 head, env_extension | Rec_info head1, Rec_info head2 -> let<+ head, env_extension = meet_head_of_kind_rec_info env head1 head2 in ET.create_rec_info head, env_extension @@ -316,7 +321,8 @@ and meet_expanded_head0 env (descr1 : ET.descr) (descr2 : ET.descr) : let<+ head, env_extension = meet_head_of_kind_region env head1 head2 in ET.create_region head, env_extension | ( ( Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ - | Naked_int64 _ | Naked_nativeint _ | Rec_info _ | Region _ ), + | Naked_vec128 _ | Naked_int64 _ | Naked_nativeint _ | Rec_info _ + | Region _ ), _ ) -> assert false @@ -363,6 +369,10 @@ and meet_head_of_kind_value env (head1 : TG.head_of_kind_value) let<* n, env_extension = meet env n1 n2 in let<+ alloc_mode = meet_alloc_mode alloc_mode1 alloc_mode2 in TG.Head_of_kind_value.create_boxed_nativeint n alloc_mode, env_extension + | Boxed_vec128 (n1, alloc_mode1), Boxed_vec128 (n2, alloc_mode2) -> + let<* n, env_extension = meet env n1 n2 in + let<+ alloc_mode = meet_alloc_mode alloc_mode1 alloc_mode2 in + TG.Head_of_kind_value.create_boxed_vec128 n alloc_mode, env_extension | ( Closures { by_function_slot = by_function_slot1; alloc_mode = alloc_mode1 }, Closures { by_function_slot = by_function_slot2; alloc_mode = alloc_mode2 } ) -> @@ -404,7 +414,8 @@ and meet_head_of_kind_value env (head1 : TG.head_of_kind_value) contents alloc_mode, env_extension ) | ( ( Variant _ | Mutable_block _ | Boxed_float _ | Boxed_int32 _ - | Boxed_int64 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ ), + | Boxed_vec128 _ | Boxed_int64 _ | Boxed_nativeint _ | Closures _ + | String _ | Array _ ), _ ) -> (* This assumes that all the different constructors are incompatible. This could break very hard for dubious uses of Obj. *) @@ -580,6 +591,10 @@ and meet_head_of_kind_naked_nativeint _env t1 t2 : _ Or_bottom.t = let<+ head = TG.Head_of_kind_naked_nativeint.inter t1 t2 in head, TEE.empty +and meet_head_of_kind_naked_vec128 _env t1 t2 : _ Or_bottom.t = + let<+ head = TG.Head_of_kind_naked_vec128.inter t1 t2 in + head, TEE.empty + and meet_head_of_kind_rec_info _env t1 _t2 : _ Or_bottom.t = (* CR-someday lmaurer: This could be doing things like discovering two depth variables are equal *) @@ -1128,6 +1143,9 @@ and join_expanded_head env kind (expanded1 : ET.t) (expanded2 : ET.t) : ET.t = | Naked_nativeint head1, Naked_nativeint head2 -> let>+ head = join_head_of_kind_naked_nativeint env head1 head2 in ET.create_naked_nativeint head + | Naked_vec128 head1, Naked_vec128 head2 -> + let>+ head = join_head_of_kind_naked_vec128 env head1 head2 in + ET.create_naked_vec128 head | Rec_info head1, Rec_info head2 -> let>+ head = join_head_of_kind_rec_info env head1 head2 in ET.create_rec_info head @@ -1135,7 +1153,8 @@ and join_expanded_head env kind (expanded1 : ET.t) (expanded2 : ET.t) : ET.t = let>+ head = join_head_of_kind_region env head1 head2 in ET.create_region head | ( ( Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ - | Naked_int64 _ | Naked_nativeint _ | Rec_info _ | Region _ ), + | Naked_vec128 _ | Naked_int64 _ | Naked_nativeint _ | Rec_info _ + | Region _ ), _ ) -> assert false in @@ -1176,6 +1195,10 @@ and join_head_of_kind_value env (head1 : TG.head_of_kind_value) let>+ n = join env n1 n2 in let alloc_mode = join_alloc_mode alloc_mode1 alloc_mode2 in TG.Head_of_kind_value.create_boxed_nativeint n alloc_mode + | Boxed_vec128 (n1, alloc_mode1), Boxed_vec128 (n2, alloc_mode2) -> + let>+ n = join env n1 n2 in + let alloc_mode = join_alloc_mode alloc_mode1 alloc_mode2 in + TG.Head_of_kind_value.create_boxed_vec128 n alloc_mode | ( Closures { by_function_slot = by_function_slot1; alloc_mode = alloc_mode1 }, Closures { by_function_slot = by_function_slot2; alloc_mode = alloc_mode2 } ) -> @@ -1209,7 +1232,8 @@ and join_head_of_kind_value env (head1 : TG.head_of_kind_value) TG.Head_of_kind_value.create_array_with_contents ~element_kind ~length contents alloc_mode | ( ( Variant _ | Mutable_block _ | Boxed_float _ | Boxed_int32 _ - | Boxed_int64 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ ), + | Boxed_vec128 _ | Boxed_int64 _ | Boxed_nativeint _ | Closures _ + | String _ | Array _ ), _ ) -> Unknown @@ -1315,6 +1339,9 @@ and join_head_of_kind_naked_int64 _env t1 t2 : _ Or_unknown.t = and join_head_of_kind_naked_nativeint _env t1 t2 : _ Or_unknown.t = Known (TG.Head_of_kind_naked_nativeint.union t1 t2) +and join_head_of_kind_naked_vec128 _env t1 t2 : _ Or_unknown.t = + Known (TG.Head_of_kind_naked_vec128.union t1 t2) + and join_head_of_kind_rec_info _env t1 t2 : _ Or_unknown.t = if Rec_info_expr.equal t1 t2 then Known t1 else Unknown diff --git a/middle_end/flambda2/types/provers.ml b/middle_end/flambda2/types/provers.ml index 570f142dc01..707a3aea834 100644 --- a/middle_end/flambda2/types/provers.ml +++ b/middle_end/flambda2/types/provers.ml @@ -96,12 +96,13 @@ let prove_is_int_generic env t : bool generic_proof = | Value (Ok ( Mutable_block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | Closures _ | String _ | Array _ )) -> + | Boxed_vec128 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ + )) -> Proved false | Value Unknown -> Unknown | Value Bottom -> Invalid | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_nativeint _ | Naked_vec128 _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_is_int env t = as_property (prove_is_int_generic env t) @@ -130,8 +131,9 @@ let prove_get_tag_generic env t : Tag.Set.t generic_proof = | Known tags -> if Tag.Set.is_empty tags then Invalid else Proved tags ))) | Value - (Ok (Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ | Boxed_nativeint _)) - -> + (Ok + ( Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ | Boxed_nativeint _ + | Boxed_vec128 _ )) -> Unknown | Value (Ok (Mutable_block _)) -> Unknown | Value (Ok (Closures _)) -> Unknown @@ -140,7 +142,7 @@ let prove_get_tag_generic env t : Tag.Set.t generic_proof = | Value Unknown -> Unknown | Value Bottom -> Invalid | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_get_tag env t = as_property (prove_get_tag_generic env t) @@ -172,7 +174,7 @@ let prove_naked_immediates_generic env t : Targetint_31_63.Set.t generic_proof = | Naked_immediate Unknown -> Unknown | Naked_immediate Bottom -> Invalid | Value _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ | Naked_nativeint _ - | Rec_info _ | Region _ -> + | Naked_vec128 _ | Rec_info _ | Region _ -> wrong_kind "Naked_immediate" t let meet_naked_immediates env t = @@ -196,7 +198,7 @@ let prove_equals_tagged_immediates env t : _ proof_of_property = else Unknown) | Value (Ok _ | Unknown | Bottom) -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let meet_equals_tagged_immediates env t : _ meet_shortcut = @@ -208,12 +210,13 @@ let meet_equals_tagged_immediates env t : _ meet_shortcut = | Value (Ok ( Mutable_block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | Closures _ | String _ | Array _ )) -> + | Boxed_vec128 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ + )) -> Invalid | Value Unknown -> Need_meet | Value Bottom | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Invalid let meet_equals_single_tagged_immediate env t : _ meet_shortcut = @@ -230,6 +233,7 @@ type _ meet_naked_number_kind = | Int32 : Int32.Set.t meet_naked_number_kind | Int64 : Int64.Set.t meet_naked_number_kind | Nativeint : Targetint_32_64.Set.t meet_naked_number_kind + | Vec128 : Numeric_types.Vec128_by_bit_pattern.Set.t meet_naked_number_kind let[@inline] meet_naked_number (type a) (kind : a meet_naked_number_kind) env t : a meet_shortcut = @@ -249,6 +253,7 @@ let[@inline] meet_naked_number (type a) (kind : a meet_naked_number_kind) env t | Int32 -> "Naked_int32" | Int64 -> "Naked_int64" | Nativeint -> "Naked_nativeint" + | Vec128 -> "Naked_vec128" in wrong_kind kind_string t in @@ -286,6 +291,14 @@ let[@inline] meet_naked_number (type a) (kind : a meet_naked_number_kind) env t (is :> Targetint_32_64.Set.t)) ~is_empty:Targetint_32_64.Set.is_empty | _ -> wrong_kind ()) + | Naked_vec128 vs -> ( + match kind with + | Vec128 -> + head_to_proof vs + (fun (fs : TG.head_of_kind_naked_vec128) -> + (fs :> Numeric_types.Vec128_by_bit_pattern.Set.t)) + ~is_empty:Numeric_types.Vec128_by_bit_pattern.Set.is_empty + | _ -> wrong_kind ()) let meet_naked_floats = meet_naked_number Float @@ -295,6 +308,8 @@ let meet_naked_int64s = meet_naked_number Int64 let meet_naked_nativeints = meet_naked_number Nativeint +let meet_naked_vec128s = meet_naked_number Vec128 + type variant_like_proof = { const_ctors : Targetint_31_63.Set.t Or_unknown.t; non_const_ctors_with_sizes : Targetint_31_63.t Tag.Scannable.Map.t @@ -342,12 +357,12 @@ let prove_variant_like_generic env t : variant_like_proof generic_proof = | Value (Ok ( Closures _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | String _ )) -> + | Boxed_vec128 _ | Boxed_nativeint _ | String _ )) -> Invalid | Value Unknown -> Unknown | Value Bottom -> Invalid | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let meet_variant_like env t = @@ -380,10 +395,12 @@ let prove_is_a_boxed_or_tagged_number env t : Proved (Boxed (alloc_mode, Naked_int64, contents_ty)) | Value (Ok (Boxed_nativeint (contents_ty, alloc_mode))) -> Proved (Boxed (alloc_mode, Naked_nativeint, contents_ty)) + | Value (Ok (Boxed_vec128 (contents_ty, alloc_mode))) -> + Proved (Boxed (alloc_mode, Naked_vec128, contents_ty)) | Value (Bottom | Ok (Mutable_block _ | Closures _ | String _ | Array _)) -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_is_a_tagged_immediate env t : _ proof_of_property = @@ -398,7 +415,7 @@ let prove_is_a_boxed_float env t : _ proof_of_property = | Value (Ok (Boxed_float _)) -> Proved () | Value _ -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_is_or_is_not_a_boxed_float env t : _ proof_of_property = @@ -408,7 +425,7 @@ let prove_is_or_is_not_a_boxed_float env t : _ proof_of_property = | Value (Ok (Boxed_float _)) -> Proved true | Value (Ok _) -> Proved false | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_is_a_boxed_int32 env t : _ proof_of_property = @@ -417,7 +434,7 @@ let prove_is_a_boxed_int32 env t : _ proof_of_property = | Value (Ok (Boxed_int32 _)) -> Proved () | Value _ -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_is_a_boxed_int64 env t : _ proof_of_property = @@ -426,7 +443,7 @@ let prove_is_a_boxed_int64 env t : _ proof_of_property = | Value (Ok (Boxed_int64 _)) -> Proved () | Value _ -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_is_a_boxed_nativeint env t : _ proof_of_property = @@ -435,7 +452,16 @@ let prove_is_a_boxed_nativeint env t : _ proof_of_property = | Value (Ok (Boxed_nativeint _)) -> Proved () | Value _ -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> + wrong_kind "Value" t + +let prove_is_a_boxed_vec128 env t : _ proof_of_property = + match expand_head env t with + | Value Unknown -> Unknown + | Value (Ok (Boxed_vec128 _)) -> Proved () + | Value _ -> Unknown + | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_unique_tag_and_size0 env t : @@ -460,7 +486,7 @@ let prove_unique_tag_and_size0 env t : -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_unique_tag_and_size env t : @@ -505,20 +531,21 @@ let meet_is_flat_float_array env t : bool meet_shortcut = | Value -> Known_result false | Naked_number Naked_float -> Known_result true | Naked_number - (Naked_immediate | Naked_int32 | Naked_int64 | Naked_nativeint) + ( Naked_immediate | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 ) | Region | Rec_info -> Misc.fatal_errorf "Wrong element kind for array: %a" K.With_subkind.print element_kind) | Value (Ok ( Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ | Boxed_nativeint _ - | Closures _ | String _ )) -> + | Boxed_vec128 _ | Closures _ | String _ )) -> Invalid | Value (Ok (Variant _ | Mutable_block _)) -> (* In case of untyped code using array primitives on regular blocks *) Need_meet | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Misc.fatal_errorf "Kind error: expected [Value]:@ %a" TG.print t let prove_is_immediates_array env t : unit proof_of_property = @@ -533,16 +560,17 @@ let prove_is_immediates_array env t : unit proof_of_property = match K.With_subkind.subkind element_kind with | Tagged_immediate -> Proved () | Anything | Boxed_float | Boxed_int32 | Boxed_int64 | Boxed_nativeint - | Variant _ | Float_block _ | Float_array | Immediate_array | Value_array - | Generic_array -> + | Boxed_vec128 | Variant _ | Float_block _ | Float_array | Immediate_array + | Value_array | Generic_array -> Unknown) | Value (Ok ( Variant _ | Mutable_block _ | Boxed_float _ | Boxed_int32 _ - | Boxed_int64 _ | Boxed_nativeint _ | Closures _ | String _ )) -> + | Boxed_vec128 _ | Boxed_int64 _ | Boxed_nativeint _ | Closures _ + | String _ )) -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_single_closures_entry_generic env t : _ generic_proof = @@ -566,12 +594,13 @@ let prove_single_closures_entry_generic env t : _ generic_proof = | Value (Ok ( Variant _ | Mutable_block _ | Boxed_float _ | Boxed_int32 _ - | Boxed_int64 _ | Boxed_nativeint _ | String _ | Array _ )) -> + | Boxed_vec128 _ | Boxed_int64 _ | Boxed_nativeint _ | String _ + | Array _ )) -> Invalid | Value Unknown -> Unknown | Value Bottom -> Invalid | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let meet_single_closures_entry env t = @@ -588,7 +617,7 @@ let meet_is_immutable_array env t : _ meet_shortcut = | Unknown -> Need_meet) | Value (Ok _) | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Invalid let prove_single_closures_entry env t = @@ -601,7 +630,7 @@ let meet_strings env t : String_info.Set.t meet_shortcut = | Value Unknown -> Need_meet | Value Bottom -> Invalid | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Misc.fatal_errorf "Kind error: expected [Value]:@ %a" TG.print t let prove_strings env t : _ proof_of_property = @@ -612,7 +641,7 @@ let prove_strings env t : _ proof_of_property = Proved (Alloc_mode.For_types.heap, strs) | Value (Ok _ | Unknown | Bottom) -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Misc.fatal_errorf "Kind error: expected [Value]:@ %a" TG.print t type tagging_proof_kind = @@ -655,7 +684,7 @@ let[@inline always] inspect_tagging_of_simple proof_kind env ~min_name_mode t : | Meet, _ -> inspect_immediates ()) | Value _ -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let prove_tagging_of_simple env ~min_name_mode t = @@ -679,7 +708,7 @@ let[@inline always] meet_boxed_number_containing_simple | Value Unknown -> Need_meet | Value Bottom -> Invalid | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Misc.fatal_errorf "Kind error: expected [Value]:@ %a" TG.print t let meet_boxed_float_containing_simple = @@ -688,7 +717,7 @@ let meet_boxed_float_containing_simple = match ty_value with | Boxed_float (ty, _) -> Some ty | Variant _ | Mutable_block _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | Closures _ | String _ | Array _ -> + | Boxed_vec128 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ -> None) let meet_boxed_int32_containing_simple = @@ -697,7 +726,7 @@ let meet_boxed_int32_containing_simple = match ty_value with | Boxed_int32 (ty, _) -> Some ty | Variant _ | Mutable_block _ | Boxed_float _ | Boxed_int64 _ - | Boxed_nativeint _ | Closures _ | String _ | Array _ -> + | Boxed_vec128 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ -> None) let meet_boxed_int64_containing_simple = @@ -706,7 +735,7 @@ let meet_boxed_int64_containing_simple = match ty_value with | Boxed_int64 (ty, _) -> Some ty | Variant _ | Mutable_block _ | Boxed_float _ | Boxed_int32 _ - | Boxed_nativeint _ | Closures _ | String _ | Array _ -> + | Boxed_vec128 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ -> None) let meet_boxed_nativeint_containing_simple = @@ -715,7 +744,16 @@ let meet_boxed_nativeint_containing_simple = match ty_value with | Boxed_nativeint (ty, _) -> Some ty | Variant _ | Mutable_block _ | Boxed_float _ | Boxed_int32 _ - | Boxed_int64 _ | Closures _ | String _ | Array _ -> + | Boxed_vec128 _ | Boxed_int64 _ | Closures _ | String _ | Array _ -> + None) + +let meet_boxed_vec128_containing_simple = + meet_boxed_number_containing_simple + ~contents_of_boxed_number:(fun (ty_value : TG.head_of_kind_value) -> + match ty_value with + | Boxed_vec128 (ty, _) -> Some ty + | Variant _ | Mutable_block _ | Boxed_float _ | Boxed_int32 _ + | Boxed_nativeint _ | Boxed_int64 _ | Closures _ | String _ | Array _ -> None) let meet_block_field_simple env ~min_name_mode ~field_kind t field_index : @@ -751,7 +789,7 @@ let meet_block_field_simple env ~min_name_mode ~field_kind t field_index : | Value Unknown -> Need_meet | Value Bottom -> Invalid | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let meet_project_function_slot_simple env ~min_name_mode t function_slot : @@ -773,7 +811,7 @@ let meet_project_function_slot_simple env ~min_name_mode t function_slot : | Value Unknown -> Need_meet | Value Bottom -> Invalid | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let meet_project_value_slot_simple env ~min_name_mode t value_slot : @@ -800,7 +838,7 @@ let meet_project_value_slot_simple env ~min_name_mode t value_slot : | Value Unknown -> Need_meet | Value Bottom -> Invalid | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let meet_rec_info env t : Rec_info_expr.t meet_shortcut = @@ -809,7 +847,7 @@ let meet_rec_info env t : Rec_info_expr.t meet_shortcut = | Rec_info Unknown -> Need_meet | Rec_info Bottom -> Invalid | Value _ | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Region _ -> wrong_kind "Rec_info" t let prove_alloc_mode_of_boxed_number env t : @@ -818,13 +856,14 @@ let prove_alloc_mode_of_boxed_number env t : | Value (Ok (Boxed_float (_, alloc_mode))) | Value (Ok (Boxed_int32 (_, alloc_mode))) | Value (Ok (Boxed_int64 (_, alloc_mode))) - | Value (Ok (Boxed_nativeint (_, alloc_mode))) -> + | Value (Ok (Boxed_nativeint (_, alloc_mode))) + | Value (Ok (Boxed_vec128 (_, alloc_mode))) -> Proved alloc_mode | Value (Ok (Variant _ | Mutable_block _ | String _ | Array _ | Closures _)) | Value (Unknown | Bottom) -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> wrong_kind "Value" t let never_holds_locally_allocated_values env var : _ proof_of_property = @@ -846,6 +885,7 @@ let never_holds_locally_allocated_values env var : _ proof_of_property = | Value (Ok (Boxed_int32 (_, alloc_mode))) | Value (Ok (Boxed_int64 (_, alloc_mode))) | Value (Ok (Boxed_nativeint (_, alloc_mode))) + | Value (Ok (Boxed_vec128 (_, alloc_mode))) | Value (Ok (Mutable_block { alloc_mode })) | Value (Ok (Closures { alloc_mode; _ })) | Value (Ok (Array { alloc_mode; _ })) -> ( @@ -856,7 +896,7 @@ let never_holds_locally_allocated_values env var : _ proof_of_property = | Value Unknown -> Unknown | Value Bottom -> Unknown | Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ -> Proved ()) let prove_physical_equality env t1 t2 = @@ -874,17 +914,24 @@ let prove_physical_equality env t1 t2 = | Naked_nativeint (Ok s1), Naked_nativeint (Ok s2) -> let module IS = Targetint_32_64.Set in IS.is_empty (IS.inter (s1 :> IS.t) (s2 :> IS.t)) - | _, _ -> false + | Naked_vec128 (Ok s1), Naked_vec128 (Ok s2) -> + let module IS = Numeric_types.Vec128_by_bit_pattern.Set in + IS.is_empty (IS.inter (s1 :> IS.t) (s2 :> IS.t)) + | ( ( Naked_float _ | Naked_int32 _ | Naked_int64 _ | Naked_nativeint _ + | Naked_vec128 _ | Value _ | Naked_immediate _ | Region _ | Rec_info _ + ), + _ ) -> + false in let check_heads () : _ proof_of_property = match expand_head env t1, expand_head env t2 with | ( ( Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ ), + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ ), _ ) -> wrong_kind "Value" t1 | ( _, ( Naked_immediate _ | Naked_float _ | Naked_int32 _ | Naked_int64 _ - | Naked_nativeint _ | Rec_info _ | Region _ ) ) -> + | Naked_vec128 _ | Naked_nativeint _ | Rec_info _ | Region _ ) ) -> wrong_kind "Value" t2 | Value (Unknown | Bottom), _ | _, Value (Unknown | Bottom) -> Unknown | Value (Ok head1), Value (Ok head2) -> ( @@ -901,6 +948,8 @@ let prove_physical_equality env t1 t2 = if incompatible_naked_numbers t1 t2 then Proved false else Unknown | Boxed_nativeint (t1, _), Boxed_nativeint (t2, _) -> if incompatible_naked_numbers t1 t2 then Proved false else Unknown + | Boxed_vec128 (t1, _), Boxed_vec128 (t2, _) -> + if incompatible_naked_numbers t1 t2 then Proved false else Unknown | Closures _, Closures _ -> Unknown | String s1, String s2 -> let module SS = String_info.Set in @@ -908,9 +957,11 @@ let prove_physical_equality env t1 t2 = (* Immediates and allocated values -> Proved false *) | ( Variant { immediates = _; blocks = Known blocks; is_unique = _ }, ( Mutable_block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | Closures _ | String _ | Array _ ) ) + | Boxed_vec128 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ + ) ) | ( ( Mutable_block _ | Boxed_float _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | Closures _ | String _ | Array _ ), + | Boxed_vec128 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ + ), Variant { immediates = _; blocks = Known blocks; is_unique = _ } ) when TG.Row_like_for_blocks.is_bottom blocks -> Proved false @@ -997,22 +1048,30 @@ let prove_physical_equality env t1 t2 = (* Boxed numbers with non-numbers or different kinds -> Proved *) | ( Boxed_float _, ( Variant _ | Mutable_block _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | Closures _ | String _ | Array _ ) ) + | Boxed_vec128 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ + ) ) | ( ( Variant _ | Mutable_block _ | Boxed_int32 _ | Boxed_int64 _ - | Boxed_nativeint _ | Closures _ | String _ | Array _ ), + | Boxed_vec128 _ | Boxed_nativeint _ | Closures _ | String _ | Array _ + ), Boxed_float _ ) | ( Boxed_int32 _, ( Variant _ | Mutable_block _ | Boxed_int64 _ | Boxed_nativeint _ - | Closures _ | String _ | Array _ ) ) + | Boxed_vec128 _ | Closures _ | String _ | Array _ ) ) | ( ( Variant _ | Mutable_block _ | Boxed_int64 _ | Boxed_nativeint _ - | Closures _ | String _ | Array _ ), + | Boxed_vec128 _ | Closures _ | String _ | Array _ ), Boxed_int32 _ ) | ( Boxed_int64 _, + ( Variant _ | Mutable_block _ | Boxed_nativeint _ | Closures _ + | Boxed_vec128 _ | String _ | Array _ ) ) + | ( ( Variant _ | Mutable_block _ | Boxed_nativeint _ | Closures _ + | Boxed_vec128 _ | String _ | Array _ ), + Boxed_int64 _ ) + | ( Boxed_vec128 _, ( Variant _ | Mutable_block _ | Boxed_nativeint _ | Closures _ | String _ | Array _ ) ) | ( ( Variant _ | Mutable_block _ | Boxed_nativeint _ | Closures _ | String _ | Array _ ), - Boxed_int64 _ ) + Boxed_vec128 _ ) | ( Boxed_nativeint _, (Variant _ | Mutable_block _ | Closures _ | String _ | Array _) ) | ( (Variant _ | Mutable_block _ | Closures _ | String _ | Array _), diff --git a/middle_end/flambda2/types/provers.mli b/middle_end/flambda2/types/provers.mli index 1525fdc0302..66e843fbe14 100644 --- a/middle_end/flambda2/types/provers.mli +++ b/middle_end/flambda2/types/provers.mli @@ -62,6 +62,11 @@ val meet_naked_int64s : val meet_naked_nativeints : Typing_env.t -> Type_grammar.t -> Targetint_32_64.Set.t meet_shortcut +val meet_naked_vec128s : + Typing_env.t -> + Type_grammar.t -> + Numeric_types.Vec128_by_bit_pattern.Set.t meet_shortcut + type variant_like_proof = private { const_ctors : Targetint_31_63.Set.t Or_unknown.t; non_const_ctors_with_sizes : Targetint_31_63.t Tag.Scannable.Map.t @@ -96,6 +101,9 @@ val prove_is_a_boxed_int64 : val prove_is_a_boxed_nativeint : Typing_env.t -> Type_grammar.t -> unit proof_of_property +val prove_is_a_boxed_vec128 : + Typing_env.t -> Type_grammar.t -> unit proof_of_property + val prove_is_or_is_not_a_boxed_float : Typing_env.t -> Type_grammar.t -> bool proof_of_property @@ -190,6 +198,12 @@ val meet_boxed_nativeint_containing_simple : Type_grammar.t -> Simple.t meet_shortcut +val meet_boxed_vec128_containing_simple : + Typing_env.t -> + min_name_mode:Name_mode.t -> + Type_grammar.t -> + Simple.t meet_shortcut + val meet_block_field_simple : Typing_env.t -> min_name_mode:Name_mode.t -> diff --git a/middle_end/flambda2/types/reify.ml b/middle_end/flambda2/types/reify.ml index 40f14314581..f9ca741ffbc 100644 --- a/middle_end/flambda2/types/reify.ml +++ b/middle_end/flambda2/types/reify.ml @@ -30,6 +30,7 @@ type to_lift = | Boxed_int32 of Int32.t | Boxed_int64 of Int64.t | Boxed_nativeint of Targetint_32_64.t + | Boxed_vec128 of Numeric_types.Vec128_by_bit_pattern.t | Immutable_float_array of { fields : Float.t list } | Immutable_value_array of { fields : Simple.t list } | Empty_array @@ -58,7 +59,7 @@ let try_to_reify_fields env ~var_allowed alloc_mode ~field_types = match Reg_width_const.descr const with | Tagged_immediate _imm -> Some simple | Naked_immediate _ | Naked_float _ | Naked_int32 _ - | Naked_int64 _ | Naked_nativeint _ -> + | Naked_vec128 _ | Naked_int64 _ | Naked_nativeint _ -> (* This should never happen, as we should have got a kind error instead *) None) @@ -315,6 +316,13 @@ let reify ~allowed_if_free_vars_defined_in ~var_is_defined_at_toplevel match Targetint_32_64.Set.get_singleton (ns :> Targetint_32_64.Set.t) with | None -> try_canonical_simple () | Some n -> Simple (Simple.const (Reg_width_const.naked_nativeint n))) + | Naked_vec128 (Ok ns) -> ( + match + Numeric_types.Vec128_by_bit_pattern.Set.get_singleton + (ns :> Numeric_types.Vec128_by_bit_pattern.Set.t) + with + | None -> try_canonical_simple () + | Some n -> Simple (Simple.const (Reg_width_const.naked_vec128 n))) (* CR-someday mshinwell: These could lift at toplevel when [ty_naked_float] is an alias type. That would require checking the alloc mode. *) | Value (Ok (Boxed_float (ty_naked_float, _alloc_mode))) -> ( @@ -353,6 +361,14 @@ let reify ~allowed_if_free_vars_defined_in ~var_is_defined_at_toplevel match Targetint_32_64.Set.get_singleton ns with | None -> try_canonical_simple () | Some n -> Lift (Boxed_nativeint n))) + | Value (Ok (Boxed_vec128 (ty_naked_vec128, _alloc_mode))) -> ( + match Provers.meet_naked_vec128s env ty_naked_vec128 with + | Need_meet -> try_canonical_simple () + | Invalid -> Invalid + | Known_result ns -> ( + match Numeric_types.Vec128_by_bit_pattern.Set.get_singleton ns with + | None -> try_canonical_simple () + | Some n -> Lift (Boxed_vec128 n))) | Value (Ok (Array @@ -417,7 +433,8 @@ let reify ~allowed_if_free_vars_defined_in ~var_is_defined_at_toplevel | Ok fields_rev -> Lift (Immutable_float_array { fields = List.rev fields_rev })) | Naked_number - (Naked_immediate | Naked_int32 | Naked_int64 | Naked_nativeint) + ( Naked_immediate | Naked_int32 | Naked_int64 | Naked_nativeint + | Naked_vec128 ) | Region | Rec_info -> Misc.fatal_errorf "Unexpected kind %a in immutable array case when reifying type:@ \ @@ -429,6 +446,7 @@ let reify ~allowed_if_free_vars_defined_in ~var_is_defined_at_toplevel | Naked_int32 Bottom | Naked_int64 Bottom | Naked_nativeint Bottom + | Naked_vec128 Bottom | Rec_info Bottom | Region Bottom -> Invalid @@ -438,6 +456,7 @@ let reify ~allowed_if_free_vars_defined_in ~var_is_defined_at_toplevel | Naked_float Unknown | Naked_int32 Unknown | Naked_int64 Unknown + | Naked_vec128 Unknown | Naked_nativeint Unknown | Rec_info Unknown | Region (Unknown | Ok _) diff --git a/middle_end/flambda2/types/reify.mli b/middle_end/flambda2/types/reify.mli index 98281f3c23a..708285d0a45 100644 --- a/middle_end/flambda2/types/reify.mli +++ b/middle_end/flambda2/types/reify.mli @@ -27,6 +27,7 @@ type to_lift = private | Boxed_int32 of Numeric_types.Int32.t | Boxed_int64 of Numeric_types.Int64.t | Boxed_nativeint of Targetint_32_64.t + | Boxed_vec128 of Numeric_types.Vec128_by_bit_pattern.t | Immutable_float_array of { fields : Numeric_types.Float_by_bit_pattern.t list } | Immutable_value_array of { fields : Simple.t list } diff --git a/middle_end/printclambda.ml b/middle_end/printclambda.ml index 984c31715c9..923fcaf112f 100644 --- a/middle_end/printclambda.ml +++ b/middle_end/printclambda.ml @@ -37,6 +37,7 @@ let rec value_kind0 ppf kind = | Pboxedintval Pnativeint -> Format.pp_print_string ppf ":nativeint" | Pboxedintval Pint32 -> Format.pp_print_string ppf ":int32" | Pboxedintval Pint64 -> Format.pp_print_string ppf ":int64" + | Pboxedvectorval Pvec128 -> Format.pp_print_string ppf ":vec128" | Pvariant { consts; non_consts } -> Format.fprintf ppf "@[[(consts (%a))@ (non_consts (%a))]@]" (Format.pp_print_list ~pp_sep:Format.pp_print_space Format.pp_print_int) @@ -60,12 +61,14 @@ let layout (layout : Lambda.layout) = | Punboxed_int Pint32 -> ":unboxed_int32" | Punboxed_int Pint64 -> ":unboxed_int64" | Punboxed_int Pnativeint -> ":unboxed_nativeint" + | Punboxed_vector Pvec128 -> ":unboxed_vec128" let rec structured_constant ppf = function | Uconst_float x -> fprintf ppf "%F" x | Uconst_int32 x -> fprintf ppf "%ldl" x | Uconst_int64 x -> fprintf ppf "%LdL" x | Uconst_nativeint x -> fprintf ppf "%ndn" x + | Uconst_vec128 {high; low} -> fprintf ppf "%016Lx:%016Lx" high low | Uconst_block (tag, l) -> fprintf ppf "block(%i" tag; List.iter (fun u -> fprintf ppf ",%a" uconstant u) l; diff --git a/ocaml/Makefile.common-jst b/ocaml/Makefile.common-jst index a9f46fe2315..acd7a7689db 100644 --- a/ocaml/Makefile.common-jst +++ b/ocaml/Makefile.common-jst @@ -198,6 +198,9 @@ install_for_test: _install # replace backend-specific testsuite/tests/asmgen with their new versions rm _runtest/testsuite/tests/asmgen/* cp -a testsuite/tests/asmgen/* _runtest/testsuite/tests/asmgen/ + # replace backend-specific testsuite/tests/unboxed-primitive-args with their new versions + rm _runtest/testsuite/tests/unboxed-primitive-args/* + cp -a testsuite/tests/unboxed-primitive-args/* _runtest/testsuite/tests/unboxed-primitive-args/ cp $(ocamldir)/Makefile.* _runtest/ diff --git a/ocaml/asmcomp/amd64/emit.mlp b/ocaml/asmcomp/amd64/emit.mlp index 39ccf590568..c69a07418b5 100644 --- a/ocaml/asmcomp/amd64/emit.mlp +++ b/ocaml/asmcomp/amd64/emit.mlp @@ -1210,7 +1210,7 @@ let probe_env p = env.stack_offset <- p.probe_stack_offset; (* Account for the return address that is now pushed on the stack. *) env.stack_offset <- env.stack_offset + 8; - env + env let emit_probe_handler_wrapper p = let wrap_label = probe_handler_wrapper_name p.probe_label in diff --git a/ocaml/asmcomp/cmm_helpers.ml b/ocaml/asmcomp/cmm_helpers.ml index 5f6edb8e368..46a559aed81 100644 --- a/ocaml/asmcomp/cmm_helpers.ml +++ b/ocaml/asmcomp/cmm_helpers.ml @@ -919,6 +919,8 @@ module Extended_machtype = struct (* Only 64-bit architectures, so this is always [typ_int] *) typ_any_int | Pvalue Pintval -> typ_tagged_int + | Punboxed_vector _ -> + Misc.fatal_error "SIMD vectors are not yet suppored in the upstream compiler build." | Pvalue _ -> typ_val end @@ -3283,5 +3285,8 @@ let kind_of_layout (layout : Lambda.layout) = | Pvalue (Pboxedintval bi) -> Boxed_integer bi | Pvalue (Pgenval | Pintval | Pvariant _ | Parrayval _) | Ptop | Pbottom | Punboxed_float | Punboxed_int _ -> Any + | Pvalue (Pboxedvectorval _) + | Punboxed_vector _ -> + Misc.fatal_error "SIMD vectors are not yet suppored in the upstream compiler build." let make_tuple l = match l with [e] -> e | _ -> Ctuple l diff --git a/ocaml/asmcomp/cmmgen.ml b/ocaml/asmcomp/cmmgen.ml index cb576e08247..a6b7dc5b20e 100644 --- a/ocaml/asmcomp/cmmgen.ml +++ b/ocaml/asmcomp/cmmgen.ml @@ -124,6 +124,8 @@ let get_field env layout ptr n dbg = | Pvalue Pintval | Punboxed_int _ -> Word_int | Pvalue _ -> Word_val | Punboxed_float -> Double + | Punboxed_vector _ -> + Misc.fatal_error "SIMD vectors are not yet suppored in the upstream compiler build." | Ptop -> Misc.fatal_errorf "get_field with Ptop: %a" Debuginfo.print_compact dbg | Pbottom -> @@ -842,6 +844,8 @@ and transl_ccall env prim args dbg = | Pint32 -> XInt32 | Pint64 -> XInt64 in (xty, transl_unbox_int dbg env bi arg) + | Unboxed_vector _ -> + Misc.fatal_error "SIMD vectors are not yet suppored in the upstream compiler build." | Untagged_int -> (XInt, untag_int (transl env arg) dbg) in @@ -871,6 +875,8 @@ and transl_ccall env prim args dbg = ([|Int; Int|], box_int dbg Pint64 alloc_heap) | _, Unboxed_integer bi -> (typ_int, box_int dbg bi alloc_heap) | _, Untagged_int -> (typ_int, (fun i -> tag_int i dbg)) + | _, Unboxed_vector _ -> + Misc.fatal_error "SIMD vectors are not yet suppored in the upstream compiler build." in let typ_args, args = transl_args prim.prim_native_repr_args args in wrap_result @@ -1244,6 +1250,8 @@ and transl_let_value env str (kind : Lambda.value_kind) id exp transl_body = Boxed (Boxed_float (alloc_heap, dbg), false) | Mutable, Pboxedintval bi -> Boxed (Boxed_integer (bi, alloc_heap, dbg), false) + | _, Pboxedvectorval _ -> + Misc.fatal_error "SIMD vectors are not yet suppored in the upstream compiler build." | _, (Pfloatval | Pboxedintval _) -> (* It would be safe to always unbox in this case, but we do it only if this indeed allows us to get rid of @@ -1290,6 +1298,8 @@ and transl_let env str (layout : Lambda.layout) id exp transl_body = there may be constant closures inside that need lifting out. *) let _cbody : expression = transl_body env in cexp + | Punboxed_vector _ -> + Misc.fatal_error "SIMD vectors are not yet suppored in the upstream compiler build." | Punboxed_float | Punboxed_int _ -> begin let cexp = transl env exp in let cbody = transl_body env in diff --git a/ocaml/boot/ocamlc b/ocaml/boot/ocamlc index b0ee3819b10..fcfac0201e4 100755 Binary files a/ocaml/boot/ocamlc and b/ocaml/boot/ocamlc differ diff --git a/ocaml/boot/ocamllex b/ocaml/boot/ocamllex index 6da593d2da4..da767d2e389 100755 Binary files a/ocaml/boot/ocamllex and b/ocaml/boot/ocamllex differ diff --git a/ocaml/lambda/lambda.ml b/ocaml/lambda/lambda.ml index 48f210df8a7..6a7932f739a 100644 --- a/ocaml/lambda/lambda.ml +++ b/ocaml/lambda/lambda.ml @@ -260,12 +260,14 @@ and value_kind = non_consts : (int * value_kind list) list; } | Parrayval of array_kind + | Pboxedvectorval of boxed_vector and layout = | Ptop | Pvalue of value_kind | Punboxed_float | Punboxed_int of boxed_integer + | Punboxed_vector of boxed_vector | Pbottom and block_shape = @@ -289,6 +291,9 @@ and array_set_kind = and boxed_integer = Primitive.boxed_integer = Pnativeint | Pint32 | Pint64 +and boxed_vector = Primitive.boxed_vector = + | Pvec128 + and bigarray_kind = Pbigarray_unknown | Pbigarray_float32 | Pbigarray_float64 @@ -310,6 +315,8 @@ and raise_kind = let equal_boxed_integer = Primitive.equal_boxed_integer +let equal_boxed_vector = Primitive.equal_boxed_vector + let equal_primitive = (* Should be implemented like [equal_value_kind] of [equal_boxed_integer], i.e. by matching over the various constructors but the type has more @@ -321,6 +328,8 @@ let rec equal_value_kind x y = | Pgenval, Pgenval -> true | Pfloatval, Pfloatval -> true | Pboxedintval bi1, Pboxedintval bi2 -> equal_boxed_integer bi1 bi2 + | Pboxedvectorval bi1, Pboxedvectorval bi2 -> + equal_boxed_vector bi1 bi2 | Pintval, Pintval -> true | Parrayval elt_kind1, Parrayval elt_kind2 -> elt_kind1 = elt_kind2 | Pvariant { consts = consts1; non_consts = non_consts1; }, @@ -337,7 +346,7 @@ let rec equal_value_kind x y = && List.for_all2 equal_value_kind fields1 fields2) non_consts1 non_consts2 | (Pgenval | Pfloatval | Pboxedintval _ | Pintval | Pvariant _ - | Parrayval _), _ -> false + | Parrayval _ | Pboxedvectorval _), _ -> false let equal_layout x y = match x, y with @@ -354,9 +363,10 @@ let compatible_layout x y = | Punboxed_float, Punboxed_float -> true | Punboxed_int bi1, Punboxed_int bi2 -> equal_boxed_integer bi1 bi2 + | Punboxed_vector bi1, Punboxed_vector bi2 -> equal_boxed_vector bi1 bi2 | Ptop, Ptop -> true | Ptop, _ | _, Ptop -> false - | (Pvalue _ | Punboxed_float | Punboxed_int _), _ -> false + | (Pvalue _ | Punboxed_float | Punboxed_int _ | Punboxed_vector _), _ -> false let must_be_value layout = match layout with @@ -640,6 +650,8 @@ let layout_functor = Pvalue Pgenval let layout_boxed_float = Pvalue Pfloatval let layout_string = Pvalue Pgenval let layout_boxedint bi = Pvalue (Pboxedintval bi) + +let layout_boxed_vector vi = Pvalue (Pboxedvectorval vi) let layout_lazy = Pvalue Pgenval let layout_lazy_contents = Pvalue Pgenval let layout_any_value = Pvalue Pgenval @@ -1442,6 +1454,7 @@ let primitive_result_layout (p : primitive) = | Pbox_float _ -> layout_boxed_float | Punbox_float -> Punboxed_float | Pccall { prim_native_repr_res = _, Untagged_int; _} -> layout_int + | Pccall { prim_native_repr_res = _, Unboxed_vector v; _} -> layout_boxed_vector v | Pccall { prim_native_repr_res = _, Unboxed_float; _} -> layout_boxed_float | Pccall { prim_native_repr_res = _, Same_as_ocaml_repr s; _} -> begin match s with diff --git a/ocaml/lambda/lambda.mli b/ocaml/lambda/lambda.mli index 09ea1eb336d..b6236bfb775 100644 --- a/ocaml/lambda/lambda.mli +++ b/ocaml/lambda/lambda.mli @@ -235,6 +235,7 @@ and value_kind = expected to be significant. *) } | Parrayval of array_kind + | Pboxedvectorval of boxed_vector (* Because we check for and error on void in the translation to lambda, we don't need a constructor for it here. *) @@ -243,6 +244,7 @@ and layout = | Pvalue of value_kind | Punboxed_float | Punboxed_int of boxed_integer + | Punboxed_vector of boxed_vector | Pbottom and block_shape = @@ -251,6 +253,9 @@ and block_shape = and boxed_integer = Primitive.boxed_integer = Pnativeint | Pint32 | Pint64 +and boxed_vector = Primitive.boxed_vector = + | Pvec128 + and bigarray_kind = Pbigarray_unknown | Pbigarray_float32 | Pbigarray_float64 @@ -280,6 +285,8 @@ val compatible_layout : layout -> layout -> bool val equal_boxed_integer : boxed_integer -> boxed_integer -> bool +val equal_boxed_vector : boxed_vector -> boxed_vector -> bool + val must_be_value : layout -> value_kind type structured_constant = diff --git a/ocaml/lambda/printlambda.ml b/ocaml/lambda/printlambda.ml index 9f6b1aaac15..427120e9415 100644 --- a/ocaml/lambda/printlambda.ml +++ b/ocaml/lambda/printlambda.ml @@ -85,6 +85,9 @@ let boxed_integer_name = function | Pint32 -> "int32" | Pint64 -> "int64" +let boxed_vector_name = function + | Pvec128 -> "vec128" + let variant_kind print_contents ppf ~consts ~non_consts = fprintf ppf "@[[(consts (%a))@ (non_consts (%a))]@]" (Format.pp_print_list ~pp_sep:Format.pp_print_space Format.pp_print_int) @@ -105,6 +108,7 @@ let rec value_kind ppf = function | Pfloatval -> fprintf ppf "[float]" | Parrayval elt_kind -> fprintf ppf "[%sarray]" (array_kind elt_kind) | Pboxedintval bi -> fprintf ppf "[%s]" (boxed_integer_name bi) + | Pboxedvectorval bv -> fprintf ppf "[%s]" (boxed_vector_name bv) | Pvariant { consts; non_consts; } -> variant_kind value_kind' ppf ~consts ~non_consts @@ -114,6 +118,7 @@ and value_kind' ppf = function | Pfloatval -> fprintf ppf "[float]" | Parrayval elt_kind -> fprintf ppf "[%sarray]" (array_kind elt_kind) | Pboxedintval bi -> fprintf ppf "[%s]" (boxed_integer_name bi) + | Pboxedvectorval bv -> fprintf ppf "[%s]" (boxed_vector_name bv) | Pvariant { consts; non_consts; } -> variant_kind value_kind' ppf ~consts ~non_consts @@ -124,6 +129,7 @@ let layout ppf layout = | Pbottom -> fprintf ppf "[bottom]" | Punboxed_float -> fprintf ppf "[unboxed_float]" | Punboxed_int bi -> fprintf ppf "[unboxed_%s]" (boxed_integer_name bi) + | Punboxed_vector bi -> fprintf ppf "[unboxed_%s]" (boxed_vector_name bi) let return_kind ppf (mode, kind) = let smode = alloc_mode mode in @@ -135,10 +141,13 @@ let return_kind ppf (mode, kind) = | Pvalue (Parrayval elt_kind) -> fprintf ppf ": %s%sarray@ " smode (array_kind elt_kind) | Pvalue (Pboxedintval bi) -> fprintf ppf ": %s%s@ " smode (boxed_integer_name bi) + | Pvalue (Pboxedvectorval bv) -> + fprintf ppf ": %s%s@ " smode (boxed_vector_name bv) | Pvalue (Pvariant { consts; non_consts; }) -> variant_kind value_kind' ppf ~consts ~non_consts | Punboxed_float -> fprintf ppf ": unboxed_float@ " | Punboxed_int bi -> fprintf ppf ": unboxed_%s@ " (boxed_integer_name bi) + | Punboxed_vector bi -> fprintf ppf ": unboxed_%s@ " (boxed_vector_name bi) | Ptop -> fprintf ppf ": top@ " | Pbottom -> fprintf ppf ": bottom@ " @@ -148,6 +157,7 @@ let field_kind ppf = function | Pfloatval -> pp_print_string ppf "float" | Parrayval elt_kind -> fprintf ppf "%s-array" (array_kind elt_kind) | Pboxedintval bi -> pp_print_string ppf (boxed_integer_name bi) + | Pboxedvectorval bv -> pp_print_string ppf (boxed_vector_name bv) | Pvariant { consts; non_consts; } -> fprintf ppf "@[[(consts (%a))@ (non_consts (%a))]@]" (Format.pp_print_list ~pp_sep:Format.pp_print_space Format.pp_print_int) diff --git a/ocaml/middle_end/clambda_primitives.ml b/ocaml/middle_end/clambda_primitives.ml index 92725a7a771..b2f2b908ead 100644 --- a/ocaml/middle_end/clambda_primitives.ml +++ b/ocaml/middle_end/clambda_primitives.ml @@ -158,18 +158,23 @@ and value_kind = Lambda.value_kind = non_consts : (int * value_kind list) list; } | Parrayval of array_kind + | Pboxedvectorval of boxed_vector and layout = Lambda.layout = | Ptop | Pvalue of value_kind | Punboxed_float | Punboxed_int of boxed_integer + | Punboxed_vector of boxed_vector | Pbottom and block_shape = Lambda.block_shape and boxed_integer = Primitive.boxed_integer = Pnativeint | Pint32 | Pint64 +and boxed_vector = Primitive.boxed_vector = + | Pvec128 + and bigarray_kind = Lambda.bigarray_kind = Pbigarray_unknown | Pbigarray_float32 | Pbigarray_float64 diff --git a/ocaml/middle_end/clambda_primitives.mli b/ocaml/middle_end/clambda_primitives.mli index b65e674ee76..06bd54fd94c 100644 --- a/ocaml/middle_end/clambda_primitives.mli +++ b/ocaml/middle_end/clambda_primitives.mli @@ -161,18 +161,23 @@ and value_kind = Lambda.value_kind = non_consts : (int * value_kind list) list; } | Parrayval of array_kind + | Pboxedvectorval of boxed_vector and layout = Lambda.layout = | Ptop | Pvalue of value_kind | Punboxed_float | Punboxed_int of boxed_integer + | Punboxed_vector of boxed_vector | Pbottom and block_shape = Lambda.block_shape and boxed_integer = Primitive.boxed_integer = Pnativeint | Pint32 | Pint64 +and boxed_vector = Primitive.boxed_vector = + | Pvec128 + and bigarray_kind = Lambda.bigarray_kind = Pbigarray_unknown | Pbigarray_float32 | Pbigarray_float64 diff --git a/ocaml/middle_end/closure/closure.ml b/ocaml/middle_end/closure/closure.ml index fd9b3615375..7d009724b70 100644 --- a/ocaml/middle_end/closure/closure.ml +++ b/ocaml/middle_end/closure/closure.ml @@ -63,8 +63,10 @@ let is_gc_ignorable kind = | Pbottom -> Misc.fatal_error "[Pbottom] should not be stored in a closure." | Punboxed_float -> true | Punboxed_int _ -> true + | Punboxed_vector _ -> true | Pvalue Pintval -> true - | Pvalue (Pgenval | Pfloatval | Pboxedintval _ | Pvariant _ | Parrayval _) -> false + | Pvalue (Pgenval | Pfloatval | Pboxedintval _ | Pvariant _ | Parrayval _ | + Pboxedvectorval _) -> false let split_closure_fv kinds fv = List.fold_right (fun id (not_scanned, scanned) -> diff --git a/ocaml/middle_end/flambda/closure_offsets.ml b/ocaml/middle_end/flambda/closure_offsets.ml index cfb1791786d..d771cdd3e54 100644 --- a/ocaml/middle_end/flambda/closure_offsets.ml +++ b/ocaml/middle_end/flambda/closure_offsets.ml @@ -79,6 +79,7 @@ let add_closure_offsets and not stored in a closure." | Punboxed_float -> true | Punboxed_int _ -> true + | Punboxed_vector _ -> true | Pvalue Pintval -> true | Pvalue _ -> false) free_vars diff --git a/ocaml/middle_end/flambda/flambda_to_clambda.ml b/ocaml/middle_end/flambda/flambda_to_clambda.ml index 7a337694c12..10abe813aba 100644 --- a/ocaml/middle_end/flambda/flambda_to_clambda.ml +++ b/ocaml/middle_end/flambda/flambda_to_clambda.ml @@ -710,6 +710,7 @@ and to_clambda_set_of_closures t env and not stored in a closure." | Punboxed_float -> true | Punboxed_int _ -> true + | Punboxed_vector _ -> true | Pvalue Pintval -> true | Pvalue _ -> false) free_vars diff --git a/ocaml/middle_end/printclambda.ml b/ocaml/middle_end/printclambda.ml index 46056115b85..0919908b4eb 100644 --- a/ocaml/middle_end/printclambda.ml +++ b/ocaml/middle_end/printclambda.ml @@ -37,6 +37,7 @@ let rec value_kind0 ppf kind = | Pboxedintval Pnativeint -> Format.pp_print_string ppf ":nativeint" | Pboxedintval Pint32 -> Format.pp_print_string ppf ":int32" | Pboxedintval Pint64 -> Format.pp_print_string ppf ":int64" + | Pboxedvectorval Pvec128 -> Format.pp_print_string ppf ":vec128" | Pvariant { consts; non_consts } -> Format.fprintf ppf "@[[(consts (%a))@ (non_consts (%a))]@]" (Format.pp_print_list ~pp_sep:Format.pp_print_space Format.pp_print_int) @@ -60,6 +61,7 @@ let layout (layout : Lambda.layout) = | Punboxed_int Pint32 -> ":unboxed_int32" | Punboxed_int Pint64 -> ":unboxed_int64" | Punboxed_int Pnativeint -> ":unboxed_nativeint" + | Punboxed_vector Pvec128 -> ":unboxed_vec128" let rec structured_constant ppf = function | Uconst_float x -> fprintf ppf "%F" x diff --git a/ocaml/runtime/amd64.S b/ocaml/runtime/amd64.S index 15b8e8734dd..aeac005207c 100644 --- a/ocaml/runtime/amd64.S +++ b/ocaml/runtime/amd64.S @@ -347,23 +347,23 @@ LBL(caml_call_gc): /* Save young_ptr */ movq %r15, Caml_state(young_ptr) /* Save floating-point registers */ - subq $(16*8), %rsp; CFI_ADJUST (16*8); - movsd %xmm0, 0*8(%rsp) - movsd %xmm1, 1*8(%rsp) - movsd %xmm2, 2*8(%rsp) - movsd %xmm3, 3*8(%rsp) - movsd %xmm4, 4*8(%rsp) - movsd %xmm5, 5*8(%rsp) - movsd %xmm6, 6*8(%rsp) - movsd %xmm7, 7*8(%rsp) - movsd %xmm8, 8*8(%rsp) - movsd %xmm9, 9*8(%rsp) - movsd %xmm10, 10*8(%rsp) - movsd %xmm11, 11*8(%rsp) - movsd %xmm12, 12*8(%rsp) - movsd %xmm13, 13*8(%rsp) - movsd %xmm14, 14*8(%rsp) - movsd %xmm15, 15*8(%rsp) + subq $(16*16), %rsp; CFI_ADJUST (16*16); + movupd %xmm0, 0*16(%rsp) + movupd %xmm1, 1*16(%rsp) + movupd %xmm2, 2*16(%rsp) + movupd %xmm3, 3*16(%rsp) + movupd %xmm4, 4*16(%rsp) + movupd %xmm5, 5*16(%rsp) + movupd %xmm6, 6*16(%rsp) + movupd %xmm7, 7*16(%rsp) + movupd %xmm8, 8*16(%rsp) + movupd %xmm9, 9*16(%rsp) + movupd %xmm10, 10*16(%rsp) + movupd %xmm11, 11*16(%rsp) + movupd %xmm12, 12*16(%rsp) + movupd %xmm13, 13*16(%rsp) + movupd %xmm14, 14*16(%rsp) + movupd %xmm15, 15*16(%rsp) /* Call the garbage collector */ PREPARE_FOR_C_CALL call GCALL(caml_garbage_collection) @@ -371,23 +371,23 @@ LBL(caml_call_gc): /* Restore young_ptr */ movq Caml_state(young_ptr), %r15 /* Restore all regs used by the code generator */ - movsd 0*8(%rsp), %xmm0 - movsd 1*8(%rsp), %xmm1 - movsd 2*8(%rsp), %xmm2 - movsd 3*8(%rsp), %xmm3 - movsd 4*8(%rsp), %xmm4 - movsd 5*8(%rsp), %xmm5 - movsd 6*8(%rsp), %xmm6 - movsd 7*8(%rsp), %xmm7 - movsd 8*8(%rsp), %xmm8 - movsd 9*8(%rsp), %xmm9 - movsd 10*8(%rsp), %xmm10 - movsd 11*8(%rsp), %xmm11 - movsd 12*8(%rsp), %xmm12 - movsd 13*8(%rsp), %xmm13 - movsd 14*8(%rsp), %xmm14 - movsd 15*8(%rsp), %xmm15 - addq $(16*8), %rsp; CFI_ADJUST(-16*8) + movupd 0*16(%rsp), %xmm0 + movupd 1*16(%rsp), %xmm1 + movupd 2*16(%rsp), %xmm2 + movupd 3*16(%rsp), %xmm3 + movupd 4*16(%rsp), %xmm4 + movupd 5*16(%rsp), %xmm5 + movupd 6*16(%rsp), %xmm6 + movupd 7*16(%rsp), %xmm7 + movupd 8*16(%rsp), %xmm8 + movupd 9*16(%rsp), %xmm9 + movupd 10*16(%rsp), %xmm10 + movupd 11*16(%rsp), %xmm11 + movupd 12*16(%rsp), %xmm12 + movupd 13*16(%rsp), %xmm13 + movupd 14*16(%rsp), %xmm14 + movupd 15*16(%rsp), %xmm15 + addq $(16*16), %rsp; CFI_ADJUST(-16*16) popq %rax; CFI_ADJUST(-8) popq %rbx; CFI_ADJUST(-8) popq %rdi; CFI_ADJUST(-8) @@ -476,23 +476,23 @@ FUNCTION(G(caml_call_local_realloc)) /* Save young_ptr */ movq %r15, Caml_state(young_ptr) /* Save floating-point registers */ - subq $(16*8), %rsp; CFI_ADJUST (16*8); - movsd %xmm0, 0*8(%rsp) - movsd %xmm1, 1*8(%rsp) - movsd %xmm2, 2*8(%rsp) - movsd %xmm3, 3*8(%rsp) - movsd %xmm4, 4*8(%rsp) - movsd %xmm5, 5*8(%rsp) - movsd %xmm6, 6*8(%rsp) - movsd %xmm7, 7*8(%rsp) - movsd %xmm8, 8*8(%rsp) - movsd %xmm9, 9*8(%rsp) - movsd %xmm10, 10*8(%rsp) - movsd %xmm11, 11*8(%rsp) - movsd %xmm12, 12*8(%rsp) - movsd %xmm13, 13*8(%rsp) - movsd %xmm14, 14*8(%rsp) - movsd %xmm15, 15*8(%rsp) + subq $(16*16), %rsp; CFI_ADJUST (16*16); + movupd %xmm0, 0*16(%rsp) + movupd %xmm1, 1*16(%rsp) + movupd %xmm2, 2*16(%rsp) + movupd %xmm3, 3*16(%rsp) + movupd %xmm4, 4*16(%rsp) + movupd %xmm5, 5*16(%rsp) + movupd %xmm6, 6*16(%rsp) + movupd %xmm7, 7*16(%rsp) + movupd %xmm8, 8*16(%rsp) + movupd %xmm9, 9*16(%rsp) + movupd %xmm10, 10*16(%rsp) + movupd %xmm11, 11*16(%rsp) + movupd %xmm12, 12*16(%rsp) + movupd %xmm13, 13*16(%rsp) + movupd %xmm14, 14*16(%rsp) + movupd %xmm15, 15*16(%rsp) /* Call the garbage collector */ PREPARE_FOR_C_CALL call GCALL(caml_local_realloc) @@ -500,23 +500,23 @@ FUNCTION(G(caml_call_local_realloc)) /* Restore young_ptr */ movq Caml_state(young_ptr), %r15 /* Restore all regs used by the code generator */ - movsd 0*8(%rsp), %xmm0 - movsd 1*8(%rsp), %xmm1 - movsd 2*8(%rsp), %xmm2 - movsd 3*8(%rsp), %xmm3 - movsd 4*8(%rsp), %xmm4 - movsd 5*8(%rsp), %xmm5 - movsd 6*8(%rsp), %xmm6 - movsd 7*8(%rsp), %xmm7 - movsd 8*8(%rsp), %xmm8 - movsd 9*8(%rsp), %xmm9 - movsd 10*8(%rsp), %xmm10 - movsd 11*8(%rsp), %xmm11 - movsd 12*8(%rsp), %xmm12 - movsd 13*8(%rsp), %xmm13 - movsd 14*8(%rsp), %xmm14 - movsd 15*8(%rsp), %xmm15 - addq $(16*8), %rsp; CFI_ADJUST(-16*8) + movupd 0*16(%rsp), %xmm0 + movupd 1*16(%rsp), %xmm1 + movupd 2*16(%rsp), %xmm2 + movupd 3*16(%rsp), %xmm3 + movupd 4*16(%rsp), %xmm4 + movupd 5*16(%rsp), %xmm5 + movupd 6*16(%rsp), %xmm6 + movupd 7*16(%rsp), %xmm7 + movupd 8*16(%rsp), %xmm8 + movupd 9*16(%rsp), %xmm9 + movupd 10*16(%rsp), %xmm10 + movupd 11*16(%rsp), %xmm11 + movupd 12*16(%rsp), %xmm12 + movupd 13*16(%rsp), %xmm13 + movupd 14*16(%rsp), %xmm14 + movupd 15*16(%rsp), %xmm15 + addq $(16*16), %rsp; CFI_ADJUST(-16*16) popq %rax; CFI_ADJUST(-8) popq %rbx; CFI_ADJUST(-8) popq %rdi; CFI_ADJUST(-8) diff --git a/ocaml/runtime/amd64nt.asm b/ocaml/runtime/amd64nt.asm index 9185d06787d..3a90239cb47 100644 --- a/ocaml/runtime/amd64nt.asm +++ b/ocaml/runtime/amd64nt.asm @@ -69,45 +69,45 @@ caml_call_gc: push rax Store_gc_regs rsp ; Save floating-point registers - sub rsp, 16*8 - movsd QWORD PTR [rsp + 0*8], xmm0 - movsd QWORD PTR [rsp + 1*8], xmm1 - movsd QWORD PTR [rsp + 2*8], xmm2 - movsd QWORD PTR [rsp + 3*8], xmm3 - movsd QWORD PTR [rsp + 4*8], xmm4 - movsd QWORD PTR [rsp + 5*8], xmm5 - movsd QWORD PTR [rsp + 6*8], xmm6 - movsd QWORD PTR [rsp + 7*8], xmm7 - movsd QWORD PTR [rsp + 8*8], xmm8 - movsd QWORD PTR [rsp + 9*8], xmm9 - movsd QWORD PTR [rsp + 10*8], xmm10 - movsd QWORD PTR [rsp + 11*8], xmm11 - movsd QWORD PTR [rsp + 12*8], xmm12 - movsd QWORD PTR [rsp + 13*8], xmm13 - movsd QWORD PTR [rsp + 14*8], xmm14 - movsd QWORD PTR [rsp + 15*8], xmm15 + sub rsp, 16*16 + movupd QWORD PTR [rsp + 0*16], xmm0 + movupd QWORD PTR [rsp + 1*16], xmm1 + movupd QWORD PTR [rsp + 2*16], xmm2 + movupd QWORD PTR [rsp + 3*16], xmm3 + movupd QWORD PTR [rsp + 4*16], xmm4 + movupd QWORD PTR [rsp + 5*16], xmm5 + movupd QWORD PTR [rsp + 6*16], xmm6 + movupd QWORD PTR [rsp + 7*16], xmm7 + movupd QWORD PTR [rsp + 8*16], xmm8 + movupd QWORD PTR [rsp + 9*16], xmm9 + movupd QWORD PTR [rsp + 10*16], xmm10 + movupd QWORD PTR [rsp + 11*16], xmm11 + movupd QWORD PTR [rsp + 12*16], xmm12 + movupd QWORD PTR [rsp + 13*16], xmm13 + movupd QWORD PTR [rsp + 14*16], xmm14 + movupd QWORD PTR [rsp + 15*16], xmm15 ; Call the garbage collector sub rsp, 32 ; PR#5008: bottom 32 bytes are reserved for callee call caml_garbage_collection add rsp, 32 ; PR#5008 ; Restore all regs used by the code generator - movsd xmm0, QWORD PTR [rsp + 0*8] - movsd xmm1, QWORD PTR [rsp + 1*8] - movsd xmm2, QWORD PTR [rsp + 2*8] - movsd xmm3, QWORD PTR [rsp + 3*8] - movsd xmm4, QWORD PTR [rsp + 4*8] - movsd xmm5, QWORD PTR [rsp + 5*8] - movsd xmm6, QWORD PTR [rsp + 6*8] - movsd xmm7, QWORD PTR [rsp + 7*8] - movsd xmm8, QWORD PTR [rsp + 8*8] - movsd xmm9, QWORD PTR [rsp + 9*8] - movsd xmm10, QWORD PTR [rsp + 10*8] - movsd xmm11, QWORD PTR [rsp + 11*8] - movsd xmm12, QWORD PTR [rsp + 12*8] - movsd xmm13, QWORD PTR [rsp + 13*8] - movsd xmm14, QWORD PTR [rsp + 14*8] - movsd xmm15, QWORD PTR [rsp + 15*8] - add rsp, 16*8 + movupd xmm0, QWORD PTR [rsp + 0*16] + movupd xmm1, QWORD PTR [rsp + 1*16] + movupd xmm2, QWORD PTR [rsp + 2*16] + movupd xmm3, QWORD PTR [rsp + 3*16] + movupd xmm4, QWORD PTR [rsp + 4*16] + movupd xmm5, QWORD PTR [rsp + 5*16] + movupd xmm6, QWORD PTR [rsp + 6*16] + movupd xmm7, QWORD PTR [rsp + 7*16] + movupd xmm8, QWORD PTR [rsp + 8*16] + movupd xmm9, QWORD PTR [rsp + 9*16] + movupd xmm10, QWORD PTR [rsp + 10*16] + movupd xmm11, QWORD PTR [rsp + 11*16] + movupd xmm12, QWORD PTR [rsp + 12*16] + movupd xmm13, QWORD PTR [rsp + 13*16] + movupd xmm14, QWORD PTR [rsp + 14*16] + movupd xmm15, QWORD PTR [rsp + 15*16] + add rsp, 16*16 pop rax pop rbx pop rdi diff --git a/ocaml/testsuite/tests/typing-unboxed/test.ml b/ocaml/testsuite/tests/typing-unboxed/test.ml index 6841b4957d5..7f8ad66716c 100644 --- a/ocaml/testsuite/tests/typing-unboxed/test.ml +++ b/ocaml/testsuite/tests/typing-unboxed/test.ml @@ -635,7 +635,7 @@ Line 1, characters 14-17: 1 | external h : (int [@unboxed]) -> float = "h" "h_nat";; ^^^ Error: Don't know how to unbox this type. - Only float, int32, int64 and nativeint can be unboxed. + Only float, int32, int64, nativeint, and vec128 can be unboxed. |}] (* Bad: unboxing the function type *) @@ -645,7 +645,7 @@ Line 1, characters 13-25: 1 | external i : int -> float [@unboxed] = "i" "i_nat";; ^^^^^^^^^^^^ Error: Don't know how to unbox this type. - Only float, int32, int64 and nativeint can be unboxed. + Only float, int32, int64, nativeint, and vec128 can be unboxed. |}] (* Bad: unboxing a "deep" sub-type. *) diff --git a/ocaml/typing/predef.ml b/ocaml/typing/predef.ml index 192ae714a2e..a578cb921cd 100644 --- a/ocaml/typing/predef.ml +++ b/ocaml/typing/predef.ml @@ -44,6 +44,7 @@ and ident_option = ident_create "option" and ident_nativeint = ident_create "nativeint" and ident_int32 = ident_create "int32" and ident_int64 = ident_create "int64" +and ident_vec128 = ident_create "vec128" and ident_lazy_t = ident_create "lazy_t" and ident_string = ident_create "string" and ident_extension_constructor = ident_create "extension_constructor" @@ -66,6 +67,7 @@ and path_option = Pident ident_option and path_nativeint = Pident ident_nativeint and path_int32 = Pident ident_int32 and path_int64 = Pident ident_int64 +and path_vec128 = Pident ident_vec128 and path_lazy_t = Pident ident_lazy_t and path_string = Pident ident_string and path_extension_constructor = Pident ident_extension_constructor @@ -86,6 +88,7 @@ and type_option t = newgenty (Tconstr(path_option, [t], ref Mnil)) and type_nativeint = newgenty (Tconstr(path_nativeint, [], ref Mnil)) and type_int32 = newgenty (Tconstr(path_int32, [], ref Mnil)) and type_int64 = newgenty (Tconstr(path_int64, [], ref Mnil)) +and type_vec128 = newgenty (Tconstr(path_vec128, [], ref Mnil)) and type_lazy_t t = newgenty (Tconstr(path_lazy_t, [t], ref Mnil)) and type_string = newgenty (Tconstr(path_string, [], ref Mnil)) and type_extension_constructor = @@ -261,6 +264,7 @@ let common_initial_env add_type add_extension empty_env = |> add_type ident_unit ~kind:(variant [cstr ident_void []] [| [| |] |]) ~layout:(Layout.immediate ~why:Enumeration) + |> add_type ident_vec128 (* Predefined exceptions - alphabetical order *) |> add_extension ident_assert_failure [newgenty (Ttuple[type_string; type_int; type_int])] diff --git a/ocaml/typing/predef.mli b/ocaml/typing/predef.mli index 77656a3227c..ec53f19b386 100644 --- a/ocaml/typing/predef.mli +++ b/ocaml/typing/predef.mli @@ -33,6 +33,7 @@ val type_option: type_expr -> type_expr val type_nativeint: type_expr val type_int32: type_expr val type_int64: type_expr +val type_vec128: type_expr val type_lazy_t: type_expr -> type_expr val type_extension_constructor:type_expr val type_floatarray:type_expr @@ -52,6 +53,7 @@ val path_option: Path.t val path_nativeint: Path.t val path_int32: Path.t val path_int64: Path.t +val path_vec128: Path.t val path_lazy_t: Path.t val path_extension_constructor: Path.t val path_floatarray: Path.t diff --git a/ocaml/typing/primitive.ml b/ocaml/typing/primitive.ml index 34804aaee3b..a3661a17289 100644 --- a/ocaml/typing/primitive.ml +++ b/ocaml/typing/primitive.ml @@ -21,9 +21,12 @@ open Layouts type boxed_integer = Pnativeint | Pint32 | Pint64 +type boxed_vector = Pvec128 + type native_repr = | Same_as_ocaml_repr of Layouts.Sort.const | Unboxed_float + | Unboxed_vector of boxed_vector | Unboxed_integer of boxed_integer | Untagged_int @@ -58,6 +61,7 @@ exception Error of Location.t * error let is_ocaml_repr = function | _, Same_as_ocaml_repr _ -> true | _, Unboxed_float + | _, Unboxed_vector _ | _, Unboxed_integer _ | _, Untagged_int -> false @@ -65,12 +69,14 @@ let is_unboxed = function | _, Same_as_ocaml_repr _ | _, Untagged_int -> false | _, Unboxed_float + | _, Unboxed_vector _ | _, Unboxed_integer _ -> true let is_untagged = function | _, Untagged_int -> true | _, Same_as_ocaml_repr _ | _, Unboxed_float + | _, Unboxed_vector _ | _, Unboxed_integer _ -> false let rec make_native_repr_args arity x = @@ -257,6 +263,7 @@ let print p osig_val_decl = (match repr with | Same_as_ocaml_repr _ -> [] | Unboxed_float + | Unboxed_vector _ | Unboxed_integer _ -> if all_unboxed then [] else [oattr_unboxed] | Untagged_int -> if all_untagged then [] else [oattr_untagged]) in @@ -286,20 +293,27 @@ let equal_boxed_integer bi1 bi2 = | (Pnativeint | Pint32 | Pint64), _ -> false +let equal_boxed_vector bi1 bi2 = + match bi1, bi2 with + | Pvec128, Pvec128 -> true + let equal_native_repr nr1 nr2 = match nr1, nr2 with | Same_as_ocaml_repr s1, Same_as_ocaml_repr s2 -> Sort.equal_const s1 s2 | Same_as_ocaml_repr _, - (Unboxed_float | Unboxed_integer _ | Untagged_int) -> false + (Unboxed_float | Unboxed_integer _ | Untagged_int | Unboxed_vector _) -> false | Unboxed_float, Unboxed_float -> true | Unboxed_float, - (Same_as_ocaml_repr _ | Unboxed_integer _ | Untagged_int) -> false + (Same_as_ocaml_repr _ | Unboxed_integer _ | Untagged_int | Unboxed_vector _) -> false + | Unboxed_vector vi1, Unboxed_vector vi2 -> equal_boxed_vector vi1 vi2 + | Unboxed_vector _, + (Same_as_ocaml_repr _ | Unboxed_float | Untagged_int | Unboxed_integer _) -> false | Unboxed_integer bi1, Unboxed_integer bi2 -> equal_boxed_integer bi1 bi2 | Unboxed_integer _, - (Same_as_ocaml_repr _ | Unboxed_float | Untagged_int) -> false + (Same_as_ocaml_repr _ | Unboxed_float | Untagged_int | Unboxed_vector _) -> false | Untagged_int, Untagged_int -> true | Untagged_int, - (Same_as_ocaml_repr _ | Unboxed_float | Unboxed_integer _) -> false + (Same_as_ocaml_repr _ | Unboxed_float | Unboxed_integer _ | Unboxed_vector _) -> false let equal_effects ef1 ef2 = match ef1, ef2 with @@ -323,7 +337,7 @@ let native_name_is_external p = let sort_of_native_repr = function | Same_as_ocaml_repr s -> s - | (Unboxed_float | Unboxed_integer _ | Untagged_int) -> Sort.Value + | (Unboxed_float | Unboxed_integer _ | Untagged_int | Unboxed_vector _) -> Sort.Value let report_error ppf err = match err with diff --git a/ocaml/typing/primitive.mli b/ocaml/typing/primitive.mli index a52e550c5fe..98c15b2158d 100644 --- a/ocaml/typing/primitive.mli +++ b/ocaml/typing/primitive.mli @@ -17,11 +17,14 @@ type boxed_integer = Pnativeint | Pint32 | Pint64 +type boxed_vector = Pvec128 + (* Representation of arguments/result for the native code version of a primitive *) type native_repr = | Same_as_ocaml_repr of Layouts.Sort.const | Unboxed_float + | Unboxed_vector of boxed_vector | Unboxed_integer of boxed_integer | Untagged_int @@ -88,6 +91,7 @@ val native_name: description -> string val byte_name: description -> string val equal_boxed_integer : boxed_integer -> boxed_integer -> bool +val equal_boxed_vector : boxed_vector -> boxed_vector -> bool val equal_native_repr : native_repr -> native_repr -> bool val equal_effects : effects -> effects -> bool val equal_coeffects : coeffects -> coeffects -> bool diff --git a/ocaml/typing/typedecl.ml b/ocaml/typing/typedecl.ml index 48a5ad061c2..acf2f7ca6bc 100644 --- a/ocaml/typing/typedecl.ml +++ b/ocaml/typing/typedecl.ml @@ -1862,6 +1862,8 @@ let native_repr_of_type env kind ty = Some (Unboxed_integer Pint64) | Unboxed, Tconstr (path, _, _) when Path.same path Predef.path_nativeint -> Some (Unboxed_integer Pnativeint) + | Unboxed, Tconstr (path, _, _) when Path.same path Predef.path_vec128 -> + Some (Unboxed_vector Pvec128) | _ -> None @@ -2502,7 +2504,7 @@ let report_error ppf = function fprintf ppf "Too many [@@unboxed]/[@@untagged] attributes" | Cannot_unbox_or_untag_type Unboxed -> fprintf ppf "@[Don't know how to unbox this type.@ \ - Only float, int32, int64 and nativeint can be unboxed.@]" + Only float, int32, int64, nativeint, and vec128 can be unboxed.@]" | Cannot_unbox_or_untag_type Untagged -> fprintf ppf "@[Don't know how to untag this type.@ \ Only int can be untagged.@]" diff --git a/ocaml/typing/typeopt.ml b/ocaml/typing/typeopt.ml index 48eb70aebee..eacf781818d 100644 --- a/ocaml/typing/typeopt.ml +++ b/ocaml/typing/typeopt.ml @@ -284,6 +284,8 @@ let rec value_kind env ~loc ~visited ~depth ~num_nodes_visited ty num_nodes_visited, (Pboxedintval Pint64) | Tconstr(p, _, _) when Path.same p Predef.path_nativeint -> num_nodes_visited, (Pboxedintval Pnativeint) + | Tconstr(p, _, _) when Path.same p Predef.path_vec128 -> + num_nodes_visited, (Pboxedvectorval Pvec128) | Tconstr(p, _, _) when (Path.same p Predef.path_array || Path.same p Predef.path_floatarray) -> @@ -569,7 +571,9 @@ let layout_union l1 l2 = | Punboxed_float, Punboxed_float -> Punboxed_float | Punboxed_int bi1, Punboxed_int bi2 -> if equal_boxed_integer bi1 bi2 then l1 else Ptop - | (Ptop | Pvalue _ | Punboxed_float | Punboxed_int _), _ -> + | Punboxed_vector bi1, Punboxed_vector bi2 -> + if equal_boxed_vector bi1 bi2 then l1 else Ptop + | (Ptop | Pvalue _ | Punboxed_float | Punboxed_int _ | Punboxed_vector _), _ -> Ptop (* Error report *) diff --git a/tests/backend/regalloc_validator/check_regalloc_validation.ml b/tests/backend/regalloc_validator/check_regalloc_validation.ml index 5764c1d2a63..cb63fe81b90 100644 --- a/tests/backend/regalloc_validator/check_regalloc_validation.ml +++ b/tests/backend/regalloc_validator/check_regalloc_validation.ml @@ -90,7 +90,7 @@ module Cfg_desc = struct let cfg = Cfg.create ~fun_name:"foo" ~fun_args:(Array.copy fun_args) ~fun_dbg:[] ~fun_fast:false ~fun_contains_calls - ~fun_num_stack_slots:(Array.make Proc.num_register_classes 0) + ~fun_num_stack_slots:(Array.make Proc.num_stack_slot_classes 0) in List.iter (fun (block : Block.t) -> @@ -120,10 +120,10 @@ module Cfg_desc = struct count. *) let update_stack_slots i = let update_slot (r : Reg.t) = - match r.loc, Proc.register_class r with - | Stack (Local idx), reg_class -> - cfg.fun_num_stack_slots.(reg_class) - <- max cfg.fun_num_stack_slots.(reg_class) (idx + 1) + match r.loc, Proc.stack_slot_class r.typ with + | Stack (Local idx), stack_class -> + cfg.fun_num_stack_slots.(stack_class) + <- max cfg.fun_num_stack_slots.(stack_class) (idx + 1) | _ -> () in Array.iter update_slot i.arg; @@ -168,7 +168,7 @@ let entry_label = Cfg.create ~fun_name:"foo" ~fun_args:[| Proc.phys_reg 0 |] ~fun_dbg:[] ~fun_fast:false ~fun_contains_calls:false - ~fun_num_stack_slots:(Array.make Proc.num_register_classes 0) + ~fun_num_stack_slots:(Array.make Proc.num_stack_slot_classes 0) in Label.Tbl.add cfg.Cfg.blocks (Cfg.entry_label cfg) { start = Cfg.entry_label cfg; @@ -559,7 +559,7 @@ let () = cfg, cfg) ~exp_std:"fatal exception raised when validating description" ~exp_err: - ">> Fatal error: instruction 20 has a register (V/37) with an unknown \ + ">> Fatal error: instruction 20 has a register (V/53) with an unknown \ location" let () = @@ -1024,7 +1024,7 @@ let test_loop ~loop_loc_first n = ~exp_std: "Validation failed: Bad equations at entry point, reason: Unsatisfiable \ equations when removing result equations.\n\ - Existing equation has to agree one 0 or 2 sides (cannot on exactly 1) \ + Existing equation has to agree on 0 or 2 sides (cannot be exactly 1) \ with the removed equation.\n\ Existing equation R[%rdi]=%rbx.\n\ Removed equation: R[%rbx]=%rbx.\n\ diff --git a/tests/simd/dune b/tests/simd/dune new file mode 100644 index 00000000000..142f04bf0db --- /dev/null +++ b/tests/simd/dune @@ -0,0 +1,22 @@ +(executable + (name simd) + (modules simd) + (foreign_stubs (language c) (names stubs) (flags -msse4.2)) + (ocamlopt_flags (:standard -fsimd))) + +(rule + (enabled_if + (= %{context_name} "main")) + (target simd.output) + (deps simd.exe) + (action + (with-outputs-to + simd.output + (run ./simd.exe)))) + +(rule + (alias runtest) + (enabled_if + (= %{context_name} "main")) + (action + (diff simd.expected simd.output))) diff --git a/tests/simd/simd.expected b/tests/simd/simd.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/simd/simd.ml b/tests/simd/simd.ml new file mode 100644 index 00000000000..e59e824193c --- /dev/null +++ b/tests/simd/simd.ml @@ -0,0 +1,300 @@ +open Stdlib + +external vec128_of_int64s : int64 -> int64 -> vec128 = "" "vec128_of_int64s" [@@noalloc] [@@unboxed] +external vec128_low_int64 : vec128 -> int64 = "" "vec128_low_int64" [@@noalloc] [@@unboxed] +external vec128_high_int64 : vec128 -> int64 = "" "vec128_high_int64" [@@noalloc] [@@unboxed] + +let eq l r = if l <> r then Printf.printf "%Ld <> %Ld\n" l r + +let[@inline never] check v l h = + let vl, vh = vec128_low_int64 v, vec128_high_int64 v in + eq vl l; + eq vh h +;; + +let[@inline never] combine v0 v1 = + let l0, h0 = vec128_low_int64 v0, vec128_high_int64 v0 in + let l1, h1 = vec128_low_int64 v1, vec128_high_int64 v1 in + vec128_of_int64s (Int64.add l0 l1) (Int64.add h0 h1) +;; + +let[@inline never] combine_with_floats v0 f0 v1 f1 = + let l0, h0 = vec128_low_int64 v0, vec128_high_int64 v0 in + let l1, h1 = vec128_low_int64 v1, vec128_high_int64 v1 in + let l, h = Int64.add l0 l1, Int64.add h0 h1 in + let l = Int64.add (Int64.of_float f0) l in + let h = Int64.add (Int64.of_float f1) h in + vec128_of_int64s l h +;; + +(* Identity *) +let () = + let v = vec128_of_int64s 1L 2L in + let v = Sys.opaque_identity v in + check v 1L 2L +;; + +(* Identity fn *) +let () = + let v = vec128_of_int64s 1L 2L in + let[@inline never] id v = v in + let v = id v in + check v 1L 2L +;; + +(* Pass to function *) +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v = combine v0 v1 in + check v 4L 6L +;; + +(* Pass to function (inlined) *) +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v = (combine[@inlined hint]) v0 v1 in + check v 4L 6L +;; + +(* Pass to function with floats *) +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let f0 = Sys.opaque_identity 5. in + let v = combine_with_floats v0 f0 v1 6. in + check v 9L 12L +;; + +(* Pass to function with floats (inlined) *) +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v = (combine_with_floats[@inlined hint]) v0 5. v1 6. in + check v 9L 12L +;; + +(* Capture in closure *) +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let f = combine v0 in + let f = Sys.opaque_identity f in + let v = f v1 in + check v 4L 6L +;; + +(* Capture vectors and floats in a closure *) +let () = + let[@inline never] f v0 v1 f0 v2 f1 v3 = + combine (combine_with_floats v0 f0 v1 f1) (combine v2 v3) + in + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v2 = vec128_of_int64s 4L 5L in + let v3 = vec128_of_int64s 6L 7L in + let f = f v0 v1 7. v2 in + let f = Sys.opaque_identity f in + let v = f 8. v3 in + check v 21L 26L +;; + +(* Capture vectors and floats in a closure (inlined) *) +let () = + let[@inline always] f v0 v1 f0 v2 f1 v3 = + (combine[@inlined hint]) + ((combine_with_floats[@inlined hint]) v0 f0 v1 f1) + ((combine[@inlined hint]) v2 v3) + in + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v2 = vec128_of_int64s 4L 5L in + let v3 = vec128_of_int64s 6L 7L in + let f = f v0 v1 7. v2 in + let v = f 8. v3 in + check v 21L 26L +;; + +(* Store in record *) +type record = { a : vec128 + ; mutable b : vec128 + ; c : float } + +let () = + let record = { a = vec128_of_int64s 1L 2L; b = vec128_of_int64s 3L 4L; c = 5. } in + check record.a 1L 2L; + check record.b 3L 4L; + let record = Sys.opaque_identity record in + record.b <- vec128_of_int64s 5L 6L; + check record.a 1L 2L; + check record.b 5L 6L; + let v = combine_with_floats record.a record.c record.b 6. in + check v 11L 14L +;; + +(* Store in variant *) +type variant = A of vec128 | B of vec128 | C of float + +let () = + let variant = A (vec128_of_int64s 1L 2L) in + let variant = Sys.opaque_identity variant in + match variant with + | A v -> check v 1L 2L + | _ -> print_endline "fail"; + let variant = ref (C 5.) in + let variant = Sys.opaque_identity variant in + variant := B (vec128_of_int64s 3L 4L); + match !variant with + | B v -> check v 3L 4L + | _ -> print_endline "fail" +;; + +(* Pass boxed vectors to an external *) +external boxed_combine : vec128 -> vec128 -> vec128 = "" "boxed_combine" [@@noalloc] + +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v = boxed_combine v0 v1 in + check v 4L 6L +;; + +(* Pass lots of vectors to an external *) +external lots_of_vectors : + vec128 -> vec128 -> vec128 -> vec128 -> vec128 -> vec128 -> vec128 -> vec128 -> + vec128 -> vec128 -> vec128 -> vec128 -> vec128 -> vec128 -> vec128 -> vec128 -> + vec128 + = "" "lots_of_vectors" [@@noalloc] [@@unboxed] + +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v2 = vec128_of_int64s 5L 6L in + let v3 = vec128_of_int64s 7L 8L in + let v4 = vec128_of_int64s 9L 10L in + let v5 = vec128_of_int64s 11L 12L in + let v6 = vec128_of_int64s 13L 14L in + let v7 = vec128_of_int64s 15L 16L in + let v8 = vec128_of_int64s 17L 18L in + let v9 = vec128_of_int64s 19L 20L in + let v10 = vec128_of_int64s 21L 22L in + let v11 = vec128_of_int64s 23L 24L in + let v12 = vec128_of_int64s 25L 26L in + let v13 = vec128_of_int64s 27L 28L in + let v14 = vec128_of_int64s 29L 30L in + let v15 = vec128_of_int64s 31L 32L in + let v = lots_of_vectors v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15 in + check v 256L 272L +;; + +(* Pass mixed floats/vectors to an external *) +external vectors_and_floats : + vec128 -> float -> vec128 -> float -> vec128 -> float -> vec128 -> float -> + float -> vec128 -> vec128 -> float -> float -> vec128 -> vec128 -> float -> + float -> float -> vec128 -> vec128 -> vec128 -> float -> float -> float -> + vec128 + = "" "vectors_and_floats" [@@noalloc] [@@unboxed] + +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v2 = vec128_of_int64s 5L 6L in + let v3 = vec128_of_int64s 7L 8L in + let v4 = vec128_of_int64s 9L 10L in + let v5 = vec128_of_int64s 11L 12L in + let v6 = vec128_of_int64s 13L 14L in + let v7 = vec128_of_int64s 15L 16L in + let v8 = vec128_of_int64s 17L 18L in + let v9 = vec128_of_int64s 19L 20L in + let v10 = vec128_of_int64s 21L 22L in + let v = vectors_and_floats v0 23. v1 24. v2 25. v3 26. 27. v4 v5 28. 29. v6 v7 30. 31. 32. v8 v9 v10 33. 34. 35. in + check v 377L 253L +;; + +(* Pass mixed ints/floats/vectors to an external *) +external vectors_and_floats_and_ints : + vec128 -> float -> vec128 -> int64 -> vec128 -> float -> vec128 -> int64 -> + int64 -> vec128 -> vec128 -> float -> float -> vec128 -> vec128 -> int64 -> + int64 -> float -> vec128 -> vec128 -> vec128 -> int64 -> int64 -> float -> + vec128 + = "" "vectors_and_floats_and_ints" [@@noalloc] [@@unboxed] + +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v2 = vec128_of_int64s 5L 6L in + let v3 = vec128_of_int64s 7L 8L in + let v4 = vec128_of_int64s 9L 10L in + let v5 = vec128_of_int64s 11L 12L in + let v6 = vec128_of_int64s 13L 14L in + let v7 = vec128_of_int64s 15L 16L in + let v8 = vec128_of_int64s 17L 18L in + let v9 = vec128_of_int64s 19L 20L in + let v10 = vec128_of_int64s 21L 22L in + let v = vectors_and_floats_and_ints v0 23. v1 24L v2 25. v3 26L 27L v4 v5 28. 29. v6 v7 30L 31L 32. v8 v9 v10 33L 34L 35. in + check v 377L 253L +;; + +(* Vectors live across a probe handler *) +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v2 = vec128_of_int64s 5L 6L in + let v3 = vec128_of_int64s 7L 8L in + let v4 = vec128_of_int64s 9L 10L in + let v5 = vec128_of_int64s 11L 12L in + let v6 = vec128_of_int64s 13L 14L in + let v7 = vec128_of_int64s 15L 16L in + let v8 = vec128_of_int64s 17L 18L in + let v9 = vec128_of_int64s 19L 20L in + let v10 = vec128_of_int64s 21L 22L in + let v11 = vec128_of_int64s 23L 24L in + let v12 = vec128_of_int64s 25L 26L in + let v13 = vec128_of_int64s 27L 28L in + let v14 = vec128_of_int64s 29L 30L in + let v15 = vec128_of_int64s 31L 32L in + [%probe "hello" ~enabled_at_init:true ( + let xxx = vec128_of_int64s 0L 0L in + check xxx 0L 0L)]; + let v = lots_of_vectors v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15 in + check v 256L 272L +;; + +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v2 = vec128_of_int64s 5L 6L in + let v3 = vec128_of_int64s 7L 8L in + let v4 = vec128_of_int64s 9L 10L in + let v5 = vec128_of_int64s 11L 12L in + let v6 = vec128_of_int64s 13L 14L in + let v7 = vec128_of_int64s 15L 16L in + let v8 = vec128_of_int64s 17L 18L in + let v9 = vec128_of_int64s 19L 20L in + let v10 = vec128_of_int64s 21L 22L in + [%probe "hello" ~enabled_at_init:true ( + let xxx = vec128_of_int64s 0L 0L in + check xxx 0L 0L)]; + let v = vectors_and_floats v0 23. v1 24. v2 25. v3 26. 27. v4 v5 28. 29. v6 v7 30. 31. 32. v8 v9 v10 33. 34. 35. in + check v 377L 253L +;; + +let () = + let v0 = vec128_of_int64s 1L 2L in + let v1 = vec128_of_int64s 3L 4L in + let v2 = vec128_of_int64s 5L 6L in + let v3 = vec128_of_int64s 7L 8L in + let v4 = vec128_of_int64s 9L 10L in + let v5 = vec128_of_int64s 11L 12L in + let v6 = vec128_of_int64s 13L 14L in + let v7 = vec128_of_int64s 15L 16L in + let v8 = vec128_of_int64s 17L 18L in + let v9 = vec128_of_int64s 19L 20L in + let v10 = vec128_of_int64s 21L 22L in + [%probe "hello" ~enabled_at_init:true ( + let xxx = vec128_of_int64s 0L 0L in + check xxx 0L 0L)]; + let v = vectors_and_floats_and_ints v0 23. v1 24L v2 25. v3 26L 27L v4 v5 28. 29. v6 v7 30L 31L 32. v8 v9 v10 33L 34L 35. in + check v 377L 253L +;; diff --git a/tests/simd/stubs.c b/tests/simd/stubs.c new file mode 100644 index 00000000000..edcb7692cfd --- /dev/null +++ b/tests/simd/stubs.c @@ -0,0 +1,103 @@ + +#include +#include +#include +#include +#include + +int64_t vec128_low_int64(__m128i v) +{ + return _mm_extract_epi64(v, 0); +} + +int64_t vec128_high_int64(__m128i v) +{ + return _mm_extract_epi64(v, 1); +} + +__m128i vec128_of_int64s(int64_t low, int64_t high) +{ + return _mm_set_epi64x(high, low); +} + +CAMLprim value boxed_combine(value v0, value v1) +{ + CAMLparam2(v0, v1); + CAMLlocal1(res); + + __m128i l = _mm_loadu_si128((__m128i*)v0); + __m128i r = _mm_loadu_si128((__m128i*)v1); + __m128i result = _mm_add_epi64(l, r); + res = caml_alloc_small(2, Abstract_tag); + _mm_storeu_si128((__m128i*)res, result); + + CAMLreturn(res); +} + +__m128i lots_of_vectors( + __m128i v0, __m128i v1, __m128i v2, __m128i v3, + __m128i v4, __m128i v5, __m128i v6, __m128i v7, + __m128i v8, __m128i v9, __m128i v10, __m128i v11, + __m128i v12, __m128i v13, __m128i v14, __m128i v15) +{ + __m128i x0 = _mm_add_epi64(v0, v1); + __m128i x1 = _mm_add_epi64(v2, v3); + __m128i x2 = _mm_add_epi64(v4, v5); + __m128i x3 = _mm_add_epi64(v6, v7); + __m128i x4 = _mm_add_epi64(v8, v9); + __m128i x5 = _mm_add_epi64(v10, v11); + __m128i x6 = _mm_add_epi64(v12, v13); + __m128i x7 = _mm_add_epi64(v14, v15); + __m128i y0 = _mm_add_epi64(x0, x1); + __m128i y1 = _mm_add_epi64(x2, x3); + __m128i y2 = _mm_add_epi64(x4, x5); + __m128i y3 = _mm_add_epi64(x6, x7); + __m128i z0 = _mm_add_epi64(y0, y1); + __m128i z1 = _mm_add_epi64(y2, y3); + return _mm_add_epi64(z0, z1); +} + +__m128i vectors_and_floats( + __m128i v0, double f0, __m128i v1, double f1, + __m128i v2, double f2, __m128i v3, double f3, + double f4, __m128i v4, __m128i v5, double f5, + double f6, __m128i v6, __m128i v7, double f7, + double f8, double f9, __m128i v8, __m128i v9, + __m128i v10, double f10, double f11, double f12) +{ + __m128i x0 = _mm_add_epi64(v0, v1); + __m128i x1 = _mm_add_epi64(v2, v3); + __m128i x2 = _mm_add_epi64(v4, v5); + __m128i x3 = _mm_add_epi64(v6, v7); + __m128i x4 = _mm_add_epi64(v8, v9); + __m128i y0 = _mm_add_epi64(x0, x1); + __m128i y1 = _mm_add_epi64(x2, x3); + __m128i y2 = _mm_add_epi64(v10, x4); + __m128i z0 = _mm_add_epi64(y0, y1); + __m128i z = _mm_add_epi64(z0, y2); + double f = f0 + f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12; + return vec128_of_int64s((int64_t)f, vec128_low_int64(z) + vec128_high_int64(z)); +} + +__m128i vectors_and_floats_and_ints( + __m128i v0, double f0, __m128i v1, int64_t i0, + __m128i v2, double f1, __m128i v3, int64_t i1, + int64_t i2, __m128i v4, __m128i v5, double f2, + double f3, __m128i v6, __m128i v7, int64_t i3, + int64_t i4, double f4, __m128i v8, __m128i v9, + __m128i v10, int64_t i5, int64_t i6, double f5) +{ + __m128i x0 = _mm_add_epi64(v0, v1); + __m128i x1 = _mm_add_epi64(v2, v3); + __m128i x2 = _mm_add_epi64(v4, v5); + __m128i x3 = _mm_add_epi64(v6, v7); + __m128i x4 = _mm_add_epi64(v8, v9); + __m128i y0 = _mm_add_epi64(x0, x1); + __m128i y1 = _mm_add_epi64(x2, x3); + __m128i y2 = _mm_add_epi64(v10, x4); + __m128i z0 = _mm_add_epi64(y0, y1); + __m128i z = _mm_add_epi64(z0, y2); + double f = f0 + f1 + f2 + f3 + f4 + f5; + int64_t i = i0 + i1 + i2 + i3 + i4 + i5 + i6; + return vec128_of_int64s((int64_t)f + i, vec128_low_int64(z) + vec128_high_int64(z)); +} diff --git a/testsuite/tests/unboxed-primitive-args/README b/testsuite/tests/unboxed-primitive-args/README new file mode 100644 index 00000000000..4bd7601e511 --- /dev/null +++ b/testsuite/tests/unboxed-primitive-args/README @@ -0,0 +1,26 @@ +This directory contains tests to check that OCaml values are correctly +passed between OCaml and C when a primitive takes some or all of its +arguments unboxed/untagged and/or return its result unboxed/untagged. + +To test one primitive we do: +- write all its argument and expected result in buffer A +- call the C external using arguments read from buffer A +- the C function write all the arguments it receive into buffer B +- the C function read the result from buffer A and returns it +- on the OCaml side we write the received result into buffer B +- the test is successful if A and B have the same contents + +Between each call, we call a function with 128 value arguments set to +0 and a function with 32 unboxed float arguments set to 0., just to +clean-up the registers and stacks in case garbage would make a test +succeed. We don't pass more floats as it doesn't build on arm32. + +We construct the set of primitives to test as follow: +- all combination of unboxed int32/int64/float arguments for functions + taking up to 6 arguments (with more than 6 ocamlopt takes a really + long time to compile the test files) +- a bunch of manual tests for the rest and specific patterns. + The list is [Gen_test.manual_tests] + +We test the set of primitives a thousand times, with different random +data each time. diff --git a/testsuite/tests/unboxed-primitive-args/common.ml b/testsuite/tests/unboxed-primitive-args/common.ml new file mode 100644 index 00000000000..03b1f0543e2 --- /dev/null +++ b/testsuite/tests/unboxed-primitive-args/common.ml @@ -0,0 +1,304 @@ +open StdLabels + +open Bigarray + +type 'a typ = + | Int : int typ + | Int32 : int32 typ + | Int64 : int64 typ + | Nativeint : nativeint typ + | Float : float typ + | Vec128 : vec128 typ + +type 'a proto = + | Ret : 'a typ -> 'a proto + | Abs : 'a typ * 'b proto -> ('a -> 'b) proto + +let ( ** ) x y = Abs (x, y) + +(* This form is easier to process programmatically. We don't expose it as + ocamlopt takes a really really long time to compile a constant list + of these. *) +type simplified_test = Test : string * 'a * 'a proto -> simplified_test + +type test = + | T1 : string * ('a -> 'b) * 'a typ * 'b typ -> test + | T2 : string * ('a -> 'b -> 'c) * 'a typ * 'b typ * 'c typ -> test + | T3 : string * ('a -> 'b -> 'c -> 'd) * + 'a typ * 'b typ * 'c typ * 'd typ -> test + | T4 : string * ('a -> 'b -> 'c -> 'd -> 'e) * + 'a typ * 'b typ * 'c typ * 'd typ * 'e typ -> test + | T5 : string * ('a -> 'b -> 'c -> 'd -> 'e -> 'f) * + 'a typ * 'b typ * 'c typ * 'd typ * 'e typ * 'f typ -> test + | T6 : string * ('a -> 'b -> 'c -> 'd -> 'e -> 'f -> 'g) * + 'a typ * 'b typ * 'c typ * 'd typ * 'e typ * 'f typ * 'g typ -> test + | T : string * 'a * 'a proto -> test + +let expand_test = function + | T1 (s, fn, a, b) -> Test (s, fn, a ** Ret b) + | T2 (s, fn, a, b, c) -> Test (s, fn, a ** b ** Ret c) + | T3 (s, fn, a, b, c, d) -> Test (s, fn, a ** b ** c ** Ret d) + | T4 (s, fn, a, b, c, d, e) -> Test (s, fn, a ** b ** c ** d ** Ret e) + | T5 (s, fn, a, b, c, d, e, f) -> + Test (s, fn, a ** b ** c ** d ** e ** Ret f) + | T6 (s, fn, a, b, c, d, e, f, g) -> + Test (s, fn, a ** b ** c ** d ** e ** f ** Ret g) + | T (s, fn, p) -> Test (s, fn, p) + +external vec128_of_int64s : int64 -> int64 -> vec128 = "" "vec128_of_int64s" [@@noalloc] [@@unboxed] +external vec128_low_int64 : vec128 -> int64 = "" "vec128_low_int64" [@@noalloc] [@@unboxed] +external vec128_high_int64 : vec128 -> int64 = "" "vec128_high_int64" [@@noalloc] [@@unboxed] + +let string_of : type a. a typ -> a -> string = function + | Int -> Int.to_string + | Int32 -> Printf.sprintf "%ldl" + | Int64 -> Printf.sprintf "%LdL" + | Nativeint -> Printf.sprintf "%ndn" + | Float -> + fun f -> Printf.sprintf "float_of_bits 0x%LxL" (Int64.bits_of_float f) + | Vec128 -> + fun v -> Printf.sprintf "vec128 %016Lx:%016Lx" (vec128_high_int64 v) (vec128_low_int64 v) + +let rec arity : type a. a proto -> int = function + | Ret _ -> 0 + | Abs (_, p) -> 1 + arity p + +module Buffer = struct + type t = (char, int8_unsigned_elt, c_layout) Array1.t + + let arg_size = 16 + + let create ~arity : t = + Array1.create char c_layout ((arity + 1) * arg_size) + + let clear (t : t) = Array1.fill t '\000' + + let length : t -> int = Array1.dim + + external init_c_side : ocaml_buffer:t -> c_buffer:t -> unit + = "test_set_buffers" + + external get_int32 : t -> int -> int32 = "%caml_bigstring_get32" + external get_int64 : t -> int -> int64 = "%caml_bigstring_get64" + external set_int32 : t -> int -> int32 -> unit = "%caml_bigstring_set32" + external set_int64 : t -> int -> int64 -> unit = "%caml_bigstring_set64" + + let get_vec128 buf ~arg = + let low, high = get_int64 buf (arg * arg_size), get_int64 buf (arg * arg_size + 8) in + vec128_of_int64s low high + + let set_vec128 buf ~arg x = + set_int64 buf (arg * arg_size) (vec128_low_int64 x); + set_int64 buf ((arg * arg_size) + 8) (vec128_high_int64 x) + + let get_int32 t ~arg = get_int32 t (arg * arg_size) + let get_int64 t ~arg = get_int64 t (arg * arg_size) + let set_int32 t ~arg x = set_int32 t (arg * arg_size) x + let set_int64 t ~arg x = set_int64 t (arg * arg_size) x + + let get_nativeint, set_nativeint = + match Sys.word_size with + | 32 -> ((fun t ~arg -> get_int32 t ~arg |> Nativeint.of_int32), + (fun t ~arg x -> set_int32 t ~arg (Nativeint.to_int32 x))) + | 64 -> ((fun t ~arg -> get_int64 t ~arg |> Int64.to_nativeint), + (fun t ~arg x -> set_int64 t ~arg (Int64.of_nativeint x))) + | n -> Printf.ksprintf failwith "unknown word size (%d)" n + + let get_int = + if Sys.word_size = 32 then + fun buf ~arg -> get_int32 buf ~arg |> Int32.to_int + else + fun buf ~arg -> get_int64 buf ~arg |> Int64.to_int + + let set_int = + if Sys.word_size = 32 then + fun buf ~arg x -> set_int32 buf ~arg (Int32.of_int x) + else + fun buf ~arg x -> set_int64 buf ~arg (Int64.of_int x) + + let get_float buf ~arg = get_int64 buf ~arg |> Int64.float_of_bits + let set_float buf ~arg x = set_int64 buf ~arg (Int64.bits_of_float x) + + let get : type a. a typ -> t -> arg:int -> a = function + | Int -> get_int + | Int32 -> get_int32 + | Int64 -> get_int64 + | Nativeint -> get_nativeint + | Float -> get_float + | Vec128 -> get_vec128 + + let set : type a. a typ -> t -> arg:int -> a -> unit = function + | Int -> set_int + | Int32 -> set_int32 + | Int64 -> set_int64 + | Nativeint -> set_nativeint + | Float -> set_float + | Vec128 -> set_vec128 + + (* This is almost a memcpy except that we use get/set which should + ensure that the values in [dst] don't overflow. *) + let copy_args ~src ~dst proto = + let rec loop : type a. a proto -> int -> unit = fun proto arg -> + match proto with + | Ret typ -> + set typ dst ~arg (get typ src ~arg) + | Abs (typ, rest) -> + set typ dst ~arg (get typ src ~arg); + loop rest (arg + 1) + in + loop proto 0 +end + +let exec proto f ~ocaml_buffer ~c_buffer = + let rec loop : type a. a proto -> a -> int -> unit = fun proto f arg -> + match proto with + | Ret typ -> + Buffer.set typ c_buffer ~arg f + | Abs (typ, rest) -> + let x = Buffer.get typ ocaml_buffer ~arg in + loop rest (f x) (arg + 1) + in + loop proto f 0 + +let strings_of_test_instance name proto buffer = + let rec loop : type a. a proto -> int -> string list -> string list * string = + fun proto arg acc -> + match proto with + | Ret typ -> + (List.rev acc, string_of typ (Buffer.get typ buffer ~arg)) + | Abs (typ, rest) -> + let s = string_of typ (Buffer.get typ buffer ~arg) in + loop rest (arg + 1) (s :: acc) + in + loop proto 0 [] + +let typ_size : type a. a typ -> int = function + | Int -> Sys.word_size / 8 + | Int32 -> 4 + | Int64 -> 8 + | Nativeint -> Sys.word_size / 8 + | Float -> 8 + | Vec128 -> 16 + +let rec sizes : type a. a proto -> int list = function + | Ret typ -> [typ_size typ] + | Abs (typ, rest) -> typ_size typ :: sizes rest + +let print_hex ~sizes ~arity buffer = + let printf = Printf.printf in + printf "("; + for i = 0 to arity do + if i = arity then + printf ") -> " + else if i > 0 then + printf ", "; + for ofs = i * Buffer.arg_size to i * Buffer.arg_size + sizes.(i) - 1 do + printf "%02x" (Char.code buffer.{ofs}); + done; + done + +let printed_mismatches = ref 0 + +let print_mismatch name proto ~ocaml_buffer ~c_buffer = + let printf = Printf.printf in + printf "Mismatch for %s\n" name; + let o_args, o_res = strings_of_test_instance name proto ocaml_buffer in + let c_args, c_res = strings_of_test_instance name proto c_buffer in + let o_args, c_args = + (* Align arguments *) + List.map2 o_args c_args ~f:(fun a b -> + let len_a = String.length a and len_b = String.length b in + let len = max len_a len_b in + (Printf.sprintf "%*s" len a, + Printf.sprintf "%*s" len b)) + |> List.split + in + printf "ocaml side : (%s) -> %s\n" (String.concat ~sep:", " o_args) o_res; + printf "c side : (%s) -> %s\n" (String.concat ~sep:", " c_args) c_res; + let sizes = sizes proto |> Array.of_list in + let arity = arity proto in + printf "ocaml side : "; print_hex ~sizes ~arity ocaml_buffer; printf "\n"; + printf "c side : "; print_hex ~sizes ~arity c_buffer; printf "\n"; + incr printed_mismatches; + if !printed_mismatches >= 1000 then begin + printf "Output truncated at 1000 failures."; + exit 0 + end + +external cleanup_normal + : int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int -> int -> int -> int -> int -> int -> int -> int + -> int = "" "test_cleanup_normal" [@@noalloc] + +external cleanup_float + : float -> float -> float -> float -> float -> float -> float -> float + -> float -> float -> float -> float -> float -> float -> float -> float + -> float -> float -> float -> float -> float -> float -> float -> float + -> float -> float -> float -> float -> float -> float -> float -> float + -> float = "" "test_cleanup_float" [@@noalloc] [@@unboxed] + +let cleanup_args_and_stack () = + let _ : int = + cleanup_normal + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + in + let _ : float = + cleanup_float + 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. + 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. + in + () + +let run_test ~random_data ~ocaml_buffer ~c_buffer (Test (name, f, proto)) = + Buffer.clear ocaml_buffer; + Buffer.clear c_buffer; + Buffer.copy_args ~src:random_data ~dst:ocaml_buffer proto; + cleanup_args_and_stack (); + exec proto f ~ocaml_buffer ~c_buffer; + let success = ocaml_buffer = c_buffer in + if not success then print_mismatch name proto ~ocaml_buffer ~c_buffer; + success + +let run_tests tests = + let tests = List.map tests ~f:expand_test in + let max_args = + List.fold_left tests ~init:0 ~f:(fun acc (Test (_, _, p)) -> + max acc (arity p)) + in + + let ocaml_buffer = Buffer.create ~arity:max_args + and c_buffer = Buffer.create ~arity:max_args in + Buffer.init_c_side ~ocaml_buffer ~c_buffer; + + let random_data = Buffer.create ~arity:max_args in + let new_random_data () = + for i = 0 to Buffer.length random_data - 1 do + random_data.{i} <- char_of_int (Random.int 256) + done + in + + let failure = ref false in + for i = 1 to 1000 do + new_random_data (); + List.iter tests ~f:(fun test -> + if not (run_test ~random_data ~ocaml_buffer ~c_buffer test) then + failure := true) + done; + exit (if !failure then 1 else 0) diff --git a/testsuite/tests/unboxed-primitive-args/common.mli b/testsuite/tests/unboxed-primitive-args/common.mli new file mode 100644 index 00000000000..362ef4f916b --- /dev/null +++ b/testsuite/tests/unboxed-primitive-args/common.mli @@ -0,0 +1,30 @@ +(** Type of arguments/result *) +type 'a typ = + | Int : int typ + | Int32 : int32 typ + | Int64 : int64 typ + | Nativeint : nativeint typ + | Float : float typ + | Vec128 : vec128 typ + +type 'a proto = + | Ret : 'a typ -> 'a proto + | Abs : 'a typ * 'b proto -> ('a -> 'b) proto + +(** Same as [Abs]. We choose this operator for its associativity. *) +val ( ** ) : 'a typ -> 'b proto -> ('a -> 'b) proto + +type test = + | T1 : string * ('a -> 'b) * 'a typ * 'b typ -> test + | T2 : string * ('a -> 'b -> 'c) * 'a typ * 'b typ * 'c typ -> test + | T3 : string * ('a -> 'b -> 'c -> 'd) * + 'a typ * 'b typ * 'c typ * 'd typ -> test + | T4 : string * ('a -> 'b -> 'c -> 'd -> 'e) * + 'a typ * 'b typ * 'c typ * 'd typ * 'e typ -> test + | T5 : string * ('a -> 'b -> 'c -> 'd -> 'e -> 'f) * + 'a typ * 'b typ * 'c typ * 'd typ * 'e typ * 'f typ -> test + | T6 : string * ('a -> 'b -> 'c -> 'd -> 'e -> 'f -> 'g) * + 'a typ * 'b typ * 'c typ * 'd typ * 'e typ * 'f typ * 'g typ -> test + | T : string * 'a * 'a proto -> test + +val run_tests : test list -> unit diff --git a/testsuite/tests/unboxed-primitive-args/gen_test.ml b/testsuite/tests/unboxed-primitive-args/gen_test.ml new file mode 100644 index 00000000000..80e11178296 --- /dev/null +++ b/testsuite/tests/unboxed-primitive-args/gen_test.ml @@ -0,0 +1,249 @@ +(* This programs generate stubs with various prototype combinations *) + +open StdLabels + +type boxed_integer = Pnativeint | Pint32 | Pint64 + +type boxed_vector = Pvec128 + +type native_repr = + | Same_as_ocaml_repr + | Unboxed_float + | Unboxed_integer of boxed_integer + | Untagged_int + | Unboxed_vector of boxed_vector + +(* Generate primitives with up to this number of arguments *) +let test_all_combination_up_to_n_args = 6 + +(* Generate primitives using all combination of these argument + representations. No need to test all combination of other + representations: regarding the calling convention + [Same_as_ocaml_repr], [Untagged_int] and + [Unboxed_integer Pnativeint] are all the same, and are the + same as [Unboxed_integer Pint]. + + We have specific tests for the other representations and for the + result representation in [manual_tests]. +*) +let test_all_args_combination_of = + [ Unboxed_float + ; Unboxed_integer Pint32 + ; Unboxed_integer Pint64 + ; Unboxed_vector Pvec128 + ] + +let code_of_repr = function + | Same_as_ocaml_repr -> "v" (* for "value" *) + | Unboxed_float -> "f" + | Unboxed_integer Pint32 -> "l" + | Unboxed_integer Pint64 -> "L" + | Unboxed_integer Pnativeint -> "n" + | Untagged_int -> "i" + | Unboxed_vector Pvec128 -> "x" (* for "xmm" *) + +let repr_of_code = function + | 'v' -> Same_as_ocaml_repr + | 'f' -> Unboxed_float + | 'l' -> Unboxed_integer Pint32 + | 'L' -> Unboxed_integer Pint64 + | 'n' -> Unboxed_integer Pnativeint + | 'i' -> Untagged_int + | 'x' -> Unboxed_vector Pvec128 + | _ -> assert false + +let manual_tests = + [ "v_v" + ; "f_f" + ; "l_l" + ; "L_L" + ; "n_n" + ; "i_i" + ; "x_x" + ; "f_fffff" + ; "f_ffffff" + ; "f_fffffff" + ; "f_fffffffffffffffff" + ; "x_xxxxx" + ; "x_xxxxxx" + ; "x_xxxxxxx" + ; "x_xxxxxxxxxxxxxxxxx" + ; "v_iiiiiiiiiiiiiiiii" + ; "v_lllllllllllllllll" + ; "v_LLLLLLLLLLLLLLLLL" + ; "v_iLiLiLiLiLiLiLiLi" + ; "v_LiLiLiLiLiLiLiLiL" + ; "v_flflflflflflflflflflflflflflflflflfl" + ; "v_fLfLfLfLfLfLfLfLfLfLfLfLfLfLfLfLfLfL" + ; "v_xfxfxfxfxfxfxfxfx" + ; "v_fxfxfxfxfxfxfxfxf" + ; "v_lfxlfxlfxlfxlfxlfx" + ; "v_lflxlxlflflxlxlflx" + ; "v_llllllfffffflxxllxx" + ] + +let ocaml_type_of_repr = function + (* Doesn't really matters what we choose for this case *) + | Same_as_ocaml_repr -> "int" + | Unboxed_float -> "(float [@unboxed])" + | Unboxed_integer Pint32 -> "(int32 [@unboxed])" + | Unboxed_integer Pint64 -> "(int64 [@unboxed])" + | Unboxed_integer Pnativeint -> "(nativeint [@unboxed])" + | Untagged_int -> "(int [@untagged])" + | Unboxed_vector Pvec128 -> "(vec128 [@unboxed])" + +let ocaml_type_gadt_of_repr = function + (* Doesn't really matters what we choose for this case *) + | Same_as_ocaml_repr -> "Int" + | Unboxed_float -> "Float" + | Unboxed_integer Pint32 -> "Int32" + | Unboxed_integer Pint64 -> "Int64" + | Unboxed_integer Pnativeint -> "Nativeint" + | Untagged_int -> "Int" + | Unboxed_vector Pvec128 -> "Vec128" + +let c_type_of_repr = function + | Same_as_ocaml_repr -> "value" + | Unboxed_float -> "double" + | Unboxed_integer Pint32 -> "int32_t" + | Unboxed_integer Pint64 -> "int64_t" + | Unboxed_integer Pnativeint -> "intnat" + | Untagged_int -> "intnat" + | Unboxed_vector Pvec128 -> "__m128i" + +type proto = + { params : native_repr list + ; return : native_repr + } + +let rec explode s = + let rec loop i acc = + if i < 0 then + acc + else + loop (i - 1) (s.[i] :: acc) + in + loop (String.length s - 1) [] + +let proto_of_str s = + Scanf.sscanf s "%c_%s" (fun return params -> + { params = List.map (explode params) ~f:repr_of_code + ; return = repr_of_code return + }) + +let function_name_of_proto proto = + Printf.sprintf "test_%s_%s" (code_of_repr proto.return) + (String.concat ~sep:"" (List.map proto.params ~f:code_of_repr)) + +let ocaml_type_gadt_of_proto proto = + Printf.sprintf "%s ** Ret %s" + (String.concat ~sep:" ** " + (List.map proto.params ~f:ocaml_type_gadt_of_repr)) + (ocaml_type_gadt_of_repr proto.return) + +let ocaml_type_of_proto proto = + String.concat ~sep:" -> " + (List.map proto.params ~f:ocaml_type_of_repr + @ [ocaml_type_of_repr proto.return]) + +let c_args_of_proto proto = + String.concat ~sep:", " + (List.mapi proto.params ~f:(fun i p -> + Printf.sprintf "%s x%d" (c_type_of_repr p) i)) + +let manual_protos = List.map manual_tests ~f:proto_of_str + +let iter_protos ~f = + let iter_for_arity arity = + let rec loop params to_gen = + List.iter test_all_args_combination_of ~f:(fun repr -> + let params = repr :: params in + let to_gen = to_gen - 1 in + if to_gen = 0 then + f { params = List.rev params + ; return = Same_as_ocaml_repr + } + else + loop params to_gen) + in + loop [] arity + in + let rec iter_arities arity = + if arity <= test_all_combination_up_to_n_args then begin + iter_for_arity arity; + iter_arities (arity + 1) + end + in + List.iter manual_protos ~f; + iter_arities 1 + +let pr fmt = Printf.ksprintf (fun s -> print_string s; print_char '\n') fmt + +let generate_ml () = + pr "open Common"; + pr ""; + iter_protos ~f:(fun proto -> + let name = function_name_of_proto proto in + pr "external %s : %s = \"\" %S [@@noalloc]" + name (ocaml_type_of_proto proto) name; + ); + pr ""; + pr "let tests = []"; + iter_protos ~f:(fun proto -> + let name = function_name_of_proto proto in + let arity = List.length proto.params in + if arity <= 6 then + pr "let tests = T%d (%S, %s, %s, %s) :: tests" + arity name name + (List.map proto.params ~f:ocaml_type_gadt_of_repr + |> String.concat ~sep:", ") + (ocaml_type_gadt_of_repr proto.return) + else + pr "let tests = T (%S, %s, %s) :: tests" + name name (ocaml_type_gadt_of_proto proto)); + pr ""; + pr "let () = run_tests (List.rev tests)" + +let generate_stubs () = + pr "#include "; + pr "#include "; + pr "#include \"test_common.h\""; + iter_protos ~f:(fun proto -> + let name = function_name_of_proto proto in + pr ""; + pr "%s %s(%s)" + (c_type_of_repr proto.return) + name + (c_args_of_proto proto); + pr "{"; + List.iteri proto.params ~f:(fun i p -> + pr " %(%d%d%);" + (match p with + | Same_as_ocaml_repr -> "set_intnat(%d, Long_val(x%d))" + | Unboxed_float -> "set_double(%d, x%d)" + | Unboxed_integer Pint32 -> "set_int32(%d, x%d)" + | Unboxed_integer Pint64 -> "set_int64(%d, x%d)" + | Unboxed_integer Pnativeint -> "set_intnat(%d, x%d)" + | Untagged_int -> "set_intnat(%d, x%d)" + | Unboxed_vector Pvec128 -> "set_vec128(%d, x%d)") + i i); + pr " return %(%d%);" + (match proto.return with + | Same_as_ocaml_repr -> "Val_long(get_intnat(%d))" + | Unboxed_float -> "get_double(%d)" + | Unboxed_integer Pint32 -> "get_int32(%d)" + | Unboxed_integer Pint64 -> "get_int64(%d)" + | Unboxed_integer Pnativeint -> "get_intnat(%d)" + | Untagged_int -> "get_intnat(%d)" + | Unboxed_vector Pvec128 -> "get_vec128(%d)") + (List.length proto.params); + pr "}" + ) + +let () = + match Sys.argv with + | [|_; "ml"|] -> generate_ml () + | [|_; "c" |] -> generate_stubs () + | _ -> + prerr_endline "Usage: ocaml gen_test.ml {ml|c}"; + exit 2 diff --git a/testsuite/tests/unboxed-primitive-args/test.ml b/testsuite/tests/unboxed-primitive-args/test.ml new file mode 100644 index 00000000000..a7b75037919 --- /dev/null +++ b/testsuite/tests/unboxed-primitive-args/test.ml @@ -0,0 +1,24 @@ +(* TEST + +readonly_files = "common.mli common.ml test_common.c test_common.h" + +* setup-ocamlopt.opt-build-env +** ocaml +test_file = "${test_source_directory}/gen_test.ml" +ocaml_script_as_argument = "true" +arguments = "c" +compiler_output = "stubs.c" +*** ocaml +arguments = "ml" +compiler_output = "main.ml" +**** script +script = "${cc} -msse4.2 -c test_common.c -I ../../../../../../../../runtime" +***** script +script = "${cc} -msse4.2 -c stubs.c -I ../../../../../../../../runtime" +****** ocamlopt.opt +ocamlopt_flags = "-fsimd" +all_modules = "test_common.o stubs.o common.mli common.ml main.ml" +******* run +******** check-program-output + +*) diff --git a/testsuite/tests/unboxed-primitive-args/test.reference b/testsuite/tests/unboxed-primitive-args/test.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/testsuite/tests/unboxed-primitive-args/test_common.c b/testsuite/tests/unboxed-primitive-args/test_common.c new file mode 100644 index 00000000000..9f453c354ce --- /dev/null +++ b/testsuite/tests/unboxed-primitive-args/test_common.c @@ -0,0 +1,54 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Jeremie Dimino, Jane Street Europe */ +/* */ +/* Copyright 2015 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +#include +#include +#include +#include + +char *ocaml_buffer; +char *c_buffer; + +value test_set_buffers(value v_ocaml_buffer, value v_c_buffer) +{ + ocaml_buffer = Caml_ba_data_val(v_ocaml_buffer); + c_buffer = Caml_ba_data_val(v_c_buffer); + return Val_unit; +} + +value test_cleanup_normal(void) +{ + return Val_int(0); +} + +double test_cleanup_float(void) +{ + return 0.; +} + +int64_t vec128_low_int64(__m128i v) +{ + return _mm_extract_epi64(v, 0); +} + +int64_t vec128_high_int64(__m128i v) +{ + return _mm_extract_epi64(v, 1); +} + +__m128i vec128_of_int64s(int64_t low, int64_t high) +{ + return _mm_set_epi64x(high, low); +} diff --git a/testsuite/tests/unboxed-primitive-args/test_common.h b/testsuite/tests/unboxed-primitive-args/test_common.h new file mode 100644 index 00000000000..fb3c08c86be --- /dev/null +++ b/testsuite/tests/unboxed-primitive-args/test_common.h @@ -0,0 +1,50 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Jeremie Dimino, Jane Street Europe */ +/* */ +/* Copyright 2015 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +#ifndef __TEST_COMMON_H +#define __TEST_COMMON_H + +#include + +/* Where the OCaml side stores the arguments and result for a test + case. The C function will read the result it is supposed to return + from this buffer. + + Argument [n] is stored at [n * 8] and the result is stored at + [arity * 8]. +*/ +extern char *ocaml_buffer; + +/* Where the C function stores the arguments it receive for a test + case. The OCaml side will store the result from the C function in + this buffer. At the of a test case, both these buffers must be + equal. */ +extern char *c_buffer; + +#define STRIDE 16 + +#define get_intnat(n) *(intnat*)(ocaml_buffer+((n)*STRIDE)) +#define get_int32(n) *(int32_t*)(ocaml_buffer+((n)*STRIDE)) +#define get_int64(n) *(int64_t*)(ocaml_buffer+((n)*STRIDE)) +#define get_double(n) *(double*)(ocaml_buffer+((n)*STRIDE)) +#define get_vec128(n) _mm_loadu_si128((__m128i*)(ocaml_buffer+((n)*STRIDE))) + +#define set_intnat(n, x) *(intnat*)(c_buffer+((n)*STRIDE)) = (x) +#define set_int32(n, x) *(int32_t*)(c_buffer+((n)*STRIDE)) = (x) +#define set_int64(n, x) *(int64_t*)(c_buffer+((n)*STRIDE)) = (x) +#define set_double(n, x) *(double*)(c_buffer+((n)*STRIDE)) = (x) +#define set_vec128(n, x) _mm_storeu_si128((__m128i*)(c_buffer+((n)*STRIDE)), (x)) + +#endif /* __TEST_COMMON_H */