Skip to content

Commit 6348826

Browse files
mshinwellEkdohibs
andauthored
flambda-backend: Rc_close_at_apply should only close the current region early (#2736)
Co-authored-by: Nathanaëlle Courant <nathanaelle.courant@ocamlpro.com>
1 parent 0f30fe9 commit 6348826

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed

lambda/lambda.mli

+20
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,26 @@ type region_close =
8686
| Rc_nontail (* do not close region, must not TCO *)
8787
| Rc_close_at_apply (* close region and tail call *)
8888

89+
(** Notes about applytail (as emitted by Printlambda) a.k.a. Rc_close_at_apply:
90+
91+
applytail / Rc_close_at_apply means that a call occurs in tail position of
92+
the nearest enclosing region, and should be compiled by closing that region
93+
(and only that region) just before control is transferred to the call.
94+
95+
In the raw lambda generated by Translcore, this can occur only in tail
96+
position of a function, so these do happen to be tail calls. But after
97+
inlining (by Simplif or otherwise), this may no longer be the case.
98+
99+
In particular, this code pattern:
100+
101+
(function (region (.... (region ... (applytail f)))))
102+
103+
means that the applytail is in tail position of the inner region and closes
104+
that one. It just so happens to be at the end of the outer function as well,
105+
but it does not mean that it's a tail call of that function. (It's not a
106+
tail call because the outer region needs to end there.)
107+
*)
108+
89109
(* CR layouts v5: When we add more blocks of non-scannable values, consider
90110
whether some of the primitives specific to ufloat records
91111
([Pmakeufloatblock], [Pufloatfield], and [Psetufloatfield]) can/should be
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
(* TEST
2+
stack-allocation;
3+
flambda2;
4+
{
5+
ocamlopt_flags="-Oclassic";
6+
native;
7+
}{
8+
ocamlopt_flags="-O3";
9+
native;
10+
}
11+
*)
12+
13+
external local_stack_offset : unit -> int = "caml_local_stack_offset"
14+
15+
external ignore : ('a[@local_opt]) -> unit = "%ignore"
16+
external opaque_identity : ('a[@local_opt]) -> ('a[@local_opt]) = "%opaque"
17+
18+
external ( +. )
19+
: (float[@local_opt])
20+
-> (float[@local_opt])
21+
-> (float[@local_opt])
22+
= "%addfloat"
23+
24+
let locally_allocate =
25+
let f = 1.0 in
26+
fun [@inline never] () -> ignore (opaque_identity (f +. 1.0) : float)
27+
;;
28+
29+
let saved_stack_offset = ref 0
30+
31+
let foo list =
32+
(* The Simplif local function optimization will transform [run] and
33+
[wrapper] into continuations. *)
34+
let[@inline never][@local] run f = f () in
35+
let[@inline never][@local] wrapper f =
36+
match Sys.opaque_identity true with
37+
| true ->
38+
(* Make a local allocation *)
39+
ignore (opaque_identity (opaque_identity 1.0 +. 1.0));
40+
(* This tail call gets moved into non-tail position after the above
41+
inlining transformation. As such it's important that its
42+
[Rc_close_at_apply] semantics is respected carefully: only the
43+
current region, and not the parent one, must be closed (since [f]
44+
is locally allocated in that parent region). *)
45+
f ()
46+
| false -> run (fun () -> f ()) [@nontail]
47+
in
48+
let f = fun _ ->
49+
assert (local_stack_offset () = !saved_stack_offset);
50+
locally_allocate ();
51+
Stdlib.List.hd list
52+
in
53+
saved_stack_offset := local_stack_offset ();
54+
wrapper f [@nontail]
55+
;;
56+
57+
let _ : int = foo [ 1 ]

0 commit comments

Comments
 (0)