Skip to content

Commit 22f170a

Browse files
authored
flambda-backend: Unboxed float type parsing in layouts_alpha (#1467)
* Add lexing and parsing for float# * Gate new parsing on layouts_alpha * Add alias of float# to float, and write some tests * Respond to review comments * Fix post-rebase * Add maturity to error message * bootstrap (I think unrelated to this PR, but I'm not positive) * improve layouts error message slightly
1 parent 740de2a commit 22f170a

19 files changed

+8281
-6456
lines changed

boot/menhir/parser.ml

+7,814-6,435
Large diffs are not rendered by default.

boot/menhir/parser.mli

+1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ type token =
8282
| INCLUDE
8383
| IN
8484
| IF
85+
| HASH_SUFFIX
8586
| HASHOP of (string)
8687
| HASH
8788
| GREATERRBRACKET

boot/ocamlc

25 KB
Binary file not shown.

boot/ocamllex

0 Bytes
Binary file not shown.

parsing/jane_syntax_parsing.ml

+34-9
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,10 @@ module Error = struct
334334
| Malformed_embedding of
335335
Embedding_syntax.t * Embedded_name.t * malformed_embedding
336336
| Unknown_extension of Embedding_syntax.t * Erasability.t * string
337-
| Disabled_extension : _ Language_extension.t -> error
337+
| Disabled_extension :
338+
{ ext : _ Language_extension.t
339+
; maturity : Language_extension.maturity option
340+
} -> error
338341
| Wrong_syntactic_category of Feature.t * string
339342
| Misnamed_embedding of
340343
Misnamed_embedding_error.t * string * Embedding_syntax.t
@@ -347,9 +350,16 @@ end
347350

348351
open Error
349352

350-
let assert_extension_enabled ~loc ext setting =
353+
let assert_extension_enabled
354+
(type a) ~loc (ext : a Language_extension.t) (setting : a)
355+
=
351356
if not (Language_extension.is_at_least ext setting) then
352-
raise (Error(loc, Disabled_extension ext))
357+
let maturity : Language_extension.maturity option =
358+
match ext with
359+
| Layouts -> Some (setting : Language_extension.maturity)
360+
| _ -> None
361+
in
362+
raise (Error(loc, Disabled_extension { ext; maturity }))
353363
;;
354364

355365
let report_error ~loc = function
@@ -371,11 +381,25 @@ let report_error ~loc = function
371381
name
372382
Embedded_name.pp_a_term (what, embedded_name)
373383
(Embedding_syntax.name what)
374-
| Disabled_extension ext ->
375-
Location.errorf
376-
~loc
377-
"The extension \"%s\" is disabled and cannot be used"
378-
(Language_extension.to_string ext)
384+
| Disabled_extension { ext; maturity } -> begin
385+
(* CR layouts: The [maturity] special case is a bit ad-hoc, but the
386+
layouts error message would be much worse without it. It also
387+
would be nice to mention the language construct in the error message.
388+
*)
389+
match maturity with
390+
| None ->
391+
Location.errorf
392+
~loc
393+
"The extension \"%s\" is disabled and cannot be used"
394+
(Language_extension.to_string ext)
395+
| Some maturity ->
396+
Location.errorf
397+
~loc
398+
"This construct requires the %s version of the extension \"%s\", \
399+
which is disabled and cannot be used"
400+
(Language_extension.maturity_to_string maturity)
401+
(Language_extension.to_string ext)
402+
end
379403
| Wrong_syntactic_category(feat, cat) ->
380404
Location.errorf
381405
~loc
@@ -837,7 +861,8 @@ module AST = struct
837861
raise_error (Wrong_syntactic_category(feat, AST.plural))
838862
end
839863
| Error err -> raise_error begin match err with
840-
| Disabled_extension ext -> Disabled_extension ext
864+
| Disabled_extension ext ->
865+
Disabled_extension { ext; maturity = None }
841866
| Unknown_extension name ->
842867
Unknown_extension (AST.embedding_syntax, erasability, name)
843868
end

parsing/lexer.mll

+129
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,103 @@ let is_in_string = ref false
128128
let in_string () = !is_in_string
129129
let print_warnings = ref true
130130

131+
type deferred_token =
132+
{ token : token
133+
; start_pos : Lexing.position
134+
; end_pos : Lexing.position
135+
}
136+
137+
(* This queue will only ever have 0 or 1 elements in it. We use it
138+
instead of an [option ref] for its convenient interface.
139+
*)
140+
let deferred_tokens : deferred_token Queue.t = Queue.create ()
141+
142+
(* Effectively splits the text in the lexer's current "window" (defined below)
143+
into two halves. The current call to the lexer will return the first half of
144+
the text in the window, and the next call to the lexer will return the second
145+
half (of length [len]) of the text in the window.
146+
147+
"window" refers to the text matched by a production of the lexer. It spans
148+
from [lexer.lex_start_p] to [lexer.lex_curr_p].
149+
150+
The function accomplishes this splitting by doing two things:
151+
- It sets the current window of the lexbuf to only account for the
152+
first half of the text. (The first half is of length: |text|-len.)
153+
- It enqueues a token into [deferred_tokens] such that, the next time the
154+
lexer is called, it will return the specified [token] *and* set the window
155+
of the lexbuf to account for the second half of the text. (The second half
156+
is of length: |text|.)
157+
158+
This business with setting the window of the lexbuf is only so that error
159+
messages point at the right place in the program text.
160+
*)
161+
let enqueue_token_from_end_of_lexbuf_window (lexbuf : Lexing.lexbuf) token ~len =
162+
let suffix_end = lexbuf.lex_curr_p in
163+
let suffix_start =
164+
{ suffix_end with pos_cnum = suffix_end.pos_cnum - len }
165+
in
166+
lexbuf.lex_curr_p <- suffix_start;
167+
Queue.add
168+
{ token; start_pos = suffix_start; end_pos = suffix_end }
169+
deferred_tokens
170+
171+
(* Note [Lexing hack for float#]
172+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
173+
This note describes a non-backward-compatible Jane Street--internal change to
174+
the lexer.
175+
176+
We want the lexer to lex [float#] differently than [float #]. [float#] is the
177+
new syntax for the unboxed float type. It veers close to the syntax for the
178+
type of all objects belonging to a class [c], which is [#c]. The way we
179+
navigate this veering is by producing the following tokens for these source
180+
program examples, where LIDENT(s) is an LIDENT with text [s].
181+
182+
float#c ==> LIDENT(float) HASH_SUFFIX LIDENT(c)
183+
float# c ==> LIDENT(float) HASH_SUFFIX LIDENT(c)
184+
float # c ==> LIDENT(float) HASH LIDENT(c)
185+
float #c ==> LIDENT(float) HASH LIDENT(c)
186+
187+
(A) The parser interprets [LIDENT(float) HASH_SUFFIX LIDENT(c)] as
188+
"the type constructor [c] applied to the unboxed float type."
189+
(B) The parser interprets [LIDENT(float) HASH LIDENT(c)] as
190+
"the type constructor [#c] applied to the usual boxed float type."
191+
192+
This is not a backward-compatible change. In upstream ocaml, the lexer
193+
produces [LIDENT(float) HASH LIDENT(c)] for all the above source programs.
194+
195+
But, this isn't problematic: everybody puts a space before '#c' to mean (B).
196+
No existing code writes things like [float#c] or indeed [float# c].
197+
198+
We accomplish this hack by setting some global mutable state upon seeing
199+
an identifier immediately followed by a hash. When that state is set, we
200+
will produce [HASH_SUFFIX] the next time the lexer is called. This is
201+
done in [enqueue_hash_suffix_from_end_of_lexbuf_window].
202+
203+
Note [Lexing hack for hash operators]
204+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
205+
To complicate the above story, we don't want to treat the # in the
206+
below program as HASH_SUFFIX:
207+
208+
x#~#y
209+
210+
We instead want:
211+
212+
x#~#y ==> LIDENT(x) HASHOP(#~#) LIDENT(y)
213+
214+
This is to allow for infix hash operators. We add an additional hack, in
215+
the style of Note [Lexing hack for float#], where the lexer consumes [x#~#]
216+
all at once, but produces LIDENT(x) from the current call to the lexer and
217+
HASHOP(#~#) from the next call to the lexer. This is done in
218+
[enqueue_hashop_from_end_of_lexbuf_window].
219+
*)
220+
221+
let enqueue_hash_suffix_from_end_of_lexbuf_window lexbuf =
222+
enqueue_token_from_end_of_lexbuf_window lexbuf HASH_SUFFIX ~len:1
223+
224+
let enqueue_hashop_from_end_of_lexbuf_window lexbuf ~hashop =
225+
enqueue_token_from_end_of_lexbuf_window lexbuf (HASHOP hashop)
226+
~len:(String.length hashop)
227+
131228
(* Escaped chars are interpreted in strings unless they are in comments. *)
132229
let store_escaped_char lexbuf c =
133230
if in_comment () then store_lexeme lexbuf else store_string_char c
@@ -419,8 +516,33 @@ rule token = parse
419516
| "?" (lowercase_latin1 identchar_latin1 * as name) ':'
420517
{ warn_latin1 lexbuf;
421518
OPTLABEL name }
519+
(* Lowercase identifiers are split into 3 cases, and the order matters
520+
(longest to shortest).
521+
*)
522+
| (lowercase identchar * as name) ('#' symbolchar_or_hash+ as hashop)
523+
(* See Note [Lexing hack for hash operators] *)
524+
{ enqueue_hashop_from_end_of_lexbuf_window lexbuf ~hashop;
525+
lookup_keyword name }
526+
| (lowercase identchar * as name) '#'
527+
(* See Note [Lexing hack for float#] *)
528+
{ enqueue_hash_suffix_from_end_of_lexbuf_window lexbuf;
529+
lookup_keyword name }
422530
| lowercase identchar * as name
423531
{ lookup_keyword name }
532+
(* Lowercase latin1 identifiers are split into 3 cases, and the order matters
533+
(longest to shortest).
534+
*)
535+
| (lowercase_latin1 identchar_latin1 * as name)
536+
('#' symbolchar_or_hash+ as hashop)
537+
(* See Note [Lexing hack for hash operators] *)
538+
{ warn_latin1 lexbuf;
539+
enqueue_hashop_from_end_of_lexbuf_window lexbuf ~hashop;
540+
LIDENT name }
541+
| (lowercase_latin1 identchar_latin1 * as name) '#'
542+
(* See Note [Lexing hack for float#] *)
543+
{ warn_latin1 lexbuf;
544+
enqueue_hash_suffix_from_end_of_lexbuf_window lexbuf;
545+
LIDENT name }
424546
| lowercase_latin1 identchar_latin1 * as name
425547
{ warn_latin1 lexbuf; LIDENT name }
426548
| uppercase identchar * as name
@@ -775,6 +897,13 @@ and skip_hash_bang = parse
775897
| "" { () }
776898
777899
{
900+
let token lexbuf =
901+
match Queue.take_opt deferred_tokens with
902+
| None -> token lexbuf
903+
| Some { token; start_pos; end_pos } ->
904+
lexbuf.lex_start_p <- start_pos;
905+
lexbuf.lex_curr_p <- end_pos;
906+
token
778907
779908
let token_with_comments lexbuf =
780909
match !preprocessor with

parsing/parser.mly

+38-11
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,16 @@ let unboxed_float sloc sign (f, m) =
812812
assert_unboxed_literals ~loc:(make_loc sloc);
813813
Pconst_float (with_sign sign f, m)
814814

815+
(* Unboxed float type *)
816+
817+
let assert_unboxed_float_type ~loc =
818+
Language_extension.(
819+
Jane_syntax_parsing.assert_extension_enabled ~loc Layouts Alpha)
820+
821+
let unboxed_float_type sloc tys =
822+
assert_unboxed_float_type ~loc:(make_loc sloc);
823+
Ptyp_constr (mkloc (Lident "float#") (make_loc sloc), tys)
824+
815825
(* Jane syntax *)
816826

817827
let mkexp_jane_syntax
@@ -946,6 +956,7 @@ let mkpat_jane_syntax
946956
%token SEMI ";"
947957
%token SEMISEMI ";;"
948958
%token HASH "#"
959+
%token HASH_SUFFIX "# "
949960
%token <string> HASHOP "##" (* just an example *)
950961
%token SIG "sig"
951962
%token STAR "*"
@@ -1030,7 +1041,7 @@ The precedences must be listed from low to high.
10301041
%nonassoc prec_constant_constructor /* cf. simple_expr (C versus C x) */
10311042
%nonassoc prec_constr_appl /* above AS BAR COLONCOLON COMMA */
10321043
%nonassoc below_HASH
1033-
%nonassoc HASH /* simple_expr/toplevel_directive */
1044+
%nonassoc HASH HASH_SUFFIX /* simple_expr/toplevel_directive */
10341045
%left HASHOP
10351046
%nonassoc below_DOT
10361047
%nonassoc DOT DOTOP
@@ -2743,6 +2754,11 @@ comprehension_clause:
27432754
{ $1 }
27442755
;
27452756

2757+
%inline hash:
2758+
| HASH { () }
2759+
| HASH_SUFFIX { () }
2760+
;
2761+
27462762
%inline simple_expr_:
27472763
| mkrhs(val_longident)
27482764
{ Pexp_ident ($1) }
@@ -2771,7 +2787,7 @@ comprehension_clause:
27712787
Pexp_open(od, mkexp ~loc:$sloc (Pexp_override $4)) }
27722788
| mod_longident DOT LBRACELESS object_expr_content error
27732789
{ unclosed "{<" $loc($3) ">}" $loc($5) }
2774-
| simple_expr HASH mkrhs(label)
2790+
| simple_expr hash mkrhs(label)
27752791
{ Pexp_send($1, $3) }
27762792
| simple_expr op(HASHOP) simple_expr
27772793
{ mkinfix $1 $2 $3 }
@@ -3170,7 +3186,7 @@ simple_pattern_not_ident:
31703186
{ Ppat_construct($1, None) }
31713187
| name_tag
31723188
{ Ppat_variant($1, None) }
3173-
| HASH mkrhs(type_longident)
3189+
| hash mkrhs(type_longident)
31743190
{ Ppat_type ($2) }
31753191
| mkrhs(mod_longident) DOT simple_delimited_pattern
31763192
{ Ppat_open($1, $3) }
@@ -3836,7 +3852,18 @@ atomic_type:
38363852
{ Ptyp_any }
38373853
| tys = actual_type_parameters
38383854
tid = mkrhs(type_longident)
3839-
{ Ptyp_constr(tid, tys) }
3855+
HASH_SUFFIX
3856+
{ match tid.txt with
3857+
| Lident "float" ->
3858+
let ident_start = fst $loc(tid) in
3859+
let hash_end = snd $loc($3) in
3860+
unboxed_float_type (ident_start, hash_end) tys
3861+
| _ ->
3862+
not_expecting $sloc "Unboxed type other than float#"
3863+
}
3864+
| tys = actual_type_parameters
3865+
tid = mkrhs(type_longident)
3866+
{ Ptyp_constr(tid, tys) } %prec below_HASH
38403867
| LESS meth_list GREATER
38413868
{ let (f, c) = $2 in Ptyp_object (f, c) }
38423869
| LESS GREATER
@@ -3972,19 +3999,19 @@ constant:
39723999
| FLOAT { let (f, m) = $1 in Pconst_float (f, m) }
39734000
(* The unboxed literals have to be composed of multiple lexemes so we can
39744001
handle line number directives properly *)
3975-
| HASH INT { unboxed_int $sloc $loc($2) Positive $2 }
3976-
| HASH FLOAT { unboxed_float $sloc Positive $2 }
4002+
| hash INT { unboxed_int $sloc $loc($2) Positive $2 }
4003+
| hash FLOAT { unboxed_float $sloc Positive $2 }
39774004
;
39784005
signed_constant:
39794006
constant { $1 }
39804007
| MINUS INT { let (n, m) = $2 in Pconst_integer("-" ^ n, m) }
39814008
| MINUS FLOAT { let (f, m) = $2 in Pconst_float("-" ^ f, m) }
3982-
| MINUS HASH INT { unboxed_int $sloc $loc($3) Negative $3 }
3983-
| MINUS HASH FLOAT { unboxed_float $sloc Negative $3 }
4009+
| MINUS hash INT { unboxed_int $sloc $loc($3) Negative $3 }
4010+
| MINUS hash FLOAT { unboxed_float $sloc Negative $3 }
39844011
| PLUS INT { let (n, m) = $2 in Pconst_integer (n, m) }
39854012
| PLUS FLOAT { let (f, m) = $2 in Pconst_float(f, m) }
3986-
| PLUS HASH INT { unboxed_int $sloc $loc($3) Positive $3 }
3987-
| PLUS HASH FLOAT { unboxed_float $sloc Negative $3 }
4013+
| PLUS hash INT { unboxed_int $sloc $loc($3) Positive $3 }
4014+
| PLUS hash FLOAT { unboxed_float $sloc Negative $3 }
39884015
;
39894016

39904017
/* Identifiers and long identifiers */
@@ -4112,7 +4139,7 @@ any_longident:
41124139
/* Toplevel directives */
41134140
41144141
toplevel_directive:
4115-
HASH dir = mkrhs(ident)
4142+
hash dir = mkrhs(ident)
41164143
arg = ioption(mk_directive_arg(toplevel_directive_argument))
41174144
{ mk_directive ~loc:$sloc dir arg }
41184145
;

parsing/pprintast.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ and core_type1 ctxt f x =
413413
(list core_field_type ~sep:";") l
414414
field_var o (* Cf #7200 *)
415415
| Ptyp_class (li, l) -> (*FIXME*)
416-
pp f "@[<hov2>%a#%a@]"
416+
pp f "@[<hov2>%a@;#%a@]"
417417
(list (core_type ctxt) ~sep:"," ~first:"(" ~last:")") l
418418
longident_loc li
419419
| Ptyp_package (lid, cstrs) ->

testsuite/tests/typing-layouts/parsing.compilers.reference

+12
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,16 @@ Line 2, characters 11-15:
1111
2 | type ('a : valu) t0 = 'a list;;
1212
^^^^
1313
Error: Syntax error: layout expected.
14+
Line 2, characters 9-15:
15+
2 | type t = float#;;
16+
^^^^^^
17+
Error: This construct requires the alpha version of the extension "layouts", which is disabled and cannot be used
18+
Line 2, characters 9-13:
19+
2 | type t = int#;;
20+
^^^^
21+
Error: Syntax error: Unboxed type other than float# not expected.
22+
Line 2, characters 9-17:
23+
2 | type t = Float.t#;;
24+
^^^^^^^^
25+
Error: Syntax error: Unboxed type other than float# not expected.
1426

testsuite/tests/typing-layouts/parsing.ml

+6
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,9 @@ type ('a : immediate) t0 = 'a list;;
99
type ('a : void) t0 = 'a list;;
1010

1111
type ('a : valu) t0 = 'a list;;
12+
13+
type t = float#;;
14+
15+
type t = int#;;
16+
17+
type t = Float.t#;;

0 commit comments

Comments
 (0)