Skip to content

Lex unboxed float and int literals as single lexemes #1469

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 2 commits into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12,424 changes: 6,037 additions & 6,387 deletions ocaml/boot/menhir/parser.ml

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions ocaml/boot/menhir/parser.mli
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ type token =
| IN
| IF
| HASH_SUFFIX
| HASH_INT of (string * char option)
| HASH_FLOAT of (string * char option)
| HASHOP of (string)
| HASH
| GREATERRBRACKET
Expand Down
112 changes: 95 additions & 17 deletions ocaml/parsing/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,13 @@ let is_in_string = ref false
let in_string () = !is_in_string
let print_warnings = ref true

let at_beginning_of_line pos = (pos.pos_cnum = pos.pos_bol)

(* See the comment on the [directive] lexer. *)
type directive_lexing_already_consumed =
| Hash
| Hash_and_line_num of { line_num : string }

type deferred_token =
{ token : token
; start_pos : Lexing.position
Expand Down Expand Up @@ -267,6 +274,25 @@ let wrap_comment_lexer comment lexbuf =
let error lexbuf e = raise (Error(e, Location.curr lexbuf))
let error_loc loc e = raise (Error(e, loc))

let directive_error
(lexbuf : Lexing.lexbuf) explanation ~directive ~already_consumed
=
let directive_prefix =
match already_consumed with
| Hash -> "#"
| Hash_and_line_num { line_num } -> "#" ^ line_num
in
(* Set the lexbuf's current window to extend to the start of
the directive so the error message's location is more accurate.
*)
lexbuf.lex_start_p <-
{ lexbuf.lex_start_p with
pos_cnum =
lexbuf.lex_start_p.pos_cnum - String.length directive_prefix
};
error lexbuf
(Invalid_directive (directive_prefix ^ directive, Some explanation))

(* to translate escape sequences *)

let digit_value c =
Expand Down Expand Up @@ -384,6 +410,18 @@ let add_docstring_comment ds =

let comments () = List.rev !comment_list

let float ~maybe_hash lit modifier =
match maybe_hash with
| "#" -> HASH_FLOAT (lit, modifier)
| "" -> FLOAT (lit, modifier)
| unexpected -> fatal_error ("expected # or empty string: " ^ unexpected)

let int ~maybe_hash lit modifier =
match maybe_hash with
| "#" -> HASH_INT (lit, modifier)
| "" -> INT (lit, modifier)
| unexpected -> fatal_error ("expected # or empty string: " ^ unexpected)

(* Error report *)

open Format
Expand Down Expand Up @@ -549,14 +587,28 @@ rule token = parse
{ UIDENT name } (* No capitalized keywords *)
| uppercase_latin1 identchar_latin1 * as name
{ warn_latin1 lexbuf; UIDENT name }
| int_literal as lit { INT (lit, None) }
| (int_literal as lit) (literal_modifier as modif)
{ INT (lit, Some modif) }
| float_literal | hex_float_literal as lit
{ FLOAT (lit, None) }
| (float_literal | hex_float_literal as lit) (literal_modifier as modif)
{ FLOAT (lit, Some modif) }
| (float_literal | hex_float_literal | int_literal) identchar+ as invalid
(* This matches either an integer literal or a directive. If the text "#2"
appears at the beginning of a line that lexes as a directive, then it
should be treated as a directive and not an unboxed int. This is acceptable
because "#2" isn't a valid unboxed int anyway because it lacks a suffix;
the parser rejects unboxed-ints-lacking-suffixes with a more descriptive
error message.
*)
| ('#'? as maybe_hash) (int_literal as lit)
{ if at_beginning_of_line lexbuf.lex_start_p && maybe_hash = "#" then
try directive (Hash_and_line_num { line_num = lit }) lexbuf
with Failure _ -> int ~maybe_hash lit None
else int ~maybe_hash lit None
}
| ('#'? as maybe_hash) (int_literal as lit) (literal_modifier as modif)
{ int ~maybe_hash lit (Some modif) }
| ('#'? as maybe_hash)
(float_literal | hex_float_literal as lit)
{ float ~maybe_hash lit None }
| ('#'? as maybe_hash)
(float_literal | hex_float_literal as lit) (literal_modifier as modif)
{ float ~maybe_hash lit (Some modif) }
| '#'? (float_literal | hex_float_literal | int_literal) identchar+ as invalid
{ error lexbuf (Invalid_literal invalid) }
| "\""
{ let s, loc = wrap_string_lexer string lexbuf in
Expand Down Expand Up @@ -641,10 +693,9 @@ rule token = parse
STAR
}
| "#"
{ let at_beginning_of_line pos = (pos.pos_cnum = pos.pos_bol) in
if not (at_beginning_of_line lexbuf.lex_start_p)
{ if not (at_beginning_of_line lexbuf.lex_start_p)
then HASH
else try directive lexbuf with Failure _ -> HASH
else try directive Hash lexbuf with Failure _ -> HASH
}
| "&" { AMPERSAND }
| "&&" { AMPERAMPER }
Expand Down Expand Up @@ -721,16 +772,43 @@ rule token = parse
| (_ as illegal_char)
{ error lexbuf (Illegal_character illegal_char) }

and directive = parse
| ([' ' '\t']* (['0'-'9']+ as num) [' ' '\t']*
("\"" ([^ '\010' '\013' '\"' ] * as name) "\"") as directive)
(* An example of a directive is:

#4 "filename.ml"

Here, 4 is the line number and filename.ml is the file name. The '#' must
appear in column 0.

The [directive] lexer is called when some portion of the start of
the line was already consumed, either just the '#' or the '#4'. That's
indicated by the [already_consumed] argument. The caller is responsible
for checking that the '#' appears in column 0.

The [directive] lexer always attempts to read the line number from the
lexbuf. It expects to receive a line number from exactly one source (either
the lexbuf or the [already_consumed] argument, but not both) and will fail if
this isn't the case.
*)
and directive already_consumed = parse
| ([' ' '\t']* (['0'-'9']+? as line_num_opt) [' ' '\t']*
("\"" ([^ '\010' '\013' '\"' ] * as name) "\"") as directive)
[^ '\010' '\013'] *
{
{ let num =
match already_consumed, line_num_opt with
| Hash_and_line_num { line_num }, "" -> line_num
| Hash, "" ->
directive_error lexbuf "expected line number"
~already_consumed ~directive
| Hash_and_line_num _, _ ->
directive_error lexbuf "expected just one line number"
~already_consumed ~directive
| Hash, num -> num
in
match int_of_string num with
| exception _ ->
(* PR#7165 *)
let explanation = "line number out of range" in
error lexbuf (Invalid_directive ("#" ^ directive, Some explanation))
directive_error lexbuf "line number out of range"
~already_consumed ~directive
| line_num ->
(* Documentation says that the line number should be
positive, but we have never guarded against this and it
Expand Down
30 changes: 15 additions & 15 deletions ocaml/parsing/parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,7 @@ let unboxed_int sloc int_loc sign (n, m) =
Pconst_integer (with_sign sign n, m)
| None ->
if Language_extension.is_enabled unboxed_literals_extension then
expecting int_loc "integer literal with type-specifying suffix"
expecting int_loc "unboxed integer literal with type-specifying suffix"
else
not_expecting sloc "line number directive"

Expand Down Expand Up @@ -882,7 +882,8 @@ let mkpat_jane_syntax
%token EXCLAVE "exclave_"
%token EXTERNAL "external"
%token FALSE "false"
%token <string * char option> FLOAT "42.0" (* just an example *)
%token <string * char option> FLOAT "42.0" (* just an example *)
%token <string * char option> HASH_FLOAT "#42.0" (* just an example *)
%token FOR "for"
%token FUN "fun"
%token FUNCTION "function"
Expand All @@ -904,7 +905,8 @@ let mkpat_jane_syntax
%token <string> ANDOP "and*" (* just an example *)
%token INHERIT "inherit"
%token INITIALIZER "initializer"
%token <string * char option> INT "42" (* just an example *)
%token <string * char option> INT "42" (* just an example *)
%token <string * char option> HASH_INT "#42l" (* just an example *)
%token <string> LABEL "~label:" (* just an example *)
%token LAZY "lazy"
%token LBRACE "{"
Expand Down Expand Up @@ -1046,7 +1048,7 @@ The precedences must be listed from low to high.
%nonassoc below_DOT
%nonassoc DOT DOTOP
/* Finally, the first tokens of simple_expr are above everything else. */
%nonassoc BACKQUOTE BANG BEGIN CHAR FALSE FLOAT INT OBJECT
%nonassoc BACKQUOTE BANG BEGIN CHAR FALSE FLOAT HASH_FLOAT INT HASH_INT OBJECT
LBRACE LBRACELESS LBRACKET LBRACKETBAR LBRACKETCOLON LIDENT LPAREN
NEW PREFIXOP STRING TRUE UIDENT
LBRACKETPERCENT QUOTED_STRING_EXPR
Expand Down Expand Up @@ -2108,7 +2110,7 @@ formal_class_parameters:
(* Class expressions. *)

class_expr:
class_simple_expr %prec below_HASH
class_simple_expr
{ $1 }
| FUN attributes class_fun_def
{ wrap_class_attrs ~loc:$sloc $3 $2 }
Expand All @@ -2121,7 +2123,7 @@ class_expr:
| class_expr attribute
{ Cl.attr $1 $2 }
| mkclass(
class_simple_expr nonempty_llist(labeled_simple_expr) %prec below_HASH
class_simple_expr nonempty_llist(labeled_simple_expr)
{ Pcl_apply($1, $2) }
| extension
{ Pcl_extension $1 }
Expand Down Expand Up @@ -2601,7 +2603,7 @@ expr:
{ Pexp_lazy $3, $2 }
;
%inline expr_:
| simple_expr nonempty_llist(labeled_simple_expr) %prec below_HASH
| simple_expr nonempty_llist(labeled_simple_expr)
{ Pexp_apply($1, $2) }
| expr_comma_list %prec below_COMMA
{ Pexp_tuple($1) }
Expand Down Expand Up @@ -3997,21 +3999,19 @@ constant:
| CHAR { Pconst_char $1 }
| STRING { let (s, strloc, d) = $1 in Pconst_string (s, strloc, d) }
| FLOAT { let (f, m) = $1 in Pconst_float (f, m) }
(* The unboxed literals have to be composed of multiple lexemes so we can
handle line number directives properly *)
| hash INT { unboxed_int $sloc $loc($2) Positive $2 }
| hash FLOAT { unboxed_float $sloc Positive $2 }
| HASH_INT { unboxed_int $sloc $sloc Positive $1 }
| HASH_FLOAT { unboxed_float $sloc Positive $1 }
;
signed_constant:
constant { $1 }
| MINUS INT { let (n, m) = $2 in Pconst_integer("-" ^ n, m) }
| MINUS FLOAT { let (f, m) = $2 in Pconst_float("-" ^ f, m) }
| MINUS hash INT { unboxed_int $sloc $loc($3) Negative $3 }
| MINUS hash FLOAT { unboxed_float $sloc Negative $3 }
| MINUS HASH_INT { unboxed_int $sloc $loc($2) Negative $2 }
| MINUS HASH_FLOAT { unboxed_float $sloc Negative $2 }
| PLUS INT { let (n, m) = $2 in Pconst_integer (n, m) }
| PLUS FLOAT { let (f, m) = $2 in Pconst_float(f, m) }
| PLUS hash INT { unboxed_int $sloc $loc($3) Positive $3 }
| PLUS hash FLOAT { unboxed_float $sloc Negative $3 }
| PLUS HASH_INT { unboxed_int $sloc $loc($2) Positive $2 }
| PLUS HASH_FLOAT { unboxed_float $sloc Negative $2 }
;

/* Identifiers and long identifiers */
Expand Down
4 changes: 2 additions & 2 deletions ocaml/testsuite/tests/parsing/pr7165.compilers.reference
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
File "pr7165.ml", line 12, characters 1-23:
File "pr7165.ml", line 12, characters 0-23:
12 | #9342101923012312312 ""
^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^
Error: Invalid lexer directive "#9342101923012312312 \"\"": line number out of range
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

Line 6, characters 45-47:
Line 6, characters 44-47:
6 | let unboxed_integers_must_have_a_modifier = #42
^^
Error: Syntax error: integer literal with type-specifying suffix expected.
^^^
Error: Syntax error: unboxed integer literal with type-specifying suffix expected.

27 changes: 27 additions & 0 deletions ocaml/testsuite/tests/typing-layouts/literals.ml
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,30 @@ let five_point_three_seven_five_in_floating_hexponent = #0xa.cp-1
[%%expect{|
val five_point_three_seven_five_in_floating_hexponent : float = 5.375
|}]

(* Unboxed literals at the beginning of the line aren't directives. *)
let f _ _ = ();;
let () = f
#2.
#2L
;;

let () = f
#2. #2.
;;

[%%expect{|
val f : 'a -> 'b -> unit = <fun>
|}];;

let () = f
(* This lexes as a directive. #2 is not a valid unboxed int literal
anyway, as it lacks a suffix.
*)
#2 "literals.ml"
()
()
;;

[%%expect{|
|}];;
4 changes: 2 additions & 2 deletions ocaml/testsuite/tests/typing-layouts/unboxed_floats.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ val id : float# -> float# = <fun>

let add (x : float#) (y : float#) = x +. y;;

add #4.0 (#5.0);;
add #4.0 #5.0;;
[%%expect {|
val add : float# -> float# -> float = <fun>
- : float = 9.
Expand All @@ -26,7 +26,7 @@ val add : float# -> float# -> float = <fun>
let apply (f : float# -> float# -> float#) (x : float#) (y : float#) =
f x y;;

apply add (#4.0) (#5.0);;
apply add #4.0 #5.0;;
[%%expect {|
val apply : (float# -> float# -> float#) -> float# -> float# -> float# =
<fun>
Expand Down