ocaml-flambda
diff --git a/‎boot/menhir/parser.ml
+7,814-6,435 b/‎boot/menhir/parser.ml
+7,814-6,435
diff --git a/‎boot/menhir/parser.mli
+1 b/‎boot/menhir/parser.mli
+1
diff --git a/‎boot/ocamlc
25 KB b/‎boot/ocamlc
25 KB
diff --git a/‎boot/ocamllex
0 Bytes b/‎boot/ocamllex
0 Bytes
diff --git a/‎parsing/jane_syntax_parsing.ml
+34-9 b/‎parsing/jane_syntax_parsing.ml
+34-9
diff --git a/‎parsing/lexer.mll
+129 b/‎parsing/lexer.mll
+129
diff --git a/‎parsing/parser.mly
+38-11 b/‎parsing/parser.mly
+38-11
diff --git a/‎parsing/pprintast.ml
+1-1 b/‎parsing/pprintast.ml
+1-1
diff --git a/‎testsuite/tests/typing-layouts/parsing.compilers.reference
+12 b/‎testsuite/tests/typing-layouts/parsing.compilers.reference
+12
diff --git a/‎testsuite/tests/typing-layouts/parsing.ml
+6 b/‎testsuite/tests/typing-layouts/parsing.ml
+6
@@ -82,6 +82,7 @@ type token =
   | INCLUDE
   | IN
   | IF
+  | HASH_SUFFIX
   | HASHOP of (string)
   | HASH
   | GREATERRBRACKET
 
@@ -334,7 +334,10 @@ module Error = struct
     | Malformed_embedding of
         Embedding_syntax.t * Embedded_name.t * malformed_embedding
     | Unknown_extension of Embedding_syntax.t * Erasability.t * string
-    | Disabled_extension : _ Language_extension.t -> error
+    | Disabled_extension :
+        { ext : _ Language_extension.t
+        ; maturity : Language_extension.maturity option
+        } -> error
     | Wrong_syntactic_category of Feature.t * string
     | Misnamed_embedding of
         Misnamed_embedding_error.t * string * Embedding_syntax.t
@@ -347,9 +350,16 @@ end
 
 open Error
 
-let assert_extension_enabled ~loc ext setting =
+let assert_extension_enabled
+    (type a) ~loc (ext : a Language_extension.t) (setting : a)
+  =
   if not (Language_extension.is_at_least ext setting) then
-    raise (Error(loc, Disabled_extension ext))
+    let maturity : Language_extension.maturity option =
+      match ext with
+      | Layouts -> Some (setting : Language_extension.maturity)
+      | _ -> None
+    in
+    raise (Error(loc, Disabled_extension { ext; maturity }))
 ;;
 
 let report_error ~loc = function
@@ -371,11 +381,25 @@ let report_error ~loc = function
         name
         Embedded_name.pp_a_term (what, embedded_name)
         (Embedding_syntax.name what)
-  | Disabled_extension ext ->
-      Location.errorf
-        ~loc
-        "The extension \"%s\" is disabled and cannot be used"
-        (Language_extension.to_string ext)
+  | Disabled_extension { ext; maturity } -> begin
+      (* CR layouts: The [maturity] special case is a bit ad-hoc, but the
+         layouts error message would be much worse without it. It also
+         would be nice to mention the language construct in the error message.
+      *)
+      match maturity with
+      | None ->
+          Location.errorf
+            ~loc
+            "The extension \"%s\" is disabled and cannot be used"
+            (Language_extension.to_string ext)
+      | Some maturity ->
+          Location.errorf
+            ~loc
+            "This construct requires the %s version of the extension \"%s\", \
+             which is disabled and cannot be used"
+            (Language_extension.maturity_to_string maturity)
+            (Language_extension.to_string ext)
+    end
   | Wrong_syntactic_category(feat, cat) ->
       Location.errorf
         ~loc
@@ -837,7 +861,8 @@ module AST = struct
                   raise_error (Wrong_syntactic_category(feat, AST.plural))
             end
           | Error err -> raise_error begin match err with
-            | Disabled_extension ext -> Disabled_extension ext
+            | Disabled_extension ext ->
+                Disabled_extension { ext; maturity = None }
             | Unknown_extension name ->
                 Unknown_extension (AST.embedding_syntax, erasability, name)
           end
 
@@ -128,6 +128,103 @@ let is_in_string = ref false
 let in_string () = !is_in_string
 let print_warnings = ref true
 
+type deferred_token =
+  { token : token
+  ; start_pos : Lexing.position
+  ; end_pos : Lexing.position
+  }
+
+(* This queue will only ever have 0 or 1 elements in it. We use it
+   instead of an [option ref] for its convenient interface.
+*)
+let deferred_tokens : deferred_token Queue.t = Queue.create ()
+
+(* Effectively splits the text in the lexer's current "window" (defined below)
+   into two halves. The current call to the lexer will return the first half of
+   the text in the window, and the next call to the lexer will return the second
+   half (of length [len]) of the text in the window.
+
+   "window" refers to the text matched by a production of the lexer. It spans
+   from [lexer.lex_start_p] to [lexer.lex_curr_p].
+
+   The function accomplishes this splitting by doing two things:
+    - It sets the current window of the lexbuf to only account for the
+      first half of the text. (The first half is of length: |text|-len.)
+    - It enqueues a token into [deferred_tokens] such that, the next time the
+      lexer is called, it will return the specified [token] *and* set the window
+      of the lexbuf to account for the second half of the text. (The second half
+      is of length: |text|.)
+
+   This business with setting the window of the lexbuf is only so that error
+   messages point at the right place in the program text.
+*)
+let enqueue_token_from_end_of_lexbuf_window (lexbuf : Lexing.lexbuf) token ~len =
+  let suffix_end = lexbuf.lex_curr_p in
+  let suffix_start =
+    { suffix_end with pos_cnum = suffix_end.pos_cnum - len }
+  in
+  lexbuf.lex_curr_p <- suffix_start;
+  Queue.add
+    { token; start_pos = suffix_start; end_pos = suffix_end }
+    deferred_tokens
+
+(* Note [Lexing hack for float#]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   This note describes a non-backward-compatible Jane Street--internal change to
+   the lexer.
+
+   We want the lexer to lex [float#] differently than [float #]. [float#] is the
+   new syntax for the unboxed float type. It veers close to the syntax for the
+   type of all objects belonging to a class [c], which is [#c]. The way we
+   navigate this veering is by producing the following tokens for these source
+   program examples, where LIDENT(s) is an LIDENT with text [s].
+
+   float#c   ==> LIDENT(float) HASH_SUFFIX LIDENT(c)
+   float# c  ==> LIDENT(float) HASH_SUFFIX LIDENT(c)
+   float # c ==> LIDENT(float) HASH LIDENT(c)
+   float #c  ==> LIDENT(float) HASH LIDENT(c)
+
+   (A) The parser interprets [LIDENT(float) HASH_SUFFIX LIDENT(c)] as
+       "the type constructor [c] applied to the unboxed float type."
+   (B) The parser interprets [LIDENT(float) HASH LIDENT(c)] as
+       "the type constructor [#c] applied to the usual boxed float type."
+
+   This is not a backward-compatible change. In upstream ocaml, the lexer
+   produces [LIDENT(float) HASH LIDENT(c)] for all the above source programs.
+
+   But, this isn't problematic: everybody puts a space before '#c' to mean (B).
+   No existing code writes things like [float#c] or indeed [float# c].
+
+   We accomplish this hack by setting some global mutable state upon seeing
+   an identifier immediately followed by a hash. When that state is set, we
+   will produce [HASH_SUFFIX] the next time the lexer is called. This is
+   done in [enqueue_hash_suffix_from_end_of_lexbuf_window].
+
+   Note [Lexing hack for hash operators]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   To complicate the above story, we don't want to treat the # in the
+   below program as HASH_SUFFIX:
+
+   x#~#y
+
+   We instead want:
+
+   x#~#y ==> LIDENT(x) HASHOP(#~#) LIDENT(y)
+
+   This is to allow for infix hash operators. We add an additional hack, in
+   the style of Note [Lexing hack for float#], where the lexer consumes [x#~#]
+   all at once, but produces LIDENT(x) from the current call to the lexer and
+   HASHOP(#~#) from the next call to the lexer. This is done in
+   [enqueue_hashop_from_end_of_lexbuf_window].
+ *)
+
+let enqueue_hash_suffix_from_end_of_lexbuf_window lexbuf =
+  enqueue_token_from_end_of_lexbuf_window lexbuf HASH_SUFFIX ~len:1
+
+let enqueue_hashop_from_end_of_lexbuf_window lexbuf ~hashop =
+  enqueue_token_from_end_of_lexbuf_window lexbuf (HASHOP hashop)
+    ~len:(String.length hashop)
+
 (* Escaped chars are interpreted in strings unless they are in comments. *)
 let store_escaped_char lexbuf c =
   if in_comment () then store_lexeme lexbuf else store_string_char c
@@ -419,8 +516,33 @@ rule token = parse
   | "?" (lowercase_latin1 identchar_latin1 * as name) ':'
       { warn_latin1 lexbuf;
         OPTLABEL name }
+  (* Lowercase identifiers are split into 3 cases, and the order matters
+     (longest to shortest).
+  *)
+  | (lowercase identchar * as name) ('#' symbolchar_or_hash+ as hashop)
+      (* See Note [Lexing hack for hash operators] *)
+      { enqueue_hashop_from_end_of_lexbuf_window lexbuf ~hashop;
+        lookup_keyword name }
+  | (lowercase identchar * as name) '#'
+      (* See Note [Lexing hack for float#] *)
+      { enqueue_hash_suffix_from_end_of_lexbuf_window lexbuf;
+        lookup_keyword name }
   | lowercase identchar * as name
       { lookup_keyword name }
+  (* Lowercase latin1 identifiers are split into 3 cases, and the order matters
+     (longest to shortest).
+  *)
+  | (lowercase_latin1 identchar_latin1 * as name)
+      ('#' symbolchar_or_hash+ as hashop)
+      (* See Note [Lexing hack for hash operators] *)
+      { warn_latin1 lexbuf;
+        enqueue_hashop_from_end_of_lexbuf_window lexbuf ~hashop;
+        LIDENT name }
+  | (lowercase_latin1 identchar_latin1 * as name) '#'
+      (* See Note [Lexing hack for float#] *)
+      { warn_latin1 lexbuf;
+        enqueue_hash_suffix_from_end_of_lexbuf_window lexbuf;
+        LIDENT name }
   | lowercase_latin1 identchar_latin1 * as name
       { warn_latin1 lexbuf; LIDENT name }
   | uppercase identchar * as name
@@ -775,6 +897,13 @@ and skip_hash_bang = parse
   | "" { () }
 
 {
+  let token lexbuf =
+    match Queue.take_opt deferred_tokens with
+    | None -> token lexbuf
+    | Some { token; start_pos; end_pos } ->
+        lexbuf.lex_start_p <- start_pos;
+        lexbuf.lex_curr_p <- end_pos;
+        token
 
   let token_with_comments lexbuf =
     match !preprocessor with
 
@@ -812,6 +812,16 @@ let unboxed_float sloc sign (f, m) =
   assert_unboxed_literals ~loc:(make_loc sloc);
   Pconst_float (with_sign sign f, m)
 
+(* Unboxed float type *)
+
+let assert_unboxed_float_type ~loc =
+    Language_extension.(
+      Jane_syntax_parsing.assert_extension_enabled ~loc Layouts Alpha)
+
+let unboxed_float_type sloc tys =
+  assert_unboxed_float_type ~loc:(make_loc sloc);
+  Ptyp_constr (mkloc (Lident "float#") (make_loc sloc), tys)
+
 (* Jane syntax *)
 
 let mkexp_jane_syntax
@@ -946,6 +956,7 @@ let mkpat_jane_syntax
 %token SEMI                   ";"
 %token SEMISEMI               ";;"
 %token HASH                   "#"
+%token HASH_SUFFIX            "# "
 %token <string> HASHOP        "##" (* just an example *)
 %token SIG                    "sig"
 %token STAR                   "*"
@@ -1030,7 +1041,7 @@ The precedences must be listed from low to high.
 %nonassoc prec_constant_constructor     /* cf. simple_expr (C versus C x) */
 %nonassoc prec_constr_appl              /* above AS BAR COLONCOLON COMMA */
 %nonassoc below_HASH
-%nonassoc HASH                         /* simple_expr/toplevel_directive */
+%nonassoc HASH HASH_SUFFIX              /* simple_expr/toplevel_directive */
 %left     HASHOP
 %nonassoc below_DOT
 %nonassoc DOT DOTOP
@@ -2743,6 +2754,11 @@ comprehension_clause:
       { $1 }
 ;
 
+%inline hash:
+  | HASH { () }
+  | HASH_SUFFIX { () }
+;
+
 %inline simple_expr_:
   | mkrhs(val_longident)
       { Pexp_ident ($1) }
@@ -2771,7 +2787,7 @@ comprehension_clause:
         Pexp_open(od, mkexp ~loc:$sloc (Pexp_override $4)) }
   | mod_longident DOT LBRACELESS object_expr_content error
       { unclosed "{<" $loc($3) ">}" $loc($5) }
-  | simple_expr HASH mkrhs(label)
+  | simple_expr hash mkrhs(label)
       { Pexp_send($1, $3) }
   | simple_expr op(HASHOP) simple_expr
       { mkinfix $1 $2 $3 }
@@ -3170,7 +3186,7 @@ simple_pattern_not_ident:
       { Ppat_construct($1, None) }
   | name_tag
       { Ppat_variant($1, None) }
-  | HASH mkrhs(type_longident)
+  | hash mkrhs(type_longident)
       { Ppat_type ($2) }
   | mkrhs(mod_longident) DOT simple_delimited_pattern
       { Ppat_open($1, $3) }
@@ -3836,7 +3852,18 @@ atomic_type:
         { Ptyp_any }
     | tys = actual_type_parameters
       tid = mkrhs(type_longident)
-        { Ptyp_constr(tid, tys) }
+      HASH_SUFFIX
+        { match tid.txt with
+          | Lident "float" ->
+              let ident_start = fst $loc(tid) in
+              let hash_end = snd $loc($3) in
+              unboxed_float_type (ident_start, hash_end) tys
+          | _ ->
+              not_expecting $sloc "Unboxed type other than float#"
+        }
+    | tys = actual_type_parameters
+      tid = mkrhs(type_longident)
+        { Ptyp_constr(tid, tys) } %prec below_HASH
     | LESS meth_list GREATER
         { let (f, c) = $2 in Ptyp_object (f, c) }
     | LESS GREATER
@@ -3972,19 +3999,19 @@ constant:
   | FLOAT             { let (f, m) = $1 in Pconst_float (f, m) }
   (* The unboxed literals have to be composed of multiple lexemes so we can
      handle line number directives properly *)
-  | HASH INT          { unboxed_int $sloc $loc($2) Positive $2 }
-  | HASH FLOAT        { unboxed_float $sloc Positive $2 }
+  | hash INT          { unboxed_int $sloc $loc($2) Positive $2 }
+  | hash FLOAT        { unboxed_float $sloc Positive $2 }
 ;
 signed_constant:
     constant          { $1 }
   | MINUS INT         { let (n, m) = $2 in Pconst_integer("-" ^ n, m) }
   | MINUS FLOAT       { let (f, m) = $2 in Pconst_float("-" ^ f, m) }
-  | MINUS HASH INT    { unboxed_int $sloc $loc($3) Negative $3 }
-  | MINUS HASH FLOAT  { unboxed_float $sloc Negative $3 }
+  | MINUS hash INT    { unboxed_int $sloc $loc($3) Negative $3 }
+  | MINUS hash FLOAT  { unboxed_float $sloc Negative $3 }
   | PLUS INT          { let (n, m) = $2 in Pconst_integer (n, m) }
   | PLUS FLOAT        { let (f, m) = $2 in Pconst_float(f, m) }
-  | PLUS HASH INT     { unboxed_int $sloc $loc($3) Positive $3 }
-  | PLUS HASH FLOAT   { unboxed_float $sloc Negative $3 }
+  | PLUS hash INT     { unboxed_int $sloc $loc($3) Positive $3 }
+  | PLUS hash FLOAT   { unboxed_float $sloc Negative $3 }
 ;
 
 /* Identifiers and long identifiers */
@@ -4112,7 +4139,7 @@ any_longident:
 /* Toplevel directives */
 
 toplevel_directive:
-  HASH dir = mkrhs(ident)
+  hash dir = mkrhs(ident)
   arg = ioption(mk_directive_arg(toplevel_directive_argument))
     { mk_directive ~loc:$sloc dir arg }
 ;
 
@@ -413,7 +413,7 @@ and core_type1 ctxt f x =
           (list core_field_type ~sep:";") l
           field_var o (* Cf #7200 *)
     | Ptyp_class (li, l) ->   (*FIXME*)
-        pp f "@[<hov2>%a#%a@]"
+        pp f "@[<hov2>%a@;#%a@]"
           (list (core_type ctxt) ~sep:"," ~first:"(" ~last:")") l
           longident_loc li
     | Ptyp_package (lid, cstrs) ->
 
@@ -11,4 +11,16 @@ Line 2, characters 11-15:
 2 | type ('a : valu) t0 = 'a list;;
                ^^^^
 Error: Syntax error: layout expected.
+Line 2, characters 9-15:
+2 | type t = float#;;
+             ^^^^^^
+Error: This construct requires the alpha version of the extension "layouts", which is disabled and cannot be used
+Line 2, characters 9-13:
+2 | type t = int#;;
+             ^^^^
+Error: Syntax error: Unboxed type other than float# not expected.
+Line 2, characters 9-17:
+2 | type t = Float.t#;;
+             ^^^^^^^^
+Error: Syntax error: Unboxed type other than float# not expected.
 
@@ -9,3 +9,9 @@ type ('a : immediate) t0 = 'a list;;
 type ('a : void) t0 = 'a list;;
 
 type ('a : valu) t0 = 'a list;;
+
+type t = float#;;
+
+type t = int#;;
+
+type t = Float.t#;;