From 6b89938da755ed3226c259b03b6a194b09284f9c Mon Sep 17 00:00:00 2001 From: Sylvie <35663410+Rangi42@users.noreply.github.com> Date: Sun, 22 Sep 2024 19:26:25 -0400 Subject: [PATCH] Avoid treating labels and macros differently in column 1 (#1515) Fixes #1512 --- man/rgbasm.5 | 1 - src/asm/lexer.cpp | 14 +++++++++- src/asm/parser.y | 16 ----------- test/asm/lexer-hack.asm | 28 ++++++++++++++++++++ test/asm/lexer-hack.err | 9 +++++++ test/asm/lexer-hack.out | 6 +++++ test/asm/syntax-error-after-syntax-error.asm | 21 ++++++++------- test/asm/syntax-error-after-syntax-error.err | 16 +++++------ test/asm/syntax-error-after-syntax-error.out | 3 +-- 9 files changed, 75 insertions(+), 39 deletions(-) create mode 100644 test/asm/lexer-hack.asm create mode 100644 test/asm/lexer-hack.err create mode 100644 test/asm/lexer-hack.out diff --git a/man/rgbasm.5 b/man/rgbasm.5 index c2b7469e9..a133e8503 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -41,7 +41,6 @@ or Labels tie a name to a specific location within a section (see .Sx Labels below). -They must come first in the line. .Pp Instructions are assembled into Game Boy opcodes. Multiple instructions on one line can be separated by double colons diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 71472a86e..1b57bb8d7 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -1849,7 +1849,19 @@ static Token yylex_NORMAL() { } } - if (token.type == T_(ID) && (lexerState->atLineStart || peek() == ':')) + // This is a "lexer hack"! We need it to distinguish between label definitions + // (which start with `LABEL`) and macro invocations (which start with `ID`). + // + // If we had one `IDENTIFIER` token, the parser would need to perform "lookahead" + // to determine which rule applies. But since macros need to enter "raw" mode to + // parse their arguments, which may not even be valid tokens in "normal" mode, we + // cannot use lookahead to check for the presence of a `COLON`. + // + // Instead, we have separate `ID` and `LABEL` tokens, lexing as a `LABEL` if a ':' + // character *immediately* follows the identifier. Thus, at the beginning of a line, + // "Label:" and "mac:" are treated as label definitions, but "Label :" and "mac :" + // are treated as macro invocations. + if (token.type == T_(ID) && peek() == ':') token.type = T_(LABEL); return token; diff --git a/src/asm/parser.y b/src/asm/parser.y index 83d6bdaa8..3de4922d2 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -433,22 +433,6 @@ line: fstk_StopRept(); yyerrok; } - // Hint about unindented macros parsed as labels - | LABEL error { - lexer_SetMode(LEXER_NORMAL); - lexer_ToggleStringExpansion(true); - } endofline { - Symbol *macro = sym_FindExactSymbol($1); - - if (macro && macro->type == SYM_MACRO) - fprintf( - stderr, - " To invoke `%s` as a macro it must be indented\n", - $1.c_str() - ); - fstk_StopRept(); - yyerrok; - } ; endofline: NEWLINE | EOB; diff --git a/test/asm/lexer-hack.asm b/test/asm/lexer-hack.asm new file mode 100644 index 000000000..83e2d41b9 --- /dev/null +++ b/test/asm/lexer-hack.asm @@ -0,0 +1,28 @@ +MACRO mac + println "got {d:_NARG} args: \#" +ENDM + +; indented, these were always macro invocations + mac + mac ro + mac : ld a, 1 + +; in column 1, we historically treated these as labels +mac +mac ro +mac : ld b, 2 + +SECTION "test", ROM0 + +; a colon makes these into labels + Label1: ld c, 3 +Label2: ld d, 4 + +; a macro invocation when already defined as a label + Label1 args +; and a label definition when already defined as a macro +mac: ld e, 5 + +; the space before the colon matters! + undef : +undef : diff --git a/test/asm/lexer-hack.err b/test/asm/lexer-hack.err new file mode 100644 index 000000000..1b3bbd4fd --- /dev/null +++ b/test/asm/lexer-hack.err @@ -0,0 +1,9 @@ +error: lexer-hack.asm(22): + "Label1" is not a macro +error: lexer-hack.asm(24): + 'mac' already defined at lexer-hack.asm(1) +error: lexer-hack.asm(27): + Macro "undef" not defined +error: lexer-hack.asm(28): + Macro "undef" not defined +error: Assembly aborted (4 errors)! diff --git a/test/asm/lexer-hack.out b/test/asm/lexer-hack.out new file mode 100644 index 000000000..a1eb8d39b --- /dev/null +++ b/test/asm/lexer-hack.out @@ -0,0 +1,6 @@ +got 0 args: +got 1 args: ro +got 2 args: : ld a,1 +got 0 args: +got 1 args: ro +got 2 args: : ld b,2 diff --git a/test/asm/syntax-error-after-syntax-error.asm b/test/asm/syntax-error-after-syntax-error.asm index 59ba1e439..1e2e2161b 100644 --- a/test/asm/syntax-error-after-syntax-error.asm +++ b/test/asm/syntax-error-after-syntax-error.asm @@ -1,10 +1,11 @@ -MACRO mac - println "got {d:_NARG} args" -ENDM - mac - mac 42 -notmac -mac -mac 42 -mac:: -mac :: +def x = 1 ; so far so good... +def n equ 2 + * / ^ 3 ; oops +def s equs "no closing quote, lol +section "test", rom0 ; good again +ld a, 42 ; keep going... +ld xor, ret ; oh no :( +label1: ; yes... +label2:: ; yes... +label3::: ; no! +halt stop abort ; please +println "finally!" diff --git a/test/asm/syntax-error-after-syntax-error.err b/test/asm/syntax-error-after-syntax-error.err index 1712c3ee5..b5262ed9e 100644 --- a/test/asm/syntax-error-after-syntax-error.err +++ b/test/asm/syntax-error-after-syntax-error.err @@ -1,13 +1,11 @@ +error: syntax-error-after-syntax-error.asm(2): + syntax error, unexpected * +error: syntax-error-after-syntax-error.asm(3): + Unterminated string error: syntax-error-after-syntax-error.asm(6): - syntax error, unexpected newline, expecting : or :: -error: syntax-error-after-syntax-error.asm(7): - syntax error, unexpected newline, expecting : or :: - To invoke `mac` as a macro it must be indented -error: syntax-error-after-syntax-error.asm(8): - syntax error, unexpected number, expecting : or :: - To invoke `mac` as a macro it must be indented + syntax error, unexpected xor error: syntax-error-after-syntax-error.asm(9): - 'mac' already defined at syntax-error-after-syntax-error.asm(1) + syntax error, unexpected : error: syntax-error-after-syntax-error.asm(10): - 'mac' already defined at syntax-error-after-syntax-error.asm(1) + syntax error, unexpected stop, expecting newline or end of buffer or :: error: Assembly aborted (5 errors)! diff --git a/test/asm/syntax-error-after-syntax-error.out b/test/asm/syntax-error-after-syntax-error.out index 23732f2e8..78c793bf3 100644 --- a/test/asm/syntax-error-after-syntax-error.out +++ b/test/asm/syntax-error-after-syntax-error.out @@ -1,2 +1 @@ -got 0 args -got 1 args +finally!