@@ -21,7 +21,6 @@ import {
21
21
KeywordSyntaxKind ,
22
22
LanguageFeatureMinimumTarget ,
23
23
LanguageVariant ,
24
- lastOrUndefined ,
25
24
LineAndCharacter ,
26
25
MapLike ,
27
26
parsePseudoBigInt ,
@@ -1614,7 +1613,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
1614
1613
isRegularExpression && shouldEmitInvalidEscapeError && escapedValue >= 0xD800 && escapedValue <= 0xDBFF &&
1615
1614
pos + 6 < end && text . substring ( pos , pos + 2 ) === "\\u" && charCodeUnchecked ( pos + 2 ) !== CharacterCodes . openBrace
1616
1615
) {
1617
- // For regular expressions in Unicode mode, \u HexLeadSurrogate \u HexTrailSurrogate is treated as a single character
1616
+ // For regular expressions in any Unicode mode, \u HexLeadSurrogate \u HexTrailSurrogate is treated as a single character
1618
1617
// for the purpose of determining whether a character class range is out of order
1619
1618
// https://tc39.es/ecma262/#prod-RegExpUnicodeEscapeSequence
1620
1619
const nextStart = pos ;
@@ -2429,7 +2428,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2429
2428
pos = startOfRegExpBody ;
2430
2429
let inEscape = false ;
2431
2430
// Although nested character classes are allowed in Unicode Sets mode,
2432
- // an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
2431
+ // an unescaped slash is nevertheless invalid even in a character class in any Unicode mode.
2433
2432
// Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
2434
2433
// as unterminated, consuming characters beyond the slash. (This even applies to `/[[]/v`,
2435
2434
// which should be parsed as a well-terminated regex with an incomplete character class.)
@@ -2438,13 +2437,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2438
2437
while ( true ) {
2439
2438
// If we reach the end of a file, or hit a newline, then this is an unterminated
2440
2439
// regex. Report error and return what we have so far.
2441
- if ( pos >= end ) {
2442
- tokenFlags |= TokenFlags . Unterminated ;
2443
- break ;
2444
- }
2445
-
2446
- const ch = charCodeUnchecked ( pos ) ;
2447
- if ( isLineBreak ( ch ) ) {
2440
+ const ch = charCodeChecked ( pos ) ;
2441
+ if ( ch === CharacterCodes . EOF || isLineBreak ( ch ) ) {
2448
2442
tokenFlags |= TokenFlags . Unterminated ;
2449
2443
break ;
2450
2444
}
@@ -2477,7 +2471,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2477
2471
pos = startOfRegExpBody ;
2478
2472
inEscape = false ;
2479
2473
let characterClassDepth = 0 ;
2480
- const bracketStack : CharacterCodes [ ] = [ ] ;
2474
+ let inDecimalQuantifier = false ;
2475
+ let groupDepth = 0 ;
2481
2476
while ( pos < endOfRegExpBody ) {
2482
2477
const ch = charCodeUnchecked ( pos ) ;
2483
2478
if ( inEscape ) {
@@ -2493,18 +2488,23 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2493
2488
characterClassDepth -- ;
2494
2489
}
2495
2490
else if ( ! characterClassDepth ) {
2496
- if ( ch === CharacterCodes . openParen ) {
2497
- bracketStack . push ( CharacterCodes . closeParen ) ;
2491
+ if ( ch === CharacterCodes . openBrace ) {
2492
+ inDecimalQuantifier = true ;
2498
2493
}
2499
- else if ( ch === CharacterCodes . openBrace ) {
2500
- bracketStack . push ( CharacterCodes . closeBrace ) ;
2494
+ else if ( ch === CharacterCodes . closeBrace && inDecimalQuantifier ) {
2495
+ inDecimalQuantifier = false ;
2501
2496
}
2502
- else if ( ch === lastOrUndefined ( bracketStack ) ) {
2503
- bracketStack . pop ( ) ;
2504
- }
2505
- else if ( ch === CharacterCodes . closeParen || ch === CharacterCodes . closeBracket || ch === CharacterCodes . closeBrace ) {
2506
- // We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex.
2507
- break ;
2497
+ else if ( ! inDecimalQuantifier ) {
2498
+ if ( ch === CharacterCodes . openParen ) {
2499
+ groupDepth ++ ;
2500
+ }
2501
+ else if ( ch === CharacterCodes . closeParen && groupDepth ) {
2502
+ groupDepth -- ;
2503
+ }
2504
+ else if ( ch === CharacterCodes . closeParen || ch === CharacterCodes . closeBracket || ch === CharacterCodes . closeBrace ) {
2505
+ // We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex.
2506
+ break ;
2507
+ }
2508
2508
}
2509
2509
}
2510
2510
pos ++ ;
@@ -2517,9 +2517,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2517
2517
// Consume the slash character
2518
2518
pos ++ ;
2519
2519
let regExpFlags = RegularExpressionFlags . None ;
2520
- while ( pos < end ) {
2521
- const ch = codePointUnchecked ( pos ) ;
2522
- if ( ! isIdentifierPart ( ch , languageVersion ) ) {
2520
+ while ( true ) {
2521
+ const ch = codePointChecked ( pos ) ;
2522
+ if ( ch === CharacterCodes . EOF || ! isIdentifierPart ( ch , languageVersion ) ) {
2523
2523
break ;
2524
2524
}
2525
2525
if ( reportErrors ) {
@@ -2530,7 +2530,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2530
2530
else if ( regExpFlags & flag ) {
2531
2531
error ( Diagnostics . Duplicate_regular_expression_flag , pos , 1 ) ;
2532
2532
}
2533
- else if ( ( ( regExpFlags | flag ) & RegularExpressionFlags . UnicodeMode ) === RegularExpressionFlags . UnicodeMode ) {
2533
+ else if ( ( ( regExpFlags | flag ) & RegularExpressionFlags . AnyUnicodeMode ) === RegularExpressionFlags . AnyUnicodeMode ) {
2534
2534
error ( Diagnostics . The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously , pos , 1 ) ;
2535
2535
}
2536
2536
else {
@@ -2560,9 +2560,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2560
2560
/** Grammar parameter */
2561
2561
var unicodeSetsMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeSets ) ;
2562
2562
/** Grammar parameter */
2563
- var unicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeMode ) ;
2563
+ var anyUnicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . AnyUnicodeMode ) ;
2564
2564
2565
- if ( unicodeMode ) {
2565
+ if ( anyUnicodeMode ) {
2566
2566
// Annex B treats any unicode mode as the strict syntax.
2567
2567
annexB = false ;
2568
2568
}
@@ -2719,7 +2719,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2719
2719
error ( Diagnostics . Incomplete_quantifier_Digit_expected , digitsStart , 0 ) ;
2720
2720
}
2721
2721
else {
2722
- if ( unicodeMode ) {
2722
+ if ( anyUnicodeMode ) {
2723
2723
error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , start , 1 , String . fromCharCode ( ch ) ) ;
2724
2724
}
2725
2725
isPreviousTermQuantifiable = true ;
@@ -2731,7 +2731,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2731
2731
}
2732
2732
}
2733
2733
else if ( ! min ) {
2734
- if ( unicodeMode ) {
2734
+ if ( anyUnicodeMode ) {
2735
2735
error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , start , 1 , String . fromCharCode ( ch ) ) ;
2736
2736
}
2737
2737
isPreviousTermQuantifiable = true ;
@@ -2775,7 +2775,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2775
2775
// falls through
2776
2776
case CharacterCodes . closeBracket :
2777
2777
case CharacterCodes . closeBrace :
2778
- if ( unicodeMode || ch === CharacterCodes . closeParen ) {
2778
+ if ( anyUnicodeMode || ch === CharacterCodes . closeParen ) {
2779
2779
error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , pos , 1 , String . fromCharCode ( ch ) ) ;
2780
2780
}
2781
2781
pos ++ ;
@@ -2832,7 +2832,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2832
2832
scanGroupName ( /*isReference*/ true ) ;
2833
2833
scanExpectedChar ( CharacterCodes . greaterThan ) ;
2834
2834
}
2835
- else if ( unicodeMode ) {
2835
+ else if ( anyUnicodeMode ) {
2836
2836
error ( Diagnostics . k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets , pos - 2 , 2 ) ;
2837
2837
}
2838
2838
break ;
@@ -2875,14 +2875,17 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2875
2875
Debug . assertEqual ( charCodeUnchecked ( pos - 1 ) , CharacterCodes . backslash ) ;
2876
2876
let ch = charCodeChecked ( pos ) ;
2877
2877
switch ( ch ) {
2878
+ case CharacterCodes . EOF :
2879
+ error ( Diagnostics . Undetermined_character_escape , pos - 1 , 1 ) ;
2880
+ return "\\" ;
2878
2881
case CharacterCodes . c :
2879
2882
pos ++ ;
2880
2883
ch = charCodeChecked ( pos ) ;
2881
2884
if ( isASCIILetter ( ch ) ) {
2882
2885
pos ++ ;
2883
2886
return String . fromCharCode ( ch & 0x1f ) ;
2884
2887
}
2885
- if ( unicodeMode ) {
2888
+ if ( anyUnicodeMode ) {
2886
2889
error ( Diagnostics . c_must_be_followed_by_an_ASCII_letter , pos - 2 , 2 ) ;
2887
2890
}
2888
2891
else if ( atomEscape && annexB ) {
@@ -2913,12 +2916,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2913
2916
pos ++ ;
2914
2917
return String . fromCharCode ( ch ) ;
2915
2918
default :
2916
- if ( pos >= end ) {
2917
- error ( Diagnostics . Undetermined_character_escape , pos - 1 , 1 ) ;
2918
- return "\\" ;
2919
- }
2920
2919
pos -- ;
2921
- return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ annexB ? "annex-b" : true ) ;
2920
+ return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ anyUnicodeMode , /*isRegularExpression*/ annexB ? "annex-b" : true ) ;
2922
2921
}
2923
2922
}
2924
2923
@@ -3464,11 +3463,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
3464
3463
}
3465
3464
}
3466
3465
scanExpectedChar ( CharacterCodes . closeBrace ) ;
3467
- if ( ! unicodeMode ) {
3466
+ if ( ! anyUnicodeMode ) {
3468
3467
error ( Diagnostics . Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set , start , pos - start ) ;
3469
3468
}
3470
3469
}
3471
- else if ( unicodeMode ) {
3470
+ else if ( anyUnicodeMode ) {
3472
3471
error ( Diagnostics . _0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces , pos - 2 , 2 , String . fromCharCode ( ch ) ) ;
3473
3472
}
3474
3473
return true ;
@@ -3490,7 +3489,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
3490
3489
}
3491
3490
3492
3491
function scanSourceCharacter ( ) : string {
3493
- const size = unicodeMode ? charSize ( charCodeChecked ( pos ) ) : 1 ;
3492
+ const size = anyUnicodeMode ? charSize ( charCodeChecked ( pos ) ) : 1 ;
3494
3493
pos += size ;
3495
3494
return size > 0 ? text . substring ( pos - size , pos ) : "" ;
3496
3495
}
0 commit comments