diff --git a/src/vs/editor/standalone/common/monarch/monarchCommon.ts b/src/vs/editor/standalone/common/monarch/monarchCommon.ts index 757e208bade77..6dbf14dc803b7 100644 --- a/src/vs/editor/standalone/common/monarch/monarchCommon.ts +++ b/src/vs/editor/standalone/common/monarch/monarchCommon.ts @@ -24,6 +24,7 @@ export interface ILexerMin { languageId: string; noThrow: boolean; ignoreCase: boolean; + unicode: boolean; usesEmbedded: boolean; defaultToken: string; stateNames: { [stateName: string]: any; }; @@ -34,6 +35,7 @@ export interface ILexer extends ILexerMin { maxStack: number; start: string | null; ignoreCase: boolean; + unicode: boolean; tokenPostfix: string; tokenizer: { [stateName: string]: IRule[]; }; diff --git a/src/vs/editor/standalone/common/monarch/monarchCompile.ts b/src/vs/editor/standalone/common/monarch/monarchCompile.ts index 2c98c6ba429be..289d045aba052 100644 --- a/src/vs/editor/standalone/common/monarch/monarchCompile.ts +++ b/src/vs/editor/standalone/common/monarch/monarchCompile.ts @@ -79,7 +79,7 @@ function createKeywordMatcher(arr: string[], caseInsensitive: boolean = false): // Lexer helpers /** - * Compiles a regular expression string, adding the 'i' flag if 'ignoreCase' is set. + * Compiles a regular expression string, adding the 'i' flag if 'ignoreCase' is set, and the 'u' flag if 'unicode' is set. * Also replaces @\w+ or sequences with the content of the specified attribute */ function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp { @@ -103,7 +103,8 @@ function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp { }); } - return new RegExp(str, (lexer.ignoreCase ? 'i' : '')); + let flags = (lexer.ignoreCase ? 'i' : '') + (lexer.unicode ? 'u' : ''); + return new RegExp(str, flags); } /** @@ -400,6 +401,7 @@ export function compile(languageId: string, json: IMonarchLanguage): monarchComm // Set standard fields: be defensive about types lexer.start = (typeof json.start === 'string' ? json.start : null); lexer.ignoreCase = bool(json.ignoreCase, false); + lexer.unicode = bool(json.unicode, false); lexer.tokenPostfix = string(json.tokenPostfix, '.' + lexer.languageId); lexer.defaultToken = string(json.defaultToken, 'source'); @@ -410,6 +412,7 @@ export function compile(languageId: string, json: IMonarchLanguage): monarchComm let lexerMin: monarchCommon.ILexerMin = json; lexerMin.languageId = languageId; lexerMin.ignoreCase = lexer.ignoreCase; + lexerMin.unicode = lexer.unicode; lexerMin.noThrow = lexer.noThrow; lexerMin.usesEmbedded = lexer.usesEmbedded; lexerMin.stateNames = json.tokenizer; diff --git a/src/vs/editor/standalone/common/monarch/monarchLexer.ts b/src/vs/editor/standalone/common/monarch/monarchLexer.ts index e4ddf8778d46f..f120b5383d7f9 100644 --- a/src/vs/editor/standalone/common/monarch/monarchLexer.ts +++ b/src/vs/editor/standalone/common/monarch/monarchLexer.ts @@ -497,7 +497,8 @@ export class MonarchTokenizer implements modes.ITokenizationSupport { let regex = rule.regex; let regexSource = rule.regex.source; if (regexSource.substr(0, 4) === '^(?:' && regexSource.substr(regexSource.length - 1, 1) === ')') { - regex = new RegExp(regexSource.substr(4, regexSource.length - 5), regex.ignoreCase ? 'i' : ''); + let flags = (regex.ignoreCase ? 'i' : '') + (regex.unicode ? 'u' : ''); + regex = new RegExp(regexSource.substr(4, regexSource.length - 5), flags); } let result = line.search(regex); diff --git a/src/vs/editor/standalone/common/monarch/monarchTypes.ts b/src/vs/editor/standalone/common/monarch/monarchTypes.ts index cd0fd1079924a..19936be8dc825 100644 --- a/src/vs/editor/standalone/common/monarch/monarchTypes.ts +++ b/src/vs/editor/standalone/common/monarch/monarchTypes.ts @@ -21,6 +21,10 @@ export interface IMonarchLanguage { * is the language case insensitive? */ ignoreCase?: boolean; + /** + * is the language unicode-aware? (i.e., /\u{1D306}/) + */ + unicode?: boolean; /** * if no match in the tokenizer assign this token class (default 'source') */ diff --git a/src/vs/monaco.d.ts b/src/vs/monaco.d.ts index ccfb97e62f1b9..e4e0752901d42 100644 --- a/src/vs/monaco.d.ts +++ b/src/vs/monaco.d.ts @@ -6286,6 +6286,10 @@ declare namespace monaco.languages { * is the language case insensitive? */ ignoreCase?: boolean; + /** + * is the language unicode-aware? (i.e., /\u{1D306}/) + */ + unicode?: boolean; /** * if no match in the tokenizer assign this token class (default 'source') */