Skip to content
This repository has been archived by the owner on Nov 5, 2021. It is now read-only.

[clojure] Improve the regular expressions for various symbols #56

Merged
merged 6 commits into from
Jan 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/clojure/clojure.contribution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const _monaco: typeof monaco =

registerLanguage({
id: 'clojure',
extensions: ['.clj', '.clojure'],
extensions: ['.clj', '.cljs', '.cljc', '.edn'],
aliases: ['clojure', 'Clojure'],
loader: () => _monaco.Promise.wrap(import('./clojure')),
});
97 changes: 90 additions & 7 deletions src/clojure/clojure.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -754,13 +754,72 @@ testTokenization('clojure', [
], 'string'),

// strings
createTestCases([
'\"I\'m a little teapot.\"',
'\"I\'m a \\\"little\\\" teapot.\"',
'\"I\'m', // this is
'a little', // a multi-line
'teapot.\"' // string
], 'string'),
[
{
line: '"I\'m a little teapot."',
tokens: [
{startIndex: 0, type: 'string.clj'},
]
},
{
line: '"I\'m a \\"little\\" teapot."',
tokens: [
{startIndex: 0, type: 'string.clj'},
{startIndex: 7, type: 'string.escape.clj'},
{startIndex: 9, type: 'string.clj'},
{startIndex: 15, type: 'string.escape.clj'},
{startIndex: 17, type: 'string.clj'},
]
}
],

// multi-line strings
[
{
line: '"I\'m',
tokens: [
{startIndex: 0, type: 'string.clj'},
]
},
{
line: '\\"a little\\"',
tokens: [
{startIndex: 0, type: 'string.escape.clj'},
{startIndex: 2, type: 'string.clj'},
{startIndex: 10, type: 'string.escape.clj'},
]
},
{
line: 'teapot."',
tokens: [
{startIndex: 0, type: 'string.clj'},
]
}
],

// strings with other escapes in them (\" \' \\ \b \f \n \r \t)
[{
line: '"the escape \\" \\\' \\\\ \\b \\f \\n \\r \\t characters"',
tokens: [
{startIndex: 0, type: 'string.clj'},
{startIndex: 12, type: 'string.escape.clj'},
{startIndex: 14, type: 'string.clj'},
{startIndex: 15, type: 'string.escape.clj'},
{startIndex: 17, type: 'string.clj'},
{startIndex: 18, type: 'string.escape.clj'},
{startIndex: 20, type: 'string.clj'},
{startIndex: 21, type: 'string.escape.clj'},
{startIndex: 23, type: 'string.clj'},
{startIndex: 24, type: 'string.escape.clj'},
{startIndex: 26, type: 'string.clj'},
{startIndex: 27, type: 'string.escape.clj'},
{startIndex: 29, type: 'string.clj'},
{startIndex: 30, type: 'string.escape.clj'},
{startIndex: 32, type: 'string.clj'},
{startIndex: 33, type: 'string.escape.clj'},
{startIndex: 35, type: 'string.clj'},
]
}],

// comments
createTestCases([
Expand Down Expand Up @@ -792,6 +851,30 @@ testTokenization('clojure', [
{startIndex: 0, type: 'comment.clj'},
],
},
{
line: '(comments foo bar)',
tokens: [
{startIndex: 0, type: 'delimiter.parenthesis.clj'},
{startIndex: 1, type: 'identifier.clj'},
{startIndex: 9, type: 'white.clj'},
{startIndex: 10, type: 'identifier.clj'},
{startIndex: 13, type: 'white.clj'},
{startIndex: 14, type: 'identifier.clj'},
{startIndex: 17, type: 'delimiter.parenthesis.clj'},
]
},
{
line: '(comment6 foo bar)',
tokens: [
{startIndex: 0, type: 'delimiter.parenthesis.clj'},
{startIndex: 1, type: 'identifier.clj'},
{startIndex: 9, type: 'white.clj'},
{startIndex: 10, type: 'identifier.clj'},
{startIndex: 13, type: 'white.clj'},
{startIndex: 14, type: 'identifier.clj'},
{startIndex: 17, type: 'delimiter.parenthesis.clj'},
]
},
{
line: '(comment foo',
tokens: [
Expand Down
68 changes: 38 additions & 30 deletions src/clojure/clojure.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,24 @@ export const conf: IRichLanguageConfiguration = {
lineComment: ';;',
},

brackets: [['(', ')'], ['[', ']'], ['{', '}']],
brackets: [
['[', ']'],
['(', ')'],
['{', '}']
],

autoClosingPairs: [
{open: '(', close: ')'},
{open: '[', close: ']'},
{open: '{', close: '}'},
{open: '"', close: '"'},
{open: '(', close: ')'},
{open: '{', close: '}'},
],

surroundingPairs: [
{open: '(', close: ')'},
{open: '[', close: ']'},
{open: '{', close: '}'},
{open: '"', close: '"'},
{open: '(', close: ')'},
{open: '{', close: '}'},
],
};

Expand All @@ -36,11 +40,26 @@ export const language = <ILanguage>{
tokenPostfix: '.clj',

brackets: [
{open: '[', close: ']', token: 'delimiter.square'},
{open: '(', close: ')', token: 'delimiter.parenthesis'},
{open: '{', close: '}', token: 'delimiter.curly'},
{open: '[', close: ']', token: 'delimiter.square'},
],

constants: ['true', 'false', 'nil'],

// delimiters: /[\\\[\]\s"#'(),;@^`{}~]|$/,

numbers: /^(?:[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?(?=[\\\[\]\s"#'(),;@^`{}~]|$))/,

characters: /^(?:\\(?:backspace|formfeed|newline|return|space|tab|o[0-7]{3}|u[0-9A-Fa-f]{4}|x[0-9A-Fa-f]{4}|.)?(?=[\\\[\]\s"(),;@^`{}~]|$))/,

escapes: /^\\(?:["'\\bfnrt]|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,

// simple-namespace := /^[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*/
// simple-symbol := /^(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)/
// qualified-symbol := (<simple-namespace>(<.><simple-namespace>)*</>)?<simple-symbol>
qualifiedSymbols: /^(?:(?:[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*(?:\.[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)*\/)?(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)*(?=[\\\[\]\s"(),;@^`{}~]|$))/,

specialForms: [
'.',
'catch',
Expand Down Expand Up @@ -712,14 +731,6 @@ export const language = <ILanguage>{
'zipmap',
],

constants: ['true', 'false', 'nil'],

symbolCharacter: /[!#'*+\-.\/:<=>?_\w\xa1-\uffff]/,

numbers: /[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?/,

characters: /\\(?:backspace|formfeed|newline|return|space|tab|x[0-9A-Fa-f]{4}|u[0-9A-Fa-f]{4}|o[0-7]{3}|@symbolCharacter|[\\"()\[\]{}])/,

tokenizer: {
root: [
// whitespaces and comments
Expand All @@ -743,42 +754,39 @@ export const language = <ILanguage>{
// reader macro characters
[/[#'@^`~]/, 'meta'],

// keywords
[/:@symbolCharacter+/, 'constant'],

// symbols
[/@symbolCharacter+/, {
cases: {
'@specialForms': 'keyword',
'@coreSymbols': 'keyword',
'@constants': 'constant',
'@default': 'identifier',
[/@qualifiedSymbols/, {
cases: {
'^:.+$': 'constant', // Clojure keywords (e.g., `:foo/bar`)
'@specialForms': 'keyword',
'@coreSymbols': 'keyword',
'@constants': 'constant',
'@default': 'identifier',
},
},
},
],

],

whitespace: [
[/\s+/, 'white'],
[/;.*$/, 'comment'],
[/\(comment/, 'comment', '@comment'],
[/\(comment\b/, 'comment', '@comment'],
],

comment: [
[/\(/, 'comment', '@push'],
[/\)/, 'comment', '@pop'],
[/[^)]/, 'comment'],
[/[^()]/, 'comment'],
],

string: [
[/"/, 'string', '@multiLineString'],
],

multiLineString: [
[/[^\\"]+/, 'string'],
[/@characters/, 'string'],
[/"/, 'string', '@pop']
[/"/, 'string', '@popall'],
[/@escapes/, 'string.escape'],
[/./, 'string']
],
},
};