From 7969c0d36c9ddb5f4eaf42a0feb1be15fe896d74 Mon Sep 17 00:00:00 2001 From: xzyfer Date: Sat, 28 Jan 2017 23:12:30 +1100 Subject: [PATCH] Implement proper comment tokenzing CSS style comments (`/* */`) are allow interpolants (`#{$foo}`), and disallow everything else. In order to dodge a bunch of edge cases I've implemented a sub-tokenizer for comments that makes all non-interpolant or whitespace tokens as word tokens. --- lib/tokenize-comment.js | 135 ++++++++++++++++++++++++++++ lib/tokenize-interpolant.js | 20 +++-- lib/tokenize.js | 20 +++-- test/comment.js | 72 +++++++++++++++ test/fixture/docblock-comment.scss | 5 ++ test/fixture/multiline-comment.scss | 3 + test/fixture/simple-comment.scss | 1 + 7 files changed, 246 insertions(+), 10 deletions(-) create mode 100644 lib/tokenize-comment.js create mode 100644 test/comment.js create mode 100644 test/fixture/docblock-comment.scss create mode 100644 test/fixture/multiline-comment.scss create mode 100644 test/fixture/simple-comment.scss diff --git a/lib/tokenize-comment.js b/lib/tokenize-comment.js new file mode 100644 index 0000000..6cc1abb --- /dev/null +++ b/lib/tokenize-comment.js @@ -0,0 +1,135 @@ +import Input from './input'; +import tokenizeString from './tokenize-string'; +import tokenizeInterpolant from './tokenize-interpolant'; + +let newline = '\n'.charCodeAt(0), + space = ' '.charCodeAt(0), + feed = '\f'.charCodeAt(0), + tab = '\t'.charCodeAt(0), + cr = '\r'.charCodeAt(0), + hash = '#'.charCodeAt(0), + backslash = '\\'.charCodeAt(0), + slash = '/'.charCodeAt(0), + openCurly = '{'.charCodeAt(0), + closeCurly = '}'.charCodeAt(0), + asterisk = '*'.charCodeAt(0), + wordEnd = /[ \n\t\r\(\)\{\},:;@!'"\\]|\/(?=\*)|#(?={)/g; + +export default function tokenize(input, l, p) { + let tokens = []; + let css = input.css.valueOf(); + + let code, next, lines, last, content, escape, + nextLine, nextOffset, escaped, escapePos, + inInterpolant, inComment, inString; + + let length = css.length; + let offset = -1; + let line = l || 1; + let pos = p || 0; + + loop: + while ( pos < length ) { + code = css.charCodeAt(pos); + + if ( code === newline ) { + offset = pos; + line += 1; + } + + switch ( code ) { + case space: + case tab: + case cr: + case feed: + next = pos; + do { + next += 1; + code = css.charCodeAt(next); + if ( code === newline ) { + offset = next; + line += 1; + } + } while ( code === space || + code === tab || + code === cr || + code === feed ); + + tokens.push(['space', css.slice(pos, next)]); + pos = next - 1; + break; + + case newline: + tokens.push(['newline', '\n', line, pos - offset]); + break; + + case closeCurly: + tokens.push(['endInterpolant', '}', line, pos - offset]); + break; + + case backslash: + next = pos; + escape = true; + while ( css.charCodeAt(next + 1) === backslash ) { + next += 1; + escape = !escape; + } + code = css.charCodeAt(next + 1); + if ( escape && (code !== slash && + code !== space && + code !== newline && + code !== tab && + code !== cr && + code !== feed ) ) { + next += 1; + } + tokens.push(['word', css.slice(pos, next + 1), + line, pos - offset, + line, next - offset + ]); + pos = next; + break; + + default: + + if ( code === asterisk && css.charCodeAt(pos + 1) === slash ) { + next = pos; + pos = next - 1; + break loop; + } + + if ( code === hash && css.charCodeAt(pos + 1) === openCurly ) { + tokens.push(['startInterpolant', '#{', line, pos + 1 - offset]); + next = pos + 1; + + let { tokens: t, pos: p } = tokenizeInterpolant(input, line, next + 1); + tokens = tokens.concat(t); + next = p; + + pos = next; + break; + } + + wordEnd.lastIndex = pos + 1; + wordEnd.test(css); + if ( wordEnd.lastIndex === 0 ) { + next = css.length - 1; + } else { + next = wordEnd.lastIndex - 2; + } + + tokens.push(['word', css.slice(pos, next + 1), + line, pos - offset, + line, next - offset + ]); + + pos = next; + + break; + } + + pos++; + } + + return { tokens, line, pos, offset }; +} diff --git a/lib/tokenize-interpolant.js b/lib/tokenize-interpolant.js index b1478aa..2533c91 100644 --- a/lib/tokenize-interpolant.js +++ b/lib/tokenize-interpolant.js @@ -1,5 +1,6 @@ import Input from './input'; import tokenizeString from './tokenize-string'; +import tokenizeComment from './tokenize-comment'; import tokenizeInterpolant from './tokenize-interpolant'; let singleQuote = "'".charCodeAt(0), @@ -88,10 +89,6 @@ export default function tokenize(input, l, p) { tokens.push(['-', '-', line, pos - offset]); break; - case asterisk: - tokens.push(['*', '*', line, pos - offset]); - break; - case decComb: tokens.push(['>', '>', line, pos - offset]); break; @@ -184,7 +181,20 @@ export default function tokenize(input, l, p) { if ( code === slash && css.charCodeAt(pos + 1) === asterisk ) { inComment = true; tokens.push(['startComment', '/*', line, pos + 1 - offset]); - pos += 2; + next = pos + 1; + + let { tokens: t, line: l, pos: p, offset: o } = tokenizeComment(input, line, next + 1); + tokens = tokens.concat(t); + next = p; + line = l; + offset = o; + + pos = next; + break; + } + + if ( code === asterisk && css.charCodeAt(pos + 1) !== slash) { + tokens.push(['*', '*', line, pos - offset]); break; } diff --git a/lib/tokenize.js b/lib/tokenize.js index 86a8da8..434ef35 100644 --- a/lib/tokenize.js +++ b/lib/tokenize.js @@ -1,5 +1,6 @@ import Input from './input'; import tokenizeString from './tokenize-string'; +import tokenizeComment from './tokenize-comment'; import tokenizeInterpolant from './tokenize-interpolant'; let singleQuote = "'".charCodeAt(0), @@ -87,10 +88,6 @@ export default function tokenize(input, l, p) { tokens.push(['-', '-', line, pos - offset]); break; - case asterisk: - tokens.push(['*', '*', line, pos - offset]); - break; - case decComb: tokens.push(['>', '>', line, pos - offset]); break; @@ -183,7 +180,20 @@ export default function tokenize(input, l, p) { if ( code === slash && css.charCodeAt(pos + 1) === asterisk ) { inComment = true; tokens.push(['startComment', '/*', line, pos + 1 - offset]); - pos += 2; + next = pos + 1; + + let { tokens: t, line: l, pos: p, offset: o } = tokenizeComment(input, line, next + 1); + tokens = tokens.concat(t); + next = p; + line = l; + offset = o; + + pos = next; + break; + } + + if ( code === asterisk && css.charCodeAt(pos + 1) !== slash) { + tokens.push(['*', '*', line, pos - offset]); break; } diff --git a/test/comment.js b/test/comment.js new file mode 100644 index 0000000..4734e51 --- /dev/null +++ b/test/comment.js @@ -0,0 +1,72 @@ +var scss = require('..'); +var fs = require('fs'); +var path = require('path'); +var assert = require('chai').assert; + +var fixture = function(name) { + return fs.readFileSync( + path.join(__dirname, 'fixture', name) + ); +} + +describe('Comment', function() { + it('should tokenize a simple comment', function() { + assert.deepEqual( + [ + ['startComment', '/*', 1, 2], + ['space', ' '], + ['word', 'my', 1, 4, 1, 5], + ['space', ' '], + ['word', 'comment', 1, 7, 1, 13], + ['space', ' '], + ['endComment', '*/', 1, 16], + ], + scss.tokenize(fixture('simple-comment.scss')) + ); + }); + + it('should tokenize a multiline comment', function() { + assert.deepEqual( + [ + ['startComment', '/*', 1, 2], + ['newline', '\n', 2, 0], + ['word', 'my', 2, 1, 2, 2], + ['space', ' '], + ['word', 'comment', 2, 4, 2, 10], + ['newline', '\n', 3, 0], + ['endComment', '*/', 3, 2], + ], + scss.tokenize(fixture('multiline-comment.scss')) + ); + }); + + it('should tokenize a docblock comment', function() { + assert.deepEqual( + [ + ['startComment', '/*', 1, 2], + ['word', '*', 1, 3, 1, 3], + ['newline', '\n', 2, 0], + ['space', ' '], + ['word', '*', 2, 2, 2, 2], + ['space', ' '], + ['word', 'line', 2, 4, 2, 7], + ['space', ' '], + ['word', '1', 2, 9, 2, 9], + ['newline', '\n', 3, 0], + ['space', ' '], + ['word', '*', 3, 2, 3, 2], + ['newline', '\n', 4, 0], + ['space', ' '], + ['word', '*', 4, 2, 4, 2], + ['space', ' '], + ['word', 'line', 4, 4, 4, 7], + ['space', ' '], + ['word', '2', 4, 9, 4, 9], + ['newline', '\n', 5, 0], + ['space', ' '], + ['endComment', '*/', 5, 3], + ], + scss.tokenize(fixture('docblock-comment.scss')) + ); + }); +}); \ No newline at end of file diff --git a/test/fixture/docblock-comment.scss b/test/fixture/docblock-comment.scss new file mode 100644 index 0000000..864dd63 --- /dev/null +++ b/test/fixture/docblock-comment.scss @@ -0,0 +1,5 @@ +/** + * line 1 + * + * line 2 + */ \ No newline at end of file diff --git a/test/fixture/multiline-comment.scss b/test/fixture/multiline-comment.scss new file mode 100644 index 0000000..37268b2 --- /dev/null +++ b/test/fixture/multiline-comment.scss @@ -0,0 +1,3 @@ +/* +my comment +*/ \ No newline at end of file diff --git a/test/fixture/simple-comment.scss b/test/fixture/simple-comment.scss new file mode 100644 index 0000000..8479bac --- /dev/null +++ b/test/fixture/simple-comment.scss @@ -0,0 +1 @@ +/* my comment */ \ No newline at end of file