Skip to content

Commit

Permalink
Implement proper comment tokenzing
Browse files Browse the repository at this point in the history
CSS style comments (`/* */`) are allow interpolants (`#{$foo}`),
and disallow everything else. In order to dodge a bunch of edge
cases I've implemented a sub-tokenizer for comments that makes all
non-interpolant or whitespace tokens as word tokens.
  • Loading branch information
xzyfer committed Jan 28, 2017
1 parent a87c091 commit 7969c0d
Show file tree
Hide file tree
Showing 7 changed files with 246 additions and 10 deletions.
135 changes: 135 additions & 0 deletions lib/tokenize-comment.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import Input from './input';
import tokenizeString from './tokenize-string';
import tokenizeInterpolant from './tokenize-interpolant';

let newline = '\n'.charCodeAt(0),
space = ' '.charCodeAt(0),
feed = '\f'.charCodeAt(0),
tab = '\t'.charCodeAt(0),
cr = '\r'.charCodeAt(0),
hash = '#'.charCodeAt(0),
backslash = '\\'.charCodeAt(0),
slash = '/'.charCodeAt(0),
openCurly = '{'.charCodeAt(0),
closeCurly = '}'.charCodeAt(0),
asterisk = '*'.charCodeAt(0),
wordEnd = /[ \n\t\r\(\)\{\},:;@!'"\\]|\/(?=\*)|#(?={)/g;

export default function tokenize(input, l, p) {
let tokens = [];
let css = input.css.valueOf();

let code, next, lines, last, content, escape,
nextLine, nextOffset, escaped, escapePos,
inInterpolant, inComment, inString;

let length = css.length;
let offset = -1;
let line = l || 1;
let pos = p || 0;

loop:
while ( pos < length ) {
code = css.charCodeAt(pos);

if ( code === newline ) {
offset = pos;
line += 1;
}

switch ( code ) {
case space:
case tab:
case cr:
case feed:
next = pos;
do {
next += 1;
code = css.charCodeAt(next);
if ( code === newline ) {
offset = next;
line += 1;
}
} while ( code === space ||
code === tab ||
code === cr ||
code === feed );

tokens.push(['space', css.slice(pos, next)]);
pos = next - 1;
break;

case newline:
tokens.push(['newline', '\n', line, pos - offset]);
break;

case closeCurly:
tokens.push(['endInterpolant', '}', line, pos - offset]);
break;

case backslash:
next = pos;
escape = true;
while ( css.charCodeAt(next + 1) === backslash ) {
next += 1;
escape = !escape;
}
code = css.charCodeAt(next + 1);
if ( escape && (code !== slash &&
code !== space &&
code !== newline &&
code !== tab &&
code !== cr &&
code !== feed ) ) {
next += 1;
}
tokens.push(['word', css.slice(pos, next + 1),
line, pos - offset,
line, next - offset
]);
pos = next;
break;

default:

if ( code === asterisk && css.charCodeAt(pos + 1) === slash ) {
next = pos;
pos = next - 1;
break loop;
}

if ( code === hash && css.charCodeAt(pos + 1) === openCurly ) {
tokens.push(['startInterpolant', '#{', line, pos + 1 - offset]);
next = pos + 1;

let { tokens: t, pos: p } = tokenizeInterpolant(input, line, next + 1);
tokens = tokens.concat(t);
next = p;

pos = next;
break;
}

wordEnd.lastIndex = pos + 1;
wordEnd.test(css);
if ( wordEnd.lastIndex === 0 ) {
next = css.length - 1;
} else {
next = wordEnd.lastIndex - 2;
}

tokens.push(['word', css.slice(pos, next + 1),
line, pos - offset,
line, next - offset
]);

pos = next;

break;
}

pos++;
}

return { tokens, line, pos, offset };
}
20 changes: 15 additions & 5 deletions lib/tokenize-interpolant.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import Input from './input';
import tokenizeString from './tokenize-string';
import tokenizeComment from './tokenize-comment';
import tokenizeInterpolant from './tokenize-interpolant';

let singleQuote = "'".charCodeAt(0),
Expand Down Expand Up @@ -88,10 +89,6 @@ export default function tokenize(input, l, p) {
tokens.push(['-', '-', line, pos - offset]);
break;

case asterisk:
tokens.push(['*', '*', line, pos - offset]);
break;

case decComb:
tokens.push(['>', '>', line, pos - offset]);
break;
Expand Down Expand Up @@ -184,7 +181,20 @@ export default function tokenize(input, l, p) {
if ( code === slash && css.charCodeAt(pos + 1) === asterisk ) {
inComment = true;
tokens.push(['startComment', '/*', line, pos + 1 - offset]);
pos += 2;
next = pos + 1;

let { tokens: t, line: l, pos: p, offset: o } = tokenizeComment(input, line, next + 1);
tokens = tokens.concat(t);
next = p;
line = l;
offset = o;

pos = next;
break;
}

if ( code === asterisk && css.charCodeAt(pos + 1) !== slash) {
tokens.push(['*', '*', line, pos - offset]);
break;
}

Expand Down
20 changes: 15 additions & 5 deletions lib/tokenize.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import Input from './input';
import tokenizeString from './tokenize-string';
import tokenizeComment from './tokenize-comment';
import tokenizeInterpolant from './tokenize-interpolant';

let singleQuote = "'".charCodeAt(0),
Expand Down Expand Up @@ -87,10 +88,6 @@ export default function tokenize(input, l, p) {
tokens.push(['-', '-', line, pos - offset]);
break;

case asterisk:
tokens.push(['*', '*', line, pos - offset]);
break;

case decComb:
tokens.push(['>', '>', line, pos - offset]);
break;
Expand Down Expand Up @@ -183,7 +180,20 @@ export default function tokenize(input, l, p) {
if ( code === slash && css.charCodeAt(pos + 1) === asterisk ) {
inComment = true;
tokens.push(['startComment', '/*', line, pos + 1 - offset]);
pos += 2;
next = pos + 1;

let { tokens: t, line: l, pos: p, offset: o } = tokenizeComment(input, line, next + 1);
tokens = tokens.concat(t);
next = p;
line = l;
offset = o;

pos = next;
break;
}

if ( code === asterisk && css.charCodeAt(pos + 1) !== slash) {
tokens.push(['*', '*', line, pos - offset]);
break;
}

Expand Down
72 changes: 72 additions & 0 deletions test/comment.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
var scss = require('..');
var fs = require('fs');
var path = require('path');
var assert = require('chai').assert;

var fixture = function(name) {
return fs.readFileSync(
path.join(__dirname, 'fixture', name)
);
}

describe('Comment', function() {
it('should tokenize a simple comment', function() {
assert.deepEqual(
[
['startComment', '/*', 1, 2],
['space', ' '],
['word', 'my', 1, 4, 1, 5],
['space', ' '],
['word', 'comment', 1, 7, 1, 13],
['space', ' '],
['endComment', '*/', 1, 16],
],
scss.tokenize(fixture('simple-comment.scss'))
);
});

it('should tokenize a multiline comment', function() {
assert.deepEqual(
[
['startComment', '/*', 1, 2],
['newline', '\n', 2, 0],
['word', 'my', 2, 1, 2, 2],
['space', ' '],
['word', 'comment', 2, 4, 2, 10],
['newline', '\n', 3, 0],
['endComment', '*/', 3, 2],
],
scss.tokenize(fixture('multiline-comment.scss'))
);
});

it('should tokenize a docblock comment', function() {
assert.deepEqual(
[
['startComment', '/*', 1, 2],
['word', '*', 1, 3, 1, 3],
['newline', '\n', 2, 0],
['space', ' '],
['word', '*', 2, 2, 2, 2],
['space', ' '],
['word', 'line', 2, 4, 2, 7],
['space', ' '],
['word', '1', 2, 9, 2, 9],
['newline', '\n', 3, 0],
['space', ' '],
['word', '*', 3, 2, 3, 2],
['newline', '\n', 4, 0],
['space', ' '],
['word', '*', 4, 2, 4, 2],
['space', ' '],
['word', 'line', 4, 4, 4, 7],
['space', ' '],
['word', '2', 4, 9, 4, 9],
['newline', '\n', 5, 0],
['space', ' '],
['endComment', '*/', 5, 3],
],
scss.tokenize(fixture('docblock-comment.scss'))
);
});
});
5 changes: 5 additions & 0 deletions test/fixture/docblock-comment.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/**
* line 1
*
* line 2
*/
3 changes: 3 additions & 0 deletions test/fixture/multiline-comment.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/*
my comment
*/
1 change: 1 addition & 0 deletions test/fixture/simple-comment.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/* my comment */

0 comments on commit 7969c0d

Please # to comment.