From aee620a20e85e651073ad8e6468d10a032f0eca8 Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Wed, 30 Dec 2020 13:34:10 +0300 Subject: [PATCH] Throw an error if block sequence/mapping indent contains a tab fix https://github.com/nodeca/js-yaml/issues/80 --- CHANGELOG.md | 1 + lib/loader.js | 25 ++++++++++++++++ lib/snippet.js | 2 +- test/issues/0080.js | 73 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 test/issues/0080.js diff --git a/CHANGELOG.md b/CHANGELOG.md index e2cd11d9..b000a7bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 (previously usage of custom non-ascii tags may have led to invalid YAML that can't be parsed). - Anchors now work correctly with empty nodes, #301. - Fix incorrect parsing of invalid block mapping syntax, #418. +- Throw an error if block sequence/mapping indent contains a tab, #80. ## [3.14.1] - 2020-12-07 diff --git a/lib/loader.js b/lib/loader.js index b6dad5b3..39f13f56 100644 --- a/lib/loader.js +++ b/lib/loader.js @@ -150,6 +150,10 @@ function State(input, options) { this.lineStart = 0; this.lineIndent = 0; + // position of first leading tab in the current line, + // used to make sure there are no tabs in the indentation + this.firstTabInLine = -1; + this.documents = []; /* @@ -389,6 +393,7 @@ function readLineBreak(state) { state.line += 1; state.lineStart = state.position; + state.firstTabInLine = -1; } function skipSeparationSpace(state, allowComments, checkIndent) { @@ -397,6 +402,9 @@ function skipSeparationSpace(state, allowComments, checkIndent) { while (ch !== 0) { while (is_WHITE_SPACE(ch)) { + if (ch === 0x09/* Tab */ && state.firstTabInLine === -1) { + state.firstTabInLine = state.position; + } ch = state.input.charCodeAt(++state.position); } @@ -959,6 +967,10 @@ function readBlockSequence(state, nodeIndent) { detected = false, ch; + // there is a leading tab before this token, so it can't be a block sequence/mapping; + // it can still be flow sequence/mapping or a scalar + if (state.firstTabInLine !== -1) return false; + if (state.anchor !== null) { state.anchorMap[state.anchor] = _result; } @@ -966,6 +978,10 @@ function readBlockSequence(state, nodeIndent) { ch = state.input.charCodeAt(state.position); while (ch !== 0) { + if (state.firstTabInLine !== -1) { + state.position = state.firstTabInLine; + throwError(state, 'tab characters must not be used in indentation'); + } if (ch !== 0x2D/* - */) { break; @@ -1030,6 +1046,10 @@ function readBlockMapping(state, nodeIndent, flowIndent) { detected = false, ch; + // there is a leading tab before this token, so it can't be a block sequence/mapping; + // it can still be flow sequence/mapping or a scalar + if (state.firstTabInLine !== -1) return false; + if (state.anchor !== null) { state.anchorMap[state.anchor] = _result; } @@ -1037,6 +1057,11 @@ function readBlockMapping(state, nodeIndent, flowIndent) { ch = state.input.charCodeAt(state.position); while (ch !== 0) { + if (!atExplicitKey && state.firstTabInLine !== -1) { + state.position = state.firstTabInLine; + throwError(state, 'tab characters must not be used in indentation'); + } + following = state.input.charCodeAt(state.position + 1); _line = state.line; // Save the current line. diff --git a/lib/snippet.js b/lib/snippet.js index c3669229..00e2133c 100644 --- a/lib/snippet.js +++ b/lib/snippet.js @@ -21,7 +21,7 @@ function getLine(buffer, lineStart, lineEnd, position, maxLineLength) { } return { - str: head + buffer.slice(lineStart, lineEnd) + tail, + str: head + buffer.slice(lineStart, lineEnd).replace(/\t/g, '→') + tail, pos: position - lineStart + head.length // relative position }; } diff --git a/test/issues/0080.js b/test/issues/0080.js new file mode 100644 index 00000000..a0bf3361 --- /dev/null +++ b/test/issues/0080.js @@ -0,0 +1,73 @@ +'use strict'; + + +const assert = require('assert'); +const yaml = require('../../'); + + +it('should throw when tabs are used as indentation', function () { + assert.throws(() => yaml.load(` + \tfoo: 1 + bar: 2 +`), /end of the stream or a document separator is expected/); + + assert.throws(() => yaml.load(` + foo: 1 + \tbar: 2 +`), /tab characters must not be used/); + + assert.throws(() => yaml.load(` + \t- foo + - bar +`), /end of the stream or a document separator is expected/); + + assert.throws(() => yaml.load(` + - foo + \t- bar +`), /tab characters must not be used/); +}); + + +it('should allow tabs inside separation spaces', function () { + assert.deepStrictEqual(yaml.load(` + foo\t \t:\t \t1\t \t +\t \t \t + bar \t : \t 2 \t +`), { foo: 1, bar: 2 }); + + assert.deepStrictEqual(yaml.load(` + -\t \tfoo\t \t +\t \t \t + - \t bar \t +`), [ 'foo', 'bar' ]); + + assert.deepStrictEqual(yaml.load(` +\t{\tfoo\t:\t1\t,\tbar\t:\t2\t}\t +`), { foo: 1, bar: 2 }); + + assert.deepStrictEqual(yaml.load(` +\t[\tfoo\t,\tbar\t]\t +`), [ 'foo', 'bar' ]); + + assert.deepStrictEqual(yaml.load(` +foo: # string indent = 1 + \t \t1 + \t 2 + \t \t3 +`), { foo: '1 2 3' }); +}); + + +it('should throw when tabs are used as indentation in strings', function () { + assert.throws(() => yaml.load(` +foo: + bar: | + \tbaz +`), /tab characters must not be used/); + + assert.deepStrictEqual(yaml.load(` +foo: + bar: | + \tbaz +`), { foo: { bar: '\tbaz\n' } }); +});