From f8db2ff55a223359f902ed1e8fad6d1e5c9805fa Mon Sep 17 00:00:00 2001 From: Khafra Date: Thu, 24 Oct 2024 18:26:09 -0400 Subject: [PATCH 1/2] fix filename* parsing --- lib/web/fetch/body.js | 6 +- lib/web/fetch/formdata-parser.js | 113 +++++++++++++++++++------------ 2 files changed, 71 insertions(+), 48 deletions(-) diff --git a/lib/web/fetch/body.js b/lib/web/fetch/body.js index 99355d32e78..b092b3c83db 100644 --- a/lib/web/fetch/body.js +++ b/lib/web/fetch/body.js @@ -364,12 +364,8 @@ function bodyMixinMethods (instance, getInternalState) { switch (mimeType.essence) { case 'multipart/form-data': { // 1. ... [long step] - const parsed = multipartFormDataParser(value, mimeType) - // 2. If that fails for some reason, then throw a TypeError. - if (parsed === 'failure') { - throw new TypeError('Failed to parse body as FormData.') - } + const parsed = multipartFormDataParser(value, mimeType) // 3. Return a new FormData object, appending each entry, // resulting from the parsing operation, to its entry list. diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js index 4bbc2c909fe..f43b5fd98e5 100644 --- a/lib/web/fetch/formdata-parser.js +++ b/lib/web/fetch/formdata-parser.js @@ -11,7 +11,7 @@ const { File: NodeFile } = require('node:buffer') const File = globalThis.File ?? NodeFile const formDataNameBuffer = Buffer.from('form-data; name="') -const filenameBuffer = Buffer.from('; filename') +const filenameBuffer = Buffer.from('filename') const dd = Buffer.from('--') const ddcrlf = Buffer.from('--\r\n') @@ -75,7 +75,7 @@ function multipartFormDataParser (input, mimeType) { // Otherwise, let boundary be the result of UTF-8 decoding mimeType’s // parameters["boundary"]. if (boundaryString === undefined) { - return 'failure' + throw parsingError('missing boundary in content-type header') } const boundary = Buffer.from(`--${boundaryString}`, 'utf8') @@ -111,7 +111,7 @@ function multipartFormDataParser (input, mimeType) { if (input.subarray(position.position, position.position + boundary.length).equals(boundary)) { position.position += boundary.length } else { - return 'failure' + throw parsingError('expected a value starting with -- and the boundary') } // 5.2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A @@ -127,7 +127,7 @@ function multipartFormDataParser (input, mimeType) { // 5.3. If position does not point to a sequence of bytes starting with 0x0D // 0x0A (CR LF), return failure. if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) { - return 'failure' + throw parsingError('expected CRLF') } // 5.4. Advance position by 2. (This skips past the newline.) @@ -138,10 +138,6 @@ function multipartFormDataParser (input, mimeType) { // is not failure. Otherwise, return failure. const result = parseMultipartFormDataHeaders(input, position) - if (result === 'failure') { - return 'failure' - } - let { name, filename, contentType, encoding } = result // 5.6. Advance position by 2. (This skips past the empty line that marks @@ -157,7 +153,7 @@ function multipartFormDataParser (input, mimeType) { const boundaryIndex = input.indexOf(boundary.subarray(2), position.position) if (boundaryIndex === -1) { - return 'failure' + throw parsingError('expected boundary after body') } body = input.subarray(position.position, boundaryIndex - 4) @@ -174,7 +170,7 @@ function multipartFormDataParser (input, mimeType) { // 5.9. If position does not point to a sequence of bytes starting with // 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2. if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) { - return 'failure' + throw parsingError('expected CRLF') } else { position.position += 2 } @@ -230,7 +226,7 @@ function parseMultipartFormDataHeaders (input, position) { if (input[position.position] === 0x0d && input[position.position + 1] === 0x0a) { // 2.1.1. If name is null, return failure. if (name === null) { - return 'failure' + throw parsingError('header name is null') } // 2.1.2. Return name, filename and contentType. @@ -250,12 +246,12 @@ function parseMultipartFormDataHeaders (input, position) { // 2.4. If header name does not match the field-name token production, return failure. if (!HTTP_TOKEN_CODEPOINTS.test(headerName.toString())) { - return 'failure' + throw parsingError('header name does not match the field-name token production') } // 2.5. If the byte at position is not 0x3A (:), return failure. if (input[position.position] !== 0x3a) { - return 'failure' + throw parsingError('expected :') } // 2.6. Advance position by 1. @@ -278,7 +274,7 @@ function parseMultipartFormDataHeaders (input, position) { // 2. If position does not point to a sequence of bytes starting with // `form-data; name="`, return failure. if (!bufferStartsWith(input, formDataNameBuffer, position)) { - return 'failure' + throw parsingError('expected form-data; name=" for content-disposition header') } // 3. Advance position so it points at the byte after the next 0x22 (") @@ -290,34 +286,61 @@ function parseMultipartFormDataHeaders (input, position) { // failure. name = parseMultipartFormDataName(input, position) - if (name === null) { - return 'failure' - } - // 5. If position points to a sequence of bytes starting with `; filename="`: - if (bufferStartsWith(input, filenameBuffer, position)) { - // Note: undici also handles filename* - let check = position.position + filenameBuffer.length - - if (input[check] === 0x2a) { - position.position += 1 - check += 1 - } - - if (input[check] !== 0x3d || input[check + 1] !== 0x22) { // =" - return 'failure' - } - - // 1. Advance position so it points at the byte after the next 0x22 (") byte - // (the one in the sequence of bytes matched above). - position.position += 12 - - // 2. Set filename to the result of parsing a multipart/form-data name given - // input and position, if the result is not failure. Otherwise, return failure. - filename = parseMultipartFormDataName(input, position) - - if (filename === null) { - return 'failure' + if (input[position.position] === 0x3b /* ; */ && input[position.position + 1] === 0x20 /* ' ' */) { + const at = { position: position.position + 2 } + + if (bufferStartsWith(input, filenameBuffer, at)) { + if (input[at.position + 8] === 0x2a /* '*' */) { + at.position += 10 // skip past filename*= + + // Remove leading http tab and spaces. See RFC for examples. + // https://datatracker.ietf.org/doc/html/rfc6266#section-5 + collectASequenceOfBytes( + (char) => char === 0x20 || char === 0x09, + input, + at + ) + + const headerValue = collectASequenceOfBytes( + (char) => char !== 0x20 && char !== 0x0d && char !== 0x0a, // ' ' or CRLF + input, + at + ) + + if ( + (headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U + (headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T + (headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F + headerValue[3] !== 0x2d || // - + headerValue[4] !== 0x38 // 8 + ) { + throw parsingError('unknown encoding, expected utf-8\'\'') + } + + // skip utf-8'' + filename = decodeURIComponent(new TextDecoder().decode(headerValue.subarray(7))) + + position.position = at.position + } else { + // 1. Advance position so it points at the byte after the next 0x22 (") byte + // (the one in the sequence of bytes matched above). + position.position += 11 + + // Remove leading http tab and spaces. See RFC for examples. + // https://datatracker.ietf.org/doc/html/rfc6266#section-5 + collectASequenceOfBytes( + (char) => char === 0x20 || char === 0x09, + input, + position + ) + + position.position++ // skip past " after removing whitespace + + // 2. Set filename to the result of parsing a multipart/form-data name given + // input and position, if the result is not failure. Otherwise, return failure. + filename = parseMultipartFormDataName(input, position) + } } } @@ -367,7 +390,7 @@ function parseMultipartFormDataHeaders (input, position) { // 2.9. If position does not point to a sequence of bytes starting with 0x0D 0x0A // (CR LF), return failure. Otherwise, advance position by 2 (past the newline). if (input[position.position] !== 0x0d && input[position.position + 1] !== 0x0a) { - return 'failure' + throw parsingError('expected CRLF') } else { position.position += 2 } @@ -393,7 +416,7 @@ function parseMultipartFormDataName (input, position) { // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1. if (input[position.position] !== 0x22) { - return null // name could be 'failure' + throw parsingError('expected "') } else { position.position++ } @@ -468,6 +491,10 @@ function bufferStartsWith (buffer, start, position) { return true } +function parsingError (cause) { + return new TypeError('Failed to parse body as FormData.', { cause: new TypeError(cause) }) +} + module.exports = { multipartFormDataParser, validateBoundary From 5a8776867b1a8978c441ebb547a4effc1eb2ce40 Mon Sep 17 00:00:00 2001 From: Khafra Date: Fri, 25 Oct 2024 09:30:08 -0400 Subject: [PATCH 2/2] fixup --- test/busboy/issue-3760.js | 59 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 test/busboy/issue-3760.js diff --git a/test/busboy/issue-3760.js b/test/busboy/issue-3760.js new file mode 100644 index 00000000000..d37852c8fac --- /dev/null +++ b/test/busboy/issue-3760.js @@ -0,0 +1,59 @@ +'use strict' + +const { test } = require('node:test') +const assert = require('node:assert') +const { Response } = require('../..') + +// https://github.com/nodejs/undici/issues/3760 +test('filename* parameter is parsed properly', async (t) => { + const response = new Response([ + '--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' + + 'Content-Type: text/plain\r\n' + + 'Content-Disposition: form-data; name="file"; filename*=UTF-8\'\'%e2%82%ac%20rates\r\n' + + '\r\n' + + 'testabc\r\n' + + '--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' + + '\r\n' + ].join(''), { + headers: { + 'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"' + } + }) + + const fd = await response.formData() + assert.deepEqual(fd.get('file').name, '€ rates') +}) + +test('whitespace after filename[*]= is ignored', async () => { + for (const response of [ + new Response([ + '--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' + + 'Content-Type: text/plain\r\n' + + 'Content-Disposition: form-data; name="file"; filename*= utf-8\'\'hello\r\n' + + '\r\n' + + 'testabc\r\n' + + '--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' + + '\r\n' + ].join(''), { + headers: { + 'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"' + } + }), + new Response([ + '--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' + + 'Content-Type: text/plain\r\n' + + 'Content-Disposition: form-data; name="file"; filename= "hello"\r\n' + + '\r\n' + + 'testabc\r\n' + + '--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' + + '\r\n' + ].join(''), { + headers: { + 'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"' + } + }) + ]) { + const fd = await response.formData() + assert.deepEqual(fd.get('file').name, 'hello') + } +})