Skip to content

Commit

Permalink
fix filename* parsing (#3768)
Browse files Browse the repository at this point in the history
* fix filename* parsing

* fixup
  • Loading branch information
KhafraDev authored Oct 25, 2024
1 parent 02c61d2 commit 211cc27
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 48 deletions.
6 changes: 1 addition & 5 deletions lib/web/fetch/body.js
Original file line number Diff line number Diff line change
Expand Up @@ -364,12 +364,8 @@ function bodyMixinMethods (instance, getInternalState) {
switch (mimeType.essence) {
case 'multipart/form-data': {
// 1. ... [long step]
const parsed = multipartFormDataParser(value, mimeType)

// 2. If that fails for some reason, then throw a TypeError.
if (parsed === 'failure') {
throw new TypeError('Failed to parse body as FormData.')
}
const parsed = multipartFormDataParser(value, mimeType)

// 3. Return a new FormData object, appending each entry,
// resulting from the parsing operation, to its entry list.
Expand Down
113 changes: 70 additions & 43 deletions lib/web/fetch/formdata-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const { File: NodeFile } = require('node:buffer')
const File = globalThis.File ?? NodeFile

const formDataNameBuffer = Buffer.from('form-data; name="')
const filenameBuffer = Buffer.from('; filename')
const filenameBuffer = Buffer.from('filename')
const dd = Buffer.from('--')
const ddcrlf = Buffer.from('--\r\n')

Expand Down Expand Up @@ -75,7 +75,7 @@ function multipartFormDataParser (input, mimeType) {
// Otherwise, let boundary be the result of UTF-8 decoding mimeType’s
// parameters["boundary"].
if (boundaryString === undefined) {
return 'failure'
throw parsingError('missing boundary in content-type header')
}

const boundary = Buffer.from(`--${boundaryString}`, 'utf8')
Expand Down Expand Up @@ -111,7 +111,7 @@ function multipartFormDataParser (input, mimeType) {
if (input.subarray(position.position, position.position + boundary.length).equals(boundary)) {
position.position += boundary.length
} else {
return 'failure'
throw parsingError('expected a value starting with -- and the boundary')
}

// 5.2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A
Expand All @@ -127,7 +127,7 @@ function multipartFormDataParser (input, mimeType) {
// 5.3. If position does not point to a sequence of bytes starting with 0x0D
// 0x0A (CR LF), return failure.
if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) {
return 'failure'
throw parsingError('expected CRLF')
}

// 5.4. Advance position by 2. (This skips past the newline.)
Expand All @@ -138,10 +138,6 @@ function multipartFormDataParser (input, mimeType) {
// is not failure. Otherwise, return failure.
const result = parseMultipartFormDataHeaders(input, position)

if (result === 'failure') {
return 'failure'
}

let { name, filename, contentType, encoding } = result

// 5.6. Advance position by 2. (This skips past the empty line that marks
Expand All @@ -157,7 +153,7 @@ function multipartFormDataParser (input, mimeType) {
const boundaryIndex = input.indexOf(boundary.subarray(2), position.position)

if (boundaryIndex === -1) {
return 'failure'
throw parsingError('expected boundary after body')
}

body = input.subarray(position.position, boundaryIndex - 4)
Expand All @@ -174,7 +170,7 @@ function multipartFormDataParser (input, mimeType) {
// 5.9. If position does not point to a sequence of bytes starting with
// 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2.
if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) {
return 'failure'
throw parsingError('expected CRLF')
} else {
position.position += 2
}
Expand Down Expand Up @@ -230,7 +226,7 @@ function parseMultipartFormDataHeaders (input, position) {
if (input[position.position] === 0x0d && input[position.position + 1] === 0x0a) {
// 2.1.1. If name is null, return failure.
if (name === null) {
return 'failure'
throw parsingError('header name is null')
}

// 2.1.2. Return name, filename and contentType.
Expand All @@ -250,12 +246,12 @@ function parseMultipartFormDataHeaders (input, position) {

// 2.4. If header name does not match the field-name token production, return failure.
if (!HTTP_TOKEN_CODEPOINTS.test(headerName.toString())) {
return 'failure'
throw parsingError('header name does not match the field-name token production')
}

// 2.5. If the byte at position is not 0x3A (:), return failure.
if (input[position.position] !== 0x3a) {
return 'failure'
throw parsingError('expected :')
}

// 2.6. Advance position by 1.
Expand All @@ -278,7 +274,7 @@ function parseMultipartFormDataHeaders (input, position) {
// 2. If position does not point to a sequence of bytes starting with
// `form-data; name="`, return failure.
if (!bufferStartsWith(input, formDataNameBuffer, position)) {
return 'failure'
throw parsingError('expected form-data; name=" for content-disposition header')
}

// 3. Advance position so it points at the byte after the next 0x22 (")
Expand All @@ -290,34 +286,61 @@ function parseMultipartFormDataHeaders (input, position) {
// failure.
name = parseMultipartFormDataName(input, position)

if (name === null) {
return 'failure'
}

// 5. If position points to a sequence of bytes starting with `; filename="`:
if (bufferStartsWith(input, filenameBuffer, position)) {
// Note: undici also handles filename*
let check = position.position + filenameBuffer.length

if (input[check] === 0x2a) {
position.position += 1
check += 1
}

if (input[check] !== 0x3d || input[check + 1] !== 0x22) { // ="
return 'failure'
}

// 1. Advance position so it points at the byte after the next 0x22 (") byte
// (the one in the sequence of bytes matched above).
position.position += 12

// 2. Set filename to the result of parsing a multipart/form-data name given
// input and position, if the result is not failure. Otherwise, return failure.
filename = parseMultipartFormDataName(input, position)

if (filename === null) {
return 'failure'
if (input[position.position] === 0x3b /* ; */ && input[position.position + 1] === 0x20 /* ' ' */) {
const at = { position: position.position + 2 }

if (bufferStartsWith(input, filenameBuffer, at)) {
if (input[at.position + 8] === 0x2a /* '*' */) {
at.position += 10 // skip past filename*=

// Remove leading http tab and spaces. See RFC for examples.
// https://datatracker.ietf.org/doc/html/rfc6266#section-5
collectASequenceOfBytes(
(char) => char === 0x20 || char === 0x09,
input,
at
)

const headerValue = collectASequenceOfBytes(
(char) => char !== 0x20 && char !== 0x0d && char !== 0x0a, // ' ' or CRLF
input,
at
)

if (
(headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
(headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
(headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
headerValue[3] !== 0x2d || // -
headerValue[4] !== 0x38 // 8
) {
throw parsingError('unknown encoding, expected utf-8\'\'')
}

// skip utf-8''
filename = decodeURIComponent(new TextDecoder().decode(headerValue.subarray(7)))

position.position = at.position
} else {
// 1. Advance position so it points at the byte after the next 0x22 (") byte
// (the one in the sequence of bytes matched above).
position.position += 11

// Remove leading http tab and spaces. See RFC for examples.
// https://datatracker.ietf.org/doc/html/rfc6266#section-5
collectASequenceOfBytes(
(char) => char === 0x20 || char === 0x09,
input,
position
)

position.position++ // skip past " after removing whitespace

// 2. Set filename to the result of parsing a multipart/form-data name given
// input and position, if the result is not failure. Otherwise, return failure.
filename = parseMultipartFormDataName(input, position)
}
}
}

Expand Down Expand Up @@ -367,7 +390,7 @@ function parseMultipartFormDataHeaders (input, position) {
// 2.9. If position does not point to a sequence of bytes starting with 0x0D 0x0A
// (CR LF), return failure. Otherwise, advance position by 2 (past the newline).
if (input[position.position] !== 0x0d && input[position.position + 1] !== 0x0a) {
return 'failure'
throw parsingError('expected CRLF')
} else {
position.position += 2
}
Expand All @@ -393,7 +416,7 @@ function parseMultipartFormDataName (input, position) {

// 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
if (input[position.position] !== 0x22) {
return null // name could be 'failure'
throw parsingError('expected "')
} else {
position.position++
}
Expand Down Expand Up @@ -468,6 +491,10 @@ function bufferStartsWith (buffer, start, position) {
return true
}

function parsingError (cause) {
return new TypeError('Failed to parse body as FormData.', { cause: new TypeError(cause) })
}

module.exports = {
multipartFormDataParser,
validateBoundary
Expand Down
59 changes: 59 additions & 0 deletions test/busboy/issue-3760.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
'use strict'

const { test } = require('node:test')
const assert = require('node:assert')
const { Response } = require('../..')

// https://github.com/nodejs/undici/issues/3760
test('filename* parameter is parsed properly', async (t) => {
const response = new Response([
'--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' +
'Content-Type: text/plain\r\n' +
'Content-Disposition: form-data; name="file"; filename*=UTF-8\'\'%e2%82%ac%20rates\r\n' +
'\r\n' +
'testabc\r\n' +
'--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' +
'\r\n'
].join(''), {
headers: {
'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"'
}
})

const fd = await response.formData()
assert.deepEqual(fd.get('file').name, '€ rates')
})

test('whitespace after filename[*]= is ignored', async () => {
for (const response of [
new Response([
'--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' +
'Content-Type: text/plain\r\n' +
'Content-Disposition: form-data; name="file"; filename*= utf-8\'\'hello\r\n' +
'\r\n' +
'testabc\r\n' +
'--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' +
'\r\n'
].join(''), {
headers: {
'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"'
}
}),
new Response([
'--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' +
'Content-Type: text/plain\r\n' +
'Content-Disposition: form-data; name="file"; filename= "hello"\r\n' +
'\r\n' +
'testabc\r\n' +
'--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' +
'\r\n'
].join(''), {
headers: {
'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"'
}
})
]) {
const fd = await response.formData()
assert.deepEqual(fd.get('file').name, 'hello')
}
})

0 comments on commit 211cc27

Please # to comment.