fix filename* parsing (#3768)

* fix filename* parsing * fixup
nodejs · Oct 25, 2024 · 211cc27 · 211cc27
1 parent 02c61d2
commit 211cc27
Show file tree

Hide file tree

Showing 3 changed files with 130 additions and 48 deletions.
diff --git a/lib/web/fetch/body.js b/lib/web/fetch/body.js
@@ -364,12 +364,8 @@ function bodyMixinMethods (instance, getInternalState) {
           switch (mimeType.essence) {
             case 'multipart/form-data': {
               // 1. ... [long step]
-              const parsed = multipartFormDataParser(value, mimeType)
-
               // 2. If that fails for some reason, then throw a TypeError.
-              if (parsed === 'failure') {
-                throw new TypeError('Failed to parse body as FormData.')
-              }
+              const parsed = multipartFormDataParser(value, mimeType)
 
               // 3. Return a new FormData object, appending each entry,
               //    resulting from the parsing operation, to its entry list.

diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js
@@ -11,7 +11,7 @@ const { File: NodeFile } = require('node:buffer')
 const File = globalThis.File ?? NodeFile
 
 const formDataNameBuffer = Buffer.from('form-data; name="')
-const filenameBuffer = Buffer.from('; filename')
+const filenameBuffer = Buffer.from('filename')
 const dd = Buffer.from('--')
 const ddcrlf = Buffer.from('--\r\n')
 
@@ -75,7 +75,7 @@ function multipartFormDataParser (input, mimeType) {
   //    Otherwise, let boundary be the result of UTF-8 decoding mimeType’s
   //    parameters["boundary"].
   if (boundaryString === undefined) {
-    return 'failure'
+    throw parsingError('missing boundary in content-type header')
   }
 
   const boundary = Buffer.from(`--${boundaryString}`, 'utf8')
@@ -111,7 +111,7 @@ function multipartFormDataParser (input, mimeType) {
     if (input.subarray(position.position, position.position + boundary.length).equals(boundary)) {
       position.position += boundary.length
     } else {
-      return 'failure'
+      throw parsingError('expected a value starting with -- and the boundary')
     }
 
     // 5.2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A
@@ -127,7 +127,7 @@ function multipartFormDataParser (input, mimeType) {
     // 5.3. If position does not point to a sequence of bytes starting with 0x0D
     //      0x0A (CR LF), return failure.
     if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) {
-      return 'failure'
+      throw parsingError('expected CRLF')
     }
 
     // 5.4. Advance position by 2. (This skips past the newline.)
@@ -138,10 +138,6 @@ function multipartFormDataParser (input, mimeType) {
     //      is not failure. Otherwise, return failure.
     const result = parseMultipartFormDataHeaders(input, position)
 
-    if (result === 'failure') {
-      return 'failure'
-    }
-
     let { name, filename, contentType, encoding } = result
 
     // 5.6. Advance position by 2. (This skips past the empty line that marks
@@ -157,7 +153,7 @@ function multipartFormDataParser (input, mimeType) {
       const boundaryIndex = input.indexOf(boundary.subarray(2), position.position)
 
       if (boundaryIndex === -1) {
-        return 'failure'
+        throw parsingError('expected boundary after body')
       }
 
       body = input.subarray(position.position, boundaryIndex - 4)
@@ -174,7 +170,7 @@ function multipartFormDataParser (input, mimeType) {
     // 5.9. If position does not point to a sequence of bytes starting with
     //      0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2.
     if (input[position.position] !== 0x0d || input[position.position + 1] !== 0x0a) {
-      return 'failure'
+      throw parsingError('expected CRLF')
     } else {
       position.position += 2
     }
@@ -230,7 +226,7 @@ function parseMultipartFormDataHeaders (input, position) {
     if (input[position.position] === 0x0d && input[position.position + 1] === 0x0a) {
       // 2.1.1. If name is null, return failure.
       if (name === null) {
-        return 'failure'
+        throw parsingError('header name is null')
       }
 
       // 2.1.2. Return name, filename and contentType.
@@ -250,12 +246,12 @@ function parseMultipartFormDataHeaders (input, position) {
 
     // 2.4. If header name does not match the field-name token production, return failure.
     if (!HTTP_TOKEN_CODEPOINTS.test(headerName.toString())) {
-      return 'failure'
+      throw parsingError('header name does not match the field-name token production')
     }
 
     // 2.5. If the byte at position is not 0x3A (:), return failure.
     if (input[position.position] !== 0x3a) {
-      return 'failure'
+      throw parsingError('expected :')
     }
 
     // 2.6. Advance position by 1.
@@ -278,7 +274,7 @@ function parseMultipartFormDataHeaders (input, position) {
         // 2. If position does not point to a sequence of bytes starting with
         //    `form-data; name="`, return failure.
         if (!bufferStartsWith(input, formDataNameBuffer, position)) {
-          return 'failure'
+          throw parsingError('expected form-data; name=" for content-disposition header')
         }
 
         // 3. Advance position so it points at the byte after the next 0x22 (")
@@ -290,34 +286,61 @@ function parseMultipartFormDataHeaders (input, position) {
         //    failure.
         name = parseMultipartFormDataName(input, position)
 
-        if (name === null) {
-          return 'failure'
-        }
-
         // 5. If position points to a sequence of bytes starting with `; filename="`:
-        if (bufferStartsWith(input, filenameBuffer, position)) {
-          // Note: undici also handles filename*
-          let check = position.position + filenameBuffer.length
-
-          if (input[check] === 0x2a) {
-            position.position += 1
-            check += 1
-          }
-
-          if (input[check] !== 0x3d || input[check + 1] !== 0x22) { // ="
-            return 'failure'
-          }
-
-          // 1. Advance position so it points at the byte after the next 0x22 (") byte
-          //    (the one in the sequence of bytes matched above).
-          position.position += 12
-
-          // 2. Set filename to the result of parsing a multipart/form-data name given
-          //    input and position, if the result is not failure. Otherwise, return failure.
-          filename = parseMultipartFormDataName(input, position)
-
-          if (filename === null) {
-            return 'failure'
+        if (input[position.position] === 0x3b /* ; */ && input[position.position + 1] === 0x20 /* ' ' */) {
+          const at = { position: position.position + 2 }
+
+          if (bufferStartsWith(input, filenameBuffer, at)) {
+            if (input[at.position + 8] === 0x2a /* '*' */) {
+              at.position += 10 // skip past filename*=
+
+              // Remove leading http tab and spaces. See RFC for examples.
+              // https://datatracker.ietf.org/doc/html/rfc6266#section-5
+              collectASequenceOfBytes(
+                (char) => char === 0x20 || char === 0x09,
+                input,
+                at
+              )
+
+              const headerValue = collectASequenceOfBytes(
+                (char) => char !== 0x20 && char !== 0x0d && char !== 0x0a, // ' ' or CRLF
+                input,
+                at
+              )
+
+              if (
+                (headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
+                (headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
+                (headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
+                headerValue[3] !== 0x2d || // -
+                headerValue[4] !== 0x38 // 8
+              ) {
+                throw parsingError('unknown encoding, expected utf-8\'\'')
+              }
+
+              // skip utf-8''
+              filename = decodeURIComponent(new TextDecoder().decode(headerValue.subarray(7)))
+
+              position.position = at.position
+            } else {
+              // 1. Advance position so it points at the byte after the next 0x22 (") byte
+              //    (the one in the sequence of bytes matched above).
+              position.position += 11
+
+              // Remove leading http tab and spaces. See RFC for examples.
+              // https://datatracker.ietf.org/doc/html/rfc6266#section-5
+              collectASequenceOfBytes(
+                (char) => char === 0x20 || char === 0x09,
+                input,
+                position
+              )
+
+              position.position++ // skip past " after removing whitespace
+
+              // 2. Set filename to the result of parsing a multipart/form-data name given
+              //    input and position, if the result is not failure. Otherwise, return failure.
+              filename = parseMultipartFormDataName(input, position)
+            }
           }
         }
 
@@ -367,7 +390,7 @@ function parseMultipartFormDataHeaders (input, position) {
     // 2.9. If position does not point to a sequence of bytes starting with 0x0D 0x0A
     //      (CR LF), return failure. Otherwise, advance position by 2 (past the newline).
     if (input[position.position] !== 0x0d && input[position.position + 1] !== 0x0a) {
-      return 'failure'
+      throw parsingError('expected CRLF')
     } else {
       position.position += 2
     }
@@ -393,7 +416,7 @@ function parseMultipartFormDataName (input, position) {
 
   // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
   if (input[position.position] !== 0x22) {
-    return null // name could be 'failure'
+    throw parsingError('expected "')
   } else {
     position.position++
   }
@@ -468,6 +491,10 @@ function bufferStartsWith (buffer, start, position) {
   return true
 }
 
+function parsingError (cause) {
+  return new TypeError('Failed to parse body as FormData.', { cause: new TypeError(cause) })
+}
+
 module.exports = {
   multipartFormDataParser,
   validateBoundary

diff --git a/test/busboy/issue-3760.js b/test/busboy/issue-3760.js
@@ -0,0 +1,59 @@
+'use strict'
+
+const { test } = require('node:test')
+const assert = require('node:assert')
+const { Response } = require('../..')
+
+// https://github.com/nodejs/undici/issues/3760
+test('filename* parameter is parsed properly', async (t) => {
+  const response = new Response([
+    '--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' +
+    'Content-Type: text/plain\r\n' +
+    'Content-Disposition: form-data; name="file"; filename*=UTF-8\'\'%e2%82%ac%20rates\r\n' +
+    '\r\n' +
+    'testabc\r\n' +
+    '--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' +
+    '\r\n'
+  ].join(''), {
+    headers: {
+      'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"'
+    }
+  })
+
+  const fd = await response.formData()
+  assert.deepEqual(fd.get('file').name, '€ rates')
+})
+
+test('whitespace after filename[*]= is ignored', async () => {
+  for (const response of [
+    new Response([
+      '--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' +
+      'Content-Type: text/plain\r\n' +
+      'Content-Disposition: form-data; name="file"; filename*=          utf-8\'\'hello\r\n' +
+      '\r\n' +
+      'testabc\r\n' +
+      '--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' +
+      '\r\n'
+    ].join(''), {
+      headers: {
+        'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"'
+      }
+    }),
+    new Response([
+      '--83d82e0d-9ced-44c0-ac79-4e66a827415b\r\n' +
+      'Content-Type: text/plain\r\n' +
+      'Content-Disposition: form-data; name="file"; filename=        "hello"\r\n' +
+      '\r\n' +
+      'testabc\r\n' +
+      '--83d82e0d-9ced-44c0-ac79-4e66a827415b--\r\n' +
+      '\r\n'
+    ].join(''), {
+      headers: {
+        'content-type': 'multipart/form-data; boundary="83d82e0d-9ced-44c0-ac79-4e66a827415b"'
+      }
+    })
+  ]) {
+    const fd = await response.formData()
+    assert.deepEqual(fd.get('file').name, 'hello')
+  }
+})