Remove accidental stripping of non-printable characters

Continuation/follow-up for: GHSA-m5ff-3wj3-8ph4 which showcased the initial problem with the way that waitress was dealing with whitespace in headers. Additional testing by ZeddYu Lu also led to other potential issues with non-printable ascii characters that are stripped by default by Python's string.strip() function
Pylons · Jan 2, 2020 · ddb65b4 · ddb65b4
1 parent cd83598
commit ddb65b4
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 3 deletions.
diff --git a/waitress/parser.py b/waitress/parser.py
@@ -213,13 +213,15 @@ def parse_header(self, header_plus):
             if not header:
                 raise ParsingError("Invalid header")
 
-            key, value = header.group('name', 'value')
+            key, value = header.group("name", "value")
 
             if b"_" in key:
                 # TODO(xistence): Should we drop this request instead?
                 continue
 
-            value = value.strip()
+            # Only strip off whitespace that is considered valid whitespace by
+            # RFC7230, don't strip the rest
+            value = value.strip(b" \t")
             key1 = tostr(key.upper().replace(b"-", b"_"))
             # If a header already exists, we append subsequent values
             # seperated by a comma. Applications already need to handle
@@ -257,7 +259,16 @@ def parse_header(self, header_plus):
             # here
             te = headers.pop("TRANSFER_ENCODING", "")
 
-            encodings = [encoding.strip().lower() for encoding in te.split(",") if encoding]
+            # NB: We can not just call bare strip() here because it will also
+            # remove other non-printable characters that we explicitly do not
+            # want removed so that if someone attempts to smuggle a request
+            # with these characters we don't fall prey to it.
+            #
+            # For example \x85 is stripped by default, but it is not considered
+            # valid whitespace to be stripped by RFC7230.
+            encodings = [
+                encoding.strip(" \t").lower() for encoding in te.split(",") if encoding
+            ]
 
             for encoding in encodings:
                 # Out of the transfer-codings listed in

diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py
@@ -236,6 +236,35 @@ def test_parse_header_transfer_encoding_invalid_multiple(self):
         else:  # pragma: nocover
             self.assertTrue(False)
 
+    def test_parse_header_transfer_encoding_invalid_whitespace(self):
+        from waitress.parser import TransferEncodingNotImplemented
+
+        data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding:\x85chunked\r\n"
+
+        try:
+            self.parser.parse_header(data)
+        except TransferEncodingNotImplemented as e:
+            self.assertIn("Transfer-Encoding requested is not supported.", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
+    def test_parse_header_transfer_encoding_invalid_unicode(self):
+        from waitress.parser import TransferEncodingNotImplemented
+
+        # This is the binary encoding for the UTF-8 character
+        # https://www.compart.com/en/unicode/U+212A "unicode character "K""
+        # which if waitress were to accidentally do the wrong thing get
+        # lowercased to just the ascii "k" due to unicode collisions during
+        # transformation
+        data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding: chun\xe2\x84\xaaed\r\n"
+
+        try:
+            self.parser.parse_header(data)
+        except TransferEncodingNotImplemented as e:
+            self.assertIn("Transfer-Encoding requested is not supported.", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
     def test_parse_header_11_expect_continue(self):
         data = b"GET /foobar HTTP/1.1\r\nexpect: 100-continue\r\n"
         self.parser.parse_header(data)