From ddb65b489d01d696afa1695b75fdd5df3e4ffdf8 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Thu, 2 Jan 2020 12:16:00 -0800 Subject: [PATCH] Remove accidental stripping of non-printable characters Continuation/follow-up for: https://github.com/Pylons/waitress/security/advisories/GHSA-m5ff-3wj3-8ph4 which showcased the initial problem with the way that waitress was dealing with whitespace in headers. Additional testing by ZeddYu Lu also led to other potential issues with non-printable ascii characters that are stripped by default by Python's string.strip() function --- waitress/parser.py | 17 ++++++++++++++--- waitress/tests/test_parser.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/waitress/parser.py b/waitress/parser.py index 8b07dd6a..fef8a3da 100644 --- a/waitress/parser.py +++ b/waitress/parser.py @@ -213,13 +213,15 @@ def parse_header(self, header_plus): if not header: raise ParsingError("Invalid header") - key, value = header.group('name', 'value') + key, value = header.group("name", "value") if b"_" in key: # TODO(xistence): Should we drop this request instead? continue - value = value.strip() + # Only strip off whitespace that is considered valid whitespace by + # RFC7230, don't strip the rest + value = value.strip(b" \t") key1 = tostr(key.upper().replace(b"-", b"_")) # If a header already exists, we append subsequent values # seperated by a comma. Applications already need to handle @@ -257,7 +259,16 @@ def parse_header(self, header_plus): # here te = headers.pop("TRANSFER_ENCODING", "") - encodings = [encoding.strip().lower() for encoding in te.split(",") if encoding] + # NB: We can not just call bare strip() here because it will also + # remove other non-printable characters that we explicitly do not + # want removed so that if someone attempts to smuggle a request + # with these characters we don't fall prey to it. + # + # For example \x85 is stripped by default, but it is not considered + # valid whitespace to be stripped by RFC7230. + encodings = [ + encoding.strip(" \t").lower() for encoding in te.split(",") if encoding + ] for encoding in encodings: # Out of the transfer-codings listed in diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py index 8d42600b..5373fd52 100644 --- a/waitress/tests/test_parser.py +++ b/waitress/tests/test_parser.py @@ -236,6 +236,35 @@ def test_parse_header_transfer_encoding_invalid_multiple(self): else: # pragma: nocover self.assertTrue(False) + def test_parse_header_transfer_encoding_invalid_whitespace(self): + from waitress.parser import TransferEncodingNotImplemented + + data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding:\x85chunked\r\n" + + try: + self.parser.parse_header(data) + except TransferEncodingNotImplemented as e: + self.assertIn("Transfer-Encoding requested is not supported.", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_transfer_encoding_invalid_unicode(self): + from waitress.parser import TransferEncodingNotImplemented + + # This is the binary encoding for the UTF-8 character + # https://www.compart.com/en/unicode/U+212A "unicode character "K"" + # which if waitress were to accidentally do the wrong thing get + # lowercased to just the ascii "k" due to unicode collisions during + # transformation + data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding: chun\xe2\x84\xaaed\r\n" + + try: + self.parser.parse_header(data) + except TransferEncodingNotImplemented as e: + self.assertIn("Transfer-Encoding requested is not supported.", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + def test_parse_header_11_expect_continue(self): data = b"GET /foobar HTTP/1.1\r\nexpect: 100-continue\r\n" self.parser.parse_header(data)