Skip to content

Commit

Permalink
Improve performance of sub-authority splitting in URL
Browse files Browse the repository at this point in the history
  • Loading branch information
sethmlarson committed May 26, 2021
1 parent 2698537 commit 2d4a3fe
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/urllib3/util/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@
BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")

SUBAUTHORITY_PAT = (u"^(?:(.*)@)?(%s|%s|%s)(?::([0-9]{0,5}))?$") % (
_HOST_PORT_PAT = ("^(%s|%s|%s)(?::([0-9]{0,5}))?$") % (
REG_NAME_PAT,
IPV4_PAT,
IPV6_ADDRZ_PAT,
)
SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL)
_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)

UNRESERVED_CHARS = set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~"
Expand Down Expand Up @@ -365,7 +365,9 @@ def parse_url(url):
scheme = scheme.lower()

if authority:
auth, host, port = SUBAUTHORITY_RE.match(authority).groups()
auth, _, host_port = authority.rpartition("@")
auth = auth or None
host, port = _HOST_PORT_RE.match(host_port).groups()
if auth and normalize_uri:
auth = _encode_invalid_chars(auth, USERINFO_CHARS)
if port == "":
Expand Down
10 changes: 10 additions & 0 deletions test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,16 @@ def test_netloc(self, url, expected_netloc):
fragment="hash",
),
),
# Tons of '@' causing backtracking
("https://" + ("@" * 10000) + "[", False),
(
"https://user:" + ("@" * 10000) + "example.com",
Url(
scheme="https",
auth="user:" + ("%40" * 9999),
host="example.com",
),
),
]

@pytest.mark.parametrize("url, expected_url", url_vulnerabilities)
Expand Down

0 comments on commit 2d4a3fe

Please # to comment.