Skip to content

Commit

Permalink
Fix catastrophic performance in lines_with_leading_tabs_expanded() (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
JelleZijlstra authored Mar 15, 2024
1 parent 7b5a657 commit f000936
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 13 deletions.
13 changes: 12 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@

<!-- Include any especially major or disruptive changes here -->

This release is a milestone: it fixes Black's first CVE security vulnerability. If you
run Black on untrusted input, or if you habitually put thousands of leading tab
characters in your docstrings, you are strongly encouraged to upgrade immediately to fix
[CVE-2024-21503](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-21503).

This release also fixes a bug in Black's AST safety check that allowed Black to make
incorrect changes to certain f-strings that are valid in Python 3.12 and higher.

### Stable style

<!-- Changes that affect Black's stable style -->
Expand Down Expand Up @@ -36,7 +44,10 @@

### Performance

<!-- Changes that improve Black's performance. -->
- Fix catastrophic performance on docstrings that contain large numbers of leading tab
characters. This fixes
[CVE-2024-21503](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-21503).
(#4278)

### Output

Expand Down
18 changes: 6 additions & 12 deletions src/black/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
STRING_PREFIX_RE: Final = re.compile(
r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL
)
FIRST_NON_WHITESPACE_RE: Final = re.compile(r"\s*\t+\s*(\S)")
UNICODE_ESCAPE_RE: Final = re.compile(
r"(?P<backslashes>\\+)(?P<body>"
r"(u(?P<u>[a-fA-F0-9]{4}))" # Character with 16-bit hex value xxxx
Expand Down Expand Up @@ -51,18 +50,13 @@ def lines_with_leading_tabs_expanded(s: str) -> List[str]:
"""
lines = []
for line in s.splitlines():
# Find the index of the first non-whitespace character after a string of
# whitespace that includes at least one tab
match = FIRST_NON_WHITESPACE_RE.match(line)
if match:
first_non_whitespace_idx = match.start(1)

lines.append(
line[:first_non_whitespace_idx].expandtabs()
+ line[first_non_whitespace_idx:]
)
else:
stripped_line = line.lstrip()
if not stripped_line or stripped_line == line:
lines.append(line)
else:
prefix_length = len(line) - len(stripped_line)
prefix = line[:prefix_length].expandtabs()
lines.append(prefix + stripped_line)
if s.endswith("\n"):
lines.append("")
return lines
Expand Down
12 changes: 12 additions & 0 deletions tests/test_black.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from black.output import color_diff, diff
from black.parsing import ASTSafetyError
from black.report import Report
from black.strings import lines_with_leading_tabs_expanded

# Import other test classes
from tests.util import (
Expand Down Expand Up @@ -2041,6 +2042,17 @@ def test_line_ranges_in_pyproject_toml(self) -> None:
b"Cannot use line-ranges in the pyproject.toml file." in result.stderr_bytes
)

def test_lines_with_leading_tabs_expanded(self) -> None:
# See CVE-2024-21503. Mostly test that this completes in a reasonable
# time.
payload = "\t" * 10_000
assert lines_with_leading_tabs_expanded(payload) == [payload]

tab = " " * 8
assert lines_with_leading_tabs_expanded("\tx") == [f"{tab}x"]
assert lines_with_leading_tabs_expanded("\t\tx") == [f"{tab}{tab}x"]
assert lines_with_leading_tabs_expanded("\tx\n y") == [f"{tab}x", " y"]


class TestCaching:
def test_get_cache_dir(
Expand Down

0 comments on commit f000936

Please # to comment.