-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Ignore quantifiers when splitting comma-separated regexes #7241
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? # to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
When parsing comma-separated lists of regular expressions in the config, ignore | ||
commas that are inside braces since those indicate quantiers, not dilineation | ||
between expressions. | ||
|
||
Closes #7229 |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -14,6 +14,7 @@ | |||
HAS_ISORT_5, | ||||
IsortDriver, | ||||
_check_csv, | ||||
_check_regexp_csv, | ||||
_format_option_value, | ||||
_splitstrip, | ||||
_unquote, | ||||
|
@@ -34,6 +35,7 @@ | |||
"HAS_ISORT_5", | ||||
"IsortDriver", | ||||
"_check_csv", | ||||
"_check_regexp_csv", | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this is private, this should not be in
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The PR currently uses it on the module, so it has to stay, unless we refactor. It was a reasonable choice for the PR author to adhere to the existing muddled pattern. I'm inclined to merge as is. |
||||
"_format_option_value", | ||||
"_splitstrip", | ||||
"_unquote", | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,8 @@ | |
import textwrap | ||
import tokenize | ||
import warnings | ||
from collections.abc import Sequence | ||
from collections import deque | ||
from collections.abc import Iterable, Sequence | ||
from io import BufferedReader, BytesIO | ||
from typing import ( | ||
TYPE_CHECKING, | ||
|
@@ -328,6 +329,29 @@ def _check_csv(value: list[str] | tuple[str] | str) -> Sequence[str]: | |
return _splitstrip(value) | ||
|
||
|
||
def _check_regexp_csv(value: list[str] | tuple[str] | str) -> Iterable[str]: | ||
if isinstance(value, (list, tuple)): | ||
yield from value | ||
else: | ||
# None is a sentinel value here | ||
regexps: deque[deque[str] | None] = deque([None]) | ||
open_braces = False | ||
for char in value: | ||
if char == "{": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Depending on the result of the discussion we should warn the user of deprecation here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wouldn't it be on line 346? That's where we split regular expressions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right, my bad. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know a year ago we were floating the idea of deprecations, but we're not ready to commit to a new plan IMO. |
||
open_braces = True | ||
elif char == "}" and open_braces: | ||
open_braces = False | ||
|
||
if char == "," and not open_braces: | ||
regexps.append(None) | ||
elif regexps[-1] is None: | ||
regexps.pop() | ||
regexps.append(deque([char])) | ||
else: | ||
regexps[-1].append(char) | ||
yield from ("".join(regexp).strip() for regexp in regexps if regexp is not None) | ||
|
||
|
||
def _comment(string: str) -> str: | ||
"""Return string as a comment.""" | ||
lines = [line.strip() for line in string.splitlines()] | ||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -5,6 +5,8 @@ | |||
from __future__ import annotations | ||||
|
||||
import os | ||||
import re | ||||
import timeit | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
from pathlib import Path | ||||
|
||||
import pytest | ||||
|
@@ -111,6 +113,31 @@ def test_unknown_py_version(capsys: CaptureFixture) -> None: | |||
assert "the-newest has an invalid format, should be a version string." in output.err | ||||
|
||||
|
||||
CSV_REGEX_COMMA_CASES = [ | ||||
("foo", ["foo"]), | ||||
("foo,bar", ["foo", "bar"]), | ||||
("foo, bar", ["foo", "bar"]), | ||||
("foo, bar{1,3}", ["foo", "bar{1,3}"]), | ||||
] | ||||
|
||||
|
||||
@pytest.mark.parametrize("in_string,expected", CSV_REGEX_COMMA_CASES) | ||||
def test_csv_regex_comma_in_quantifier(in_string, expected) -> None: | ||||
"""Check that we correctly parse a comma-separated regex when there are one | ||||
or more commas within quantifier expressions. | ||||
""" | ||||
|
||||
def _template_run(in_string): | ||||
r = Run( | ||||
[str(EMPTY_MODULE), rf"--bad-names-rgx={in_string}"], | ||||
exit=False, | ||||
) | ||||
return r.linter.config.bad_names_rgxs | ||||
|
||||
assert _template_run(in_string) == [re.compile(regex) for regex in expected] | ||||
|
||||
|
||||
|
||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
def test_short_verbose(capsys: CaptureFixture) -> None: | ||||
"""Check that we correctly handle the -v flag.""" | ||||
Run([str(EMPTY_MODULE), "-v"], exit=False) | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.