Skip to content

Commit

Permalink
[refurb] Count codepoints not bytes for `slice-to-remove-prefix-or-su…
Browse files Browse the repository at this point in the history
…ffix (FURB188)` (#13631)
  • Loading branch information
dylwil3 authored Oct 7, 2024
1 parent 27ac34d commit 14ee5db
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 7 deletions.
30 changes: 29 additions & 1 deletion crates/ruff_linter/resources/test/fixtures/refurb/FURB188.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,32 @@ def ignore_step():
text = "!x!y!z"
if text.startswith("!"):
text = text[1::2]
print(text)
print(text)

def handle_unicode():
# should be skipped!
text = "řetězec"
if text.startswith("ř"):
text = text[2:]

# should be linted
# with fix `text = text.removeprefix("ř")`
text = "řetězec"
if text.startswith("ř"):
text = text[1:]


def handle_surrogates():
# should be linted
text = "\ud800\udc00heythere"
if text.startswith("\ud800\udc00"):
text = text[2:]
text = "\U00010000heythere"
if text.startswith("\U00010000"):
text = text[1:]

# should not be linted
text = "\ud800\udc00heythere"
if text.startswith("\ud800\udc00"):
text = text[1:]

Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_python_semantic::SemanticModel;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen};
use ruff_text_size::Ranged;

/// ## What it does
/// Checks for the removal of a prefix or suffix from a string by assigning
Expand Down Expand Up @@ -334,8 +334,9 @@ fn affix_matches_slice_bound(data: &RemoveAffixData, semantic: &SemanticModel) -
}),
) => num
.as_int()
.and_then(ast::Int::as_u32) // Only support prefix removal for size at most `u32::MAX`
.is_some_and(|x| x == string_val.to_str().text_len().to_u32()),
// Only support prefix removal for size at most `usize::MAX`
.and_then(ast::Int::as_usize)
.is_some_and(|x| x == string_val.chars().count()),
(
AffixKind::StartsWith,
ast::Expr::Call(ast::ExprCall {
Expand Down Expand Up @@ -369,8 +370,8 @@ fn affix_matches_slice_bound(data: &RemoveAffixData, semantic: &SemanticModel) -
// Only support prefix removal for size at most `u32::MAX`
value
.as_int()
.and_then(ast::Int::as_u32)
.is_some_and(|x| x == string_val.to_str().text_len().to_u32())
.and_then(ast::Int::as_usize)
.is_some_and(|x| x == string_val.chars().count())
},
),
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,4 +250,74 @@ FURB188.py:162:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing
162 |+ text = text.removeprefix("!")
164 163 | print(text)
165 164 |
166 165 |
166 165 |

FURB188.py:183:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
|
181 | # with fix `text = text.removeprefix("ř")`
182 | text = "řetězec"
183 | if text.startswith("ř"):
| _____^
184 | | text = text[1:]
| |_______________________^ FURB188
|
= help: Use removeprefix instead of assignment conditional upon startswith.

Safe fix
180 180 | # should be linted
181 181 | # with fix `text = text.removeprefix("ř")`
182 182 | text = "řetězec"
183 |- if text.startswith("ř"):
184 |- text = text[1:]
183 |+ text = text.removeprefix("ř")
185 184 |
186 185 |
187 186 | def handle_surrogates():

FURB188.py:190:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
|
188 | # should be linted
189 | text = "\ud800\udc00heythere"
190 | if text.startswith("\ud800\udc00"):
| _____^
191 | | text = text[2:]
| |_______________________^ FURB188
192 | text = "\U00010000heythere"
193 | if text.startswith("\U00010000"):
|
= help: Use removeprefix instead of assignment conditional upon startswith.

Safe fix
187 187 | def handle_surrogates():
188 188 | # should be linted
189 189 | text = "\ud800\udc00heythere"
190 |- if text.startswith("\ud800\udc00"):
191 |- text = text[2:]
190 |+ text = text.removeprefix("\ud800\udc00")
192 191 | text = "\U00010000heythere"
193 192 | if text.startswith("\U00010000"):
194 193 | text = text[1:]

FURB188.py:193:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
|
191 | text = text[2:]
192 | text = "\U00010000heythere"
193 | if text.startswith("\U00010000"):
| _____^
194 | | text = text[1:]
| |_______________________^ FURB188
195 |
196 | # should not be linted
|
= help: Use removeprefix instead of assignment conditional upon startswith.

Safe fix
190 190 | if text.startswith("\ud800\udc00"):
191 191 | text = text[2:]
192 192 | text = "\U00010000heythere"
193 |- if text.startswith("\U00010000"):
194 |- text = text[1:]
193 |+ text = text.removeprefix("\U00010000")
195 194 |
196 195 | # should not be linted
197 196 | text = "\ud800\udc00heythere"
8 changes: 8 additions & 0 deletions crates/ruff_python_ast/src/int.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ impl Int {
}
}

/// Return the [`Int`] as an u64, if it can be represented as that data type.
pub fn as_usize(&self) -> Option<usize> {
match &self.0 {
Number::Small(small) => usize::try_from(*small).ok(),
Number::Big(_) => None,
}
}

/// Return the [`Int`] as an i8, if it can be represented as that data type.
pub fn as_i8(&self) -> Option<i8> {
match &self.0 {
Expand Down

0 comments on commit 14ee5db

Please # to comment.