From 14ee5dbfde5c7bd07cd6d6fefee529cfd8291c73 Mon Sep 17 00:00:00 2001 From: Dylan <53534755+dylwil3@users.noreply.github.com> Date: Mon, 7 Oct 2024 09:13:28 -0500 Subject: [PATCH] [refurb] Count codepoints not bytes for `slice-to-remove-prefix-or-suffix (FURB188)` (#13631) --- .../resources/test/fixtures/refurb/FURB188.py | 30 +++++++- .../rules/slice_to_remove_prefix_or_suffix.rs | 11 +-- ...es__refurb__tests__FURB188_FURB188.py.snap | 72 ++++++++++++++++++- crates/ruff_python_ast/src/int.rs | 8 +++ 4 files changed, 114 insertions(+), 7 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/refurb/FURB188.py b/crates/ruff_linter/resources/test/fixtures/refurb/FURB188.py index 45a39257f3255..45935595183a3 100644 --- a/crates/ruff_linter/resources/test/fixtures/refurb/FURB188.py +++ b/crates/ruff_linter/resources/test/fixtures/refurb/FURB188.py @@ -169,4 +169,32 @@ def ignore_step(): text = "!x!y!z" if text.startswith("!"): text = text[1::2] - print(text) \ No newline at end of file + print(text) + +def handle_unicode(): + # should be skipped! + text = "řetězec" + if text.startswith("ř"): + text = text[2:] + + # should be linted + # with fix `text = text.removeprefix("ř")` + text = "řetězec" + if text.startswith("ř"): + text = text[1:] + + +def handle_surrogates(): + # should be linted + text = "\ud800\udc00heythere" + if text.startswith("\ud800\udc00"): + text = text[2:] + text = "\U00010000heythere" + if text.startswith("\U00010000"): + text = text[1:] + + # should not be linted + text = "\ud800\udc00heythere" + if text.startswith("\ud800\udc00"): + text = text[1:] + \ No newline at end of file diff --git a/crates/ruff_linter/src/rules/refurb/rules/slice_to_remove_prefix_or_suffix.rs b/crates/ruff_linter/src/rules/refurb/rules/slice_to_remove_prefix_or_suffix.rs index e61cb1dc13696..44443032f242f 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/slice_to_remove_prefix_or_suffix.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/slice_to_remove_prefix_or_suffix.rs @@ -4,7 +4,7 @@ use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast as ast; use ruff_python_semantic::SemanticModel; use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextLen}; +use ruff_text_size::Ranged; /// ## What it does /// Checks for the removal of a prefix or suffix from a string by assigning @@ -334,8 +334,9 @@ fn affix_matches_slice_bound(data: &RemoveAffixData, semantic: &SemanticModel) - }), ) => num .as_int() - .and_then(ast::Int::as_u32) // Only support prefix removal for size at most `u32::MAX` - .is_some_and(|x| x == string_val.to_str().text_len().to_u32()), + // Only support prefix removal for size at most `usize::MAX` + .and_then(ast::Int::as_usize) + .is_some_and(|x| x == string_val.chars().count()), ( AffixKind::StartsWith, ast::Expr::Call(ast::ExprCall { @@ -369,8 +370,8 @@ fn affix_matches_slice_bound(data: &RemoveAffixData, semantic: &SemanticModel) - // Only support prefix removal for size at most `u32::MAX` value .as_int() - .and_then(ast::Int::as_u32) - .is_some_and(|x| x == string_val.to_str().text_len().to_u32()) + .and_then(ast::Int::as_usize) + .is_some_and(|x| x == string_val.chars().count()) }, ), ( diff --git a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB188_FURB188.py.snap b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB188_FURB188.py.snap index 89a0c17633e70..ddcc3676ac252 100644 --- a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB188_FURB188.py.snap +++ b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB188_FURB188.py.snap @@ -250,4 +250,74 @@ FURB188.py:162:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing 162 |+ text = text.removeprefix("!") 164 163 | print(text) 165 164 | -166 165 | +166 165 | + +FURB188.py:183:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice. + | +181 | # with fix `text = text.removeprefix("ř")` +182 | text = "řetězec" +183 | if text.startswith("ř"): + | _____^ +184 | | text = text[1:] + | |_______________________^ FURB188 + | + = help: Use removeprefix instead of assignment conditional upon startswith. + +ℹ Safe fix +180 180 | # should be linted +181 181 | # with fix `text = text.removeprefix("ř")` +182 182 | text = "řetězec" +183 |- if text.startswith("ř"): +184 |- text = text[1:] + 183 |+ text = text.removeprefix("ř") +185 184 | +186 185 | +187 186 | def handle_surrogates(): + +FURB188.py:190:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice. + | +188 | # should be linted +189 | text = "\ud800\udc00heythere" +190 | if text.startswith("\ud800\udc00"): + | _____^ +191 | | text = text[2:] + | |_______________________^ FURB188 +192 | text = "\U00010000heythere" +193 | if text.startswith("\U00010000"): + | + = help: Use removeprefix instead of assignment conditional upon startswith. + +ℹ Safe fix +187 187 | def handle_surrogates(): +188 188 | # should be linted +189 189 | text = "\ud800\udc00heythere" +190 |- if text.startswith("\ud800\udc00"): +191 |- text = text[2:] + 190 |+ text = text.removeprefix("\ud800\udc00") +192 191 | text = "\U00010000heythere" +193 192 | if text.startswith("\U00010000"): +194 193 | text = text[1:] + +FURB188.py:193:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice. + | +191 | text = text[2:] +192 | text = "\U00010000heythere" +193 | if text.startswith("\U00010000"): + | _____^ +194 | | text = text[1:] + | |_______________________^ FURB188 +195 | +196 | # should not be linted + | + = help: Use removeprefix instead of assignment conditional upon startswith. + +ℹ Safe fix +190 190 | if text.startswith("\ud800\udc00"): +191 191 | text = text[2:] +192 192 | text = "\U00010000heythere" +193 |- if text.startswith("\U00010000"): +194 |- text = text[1:] + 193 |+ text = text.removeprefix("\U00010000") +195 194 | +196 195 | # should not be linted +197 196 | text = "\ud800\udc00heythere" diff --git a/crates/ruff_python_ast/src/int.rs b/crates/ruff_python_ast/src/int.rs index 08f3d39119545..bbcf1b0b2a349 100644 --- a/crates/ruff_python_ast/src/int.rs +++ b/crates/ruff_python_ast/src/int.rs @@ -96,6 +96,14 @@ impl Int { } } + /// Return the [`Int`] as an u64, if it can be represented as that data type. + pub fn as_usize(&self) -> Option { + match &self.0 { + Number::Small(small) => usize::try_from(*small).ok(), + Number::Big(_) => None, + } + } + /// Return the [`Int`] as an i8, if it can be represented as that data type. pub fn as_i8(&self) -> Option { match &self.0 {