From a25b5d6087bba1509f96654c583efcd7796452cd Mon Sep 17 00:00:00 2001 From: dosisod <39638017+dosisod@users.noreply.github.com> Date: Thu, 16 Nov 2023 18:45:46 -0800 Subject: [PATCH] Add `simplify-hashlib-ctor` check: Also bump version for new release. --- docs/checks.md | 28 ++++++++ pyproject.toml | 2 +- refurb/checks/hashlib/simplify_ctor.py | 88 ++++++++++++++++++++++++++ refurb/checks/hashlib/use_hexdigest.py | 4 ++ test/data/err_182.py | 87 +++++++++++++++++++++++++ test/data/err_182.txt | 16 +++++ 6 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 refurb/checks/hashlib/simplify_ctor.py create mode 100644 test/data/err_182.py create mode 100644 test/data/err_182.txt diff --git a/docs/checks.md b/docs/checks.md index e18cf9e..a63bab5 100644 --- a/docs/checks.md +++ b/docs/checks.md @@ -2124,11 +2124,39 @@ Use `.hexdigest()` to get a hex digest from a hash. Bad: ```python +from hashlib import sha512 + hashed = sha512(b"some data").digest().hex() ``` Good: ```python +from hashlib import sha512 + hashed = sha512(b"some data").hexdigest() +``` + +## FURB182: `simplify-hashlib-ctor` + +Categories: `hashlib` `readability` + +You can pass data into `hashlib` constructors, so instead of creating a +hash object and immediately updating it, pass the data directly. + +Bad: + +```python +from hashlib import sha512 + +h = sha512() +h.update(b"data) +``` + +Good: + +```python +from hashlib import sha512 + +h = sha512(b"data") ``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a25ffdf..c0ead03 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "refurb" -version = "1.23.0" +version = "1.24.0" description = "A tool for refurbish and modernize Python codebases" authors = ["dosisod"] license = "GPL-3.0-only" diff --git a/refurb/checks/hashlib/simplify_ctor.py b/refurb/checks/hashlib/simplify_ctor.py new file mode 100644 index 0000000..390bc46 --- /dev/null +++ b/refurb/checks/hashlib/simplify_ctor.py @@ -0,0 +1,88 @@ +from dataclasses import dataclass +from typing import cast + +from mypy.nodes import ( + AssignmentStmt, + Block, + CallExpr, + ExpressionStmt, + MemberExpr, + MypyFile, + NameExpr, + RefExpr, + Statement, +) + +from refurb.checks.common import check_block_like, stringify +from refurb.checks.hashlib.use_hexdigest import HASHLIB_ALGOS +from refurb.error import Error + + +@dataclass +class ErrorInfo(Error): + """ + You can pass data into `hashlib` constructors, so instead of creating a + hash object and immediately updating it, pass the data directly. + + Bad: + + ``` + from hashlib import sha512 + + h = sha512() + h.update(b"data) + ``` + + Good: + + ``` + from hashlib import sha512 + + h = sha512(b"data") + ``` + """ + + name = "simplify-hashlib-ctor" + categories = ("hashlib", "readability") + code = 182 + + +def check(node: Block | MypyFile, errors: list[Error]) -> None: + check_block_like(check_stmts, node, errors) + + +def check_stmts(stmts: list[Statement], errors: list[Error]) -> None: + assignment: AssignmentStmt | None = None + var: RefExpr | None = None + + for stmt in stmts: + match stmt: + case AssignmentStmt( + lvalues=[NameExpr() as lhs], + rvalue=CallExpr(callee=RefExpr(fullname=fn), args=[]), + ) if fn in HASHLIB_ALGOS: + assignment = stmt + var = lhs + + case ExpressionStmt( + expr=CallExpr( + callee=MemberExpr( + expr=RefExpr(fullname=fullname, name=lhs), # type: ignore + name="update", + ), + args=[arg], + ) + ) if assignment and var and var.fullname == fullname: + func_name = stringify(cast(CallExpr, assignment.rvalue).callee) + + data = stringify(arg) + + old = f"{lhs} = {func_name}(); {lhs}.update({data})" + new = f"{lhs} = {func_name}({data})" + + msg = f"Replace `{old}` with `{new}`" + + errors.append(ErrorInfo.from_node(assignment, msg)) + + case _: + assignment = None diff --git a/refurb/checks/hashlib/use_hexdigest.py b/refurb/checks/hashlib/use_hexdigest.py index c6489d0..81cab5f 100644 --- a/refurb/checks/hashlib/use_hexdigest.py +++ b/refurb/checks/hashlib/use_hexdigest.py @@ -14,12 +14,16 @@ class ErrorInfo(Error): Bad: ``` + from hashlib import sha512 + hashed = sha512(b"some data").digest().hex() ``` Good: ``` + from hashlib import sha512 + hashed = sha512(b"some data").hexdigest() ``` """ diff --git a/test/data/err_182.py b/test/data/err_182.py new file mode 100644 index 0000000..340d43e --- /dev/null +++ b/test/data/err_182.py @@ -0,0 +1,87 @@ +import hashlib +from hashlib import ( + blake2b, + blake2s, + md5, + sha1, + sha3_224, + sha3_256, + sha3_384, + sha3_512, + sha224, +) +from hashlib import sha256 +from hashlib import sha256 as hash_algo +from hashlib import sha384, sha512, shake_128, shake_256 + +# these will match + +h1 = blake2b() +h1.update(b"data") + +h2 = blake2s() +h2.update(b"data") + +h3 = md5() +h3.update(b"data") + +h4 = sha1() +h4.update(b"data") + +h5 = sha224() +h5.update(b"data") + +h6 = sha256() +h6.update(b"data") + +h7 = sha384() +h7.update(b"data") + +h8 = sha3_224() +h8.update(b"data") + +h9 = sha3_256() +h9.update(b"data") + +h10 = sha3_384() +h10.update(b"data") + +h11 = sha3_512() +h11.update(b"data") + +h12 = sha512() +h12.update(b"data") + +h13 = shake_128() +h13.update(b"data") + +h14 = shake_256() +h14.update(b"data") + +h15 = hashlib.sha256() +h15.update(b"data") + +h16 = hash_algo() +h16.update(b"data") + + +# these will not + +h17 = sha256() +h17.digest() + +h18 = sha256(b"data") +h18.update(b"more data") +h18.digest() + +h19 = sha256() +pass +h19.digest() + +class Hash: + def update(self, data: bytes) -> None: + return None + + +h20 = Hash() +h20.update(b"data") diff --git a/test/data/err_182.txt b/test/data/err_182.txt new file mode 100644 index 0000000..6ccec2d --- /dev/null +++ b/test/data/err_182.txt @@ -0,0 +1,16 @@ +test/data/err_182.py:19:1 [FURB182]: Replace `h1 = blake2b(); h1.update(b'data')` with `h1 = blake2b(b'data')` +test/data/err_182.py:22:1 [FURB182]: Replace `h2 = blake2s(); h2.update(b'data')` with `h2 = blake2s(b'data')` +test/data/err_182.py:25:1 [FURB182]: Replace `h3 = md5(); h3.update(b'data')` with `h3 = md5(b'data')` +test/data/err_182.py:28:1 [FURB182]: Replace `h4 = sha1(); h4.update(b'data')` with `h4 = sha1(b'data')` +test/data/err_182.py:31:1 [FURB182]: Replace `h5 = sha224(); h5.update(b'data')` with `h5 = sha224(b'data')` +test/data/err_182.py:34:1 [FURB182]: Replace `h6 = sha256(); h6.update(b'data')` with `h6 = sha256(b'data')` +test/data/err_182.py:37:1 [FURB182]: Replace `h7 = sha384(); h7.update(b'data')` with `h7 = sha384(b'data')` +test/data/err_182.py:40:1 [FURB182]: Replace `h8 = sha3_224(); h8.update(b'data')` with `h8 = sha3_224(b'data')` +test/data/err_182.py:43:1 [FURB182]: Replace `h9 = sha3_256(); h9.update(b'data')` with `h9 = sha3_256(b'data')` +test/data/err_182.py:46:1 [FURB182]: Replace `h10 = sha3_384(); h10.update(b'data')` with `h10 = sha3_384(b'data')` +test/data/err_182.py:49:1 [FURB182]: Replace `h11 = sha3_512(); h11.update(b'data')` with `h11 = sha3_512(b'data')` +test/data/err_182.py:52:1 [FURB182]: Replace `h12 = sha512(); h12.update(b'data')` with `h12 = sha512(b'data')` +test/data/err_182.py:55:1 [FURB182]: Replace `h13 = shake_128(); h13.update(b'data')` with `h13 = shake_128(b'data')` +test/data/err_182.py:58:1 [FURB182]: Replace `h14 = shake_256(); h14.update(b'data')` with `h14 = shake_256(b'data')` +test/data/err_182.py:61:1 [FURB182]: Replace `h15 = hashlib.sha256(); h15.update(b'data')` with `h15 = hashlib.sha256(b'data')` +test/data/err_182.py:64:1 [FURB182]: Replace `h16 = hash_algo(); h16.update(b'data')` with `h16 = hash_algo(b'data')`