Skip to content

Commit

Permalink
Strict inequality separator and new pattern (#684)
Browse files Browse the repository at this point in the history
* Strict inequality (!==) in JS separator

* Use thrifty=True by default in console tool

* testfix

* optimization

* [skip actions] [work] 2025-02-21T16:54:52+02:00

* [skip actions] [work] 2025-02-21T17:40:15+02:00

* rollback

* Tavily API Key pattern
  • Loading branch information
babenek authored Feb 24, 2025
1 parent 2211238 commit f618f3c
Show file tree
Hide file tree
Showing 16 changed files with 240 additions and 12 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Full documentation can be found here: <https://credsweeper.readthedocs.io/>

### Main Requirements

- Python 3.8, 3.9, 3.10, 3.11, 3.12
- Python 3.9, 3.10, 3.11, 3.12

### Installation

Expand Down
4 changes: 2 additions & 2 deletions credsweeper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ def get_arguments() -> Namespace:
metavar="POSITIVE_INT")
parser.add_argument("--thrifty",
help="clear objects after scan to reduce memory consumption",
action="store_const",
const=True)
action=BooleanOptionalAction,
default=True)
parser.add_argument("--skip_ignored",
help="parse .gitignore files and skip credentials from ignored objects",
dest="skip_ignored",
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/common/keyword_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class KeywordPattern:
r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \
r")" # <variable>
separator = r"(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*" \
r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:|=(>|&gt;|\\u0026gt;)|!=|===|==|=|%3d)" \
r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:|=(>|&gt;|\\u0026gt;)|!==|!=|===|==|=|%3d)" \
r"(\s|\\{1,8}[tnr])*"
# might be curly, square or parenthesis with words before
wrap = r"(?P<wrap>(" \
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/filters/value_allowlist_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class ValueAllowlistCheck(Filter):
r"\$\{(\*|[0-9]+|[a-z_].*)\}", #
r"\$[0-9]+(\s|$)", #
r"\$\$[a-z_]+(\^%[0-9a-z_]+)?", #
r"#\{.*\}", #
r"#\{.+\}", # Ruby: String Interpolation
r"\{\{.+\}\}", #
r".*@@@hl@@@(암호|비번|PW|PASS)@@@endhl@@@", #
]
Expand Down
3 changes: 3 additions & 0 deletions credsweeper/filters/value_blocklist_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ class ValueBlocklistCheck(Filter):
"true",
"false",
"null",
"none",
"bearer",
"string",
"value",
"undefined",
]

def __init__(self, config: Config = None) -> None:
Expand Down
16 changes: 16 additions & 0 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1391,6 +1391,22 @@
- code
- doc

- name: Tavily API Key
severity: high
confidence: strong
type: pattern
values:
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>tvly-[0-9A-Za-z_-]{32,40})(?![0-9A-Za-z_-])
min_line_len: 37
filter_type:
- ValuePatternCheck(5)
- ValueEntropyBase64Check
required_substrings:
- tvly-
target:
- code
- doc

- name: Discord Bot Token
severity: high
confidence: strong
Expand Down
28 changes: 27 additions & 1 deletion credsweeper/utils/hop_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,37 @@ class HopStat:
')': '0',
'_': '-',
'+': '=',
'Q': 'q',
'W': 'w',
'E': 'e',
'R': 'r',
'T': 't',
'Y': 'y',
'U': 'u',
'I': 'i',
'O': 'o',
'P': 'p',
'{': '[',
'}': ']',
'|': '\\',
'A': 'a',
'S': 's',
'D': 'd',
'F': 'f',
'G': 'g',
'H': 'h',
'J': 'j',
'K': 'k',
'L': 'l',
':': ';',
'"': "'",
'Z': 'z',
'X': 'x',
'C': 'c',
'V': 'v',
'B': 'b',
'N': 'n',
'M': 'm',
'<': ',',
'>': '.',
'?': '/',
Expand Down Expand Up @@ -75,7 +101,7 @@ def stat(self, value: str) -> Tuple[float, float]:
"""
hops = []
value = value.lower().translate(HopStat.TRANSLATION)
value = value.translate(HopStat.TRANSLATION)
for a, b in zip(value[:-1], value[1:]):
hop = self.__hop_dict.get((a, b))
if hop is None:
Expand Down
6 changes: 4 additions & 2 deletions docs/source/guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Get all argument list:
[--ml_threshold FLOAT_OR_STR]
[--ml_batch_size POSITIVE_INT] [--ml_config PATH]
[--ml_model PATH] [--ml_providers STR]
[--jobs POSITIVE_INT] [--thrifty]
[--jobs POSITIVE_INT] [--thrifty | --no-thrifty]
[--skip_ignored] [--error | --no-error]
[--save-json [PATH]] [--save-xlsx [PATH]]
[--stdout | --no-stdout] [--color | --no-color]
Expand Down Expand Up @@ -72,7 +72,9 @@ Get all argument list:
(CPUExecutionProvider is used by default)
--jobs POSITIVE_INT, -j POSITIVE_INT
number of parallel processes to use (default: 1)
--thrifty clear objects after scan to reduce memory consumption
--thrifty, --no-thrifty
clear objects after scan to reduce memory consumption
(default: True)
--skip_ignored parse .gitignore files and skip credentials from
ignored objects
--error, --no-error produce error code if credentials are found (default:
Expand Down
6 changes: 3 additions & 3 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from pathlib import Path

# total number of files in test samples
SAMPLES_FILES_COUNT = 147
SAMPLES_FILES_COUNT = 148

# the lowest value of ML threshold is used to display possible lowest values
NEGLIGIBLE_ML_THRESHOLD = 0.0001

# credentials count after scan with negligible ML threshold
SAMPLES_CRED_COUNT = 470
SAMPLES_CRED_COUNT = 472
SAMPLES_CRED_LINE_COUNT = SAMPLES_CRED_COUNT + 19

# Number of filtered credentials with ML
Expand All @@ -17,7 +17,7 @@
SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED

# with option --doc
SAMPLES_IN_DOC = 656
SAMPLES_IN_DOC = 657

# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 87
Expand Down
50 changes: 50 additions & 0 deletions tests/data/depth_3.json
Original file line number Diff line number Diff line change
Expand Up @@ -12673,6 +12673,31 @@
}
]
},
{
"rule": "Tavily API Key",
"severity": "high",
"confidence": "strong",
"ml_probability": null,
"line_data_list": [
{
"line": "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 # TP",
"line_num": 1,
"path": "./tests/samples/tvly",
"info": "FILE|RAW",
"value": "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",
"value_start": 0,
"value_end": 37,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 4.703528274549062,
"valid": true
}
}
]
},
{
"rule": "Twilio Credentials",
"severity": "high",
Expand Down Expand Up @@ -12898,6 +12923,31 @@
}
]
},
{
"rule": "Password",
"severity": "medium",
"confidence": "moderate",
"ml_probability": 0.857,
"line_data_list": [
{
"line": "if (password !== \"PaS5w0rD2#\"){",
"line_num": 21,
"path": "./tests/samples/url_cred.js",
"info": "FILE|RAW",
"value": "PaS5w0rD2#",
"value_start": 18,
"value_end": 28,
"variable": "password",
"variable_start": 4,
"variable_end": 12,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 2.989735285398626,
"valid": false
}
}
]
},
{
"rule": "UUID",
"severity": "info",
Expand Down
25 changes: 25 additions & 0 deletions tests/data/doc.json
Original file line number Diff line number Diff line change
Expand Up @@ -18122,6 +18122,31 @@
}
]
},
{
"rule": "Tavily API Key",
"severity": "high",
"confidence": "strong",
"ml_probability": null,
"line_data_list": [
{
"line": "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 # TP",
"line_num": 1,
"path": "./tests/samples/tvly",
"info": "FILE|RAW",
"value": "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",
"value_start": 0,
"value_end": 37,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 4.703528274549062,
"valid": true
}
}
]
},
{
"rule": "Twilio Credentials",
"severity": "high",
Expand Down
50 changes: 50 additions & 0 deletions tests/data/ml_threshold.json
Original file line number Diff line number Diff line change
Expand Up @@ -11597,6 +11597,31 @@
}
]
},
{
"rule": "Tavily API Key",
"severity": "high",
"confidence": "strong",
"ml_probability": null,
"line_data_list": [
{
"line": "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 # TP",
"line_num": 1,
"path": "./tests/samples/tvly",
"info": "",
"value": "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",
"value_start": 0,
"value_end": 37,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 4.703528274549062,
"valid": true
}
}
]
},
{
"rule": "Twilio Credentials",
"severity": "high",
Expand Down Expand Up @@ -11847,6 +11872,31 @@
}
]
},
{
"rule": "Password",
"severity": "medium",
"confidence": "moderate",
"ml_probability": 0.857,
"line_data_list": [
{
"line": "if (password !== \"PaS5w0rD2#\"){",
"line_num": 21,
"path": "./tests/samples/url_cred.js",
"info": "",
"value": "PaS5w0rD2#",
"value_start": 18,
"value_end": 28,
"variable": "password",
"variable_start": 4,
"variable_end": 12,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 2.989735285398626,
"valid": false
}
}
]
},
{
"rule": "UUID",
"severity": "info",
Expand Down
50 changes: 50 additions & 0 deletions tests/data/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -9372,6 +9372,31 @@
}
]
},
{
"rule": "Tavily API Key",
"severity": "high",
"confidence": "strong",
"ml_probability": null,
"line_data_list": [
{
"line": "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 # TP",
"line_num": 1,
"path": "./tests/samples/tvly",
"info": "",
"value": "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",
"value_start": 0,
"value_end": 37,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 4.703528274549062,
"valid": true
}
}
]
},
{
"rule": "Twilio Credentials",
"severity": "high",
Expand Down Expand Up @@ -9597,6 +9622,31 @@
}
]
},
{
"rule": "Password",
"severity": "medium",
"confidence": "moderate",
"ml_probability": 0.857,
"line_data_list": [
{
"line": "if (password !== \"PaS5w0rD2#\"){",
"line_num": 21,
"path": "./tests/samples/url_cred.js",
"info": "",
"value": "PaS5w0rD2#",
"value_start": 18,
"value_end": 28,
"variable": "password",
"variable_start": 4,
"variable_end": 12,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 2.989735285398626,
"valid": false
}
}
]
},
{
"rule": "UUID",
"severity": "info",
Expand Down
2 changes: 2 additions & 0 deletions tests/samples/tvly
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
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 # TP
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 # FP
4 changes: 4 additions & 0 deletions tests/samples/url_cred.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ email_as_login = "smtps://example@gmail.com:FnD83JZs@smtp.gmail.com:465";
*/

url3d = "https://localhost.com/013948?26timestamp%3D1395782596%26token%3Dh1d3Me4ch534d801sl3jdk%26version%3D3.14%26si";

if (password !== "PaS5w0rD2#"){
// Strict inequality (!==)
}
Loading

0 comments on commit f618f3c

Please # to comment.