Strict inequality separator and new pattern (#684)

* Strict inequality (!==) in JS separator * Use thrifty=True by default in console tool * testfix * optimization * [skip actions] [work] 2025-02-21T16:54:52+02:00 * [skip actions] [work] 2025-02-21T17:40:15+02:00 * rollback * Tavily API Key pattern
Samsung · Feb 24, 2025 · f618f3c · f618f3c
1 parent 2211238
commit f618f3c
Show file tree

Hide file tree

Showing 16 changed files with 240 additions and 12 deletions.
diff --git a/README.md b/README.md
@@ -46,7 +46,7 @@ Full documentation can be found here: <https://credsweeper.readthedocs.io/>
 
 ### Main Requirements
 
-- Python 3.8, 3.9, 3.10, 3.11, 3.12
+- Python 3.9, 3.10, 3.11, 3.12
 
 ### Installation
 

diff --git a/credsweeper/__main__.py b/credsweeper/__main__.py
@@ -205,8 +205,8 @@ def get_arguments() -> Namespace:
                         metavar="POSITIVE_INT")
     parser.add_argument("--thrifty",
                         help="clear objects after scan to reduce memory consumption",
-                        action="store_const",
-                        const=True)
+                        action=BooleanOptionalAction,
+                        default=True)
     parser.add_argument("--skip_ignored",
                         help="parse .gitignore files and skip credentials from ignored objects",
                         dest="skip_ignored",

diff --git a/credsweeper/common/keyword_pattern.py b/credsweeper/common/keyword_pattern.py
@@ -13,7 +13,7 @@ class KeywordPattern:
                 r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \
                 r")"  # <variable>
     separator = r"(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*" \
-                r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:|=(>|&gt;|\\u0026gt;)|!=|===|==|=|%3d)" \
+                r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:|=(>|&gt;|\\u0026gt;)|!==|!=|===|==|=|%3d)" \
                 r"(\s|\\{1,8}[tnr])*"
     # might be curly, square or parenthesis with words before
     wrap = r"(?P<wrap>(" \

diff --git a/credsweeper/filters/value_allowlist_check.py b/credsweeper/filters/value_allowlist_check.py
@@ -16,7 +16,7 @@ class ValueAllowlistCheck(Filter):
         r"\$\{(\*|[0-9]+|[a-z_].*)\}",  #
         r"\$[0-9]+(\s|$)",  #
         r"\$\$[a-z_]+(\^%[0-9a-z_]+)?",  #
-        r"#\{.*\}",  #
+        r"#\{.+\}",  # Ruby: String Interpolation
         r"\{\{.+\}\}",  #
         r".*@@@hl@@@(암호|비번|PW|PASS)@@@endhl@@@",  #
     ]

diff --git a/credsweeper/filters/value_blocklist_check.py b/credsweeper/filters/value_blocklist_check.py
@@ -11,8 +11,11 @@ class ValueBlocklistCheck(Filter):
         "true",
         "false",
         "null",
+        "none",
         "bearer",
         "string",
+        "value",
+        "undefined",
     ]
 
     def __init__(self, config: Config = None) -> None:

diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml
@@ -1391,6 +1391,22 @@
     - code
     - doc
 
+- name: Tavily API Key
+  severity: high
+  confidence: strong
+  type: pattern
+  values:
+    - (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>tvly-[0-9A-Za-z_-]{32,40})(?![0-9A-Za-z_-])
+  min_line_len: 37
+  filter_type:
+    - ValuePatternCheck(5)
+    - ValueEntropyBase64Check
+  required_substrings:
+    - tvly-
+  target:
+    - code
+    - doc
+
 - name: Discord Bot Token
   severity: high
   confidence: strong

diff --git a/credsweeper/utils/hop_stat.py b/credsweeper/utils/hop_stat.py
@@ -25,11 +25,37 @@ class HopStat:
         ')': '0',
         '_': '-',
         '+': '=',
+        'Q': 'q',
+        'W': 'w',
+        'E': 'e',
+        'R': 'r',
+        'T': 't',
+        'Y': 'y',
+        'U': 'u',
+        'I': 'i',
+        'O': 'o',
+        'P': 'p',
         '{': '[',
         '}': ']',
         '|': '\\',
+        'A': 'a',
+        'S': 's',
+        'D': 'd',
+        'F': 'f',
+        'G': 'g',
+        'H': 'h',
+        'J': 'j',
+        'K': 'k',
+        'L': 'l',
         ':': ';',
         '"': "'",
+        'Z': 'z',
+        'X': 'x',
+        'C': 'c',
+        'V': 'v',
+        'B': 'b',
+        'N': 'n',
+        'M': 'm',
         '<': ',',
         '>': '.',
         '?': '/',
@@ -75,7 +101,7 @@ def stat(self, value: str) -> Tuple[float, float]:
 
         """
         hops = []
-        value = value.lower().translate(HopStat.TRANSLATION)
+        value = value.translate(HopStat.TRANSLATION)
         for a, b in zip(value[:-1], value[1:]):
             hop = self.__hop_dict.get((a, b))
             if hop is None:

diff --git a/docs/source/guide.rst b/docs/source/guide.rst
@@ -22,7 +22,7 @@ Get all argument list:
                                  [--ml_threshold FLOAT_OR_STR]
                                  [--ml_batch_size POSITIVE_INT] [--ml_config PATH]
                                  [--ml_model PATH] [--ml_providers STR]
-                                 [--jobs POSITIVE_INT] [--thrifty]
+                                 [--jobs POSITIVE_INT] [--thrifty | --no-thrifty]
                                  [--skip_ignored] [--error | --no-error]
                                  [--save-json [PATH]] [--save-xlsx [PATH]]
                                  [--stdout | --no-stdout] [--color | --no-color]
@@ -72,7 +72,9 @@ Get all argument list:
                             (CPUExecutionProvider is used by default)
       --jobs POSITIVE_INT, -j POSITIVE_INT
                             number of parallel processes to use (default: 1)
-      --thrifty             clear objects after scan to reduce memory consumption
+      --thrifty, --no-thrifty
+                            clear objects after scan to reduce memory consumption
+                            (default: True)
       --skip_ignored        parse .gitignore files and skip credentials from
                             ignored objects
       --error, --no-error   produce error code if credentials are found (default:

diff --git a/tests/__init__.py b/tests/__init__.py
@@ -1,13 +1,13 @@
 from pathlib import Path
 
 # total number of files in test samples
-SAMPLES_FILES_COUNT = 147
+SAMPLES_FILES_COUNT = 148
 
 # the lowest value of ML threshold is used to display possible lowest values
 NEGLIGIBLE_ML_THRESHOLD = 0.0001
 
 # credentials count after scan with negligible ML threshold
-SAMPLES_CRED_COUNT = 470
+SAMPLES_CRED_COUNT = 472
 SAMPLES_CRED_LINE_COUNT = SAMPLES_CRED_COUNT + 19
 
 # Number of filtered credentials with ML
@@ -17,7 +17,7 @@
 SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED
 
 # with option --doc
-SAMPLES_IN_DOC = 656
+SAMPLES_IN_DOC = 657
 
 # archived credentials that are not found without --depth
 SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 87

diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json
@@ -12673,6 +12673,31 @@
             }
         ]
     },
+    {
+        "rule": "Tavily API Key",
+        "severity": "high",
+        "confidence": "strong",
+        "ml_probability": null,
+        "line_data_list": [
+            {
+                "line": "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  # TP",
+                "line_num": 1,
+                "path": "./tests/samples/tvly",
+                "info": "FILE|RAW",
+                "value": "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",
+                "value_start": 0,
+                "value_end": 37,
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 4.703528274549062,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "rule": "Twilio Credentials",
         "severity": "high",
@@ -12898,6 +12923,31 @@
             }
         ]
     },
+    {
+        "rule": "Password",
+        "severity": "medium",
+        "confidence": "moderate",
+        "ml_probability": 0.857,
+        "line_data_list": [
+            {
+                "line": "if (password !== \"PaS5w0rD2#\"){",
+                "line_num": 21,
+                "path": "./tests/samples/url_cred.js",
+                "info": "FILE|RAW",
+                "value": "PaS5w0rD2#",
+                "value_start": 18,
+                "value_end": 28,
+                "variable": "password",
+                "variable_start": 4,
+                "variable_end": 12,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 2.989735285398626,
+                    "valid": false
+                }
+            }
+        ]
+    },
     {
         "rule": "UUID",
         "severity": "info",

diff --git a/tests/data/doc.json b/tests/data/doc.json
@@ -18122,6 +18122,31 @@
             }
         ]
     },
+    {
+        "rule": "Tavily API Key",
+        "severity": "high",
+        "confidence": "strong",
+        "ml_probability": null,
+        "line_data_list": [
+            {
+                "line": "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  # TP",
+                "line_num": 1,
+                "path": "./tests/samples/tvly",
+                "info": "FILE|RAW",
+                "value": "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",
+                "value_start": 0,
+                "value_end": 37,
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 4.703528274549062,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "rule": "Twilio Credentials",
         "severity": "high",

diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json
@@ -11597,6 +11597,31 @@
             }
         ]
     },
+    {
+        "rule": "Tavily API Key",
+        "severity": "high",
+        "confidence": "strong",
+        "ml_probability": null,
+        "line_data_list": [
+            {
+                "line": "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  # TP",
+                "line_num": 1,
+                "path": "./tests/samples/tvly",
+                "info": "",
+                "value": "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",
+                "value_start": 0,
+                "value_end": 37,
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 4.703528274549062,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "rule": "Twilio Credentials",
         "severity": "high",
@@ -11847,6 +11872,31 @@
             }
         ]
     },
+    {
+        "rule": "Password",
+        "severity": "medium",
+        "confidence": "moderate",
+        "ml_probability": 0.857,
+        "line_data_list": [
+            {
+                "line": "if (password !== \"PaS5w0rD2#\"){",
+                "line_num": 21,
+                "path": "./tests/samples/url_cred.js",
+                "info": "",
+                "value": "PaS5w0rD2#",
+                "value_start": 18,
+                "value_end": 28,
+                "variable": "password",
+                "variable_start": 4,
+                "variable_end": 12,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 2.989735285398626,
+                    "valid": false
+                }
+            }
+        ]
+    },
     {
         "rule": "UUID",
         "severity": "info",

diff --git a/tests/data/output.json b/tests/data/output.json
@@ -9372,6 +9372,31 @@
             }
         ]
     },
+    {
+        "rule": "Tavily API Key",
+        "severity": "high",
+        "confidence": "strong",
+        "ml_probability": null,
+        "line_data_list": [
+            {
+                "line": "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  # TP",
+                "line_num": 1,
+                "path": "./tests/samples/tvly",
+                "info": "",
+                "value": "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",
+                "value_start": 0,
+                "value_end": 37,
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 4.703528274549062,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "rule": "Twilio Credentials",
         "severity": "high",
@@ -9597,6 +9622,31 @@
             }
         ]
     },
+    {
+        "rule": "Password",
+        "severity": "medium",
+        "confidence": "moderate",
+        "ml_probability": 0.857,
+        "line_data_list": [
+            {
+                "line": "if (password !== \"PaS5w0rD2#\"){",
+                "line_num": 21,
+                "path": "./tests/samples/url_cred.js",
+                "info": "",
+                "value": "PaS5w0rD2#",
+                "value_start": 18,
+                "value_end": 28,
+                "variable": "password",
+                "variable_start": 4,
+                "variable_end": 12,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 2.989735285398626,
+                    "valid": false
+                }
+            }
+        ]
+    },
     {
         "rule": "UUID",
         "severity": "info",

diff --git a/tests/samples/tvly b/tests/samples/tvly
@@ -0,0 +1,2 @@
+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  # TP
+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  # FP
diff --git a/tests/samples/url_cred.js b/tests/samples/url_cred.js
@@ -17,3 +17,7 @@ email_as_login = "smtps://example@gmail.com:FnD83JZs@smtp.gmail.com:465";
 */
 
 url3d = "https://localhost.com/013948?26timestamp%3D1395782596%26token%3Dh1d3Me4ch534d801sl3jdk%26version%3D3.14%26si";
+
+if (password !== "PaS5w0rD2#"){
+// Strict inequality (!==)
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		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 # TP
		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 # FP