From 643314e7f85a67b0f8eeb0d4960dadd4eb697b36 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Mon, 11 Oct 2021 21:15:54 +0300 Subject: [PATCH 1/4] Add support for custom formatting strings --- pywhat/printer.py | 41 +++++++++++++++++++++++++++++ pywhat/what.py | 42 +++++++++++++++++++++++++++++- tests/test_click.py | 63 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 1 deletion(-) diff --git a/pywhat/printer.py b/pywhat/printer.py index fe56936..b474ca0 100644 --- a/pywhat/printer.py +++ b/pywhat/printer.py @@ -1,5 +1,6 @@ import json import os +import re from rich.console import Console from rich.table import Table @@ -180,6 +181,46 @@ def print_raw(self, text: dict, text_input) -> str: return output_str + def format_print(self, text: dict, format_str: str): + if text["Regexes"]: + output = [] + format_list = [] + + # Split format_str so that format_list's item will either be r'\\' or something else + start = 0 + while (i := format_str.find(r"\\", start)) != -1: + if format_str[start:i]: + format_list.append(format_str[start:i]) + format_list.append("\\") + start = i + 2 + format_list.append(format_str[start:]) + + for key, value in text["Regexes"].items(): + for i in value: + temp = "" + for s in format_list: + formats = { + "%m": i["Matched"], + "%n": i["Regex Pattern"]["Name"], + "%d": i["Regex Pattern"]["Description"], + "%e": i["Regex Pattern"].get("Exploit"), + "%r": str(i["Regex Pattern"]["Rarity"]), + "%l": i["Regex Pattern"]["URL"] + i["Matched"] + if i["Regex Pattern"]["URL"] is not None + else None, + "%t": ", ".join(i["Regex Pattern"]["Tags"]), + } + for format, value in formats.items(): + value = str() if value is None else value + s = re.sub(r"(? bool: if "File Signatures" in text and text["File Signatures"]: # loops files diff --git a/pywhat/what.py b/pywhat/what.py index b04fba8..787d774 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -118,7 +118,7 @@ def get_text(ctx, opts, value): @click.option( "--format", required=False, - help="--format json for json output. --format pretty for a pretty table output.", + help="Format output according to specified rules.", ) def main(**kwargs): """ @@ -184,6 +184,44 @@ def main(**kwargs): Refer to the Filtration section for more information. + Formatting the output: + + --format format_str + + format_str can be equal to: + + pretty - Output data in the table + + json - Ouput data in json format + + CUSTOM_STRING - Print data in the way you want. For every match CUSTOM_STRING will be printed and '%x' (See below for possible x values) will be substituted with a match value. + + For example: + + pywhat --format '%m - %n' 'google.com htb{flag}' + + will print: + + htb{flag} - HackTheBox Flag Format + google.com - Uniform Resource Locator (URL) + + Possible '%x' values: + + %m - matched text + + %n - name of regex + + %d - description (will not output if absent) + + %e - exploit (will not ouput if absent) + + %r - rarity + + %l - link (will not ouput if absent) + + %t - tags (in 'tag1, tag2 ...' format) + + If you want to print '%' or '\' character - escape it: '\%', '\\'. Examples: @@ -246,6 +284,8 @@ def main(**kwargs): p.print_json(identified_output) elif kwargs["format"] == "pretty": p.pretty_print(identified_output, kwargs["text_input"]) + elif kwargs["format"] is not None: + p.format_print(identified_output, kwargs["format"]) else: p.print_raw(identified_output, kwargs["text_input"]) diff --git a/tests/test_click.py b/tests/test_click.py index c8c51b5..8babb85 100644 --- a/tests/test_click.py +++ b/tests/test_click.py @@ -632,3 +632,66 @@ def test_file_fixture_sshpass(): result = runner.invoke(main, ["fixtures/file"]) assert result.exit_code == 0 assert re.findall("SSHPass Clear Password Argument", str(result.output)) + + +def test_format(): + runner = CliRunner() + result = runner.invoke( + main, ["-db", "--format", " json ", "rBPAQmwMrt7FDDPNyjwFgwSqbWZPf6SLkk"] + ) + assert result.exit_code == 0 + assert '"File Signatures":' in result.output + + +def test_format2(): + runner = CliRunner() + result = runner.invoke( + main, ["-db", "--format", " pretty ", "rBPAQmwMrt7FDDPNyjwFgwSqbWZPf6SLkk"] + ) + assert result.exit_code == 0 + assert "Possible Identification" in result.output + + +def test_format3(): + runner = CliRunner() + result = runner.invoke( + main, + [ + "-db", + "--format", + r"%m 2%n %d --- -%e%r %l %t \%d", + "rBPAQmwMrt7FDDPNyjwFgwSqbWZPf6SLkk", + ], + ) + assert result.exit_code == 0 + assert ( + "rBPAQmwMrt7FDDPNyjwFgwSqbWZPf6SLkk 2Ripple (XRP) Wallet Address --- -0.3 https://xrpscan.com/account/rBPAQmwMrt7FDDPNyjwFgwSqbWZPf6SLkk Finance, Cryptocurrency Wallet, Ripple Wallet, Ripple, XRP %d" + in result.output.replace("\n", "") + ) + + +def test_format4(): + runner = CliRunner() + result = runner.invoke( + main, + [ + "-db", + "--include", + "Bug Bounty", + "--format", + r"\\%e %l %z", + "heroku00000000-0000-0000-0000-000000000000", + ], + ) + assert result.exit_code == 0 + assert ( + '\\Use the command below to verify that the API key is valid:\n $ curl -X POST https://api.heroku.com/apps -H "Accept: application/vnd.heroku+json; version=3" -H "Authorization: Bearer heroku00000000-0000-0000-0000-000000000000"\n %z'.split() + == result.output.split() + ) + + +def test_format5(): + runner = CliRunner() + result = runner.invoke(main, ["-db", "--format", r"%e", "thm{2}"]) + assert result.exit_code == 0 + assert len(result.output) == 0 From cad871f030bd762b50a3073bc3eec639c0724ef1 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Mon, 11 Oct 2021 21:17:01 +0300 Subject: [PATCH 2/4] Add --print-tags option --- pywhat/printer.py | 17 ++++++++++++++--- pywhat/what.py | 9 +++++---- tests/test_click.py | 16 ++++++++++++++++ 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/pywhat/printer.py b/pywhat/printer.py index b474ca0..1e97057 100644 --- a/pywhat/printer.py +++ b/pywhat/printer.py @@ -11,7 +11,7 @@ def __init__(self): self.console = Console(highlight=False) self.bug_bounty_mode = False - def pretty_print(self, text: dict, text_input): + def pretty_print(self, text: dict, text_input, print_tags=False): to_out = "" if text["File Signatures"]: @@ -70,7 +70,14 @@ def pretty_print(self, text: dict, text_input): ): exploit = i["Regex Pattern"]["Exploit"] - if not description: + if print_tags: + tags = f"Tags: {', '.join(i['Regex Pattern']['Tags'])}" + if description is None: + description = tags + else: + description += "\n" + tags + + if description is None: description = "None" # FIXME this is quite messy @@ -117,7 +124,7 @@ def print_json(self, text: dict): Returns the printable object """ - def print_raw(self, text: dict, text_input) -> str: + def print_raw(self, text: dict, text_input, print_tags=False): output_str = "" if text["File Signatures"] and text["Regexes"]: @@ -171,6 +178,10 @@ def print_raw(self, text: dict, text_input) -> str: "\n[bold #D7Afff]Exploit: [/bold #D7Afff]" + i["Regex Pattern"]["Exploit"] ) + + if print_tags: + output_str += f"\n[bold #D7Afff]Tags: [/bold #D7Afff]{', '.join(i['Regex Pattern']['Tags'])}" + output_str += "\n\n" if output_str == "" and not self.bug_bounty_mode: diff --git a/pywhat/what.py b/pywhat/what.py index 787d774..cb6c91d 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -120,6 +120,7 @@ def get_text(ctx, opts, value): required=False, help="Format output according to specified rules.", ) +@click.option("-pt", "--print-tags", is_flag=True, help="Add flags to ouput") def main(**kwargs): """ pyWhat - Identify what something is. @@ -280,14 +281,14 @@ def main(**kwargs): p = printer.Printing() - if kwargs["json"] or kwargs["format"] == "json": + if kwargs["json"] or str(kwargs["format"]).strip() == "json": p.print_json(identified_output) - elif kwargs["format"] == "pretty": - p.pretty_print(identified_output, kwargs["text_input"]) + elif str(kwargs["format"]).strip() == "pretty": + p.pretty_print(identified_output, kwargs["text_input"], kwargs["print_tags"]) elif kwargs["format"] is not None: p.format_print(identified_output, kwargs["format"]) else: - p.print_raw(identified_output, kwargs["text_input"]) + p.print_raw(identified_output, kwargs["text_input"], kwargs["print_tags"]) class What_Object: diff --git a/tests/test_click.py b/tests/test_click.py index 8babb85..eb10c9f 100644 --- a/tests/test_click.py +++ b/tests/test_click.py @@ -695,3 +695,19 @@ def test_format5(): result = runner.invoke(main, ["-db", "--format", r"%e", "thm{2}"]) assert result.exit_code == 0 assert len(result.output) == 0 + + +def test_print_tags(): + runner = CliRunner() + result = runner.invoke(main, ["-db", "-pt", "thm{2}"]) + assert result.exit_code == 0 + assert "Tags: CTF Flag" in result.output + + +def test_print_tags2(): + runner = CliRunner() + result = runner.invoke( + main, ["-db", "--print-tags", "--format", "pretty", "thm{2}"] + ) + assert result.exit_code == 0 + assert "Tags: CTF Flag" in result.output From c173aae2201eca2ac8410749b2eb07979fd583b6 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Mon, 11 Oct 2021 21:17:36 +0300 Subject: [PATCH 3/4] Disable boundaryless mode in click tests --- tests/test_click.py | 63 ++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/tests/test_click.py b/tests/test_click.py index eb10c9f..6c900f9 100644 --- a/tests/test_click.py +++ b/tests/test_click.py @@ -10,14 +10,14 @@ def test_nothing_found(): runner = CliRunner() - result = runner.invoke(main, [""]) + result = runner.invoke(main, ["-db", ""]) assert result.exit_code == 0 assert "Nothing found!" in result.output def test_hello_world(): runner = CliRunner() - result = runner.invoke(main, ["THM{this is a flag}"]) + result = runner.invoke(main, ["-db", "THM{this is a flag}"]) assert result.exit_code == 0 assert "THM{" in result.output @@ -47,14 +47,14 @@ def test_tag_printing(): def test_json_printing(): """Test for valid json""" runner = CliRunner() - result = runner.invoke(main, ["10.0.0.1", "--json"]) + result = runner.invoke(main, ["-db", "10.0.0.1", "--json"]) assert json.loads(result.output.replace("\n", "")) def test_json_printing2(): """Test for empty json return""" runner = CliRunner() - result = runner.invoke(main, ["", "--json"]) + result = runner.invoke(main, ["-db", "", "--json"]) assert result.output.strip("\n") == '{"File Signatures": null, "Regexes": null}' @@ -159,7 +159,7 @@ def test_file_fixture14(): def test_arg_parsing(): runner = CliRunner() - result = runner.invoke(main, ["1KFHE7w8BhaENAswwryaoccDb6qcT6DbYY"]) + result = runner.invoke(main, ["-db", "1KFHE7w8BhaENAswwryaoccDb6qcT6DbYY"]) assert result.exit_code == 0 assert re.findall("blockchain", str(result.output)) @@ -223,7 +223,7 @@ def test_file_fixture_email(): def test_file_fixture_email2(): runner = CliRunner() - result = runner.invoke(main, ["firstname+lastname@example.com"]) + result = runner.invoke(main, ["-db", "firstname+lastname@example.com"]) assert result.exit_code == 0 assert re.findall("Email", str(result.output)) @@ -251,28 +251,28 @@ def test_file_fixture_youtube_id(): def test_file_fixture_ip4(): runner = CliRunner() - result = runner.invoke(main, ["118.103.238.230"]) + result = runner.invoke(main, ["-db", "118.103.238.230"]) assert result.exit_code == 0 assert re.findall("Address Version 4", str(result.output)) def test_file_fixture_ip4_shodan(): runner = CliRunner() - result = runner.invoke(main, ["118.103.238.230"]) + result = runner.invoke(main, ["-db", "118.103.238.230"]) assert result.exit_code == 0 assert re.findall("shodan", str(result.output)) def test_file_fixture_ip6(): runner = CliRunner() - result = runner.invoke(main, ["2001:0db8:85a3:0000:0000:8a2e:0370:7334"]) + result = runner.invoke(main, ["-db", "2001:0db8:85a3:0000:0000:8a2e:0370:7334"]) assert result.exit_code == 0 assert re.findall("Address Version 6", str(result.output)) def test_file_fixture_ip6_shodan(): runner = CliRunner() - result = runner.invoke(main, ["2001:0db8:85a3:0000:0000:8a2e:0370:7334"]) + result = runner.invoke(main, ["-db", "2001:0db8:85a3:0000:0000:8a2e:0370:7334"]) assert result.exit_code == 0 assert re.findall("shodan", str(result.output)) @@ -294,7 +294,7 @@ def test_file_pcap(): def test_file_coords(): runner = CliRunner() - result = runner.invoke(main, ["52.6169586, -1.9779857"]) + result = runner.invoke(main, ["-db", "52.6169586, -1.9779857"]) assert result.exit_code == 0 assert re.findall("Latitude", str(result.output)) @@ -323,7 +323,7 @@ def test_file_fixture_bch(): def test_file_fixture_bch2(): runner = CliRunner() result = runner.invoke( - main, ["bitcoincash:qzlg6uvceehgzgtz6phmvy8gtdqyt6vf359at4n3lq"] + main, ["-db", "bitcoincash:qzlg6uvceehgzgtz6phmvy8gtdqyt6vf359at4n3lq"] ) assert result.exit_code == 0 assert re.findall("blockchain", str(result.output)) @@ -352,7 +352,7 @@ def test_file_fixture_xmr(): def test_file_cors(): runner = CliRunner() - result = runner.invoke(main, ["Access-Control-Allow: *"]) + result = runner.invoke(main, ["-db", "Access-Control-Allow: *"]) assert result.exit_code == 0 assert re.findall("Access", str(result.output)) @@ -362,7 +362,8 @@ def test_file_jwt(): result = runner.invoke( main, [ - "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c" + "-db", + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c", ], ) assert result.exit_code == 0 @@ -371,28 +372,30 @@ def test_file_jwt(): def test_file_s3(): runner = CliRunner() - result = runner.invoke(main, ["http://s3.amazonaws.com/bucket/"]) + result = runner.invoke(main, ["-db", "http://s3.amazonaws.com/bucket/"]) assert result.exit_code == 0 assert re.findall("S3", str(result.output)) def test_file_s3_2(): runner = CliRunner() - result = runner.invoke(main, ["s3://bucket/path/key"]) + result = runner.invoke(main, ["-db", "s3://bucket/path/key"]) assert result.exit_code == 0 assert re.findall("S3", str(result.output)) def test_file_s3_3(): runner = CliRunner() - result = runner.invoke(main, ["s3://bucket/path/directory/"]) + result = runner.invoke(main, ["-db", "s3://bucket/path/directory/"]) assert result.exit_code == 0 assert re.findall("S3", str(result.output)) def test_file_arn(): runner = CliRunner() - result = runner.invoke(main, ["arn:partition:service:region:account-id:resource"]) + result = runner.invoke( + main, ["-db", "arn:partition:service:region:account-id:resource"] + ) assert result.exit_code == 0 assert re.findall("ARN", str(result.output)) @@ -400,7 +403,7 @@ def test_file_arn(): def test_file_arn2(): runner = CliRunner() result = runner.invoke( - main, ["arn:partition:service:region:account-id:resourcetype/resource"] + main, ["-db", "arn:partition:service:region:account-id:resourcetype/resource"] ) assert result.exit_code == 0 assert re.findall("ARN", str(result.output)) @@ -409,7 +412,7 @@ def test_file_arn2(): def test_file_arn3(): runner = CliRunner() result = runner.invoke( - main, ["arn:partition:service:region:account-id:resourcetype:resource"] + main, ["-db", "arn:partition:service:region:account-id:resourcetype:resource"] ) assert result.exit_code == 0 assert re.findall("ARN", str(result.output)) @@ -417,49 +420,51 @@ def test_file_arn3(): def test_file_arn4(): runner = CliRunner() - result = runner.invoke(main, ["arn:aws:s3:::my_corporate_bucket/Development/*"]) + result = runner.invoke( + main, ["-db", "arn:aws:s3:::my_corporate_bucket/Development/*"] + ) assert result.exit_code == 0 assert re.findall("ARN", str(result.output)) def test_key_value_min_rarity_0(): runner = CliRunner() - result = runner.invoke(main, ["--rarity", "0:", "key:value"]) + result = runner.invoke(main, ["-db", "--rarity", "0:", "key:value"]) assert result.exit_code == 0 assert re.findall("Key:Value", str(result.output)) def test_key_value_min_rarity_0_1(): runner = CliRunner() - result = runner.invoke(main, ["--rarity", "0:", "key : value"]) + result = runner.invoke(main, ["-db", "--rarity", "0:", "key : value"]) assert result.exit_code == 0 assert re.findall("Key:Value", str(result.output)) def test_key_value_min_rarity_0_2(): runner = CliRunner() - result = runner.invoke(main, ["--rarity", "0:", "key: value"]) + result = runner.invoke(main, ["-db", "--rarity", "0:", "key: value"]) assert result.exit_code == 0 assert re.findall("Key:Value", str(result.output)) def test_key_value_min_rarity_0_3(): runner = CliRunner() - result = runner.invoke(main, ["--rarity", "0:", ":a:"]) + result = runner.invoke(main, ["-db", "--rarity", "0:", ":a:"]) assert result.exit_code == 0 assert not re.findall("Key:Value", str(result.output)) def test_key_value_min_rarity_0_4(): runner = CliRunner() - result = runner.invoke(main, ["--rarity", "0:", ":::::"]) + result = runner.invoke(main, ["-db", "--rarity", "0:", ":::::"]) assert result.exit_code == 0 assert not re.findall("Key:Value", str(result.output)) def test_key_value_min_rarity_0_5(): runner = CliRunner() - result = runner.invoke(main, ["--rarity", "0:", "a:b:c"]) + result = runner.invoke(main, ["-db", "--rarity", "0:", "a:b:c"]) assert result.exit_code == 0 assert not re.findall("a:b:c", str(result.output)) @@ -580,14 +585,14 @@ def test_file_fixture_turkish_car_plate(): def test_file_fixture_date_of_birth(): runner = CliRunner() - result = runner.invoke(main, ["fixtures/file"]) + result = runner.invoke(main, ["-db", "fixtures/file"]) assert result.exit_code == 0 assert re.findall("Date of Birth", str(result.output)) def test_file_fixture_turkish_id_number(): runner = CliRunner() - result = runner.invoke(main, ["fixtures/file"]) + result = runner.invoke(main, ["-db", "fixtures/file"]) assert result.exit_code == 0 assert re.findall("Turkish Identification Number", str(result.output)) From 079de4162bb93e154b7d9423cdc7bb41720542d8 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Tue, 12 Oct 2021 16:45:32 +0300 Subject: [PATCH 4/4] Fix printer.py mypy tests and remove the walrus operator --- pywhat/printer.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pywhat/printer.py b/pywhat/printer.py index 1e97057..904e57b 100644 --- a/pywhat/printer.py +++ b/pywhat/printer.py @@ -199,27 +199,29 @@ def format_print(self, text: dict, format_str: str): # Split format_str so that format_list's item will either be r'\\' or something else start = 0 - while (i := format_str.find(r"\\", start)) != -1: + i = format_str.find(r"\\", start) + while i != -1: if format_str[start:i]: format_list.append(format_str[start:i]) format_list.append("\\") start = i + 2 + i = format_str.find(r"\\", start) format_list.append(format_str[start:]) for key, value in text["Regexes"].items(): - for i in value: + for match in value: temp = "" for s in format_list: formats = { - "%m": i["Matched"], - "%n": i["Regex Pattern"]["Name"], - "%d": i["Regex Pattern"]["Description"], - "%e": i["Regex Pattern"].get("Exploit"), - "%r": str(i["Regex Pattern"]["Rarity"]), - "%l": i["Regex Pattern"]["URL"] + i["Matched"] - if i["Regex Pattern"]["URL"] is not None + "%m": match["Matched"], + "%n": match["Regex Pattern"]["Name"], + "%d": match["Regex Pattern"]["Description"], + "%e": match["Regex Pattern"].get("Exploit"), + "%r": str(match["Regex Pattern"]["Rarity"]), + "%l": match["Regex Pattern"]["URL"] + match["Matched"] + if match["Regex Pattern"]["URL"] is not None else None, - "%t": ", ".join(i["Regex Pattern"]["Tags"]), + "%t": ", ".join(match["Regex Pattern"]["Tags"]), } for format, value in formats.items(): value = str() if value is None else value