From e951d588c314f728857633a16f243f8097a9c3a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernardas=20Ali=C5=A1auskas?= Date: Thu, 17 Jun 2021 14:50:24 +0700 Subject: [PATCH 1/6] add unix timestamps --- pywhat/Data/regex.json | 44 ++++++++++++++++++++++++++++++++++ tests/test_regex_identifier.py | 28 ++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/pywhat/Data/regex.json b/pywhat/Data/regex.json index ae7ea68..2daec63 100644 --- a/pywhat/Data/regex.json +++ b/pywhat/Data/regex.json @@ -127,6 +127,28 @@ "Networking" ] }, + { + "Name": "Recent Unix Timestamp", + "Regex": "^[0-9]{10}$", + "plural_name": false, + "Description": "Seconds elapsed since unix epoch: 1970, between year 2001 and 2286", + "Rarity": 0.8, + "URL": null, + "Tags": [ + "Time" + ] + }, + { + "Name": "Recent Unix Millisecond Timestamp", + "Regex": "^[0-9]{13}$", + "plural_name": false, + "Description": "Milliseconds elapsed since unix epoch: 1970, between year 2001 and 2286", + "Rarity": 0.8, + "URL": null, + "Tags": [ + "Time" + ] + }, { "Name": "Amazon Resource Name (ARN)", "Regex": "(?i)^arn:(?P[^:\n]*):(?P[^:\n]*):(?P[^:\n]*):(?P[^:\n]*):(?P(?P[^:\/\n]*)[:\/])?(?P.*)$", @@ -462,6 +484,28 @@ "Username" ] }, + { + "Name": "Unix Timestamp", + "Regex": "^[0-9]{8,10}$", + "plural_name": false, + "Description": "Seconds elapsed since unix epoch: 1970", + "Rarity": 0.2, + "URL": null, + "Tags": [ + "Time" + ] + }, + { + "Name": "Unix Millisecond Timestamp", + "Regex": "^[0-9]{11,13}$", + "plural_name": false, + "Description": "Milliseconds elapsed since unix epoch: 1970", + "Rarity": 0.2, + "URL": null, + "Tags": [ + "Time" + ] + }, { "Name": "Key:Value Pair", "Regex": "[^:\\s]+[ ]?:[ ]?[^:\\s]+", diff --git a/tests/test_regex_identifier.py b/tests/test_regex_identifier.py index aa2b79a..4e4206b 100644 --- a/tests/test_regex_identifier.py +++ b/tests/test_regex_identifier.py @@ -1,3 +1,6 @@ +from datetime import datetime +from time import time + import pytest from pywhat import regex_identifier @@ -386,3 +389,28 @@ def test_arn4(): r = regex_identifier.RegexIdentifier() res = r.check(["arn:aws:s3:::my_corporate_bucket/Development/*"]) assert "ARN" in str(res) + + +def test_unix_timestamp(): + r = regex_identifier.RegexIdentifier() + ts_from_ymd = lambda ymd: int(datetime.strptime(ymd, '%Y-%m-%d').timestamp()) + + res = r.check([str(ts_from_ymd('2020-01-01'))]) + keys = [m['Regex Pattern']['Name'] for m in res] + assert "Unix Timestamp" in keys + assert "Recent Unix Timestamp" in keys + + res = r.check([str(ts_from_ymd('1980-01-01'))]) + keys = [m['Regex Pattern']['Name'] for m in res] + assert "Unix Timestamp" in keys + assert "Recent Unix Timestamp" not in keys + + res = r.check([str(ts_from_ymd('2020-01-01') * 1000)]) + keys = [m['Regex Pattern']['Name'] for m in res] + assert "Unix Millisecond Timestamp" in keys + assert "Recent Unix Millisecond Timestamp" in keys + + res = r.check([str(ts_from_ymd('1980-01-01') * 1000)]) + keys = [m['Regex Pattern']['Name'] for m in res] + assert "Unix Millisecond Timestamp" in keys + assert "Recent Unix Millisecond Timestamp" not in keys From ba28589774923a1a3e2ee747af0d4e3b41ffe729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernardas=20Ali=C5=A1auskas?= Date: Thu, 17 Jun 2021 17:42:16 +0700 Subject: [PATCH 2/6] reduce timestamp rarity to 0.5 --- pywhat/Data/regex.json | 45 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/pywhat/Data/regex.json b/pywhat/Data/regex.json index 2daec63..26a6563 100644 --- a/pywhat/Data/regex.json +++ b/pywhat/Data/regex.json @@ -127,28 +127,7 @@ "Networking" ] }, - { - "Name": "Recent Unix Timestamp", - "Regex": "^[0-9]{10}$", - "plural_name": false, - "Description": "Seconds elapsed since unix epoch: 1970, between year 2001 and 2286", - "Rarity": 0.8, - "URL": null, - "Tags": [ - "Time" - ] - }, - { - "Name": "Recent Unix Millisecond Timestamp", - "Regex": "^[0-9]{13}$", - "plural_name": false, - "Description": "Milliseconds elapsed since unix epoch: 1970, between year 2001 and 2286", - "Rarity": 0.8, - "URL": null, - "Tags": [ - "Time" - ] - }, + { "Name": "Amazon Resource Name (ARN)", "Regex": "(?i)^arn:(?P[^:\n]*):(?P[^:\n]*):(?P[^:\n]*):(?P[^:\n]*):(?P(?P[^:\/\n]*)[:\/])?(?P.*)$", @@ -448,6 +427,28 @@ "Identifiers", "Credentials" ] + }, + { + "Name": "Recent Unix Timestamp", + "Regex": "^[0-9]{10}$", + "plural_name": false, + "Description": "Seconds elapsed since unix epoch: 1970, between year 2001 and 2286", + "Rarity": 0.5, + "URL": null, + "Tags": [ + "Time" + ] + }, + { + "Name": "Recent Unix Millisecond Timestamp", + "Regex": "^[0-9]{13}$", + "plural_name": false, + "Description": "Milliseconds elapsed since unix epoch: 1970, between year 2001 and 2286", + "Rarity": 0.5, + "URL": null, + "Tags": [ + "Time" + ] }, { "Name": "YouTube Channel ID", From 43906e2645506c032ce658e612d04be6d1b81bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernardas=20Ali=C5=A1auskas?= Date: Thu, 24 Jun 2021 16:07:01 +0700 Subject: [PATCH 3/6] update test_unix_timestamp to use timestamp literals for tests --- tests/test_regex_identifier.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/test_regex_identifier.py b/tests/test_regex_identifier.py index 4e4206b..c99d138 100644 --- a/tests/test_regex_identifier.py +++ b/tests/test_regex_identifier.py @@ -393,24 +393,28 @@ def test_arn4(): def test_unix_timestamp(): r = regex_identifier.RegexIdentifier() - ts_from_ymd = lambda ymd: int(datetime.strptime(ymd, '%Y-%m-%d').timestamp()) - res = r.check([str(ts_from_ymd('2020-01-01'))]) + res = r.check(["1577836800"]) # 2020-01-01 keys = [m['Regex Pattern']['Name'] for m in res] assert "Unix Timestamp" in keys assert "Recent Unix Timestamp" in keys - res = r.check([str(ts_from_ymd('1980-01-01'))]) + res = r.check(["94694400"]) # 1973-01-01 keys = [m['Regex Pattern']['Name'] for m in res] assert "Unix Timestamp" in keys assert "Recent Unix Timestamp" not in keys - res = r.check([str(ts_from_ymd('2020-01-01') * 1000)]) + res = r.check(["1234567"]) # 7 numbers + keys = [m['Regex Pattern']['Name'] for m in res] + assert "Unix Timestamp" not in keys + assert "Recent Unix Timestamp" not in keys + + res = r.check(["1577836800000"]) # 2020-01-01 keys = [m['Regex Pattern']['Name'] for m in res] assert "Unix Millisecond Timestamp" in keys assert "Recent Unix Millisecond Timestamp" in keys - res = r.check([str(ts_from_ymd('1980-01-01') * 1000)]) + res = r.check(["94694400000"]) # 1973-01-01 keys = [m['Regex Pattern']['Name'] for m in res] assert "Unix Millisecond Timestamp" in keys assert "Recent Unix Millisecond Timestamp" not in keys From 51749ccf9747b827be0086814131a9acce7a63d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernardas=20Ali=C5=A1auskas?= Date: Tue, 29 Jun 2021 14:34:09 +0700 Subject: [PATCH 4/6] formatting --- pywhat/Data/regex.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pywhat/Data/regex.json b/pywhat/Data/regex.json index caddfcb..a9ec809 100644 --- a/pywhat/Data/regex.json +++ b/pywhat/Data/regex.json @@ -178,7 +178,6 @@ "IPv6" ] }, - { "Name": "Google API Key", "Regex": "AIza[0-9A-Za-z-_]{35}", @@ -681,7 +680,7 @@ "Phone Number" ] }, - { + { "Name": "Recent Unix Timestamp", "Regex": "^[0-9]{10}$", "plural_name": false, From fbfcc3009b868587808ac0a12f26592e031c18de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernardas=20Ali=C5=A1auskas?= Date: Tue, 29 Jun 2021 14:35:52 +0700 Subject: [PATCH 5/6] separate timestamp tests into individual functions --- tests/test_regex_identifier.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/test_regex_identifier.py b/tests/test_regex_identifier.py index b49a6f0..4b4409c 100644 --- a/tests/test_regex_identifier.py +++ b/tests/test_regex_identifier.py @@ -479,29 +479,40 @@ def test_arn4(): assert "ARN" in str(res) -def test_unix_timestamp(): +def test_unix_timestamp1(): r = regex_identifier.RegexIdentifier() - res = r.check(["1577836800"]) # 2020-01-01 keys = [m['Regex Pattern']['Name'] for m in res] assert "Unix Timestamp" in keys assert "Recent Unix Timestamp" in keys + +def test_unix_timestamp2(): + r = regex_identifier.RegexIdentifier() res = r.check(["94694400"]) # 1973-01-01 keys = [m['Regex Pattern']['Name'] for m in res] assert "Unix Timestamp" in keys assert "Recent Unix Timestamp" not in keys + +def test_unix_timestamp3(): + r = regex_identifier.RegexIdentifier() res = r.check(["1234567"]) # 7 numbers keys = [m['Regex Pattern']['Name'] for m in res] assert "Unix Timestamp" not in keys assert "Recent Unix Timestamp" not in keys + +def test_unix_timestamp4(): + r = regex_identifier.RegexIdentifier() res = r.check(["1577836800000"]) # 2020-01-01 keys = [m['Regex Pattern']['Name'] for m in res] assert "Unix Millisecond Timestamp" in keys assert "Recent Unix Millisecond Timestamp" in keys + +def test_unix_timestamp5(): + r = regex_identifier.RegexIdentifier() res = r.check(["94694400000"]) # 1973-01-01 keys = [m['Regex Pattern']['Name'] for m in res] assert "Unix Millisecond Timestamp" in keys From f4ab9fb39dbd9d74702aa36f95edbe0b03066a71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernardas=20Ali=C5=A1auskas?= Date: Tue, 29 Jun 2021 17:43:02 +0700 Subject: [PATCH 6/6] adjust first timestamp test's name --- tests/test_regex_identifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_regex_identifier.py b/tests/test_regex_identifier.py index 4b4409c..b1a9f36 100644 --- a/tests/test_regex_identifier.py +++ b/tests/test_regex_identifier.py @@ -479,7 +479,7 @@ def test_arn4(): assert "ARN" in str(res) -def test_unix_timestamp1(): +def test_unix_timestamp(): r = regex_identifier.RegexIdentifier() res = r.check(["1577836800"]) # 2020-01-01 keys = [m['Regex Pattern']['Name'] for m in res]