-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpic_to_ip.py
44 lines (34 loc) · 1.22 KB
/
pic_to_ip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
'''
Author: Yann Santschi
Version: 1.0
Description: OCR extraction of IPs from pictures
Output: Deduplicated list of IPs from pictures in the folder
'''
import cv2
import easyocr
import re
import os
folder_path = r'PATH'
ipv4_pattern = r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b'
ip_addresses = []
def img_2_text(location):
img = cv2.imread(location)
reader = easyocr.Reader(['en'], gpu=False)
text = reader.readtext(img)
print(text)
# Loop through the list and search for IP addresses in the second element of each tuple
for item in text:
string_data = item[1] # Second element of each tuple
match = re.search(ipv4_pattern, string_data)
if match:
ip_addresses.append(match.group())
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
for filename in os.listdir(folder_path):
if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff')):
file = os.path.join(folder_path, filename)
img_2_text(file)
unique_ip_addresses = list(set(ip_addresses))
with open('extracted_ips.txt', 'w') as out_file:
for ip in unique_ip_addresses:
out_file.write(ip + '\n')