LangMers_Image_Pipeline

https://cdn.mathpix.com/snip/images/jO-Dw7_tgsdshqDWoOg1vm4-b4ECuwWHYYzUIHRmRg4.original.fullsize.png
LangMers · Dec 24, 2023 · 67aff21 · 67aff21
1 parent eb184c9
commit 67aff21
Show file tree

Hide file tree

Showing 4 changed files with 400 additions and 0 deletions.
diff --git a/Lang_Scan.py b/Lang_Scan.py
@@ -0,0 +1,106 @@
+import sys
+import os
+import io
+from google.cloud import vision
+from PIL import Image
+import cv2
+import subprocess  # Added for setting metadata
+import plistlib  # Added for reading metadata
+import openai
+
+# Supported image formats
+SUPPORTED_FORMATS = ['.jpg', '.jpeg', '.png', '.tiff', '.bmp']
+
+# Error handling decorator
+def handle_errors(func):
+    def inner(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except Exception as e:
+            print(f"Error: {e}")
+            return None
+    return inner
+
+@handle_errors
+def load_image(image_path):
+    """Load an image based on its extension and return an image object."""
+    ext = os.path.splitext(image_path)[1].lower()
+
+    if ext in SUPPORTED_FORMATS:
+        image = Image.open(image_path)
+    elif ext == '.bmp':
+        image = cv2.imread(image_path)
+    else:
+        raise ValueError("Unsupported image format")
+
+    return image
+
+@handle_errors
+def get_text(image):
+    """Use Google Cloud Vision API to extract text from an image."""
+    client = vision.ImageAnnotatorClient()
+    with io.open(image, 'rb') as image_file:
+        content = image_file.read()
+
+    image = vision.Image(content=content)
+    response = client.text_detection(image=image)
+    texts = response.text_annotations
+
+    if response.error.message:
+        raise Exception(f'Google API Error: {response.error.message}')
+
+    return texts[0].description if texts else None
+
+@handle_errors
+def categorize(text):
+    """Use OpenAI GPT-3 to categorize text."""
+    openai_api_key = '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'  # Replace with your OpenAI API key
+    openai.api_key = openai_api_key
+    try:
+        response = openai.Completion.create(
+            engine="text-davinci-003",
+            prompt="Categorize and tag this image description: " + text,
+            max_tokens=50
+        )
+        return response.choices[0].text.strip()
+    except Exception as e:
+        print(f"Error in GPT-3 request: {e}")
+        return None
+
+def set_tags_and_comments(image_path, tags, comments):
+    try:
+        # Set tags using `xattr`
+        subprocess.run(['xattr', '-w', 'com.apple.metadata:_kMDItemUserTags', tags, image_path])
+
+        # Set comments using `xattr`
+        subprocess.run(['xattr', '-w', 'com.apple.metadata:kMDItemFinderComment', comments, image_path])
+    except Exception as e:
+        print(f"Error setting tags and comments: {e}")
+
+def main(image_path):
+    # Load image
+    image = load_image(image_path)
+    if not image:
+        return
+
+    # Extract text 
+    text = get_text(image)
+    if not text:
+        return
+
+    # Classify text
+    category = categorize(text)
+    if not category:
+        return
+
+    print(category)
+
+    # Set tags and comments
+    set_tags_and_comments(image_path, category, "Your comments here")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python script.py <image_path>")
+    else:
+        main(sys.argv[1])
+
diff --git a/img_processor.py b/img_processor.py
@@ -0,0 +1,16 @@
+import logging
+import os
+from functions import ocr, categorize, process_image
+
+logging.basicConfig(filename='app.log', level=logging.INFO)
+
+images_folder = '/path/images'
+
+def main():
+    for filename in os.listdir(images_folder):
+        image_path = os.path.join(images_folder, filename)
+        process_image(image_path)
+
+if __name__ == '__main__':
+    main()
+
diff --git a/lang_ocr.py b/lang_ocr.py
@@ -0,0 +1,170 @@
+import sys
+import subprocess
+import openai
+from PIL import Image
+import pytesseract
+import time
+import os
+import os
+import pytesseract
+from PIL import Image
+import openai
+import subprocess
+
+# Path to image folder
+image_dir = '/path/to/images'  
+
+# OpenAI API key
+openai_api_key = 'your_key'
+
+# Supported formats
+formats = ['.jpg', '.png'] 
+
+def ocr_image(filepath):
+    """Run OCR on image and return text"""
+    image = Image.open(filepath)
+    text = pytesseract.image_to_string(image)
+    return text
+
+def clean_text(text):
+    """Clean up text for filename"""
+    text = text.strip().replace(' ', '_') 
+    return text
+
+def categorize(text):
+    """Use GPT-3 to categorize text"""
+    # Call OpenAI API
+    ...
+
+def rename_file(filepath, text):
+    filename = clean_text(text)[:50] + '.jpg'
+    new_path = os.path.join(image_dir, filename)
+    os.rename(filepath, new_path)
+
+def add_metadata(filepath, text, category):
+    """Add tags and comments""" 
+    tags = category
+    comments = text
+
+    # Call xattr to set tags and comments
+    ...
+
+for filename in os.listdir(image_dir):
+
+    filepath = os.path.join(image_dir, filename)
+
+    if os.path.splitext(filename)[1].lower() in formats:
+
+        text = ocr_image(filepath)
+
+    category = categorize(text)
+
+    rename_file(filepath, text) 
+
+    add_metadata(filepath, text, category)
+# Remove special characters, whitespace etc
+new_name = cleanup_text(text) 
+
+# Limit length
+new_name = new_name[:50]
+# Path to the folder where screenshots are saved
+screenshot_folder = '/Users/david/Desktop/Screenshots_Automate'
+
+for filename in os.listdir(image_dir):
+    filepath = os.path.join(image_dir, filename)
+
+# Supported image formats
+SUPPORTED_FORMATS = ['.jpg', '.jpeg', '.png', '.tiff', '.bmp']
+
+# OpenAI API key (replace with your actual key)
+openai_api_key = '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'
+
+# Error handling decorator
+def handle_errors(func):
+    def inner(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except Exception as e:
+            print(f"Error: {e}")
+            return None
+    return inner
+
+@handle_errors
+def load_image(image_path):
+    """Load an image based on its extension and return an image object."""
+    ext = os.path.splitext(image_path)[1].lower()
+
+    if ext in SUPPORTED_FORMATS:
+        image = Image.open(image_path)
+    else:
+        raise ValueError("Unsupported image format")
+
+    return image
+
+@handle_errors
+def get_text_with_ocr(image):
+    """Use Tesseract OCR to extract text from an image."""
+    text = pytesseract.image_to_string(image)
+
+    if not text:
+        raise Exception("No text found in the image")
+
+    return text
+
+@handle_errors
+def categorize(text):
+    """Use OpenAI GPT-3 to categorize text."""
+    openai.api_key = openai_api_key
+    try:
+        response = openai.Completion.create(
+            engine="text-davinci-003",
+            prompt="Categorize and tag this image description: " + text,
+            max_tokens=50
+        )
+        return response.choices[0].text.strip()
+    except Exception as e:
+        print(f"Error in GPT-3 request: {e}")
+        return None
+
+def set_tags_and_comments(image_path, tags, comments):
+    try:
+        # Set tags using `xattr`
+        subprocess.run(['xattr', '-w', 'com.apple.metadata:_kMDItemUserTags', tags, image_path])
+
+        # Set comments using `xattr`
+        subprocess.run(['xattr', '-w', 'com.apple.metadata:kMDItemFinderComment', comments, image_path])
+    except Exception as e:
+        print(f"Error setting tags and comments: {e}")
+
+def process_screenshot(file_path):
+    # Load image
+    image = load_image(file_path)
+    if not image:
+        return
+
+    # Extract text using OCR
+    text = get_text_with_ocr(image)
+    if not text:
+        return
+
+    # Classify text
+    category = categorize(text)
+    if not category:
+        return
+    text = get_text_with_ocr(filepath)
+    # Set tags and comments
+    set_tags_and_comments(file_path, category, "Your comments here")
+
+    set_tags_and_comments(filepath, text, "Comments")
+def monitor_folder():
+    while True:
+        for file_name in os.listdir(screenshot_folder):
+            file_path = os.path.join(screenshot_folder, file_name)
+            if os.path.isfile(file_path) and file_name.lower().endswith(tuple(SUPPORTED_FORMATS)):
+                process_screenshot(file_path)
+                os.rename(filepath, os.path.join(image_dir, new_name))
+                #os.remove(file_path)  #Optionally, delete the processed screenshot
+        time.sleep(1)  # Adjust the sleep time as needed
+
+if __name__ == "__main__":
+    monitor_folder()