-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcomics_transcript_extractor.py
40 lines (30 loc) · 1.69 KB
/
comics_transcript_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from bs4 import BeautifulSoup
import image_describer as ImageDescriber
def get_transcript(comics_file, keys_obj):
soup = None
with open(comics_file, "r", encoding='utf8') as html_file:
soup = BeautifulSoup(html_file, 'lxml')
comic_content_div = soup.findAll("div", {"class": "col-md-9 col-lg-9 col-sm-8 contLft"})[0]
title = get_title(comic_content_div)
scenes = get_scenes(comic_content_div, keys_obj)
transcript = "Hi, I am gonna help you read this comics story: " + title + ". Here's what will happen: For each scene, I am gonna call out what I see in the image, and then I will transcribe the scene. Rest, I am gonna leave to your imagination. Since, we are going to read the comics, imagine each charater I call out as a cartoon. So, let's get started. " + scenes
return transcript
def get_title(comic_content_div):
return comic_content_div.find("h2").text
def get_scenes(comic_content_div, keys_obj):
scenes = comic_content_div.findAll("div", {"class": "col-md-6 col-lg-6 col-sm-6 comicWrap"})
scene_transcript_list = []
scene_index = 1
for scene in scenes:
image_src = "Comics/" + scene.find("img")['src']
image_decription = ImageDescriber.get_image_description(
image_src,
keys_obj['custom_vision_api']['url'],
keys_obj['custom_vision_api']['prediction_key'])
image_decription = 'Scene ' + str(scene_index) + ': I see, ' + image_decription + '.'
print(image_decription)
text = scene.text
complete_text = image_decription + " " + text.strip()
scene_transcript_list.append(complete_text)
scene_index += 1
return '\n'.join(scene_transcript_list)