-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEditorials.py
49 lines (38 loc) · 1.43 KB
/
Editorials.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from lib.Titles_With_Links import *
from lib.Dict_To_PDF import *
# Extracting Editorials
def extract_editorials():
driver_path = "C:\webdrivers\chromedriver"
service = Service(driver_path)
driver = webdriver.Chrome(service=service)
title_to_story = {}
for i in title_to_link:
title_to_story[i] = ""
driver.get("https://www.hindustantimes.com" + title_to_link[i])
content = driver.page_source
soup = BeautifulSoup(content, "html.parser")
# Short Description of the Story
p = soup.find("h2", attrs={"class": "sortDec"})
title_to_story[i] += p.text + " "
# Detailed Story
div = soup.find("div", attrs={"class": "detail"})
for paras in div.find_all("p"):
title_to_story[i] += paras.text
driver.quit()
return title_to_story
if __name__ == "__main__":
while True:
options = input(
"Enter Your Option: \n 1. All the Editorials in a Single PDF \n 2. All the Editorials in Different PDF'S \n 3. Exit \n Enter Your Option Here: "
)
if options == "3":
break
match options:
case "1":
Titles_With_Links()
dict_to_sing_pdf(extract_editorials())
case "2":
Titles_With_Links()
dict_to_mul_pdf(extract_editorials())
case _:
print("Please choose from the following options")