Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

style: format code with Autopep8, Black, isort, Ruff Formatter and Yapf #2

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions moviedata/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@

from tqdm import tqdm
import pandas as pd
import os
from dotenv import load_dotenv
from pprint import pprint
import threading
from pprint import pprint
from typing import *
from bs4 import *
import requests

load_dotenv()
PATH = os.getenv('PATH_OF_DIR')
import pandas as pd
import requests
from bs4 import *
from dotenv import load_dotenv
from tqdm import tqdm

from moviedata.analysis import *
from moviedata.helper_functions import *
from moviedata.scraper import *
from moviedata.analysis import *

load_dotenv()
PATH = os.getenv("PATH_OF_DIR")
29 changes: 16 additions & 13 deletions moviedata/helper_functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from moviedata import *
import base64

from moviedata import *


def encode(message: str) -> bytes:
"""
Expand All @@ -16,8 +17,7 @@ def get_page(url: str, cookies: dict) -> BeautifulSoup:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
req = requests.get(
url, headers=headers, cookies=cookies)
req = requests.get(url, headers=headers, cookies=cookies)
return BeautifulSoup(req.content, "html.parser")


Expand All @@ -26,27 +26,30 @@ def __init__(self, data: Dict, name: str) -> None:
self.data = pd.DataFrame(data)
self.name = name
print(PATH)
self.file_path = f'{PATH}/data/{self.name}/{self.name}'
if name not in os.listdir(f'{PATH}/data/'):
os.mkdir(f'{PATH}/data/{name}')
self.file_path = f"{PATH}/data/{self.name}/{self.name}"
if name not in os.listdir(f"{PATH}/data/"):
os.mkdir(f"{PATH}/data/{name}")

def save_csv(self) -> bool:
self.data.to_csv(
self.file_path + '.csv', index=False)
self.data.to_csv(self.file_path + ".csv", index=False)
return True

def save_json(self) -> bool:
self.data.to_json(self.file_path + '.json')
self.data.to_json(self.file_path + ".json")
return True


def get_reviews_details(reviews: List[BeautifulSoup]) -> List:
review_details = []
for review in reviews:
review_url = review.find('a').attrs['href']
review_three_elems = review.find(
'span', class_="three-Elements").find_all('span')
review_count, review_type = review_three_elems[0].text, review_three_elems[1].text
review_url = review.find("a").attrs["href"]
review_three_elems = review.find("span", class_="three-Elements").find_all(
"span"
)
review_count, review_type = (
review_three_elems[0].text,
review_three_elems[1].text,
)
review_details.append([review_url, review_count, review_type])
return review_details

Expand Down
47 changes: 34 additions & 13 deletions moviedata/scraper/imdb/imdb_movies_basic_details_scraper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from moviedata import *
from moviedata.scraper.imdb.imdb_scraper import *
from moviedata.scraper.imdb.imdb_movie_details_scraper import *
from moviedata.scraper.imdb.imdb_scraper import *


class IMDBMoviesBasicDetailsScraper(IMDBScraper):
Expand All @@ -16,7 +16,7 @@ def __init__(self, specific_url: str = "/chart/moviemeter/?ref_=nv_mv_mpm"):
"movieYear": [],
"movieTime": [],
"movieAvgRating": [],
"movieRatingCount": []
"movieRatingCount": [],
}

def movies_basic_details(self):
Expand All @@ -35,22 +35,43 @@ def movies_basic_details(self):
"span", class_="sc-b189961a-8 kLaxqf cli-title-metadata-item"
)
movie_year = movie_timestamp[0].text if movie_timestamp else None
movie_time = movie_timestamp[1].text if len(
movie_timestamp) > 1 else None
movie_time = movie_timestamp[1].text if len(movie_timestamp) > 1 else None
movie_avg_rating_element = movie.find(
"span",
class_="ipc-rating-star ipc-rating-star--base ipc-rating-star--imdb ratingGroup--imdb-rating",
)
movie_avg_rating = movie_avg_rating_element.contents[1].strip(
) if movie_avg_rating_element else None
movie_avg_rating = (
movie_avg_rating_element.contents[1].strip()
if movie_avg_rating_element
else None
)
movie_rating_count_element = movie.find(
"span", class_="ipc-rating-star--voteCount")
movie_rating_count = movie_rating_count_element.text.strip().replace(
"(", "").replace(")", "") if movie_rating_count_element else None
self.movies_basic_details_dict = add_to_dict(self.movies_basic_details_dict, [encode(
movie_title), movie_img, movie_more_url, movie_title, movie_year, movie_time, movie_avg_rating, movie_rating_count])
movie_thread = threading.Thread(target=self.imdb_movie_details.movie_further_details, args=(
encode(movie_title), movie_more_url))
"span", class_="ipc-rating-star--voteCount"
)
movie_rating_count = (
movie_rating_count_element.text.strip()
.replace("(", "")
.replace(")", "")
if movie_rating_count_element
else None
)
self.movies_basic_details_dict = add_to_dict(
self.movies_basic_details_dict,
[
encode(movie_title),
movie_img,
movie_more_url,
movie_title,
movie_year,
movie_time,
movie_avg_rating,
movie_rating_count,
],
)
movie_thread = threading.Thread(
target=self.imdb_movie_details.movie_further_details,
args=(encode(movie_title), movie_more_url),
)
movie_threads.append(movie_thread)
movie_thread.start()
# break
Expand Down