From 675e9dd2a3ad26da72614d04bd6014bcb157a34d Mon Sep 17 00:00:00 2001 From: Pavlo Pohorieltsev <667strets@gmail.com> Date: Sun, 3 Sep 2023 17:33:18 +0300 Subject: [PATCH] [FR]: Teacher's schedule #8 --- ontu_parser/classes/dataclasses.py | 477 ++++++++++++++++++++--------- ontu_parser/classes/enums.py | 11 +- ontu_parser/classes/parser.py | 164 ++++++---- ontu_parser/classes/sender.py | 99 +++--- 4 files changed, 477 insertions(+), 274 deletions(-) diff --git a/ontu_parser/classes/dataclasses.py b/ontu_parser/classes/dataclasses.py index 03088d6..d5e0ff7 100644 --- a/ontu_parser/classes/dataclasses.py +++ b/ontu_parser/classes/dataclasses.py @@ -2,6 +2,7 @@ Contains classes needed to get data Like Faculty or Group, provides methods to get names, ids, etc. """ +from urllib.parse import parse_qsl from attrs import define from bs4.element import Tag @@ -11,6 +12,7 @@ class BaseTag(BaseClass): """Base Tag Class for parsing BS4 tags from responses""" + @classmethod def from_tag(cls, tag): """Checks tag and returns initialized object""" @@ -25,29 +27,27 @@ def _check_tag(tag: Tag): @define class Faculty(BaseTag): """Describes faculty from BS4 tag""" + faculty_tag: Tag @staticmethod def _check_tag(tag): - attrs = getattr(tag, 'attrs', None) - span = getattr(tag, 'span', None) + attrs = getattr(tag, "attrs", None) + span = getattr(tag, "span", None) required_properties = [attrs, span] if not all(required_properties): raise ValueError(f"Invalid tag: {tag}, has no attrs", tag) - required = ['data-id'] + required = ["data-id"] for requirement in required: if requirement not in attrs: raise ValueError( f"Invalid tag: {tag}, doesn't have attrs: {required}", tag, - required + required, ) - span_string = getattr(span, 'string', None) + span_string = getattr(span, "string", None) if span_string is None: - raise ValueError( - f"Invalid tag: {tag}, `span` has no string", - tag - ) + raise ValueError(f"Invalid tag: {tag}, `span` has no string", tag) @classmethod def from_tag(cls, tag): @@ -56,11 +56,11 @@ def from_tag(cls, tag): def get_faculty_picture(self): """Returns relative link to picture (if present)""" - return self.faculty_tag.attrs.get('data-cover', None) + return self.faculty_tag.attrs.get("data-cover", None) def get_faculty_id(self): """Returns temporary id of faculty (for later use in search)""" - return self.faculty_tag.attrs['data-id'] + return self.faculty_tag.attrs["data-id"] def get_faculty_name(self): """Returns name of the faculty""" @@ -73,35 +73,28 @@ class Group(BaseTag): group_tag: Tag - _icon_tag_filter = {'attrs': {'class': 'icon'}} - _text_tag_filter = {'attrs': {'class': 'branding-bar'}} + _icon_tag_filter = {"attrs": {"class": "icon"}} + _text_tag_filter = {"attrs": {"class": "branding-bar"}} @staticmethod def _check_tag(tag): - attrs = getattr(tag, 'attrs', None) - required = ['data-id'] + attrs = getattr(tag, "attrs", None) + required = ["data-id"] for requirement in required: if requirement not in attrs: raise ValueError( f"Invalid tag: {tag}, doesn't have attrs: {required}", tag, - required + required, ) # Children requiremenets - icon = tag.find( - **Group._icon_tag_filter - ) - text = tag.find( - **Group._text_tag_filter - ) + icon = tag.find(**Group._icon_tag_filter) + text = tag.find(**Group._text_tag_filter) required = [icon, text] if not all(required): - raise ValueError( - f"Invalid tag: {tag} doesn't have suitable children", - tag - ) + raise ValueError(f"Invalid tag: {tag} doesn't have suitable children", tag) @classmethod def from_tag(cls, tag): @@ -111,16 +104,12 @@ def from_tag(cls, tag): @property def text(self): """Returns text tag from group tag""" - return self.group_tag.find( - **self._text_tag_filter - ) + return self.group_tag.find(**self._text_tag_filter) @property def icon(self): """Returns icon tag from group tag""" - return self.group_tag.find( - **self._icon_tag_filter - ) + return self.group_tag.find(**self._icon_tag_filter) def get_group_id(self): """Returns (temporary) id of this group""" @@ -141,16 +130,16 @@ def get_group_icon(self): # Hardcoding this attrs = self.icon.attrs.copy() # Feels bad :( - attrs.pop('icon') + attrs.pop("icon") return attrs[0] -class BaseLesson(BaseTag): +class BaseStudentsLesson(BaseTag): """ - Describes lesson from bs4 tag + Describes lesson from bs4 tag - Note: Lesson is a concrete even with date and teacher - Pair on the other hand just states at which time lesson will happen + Note: Lesson is a concrete even with date and teacher + Pair on the other hand just states at which time lesson will happen """ lesson_tag: Tag @@ -175,11 +164,12 @@ def parse_tag(self): ) -class RegularLesson(BaseLesson): +class StudentsRegularLesson(BaseStudentsLesson): """ - This class should be used to parse lesson from bs4 tag - If you are getting schedule for current week + This class should be used to parse lesson from bs4 tag + If you are getting schedule for current week """ + @classmethod def from_tag(cls, tag): obj = cls() @@ -190,66 +180,41 @@ def from_tag(cls, tag): def parse_tag(self): lesson_top = self.lesson_tag.parent - predm_element = lesson_top.find( - name='span', - attrs={ - 'class': 'predm' - } - ) + predm_element = lesson_top.find(name="span", attrs={"class": "predm"}) self.lesson_name = { - 'short': predm_element.text, - 'full': predm_element.attrs.get('title', "Not Set") + "short": predm_element.text, + "full": predm_element.attrs.get("title", "Not Set"), } - prp_element = lesson_top.find( - name='span', - attrs={ - 'class': 'prp' - } - ) + prp_element = lesson_top.find(name="span", attrs={"class": "prp"}) self.teacher = { - 'short': prp_element.text.replace('\xa0', ' '), # Why... - 'full': prp_element.attrs.get('title', "Not Set") + "short": prp_element.text.replace("\xa0", " "), # Why... + "full": prp_element.attrs.get("title", "Not Set"), } # Card tag consists of two children # First states type of content # Other - content itself - card_tag = lesson_top.find( - name='div', - attrs={ - 'class': 'card' - } - ) + card_tag = lesson_top.find(name="div", attrs={"class": "card"}) if card_tag: - card_content = card_tag.find( - name='div', - attrs={ - 'class': 'card-content' - } - ) + card_content = card_tag.find(name="div", attrs={"class": "card-content"}) if card_content: - self.lesson_info = card_content.text.replace('\t', '').strip() + self.lesson_info = card_content.text.replace("\t", "").strip() - auditorium_tag = lesson_top.find( - name='a', - attrs={ - 'class': 'fg-blue' - } - ) + auditorium_tag = lesson_top.find(name="a", attrs={"class": "fg-blue"}) if auditorium_tag: self.auditorium = auditorium_tag.text -class Pair(BaseTag): +class StudentsPair(BaseTag): """ - Describes pair from bs4 tag + Describes pair from bs4 tag - Note: Pair describes when certain Lesson will happen + Note: Pair describes when certain Lesson will happen """ pair_tag: Tag - lessons: list[BaseLesson] = [] + lessons: list[BaseStudentsLesson] = [] pair_no: int = None _subgroup_id: int = 0 @@ -273,24 +238,16 @@ def from_tag(cls, tag, subgroup_id=0): def set_pair_number(self): """This method gets pair number for better identification""" - pair_no_tag = self.pair_tag.find( - attrs={ - 'class': 'lesson' - } - ) + pair_no_tag = self.pair_tag.find(attrs={"class": "lesson"}) self.pair_no = int(pair_no_tag.text) def get_pair_tag_for_subgroup(self): """ - This method returns tag for this pair accounting for subgroup - Currently opening a page for subgroup (like KN-321[a]) opens - a page for both subgroups (or a group), thus we have to get a correct cell + This method returns tag for this pair accounting for subgroup + Currently opening a page for subgroup (like KN-321[a]) opens + a page for both subgroups (or a group), thus we have to get a correct cell """ - pair_no_tag = self.pair_tag.find( - attrs={ - 'class': 'lesson' - } - ) + pair_no_tag = self.pair_tag.find(attrs={"class": "lesson"}) skip = 1 + self._subgroup_id pair_tag = None for _ in range(skip): @@ -304,19 +261,10 @@ def get_pair_tag_for_subgroup(self): def get_lessons(pair: Tag): """Parses lessons for this pair""" # All time 'days' have s with dates in them - all_dates = pair.find_all( - name='span', - attrs={ - 'class': 'fg-blue' - } - ) + all_dates = pair.find_all(name="span", attrs={"class": "fg-blue"}) # There is at least one tag with this class if # there are lessons - lesson = pair.find( - attrs={ - 'class': 'predm' - } - ) + lesson = pair.find(attrs={"class": "predm"}) lessons = [] if not any([len(all_dates), lesson]): return lessons @@ -324,31 +272,18 @@ def get_lessons(pair: Tag): # This means we are dealing with 'all time' records # Which have multiple lessons per pair for lesson in all_dates: - lessons.append( - RegularLesson.from_tag( - lesson - ) - ) + lessons.append(StudentsRegularLesson.from_tag(lesson)) return lessons # This means we are dealing with single week records - lessons.append( - RegularLesson.from_tag( - lesson - ) - ) + lessons.append(StudentsRegularLesson.from_tag(lesson)) return lessons -class Schedule(BaseTag): - """Describes schedule from HTML table""" +class BaseSchedule(BaseTag): + """Describes schedule from BS4 tag""" - schedule_table: Tag - subgroups: list[str] = [] - subgroup_id: int = 0 - _schedule_data: dict[str, list[Pair]] = {} - _subgroup: str = '' - - _splitter_class: str = 'bg-darkCyan' + schedule_tag: Tag + _schedule_data: dict[str, list["StudentsPair | TeachersPair"]] = {} @property def week(self): @@ -356,19 +291,29 @@ def week(self): self._get_week() return self._schedule_data + def _get_week(self): + raise NotImplementedError("`_get_week` was not implemented") + + +class StudentsSchedule(BaseSchedule): + """Describes schedule from HTML table""" + + subgroups: list[str] = [] + subgroup_id: int = 0 + _subgroup: str = "" + + _splitter_class: str = "bg-darkCyan" + @staticmethod def _check_tag(tag): - if tag.name != 'table': - raise ValueError( - f"Invalid tag: {tag}. Should be table", - tag - ) + if tag.name != "table": + raise ValueError(f"Invalid tag: {tag}. Should be table", tag) @classmethod def from_tag(cls, tag, subgroup=None): cls._check_tag(tag) obj = cls() - obj.schedule_table = tag + obj.schedule_tag = tag obj._subgroup = subgroup obj._get_subgroup_id() @@ -387,19 +332,15 @@ def _get_subgroup_id(self): def _parse_subgroups(self): """This method prepares subgroups for later use""" sub_groups_list = [] - table_head = self.schedule_table.thead - head_rows = table_head.find_all( - name='tr' - ) + table_head = self.schedule_tag.thead + head_rows = table_head.find_all(name="tr") # Hardcoding positions! Yikes! # head_rows[0] - meta info (`Day`, `Pair` columns, Group name) # head_rows[1] - sub_groups (a/b etc) sub_groups_tag = head_rows[1] - sub_groups_tags = sub_groups_tag.find_all( - name='th' - ) + sub_groups_tags = sub_groups_tag.find_all(name="th") for sub_group in sub_groups_tags: sub_groups_list.append(sub_group.text.strip()) @@ -408,11 +349,11 @@ def _parse_subgroups(self): def _prepare_day_tag(self, day_name_tag): """ - Parses day from 'day_name_tag'* - Returns name of that day and a list of tags that represent pairs + Parses day from 'day_name_tag'* + Returns name of that day and a list of tags that represent pairs - *day_name_tag is a tag that contains name of the tag - It also has attr - class = day + *day_name_tag is a tag that contains name of the tag + It also has attr - class = day """ pair_tags = [] @@ -429,7 +370,7 @@ def _prepare_day_tag(self, day_name_tag): # We may not have next sibling # Or, as it happens RN - we may get ' ' as next tag :| break - if self._splitter_class in next_pair_tag.attrs.get('class', []): + if self._splitter_class in next_pair_tag.attrs.get("class", []): # splitter has class `_splitter_class` (like bg-darkCyan) # if we hit splitter - day has ended break @@ -441,26 +382,256 @@ def _prepare_day_tag(self, day_name_tag): def _prepare_tags(self, tags): """Parses bs4 tags to list of Pair objects""" - prepared_tags: list[Pair] = [] + prepared_tags: list[StudentsPair] = [] for tag in tags: - prepared_tags.append( - Pair.from_tag( - tag, - subgroup_id=self.subgroup_id - ) - ) + prepared_tags.append(StudentsPair.from_tag(tag, subgroup_id=self.subgroup_id)) return prepared_tags def _get_week(self): """Iteratively loops trough table to get data for all days""" - table_body = self.schedule_table.tbody - days = table_body.find_all( - attrs={ - 'class': 'day' - } - ) + table_body = self.schedule_tag.tbody + days = table_body.find_all(attrs={"class": "day"}) for day in days: day_name, tags = self._prepare_day_tag(day) prepared_days = self._prepare_tags(tags) self._schedule_data[day_name] = prepared_days return self._schedule_data + + +class TeachersLesson: + """Class to describe lesson for teachers""" + + # pylint: disable=too-few-public-methods + + name: str + groups: str + + def __init__(self, name: str, groups: list[str] | str) -> None: + self.name = name + if isinstance(groups, list): + self.groups = ", ".join(groups) + else: + self.groups = groups + + def __str__(self) -> str: + return f"Lesson: {self.name} with ({self.groups})" + + +class TeachersPair(BaseTag): + """Describes pair from bs4 tag""" + + __pair_no_not_specified = "Не вказано" + __pair_name_not_specified = "Назва не вказана" + __groups_not_specified = "Групи не вказані" + + pair_tag: Tag + pair_no: int + lesson: TeachersLesson | None + + @staticmethod + def _check_tag(tag: Tag): + pass + + def parse_tag(self): + """This method parses bs4 and stores data from it in object's fields""" + pair_no_text = self.pair_tag.attrs.get("data-title-caption", self.__pair_no_not_specified) + if pair_no_text != self.__pair_no_not_specified: + self.pair_no = int(pair_no_text.split()[0]) + else: + self.pair_no = 0 + + pair_name_tag = self.pair_tag.find(name="p", attrs={"class": "text-leader"}) + pair_name = pair_name_tag.text.strip() if pair_name_tag else None + + groups_tag = self.pair_tag.find(name="p", attrs={"class": "text-secondary"}) + # Consider splitting. e.g of content: КН-341[а], КН-342[а], КН-343[а], КН-343[б] + groups = groups_tag.text.strip() if groups_tag else None + + lesson = None + if pair_name or groups: + lesson = TeachersLesson( + name=pair_name or self.__pair_name_not_specified, + groups=groups or self.__groups_not_specified, + ) + + self.lesson = lesson + + @classmethod + def from_tag(cls, tag): + cls._check_tag(tag) + obj = cls() + obj.pair_tag = tag + obj.parse_tag() + return obj + + +class TeacherSchedule(BaseSchedule): + """Describes schedule from HTML grid""" + + @staticmethod + def _check_tag(tag: Tag): + if tag.name != "div": + raise ValueError(f"Invalid tag: {tag}. Should be div", tag) + if "grid" not in tag.attrs.get("class", []): + raise ValueError(f"Invalid tag: {tag}. Should be grid", tag) + + @classmethod + def from_tag(cls, tag): + cls._check_tag(tag) + obj = cls() + obj.schedule_tag = tag + return obj + + def _prepare_day_tag(self, day_card: "Tag"): + day_name = day_card.find(name="div", attrs={"class": "card-header"}) + if not day_name: + raise ValueError(f"Invalid tag: {day_card}. No card-header found", day_card) + day_name = day_name.text.strip() + pairs = [] + for pair in day_card.find_all(name="div", attrs={"data-role": "panel"}): + pairs.append(pair) + return day_name, pairs + + def _prepare_tags(self, tags: list["Tag"]): + prepared_tags: list[TeachersPair] = [] + for tag in tags: + prepared_tags.append(TeachersPair.from_tag(tag)) + return prepared_tags + + def _get_week(self): + all_cards = self.schedule_tag.find_all(name="div", attrs={"class": "card"}) + for card in all_cards: + day_name, tags = self._prepare_day_tag(card) + prepared_days = self._prepare_tags(tags) + self._schedule_data[day_name] = prepared_days + return self._schedule_data + + +@define +class Department(BaseTag): + """Describes department from BS4 tag""" + + department: Tag + + @staticmethod + def _check_tag(tag): + attrs = getattr(tag, "attrs", None) + span = tag.find(name="span", attrs={"class": "branding-bar"}) + required_properties = [attrs, span] + if not all(required_properties): + raise ValueError(f"Invalid tag: {tag}, has no attrs", tag) + required = ["href"] + for requirement in required: + if requirement not in attrs: + raise ValueError( + f"Invalid tag: {tag}, doesn't have attrs: {required}", + tag, + required, + ) + span_string = getattr(span, "string", None) + if span_string is None: + raise ValueError(f"Invalid tag: {tag}, `span` has no string", tag) + + @classmethod + def from_tag(cls, tag): + cls._check_tag(tag) + obj = cls(department=tag) + if not obj.department: + raise ValueError("Invalid tag", tag) + return obj + + def get_department_picture(self): + """Returns class of the picture (if present)""" + container = self.department.find(name="div", attrs={"class": "slide-front"}) + if not container: + return None + span = container.find(name="span") + if not span: + return None + return span.attrs.get("class", None) + + def get_department_link(self): + """Returns (semi?) permanent relative link to department""" + return self.department.attrs["href"] + + def get_department_id(self) -> int: + """Return id of the department""" + key_dict = dict(parse_qsl(self.get_department_link())) + return int(key_dict["dep"]) + + def get_department_name(self) -> dict[str, str]: + """Returns name of the faculty""" + name = {"short": "", "full": ""} + short_name_span = self.department.find(name="span", attrs={"class": "branding-bar"}) + full_name_span = self.department.find(name="div", attrs={"class": "slide-back"}) + name["short"] = short_name_span.text.strip() if short_name_span else "" + full_name = full_name_span.text.strip() if full_name_span else "" + name["full"] = full_name + if full_name: + name["full"] = " ".join([x.capitalize() if len(x) > 2 else x for x in full_name]) + return name + + +@define +class Teacher(BaseTag): + """Describes teacher from BS4 tag""" + + teacher: Tag + + @staticmethod + def _check_tag(tag): + attrs = getattr(tag, "attrs", None) + span = tag.find(name="span", attrs={"class": "branding-bar"}) + required_properties = [attrs, span] + if not all(required_properties): + raise ValueError(f"Invalid tag: {tag}, has no attrs", tag) + required = ["href"] + for requirement in required: + if requirement not in attrs: + raise ValueError( + f"Invalid tag: {tag}, doesn't have attrs: {required}", + tag, + required, + ) + span_string = getattr(span, "string", None) + if span_string is None: + raise ValueError(f"Invalid tag: {tag}, `span` has no string", tag) + + @classmethod + def from_tag(cls, tag): + cls._check_tag(tag) + obj = cls(teacher=tag) + if not obj.teacher: + raise ValueError("Invalid tag", tag) + return obj + + def get_teacher_picture(self): + """Returns class of the picture (if present)""" + container = self.teacher.find(name="div", attrs={"class": "slide-front"}) + if not container: + return None + span = container.find(name="span") + if not span: + return None + return span.attrs.get("class", None) + + def get_teacher_link(self): + """Returns (semi?) permanent relative link to department""" + return self.teacher.attrs["href"] + + def get_teacher_id(self) -> int: + """Return id of teacher""" + key_dict = dict(parse_qsl(self.get_teacher_link())) + return int(key_dict["teacher"]) + + def get_teacher_name(self) -> dict[str, str]: + """Returns name of the faculty""" + name = {"short": "", "full": ""} + short_name_span = self.teacher.find(name="span", attrs={"class": "branding-bar"}) + full_name_span = self.teacher.find(name="div", attrs={"class": "slide-back"}) + name["short"] = short_name_span.text.strip() if short_name_span else "" + full_name = full_name_span.text.strip() if full_name_span else "" + name["full"] = full_name + if full_name: + name["full"] = " ".join([x.capitalize() if len(x) > 2 else x for x in full_name]) + return name diff --git a/ontu_parser/classes/enums.py b/ontu_parser/classes/enums.py index 3673024..ba7e872 100644 --- a/ontu_parser/classes/enums.py +++ b/ontu_parser/classes/enums.py @@ -4,17 +4,18 @@ class RequestsEnum: """Contains information for Requests library""" + class Methods(Enum): """Contains used HTTP Methods for requests""" - GET = 'GET' - POST = 'POST' - CHOICES = [ - GET, POST - ] + GET = "GET" + POST = "POST" + + CHOICES = [GET, POST] class Codes(Enum): """Contains used HTTP response codes""" + OK = 200 @classmethod diff --git a/ontu_parser/classes/parser.py b/ontu_parser/classes/parser.py index 32a6730..cf66f43 100644 --- a/ontu_parser/classes/parser.py +++ b/ontu_parser/classes/parser.py @@ -4,30 +4,28 @@ from .base import BaseClass from .enums import RequestsEnum -from .dataclasses import Faculty, Group, Schedule +from .dataclasses import Faculty, Group, StudentsSchedule, Department, Teacher, TeacherSchedule from .sender import Sender class Parser(BaseClass): """Parser class to get information from Rozklad ONTU""" + sender: Sender = None def __init__(self, *args, **kwargs): - if isinstance(kwargs, dict) and 'kwargs' in kwargs: + if isinstance(kwargs, dict) and "kwargs" in kwargs: # Trick to unwrap kwargs - kwargs = kwargs.get('kwargs', {}) + kwargs = kwargs.get("kwargs", {}) self.sender = Sender(*args, **kwargs) def _get_page(self, response: Response): content = response.content if not content: - raise ValueError( - f'Response: {response} has no content!', - response - ) - decoded_content = content.decode('utf-8') - return BeautifulSoup(decoded_content, 'html.parser') + raise ValueError(f"Response: {response} has no content!", response) + decoded_content = content.decode("utf-8") + return BeautifulSoup(decoded_content, "html.parser") def get_faculties(self) -> list[Faculty]: """Returns a list of faculties as Faculty objects""" @@ -35,88 +33,79 @@ def get_faculties(self) -> list[Faculty]: method=RequestsEnum.method_get() # No data gives 'main' page with faculties ) faculty_page = self._get_page(faculties_response) - faculty_tags = faculty_page.find_all( - attrs={ - 'class': 'fc' # Faculties have class 'fc' - } - ) + faculty_tags = faculty_page.find_all(attrs={"class": "fc"}) # Faculties have class 'fc' faculty_entities = [] for tag in faculty_tags: - faculty_entities.append( - Faculty.from_tag( - tag - ) - ) + faculty_entities.append(Faculty.from_tag(tag)) return faculty_entities def get_groups(self, faculty_id) -> list[Group]: """Returns Group list of a faculty by faculty id""" groups_response = self.sender.send_request( method=RequestsEnum.method_post(), - data={ - 'facultyid': faculty_id - } + data={"facultyid": faculty_id}, ) groups_page = self._get_page(groups_response) - groups_tags = groups_page.find_all( - attrs={ - 'class': 'grp' - } - ) + groups_tags = groups_page.find_all(attrs={"class": "grp"}) group_entities: list[Group] = [] for tag in groups_tags: - group_entities.append( - Group.from_tag( - tag - ) - ) + group_entities.append(Group.from_tag(tag)) return group_entities - def get_schedule(self, group_id, all_time=False): + def get_schedule( + self, + group_id: int | None = None, + teacher_id: int | None = None, + all_time=False, + ): + """Returns schedule for group, or for teachers""" + if group_id: + return self._get_group_schedule(group_id, all_time=all_time).week + if teacher_id: + return self._get_teachers_schedule(teacher_id, all_time=all_time).week + raise ValueError("No group or teacher id provided!") + + def _get_group_schedule(self, group_id, all_time=False) -> StudentsSchedule: """ - Returns a schedule for a group (by id) - If all_time is False - returns schedule for current week - Else - returns schedule for whole semester + Returns a schedule for a group (by id) + If all_time is False - returns schedule for current week + Else - returns schedule for whole semester """ - request_data = { - 'groupid': group_id - } + request_data = {"groupid": group_id} if all_time: - request_data['show_all'] = 1 + request_data["show_all"] = 1 schedule_response = self.sender.send_request( method=RequestsEnum.method_post(), - data=request_data - ) - schedule_page = self._get_page( - schedule_response + data=request_data, ) + schedule_page = self._get_page(schedule_response) - breadcrumbs = schedule_page.find( - attrs={ - 'class': 'breadcrumbs' - } - ) - group_breadcrumbs = breadcrumbs.find_all( - attrs={ - 'class': 'page-link' - } - ) + breadcrumbs = schedule_page.find(attrs={"class": "breadcrumbs"}) + group_breadcrumbs = breadcrumbs.find_all(attrs={"class": "page-link"}) - table = schedule_page.find( - attrs={ - 'class': 'table' - } - ) + table = schedule_page.find(attrs={"class": "table"}) group_name = group_breadcrumbs[-1].text # I hate this, but at the same time - I love it # If it ever to become broken I'll implement this a bit thoughtfully :) - subgroup_name = group_name.split('[')[1].replace(']', '') - schedule = Schedule.from_tag( - table, - subgroup=subgroup_name - ) - week = schedule.week - return week + subgroup_name = group_name.split("[")[1].replace("]", "") + schedule = StudentsSchedule.from_tag(table, subgroup=subgroup_name) + return schedule + + def _get_teachers_schedule(self, teacher_id, all_time=False) -> TeacherSchedule: + """Returns a schedule for a teacher (by id)""" + self._check_for_teachers() + query = {"page": "teacher", "teacher": teacher_id} + if all_time: + query["page"] = "teacher_all" + query["show"] = 1 + schedule_response = self.sender.send_request(method=RequestsEnum.method_get(), query=query) + schedule_page = self._get_page(schedule_response) + + grid = schedule_page.find(name="div", attrs={"class": "grid"}) + if not grid: + raise ValueError("No grid found!") + schedule = TeacherSchedule.from_tag(grid) + return schedule def parse(self, all_time=False): """Parses information, requiring user input (CLI)""" @@ -126,7 +115,7 @@ def parse(self, all_time=False): for faculty in all_faculties: print(faculty.get_faculty_name()) - faculty_name = input('Введите название факультета: ') + faculty_name = input("Введите название факультета: ") faculty_id = None for faculty in all_faculties: if faculty.get_faculty_name() == faculty_name: @@ -139,7 +128,7 @@ def parse(self, all_time=False): for group in groups: print(group.get_group_name()) - group_name = input('Введите название группы: ') + group_name = input("Введите название группы: ") group_id = None for group in groups: if group.get_group_name() == group_name: @@ -151,3 +140,42 @@ def parse(self, all_time=False): schedule = self.get_schedule(group_id, all_time=all_time) return schedule + + def _check_for_teachers(self): + """A check, that must be rune before executing teachers methods""" + if not self.sender: + raise ValueError("Sender is not set!") + if not self.sender.for_teachers: + raise ValueError("Sender is not set for teachers!") + + def get_departments(self) -> list["Department"]: + """Returns a list of departments""" + self._check_for_teachers() + + departments_response = self.sender.send_request(method=RequestsEnum.method_get()) + departments_page = self._get_page(departments_response) + titles = departments_page.find(attrs={"class": "tiles-grid"}) + if not titles: + raise ValueError("No titles found!") + departments_tags = titles.find_all(name="a", attrs={"data-role": "tile"}) + departments = [] + for tag in departments_tags: + departments.append(Department.from_tag(tag)) + return departments + + def get_teachers_by_department(self, department_id: int) -> "list[Teacher]": + """Returns a list of teachers by department id""" + self._check_for_teachers() + + teachers_response = self.sender.send_request( + method=RequestsEnum.method_get(), query={"page": "department", "dep": department_id} + ) + teachers_page = self._get_page(teachers_response) + teachers_tags = teachers_page.find_all(attrs={"class": "tiles-grid"}) + if not teachers_tags: + raise ValueError("No teachers found!") + teachers_tags = teachers_tags[0].find_all(name="a", attrs={"data-role": "tile"}) + teachers = [] + for tag in teachers_tags: + teachers.append(Teacher.from_tag(tag)) + return teachers diff --git a/ontu_parser/classes/sender.py b/ontu_parser/classes/sender.py index 90c254f..ce4a3aa 100644 --- a/ontu_parser/classes/sender.py +++ b/ontu_parser/classes/sender.py @@ -1,6 +1,7 @@ """Module for sending operations""" import time from datetime import datetime +from urllib.parse import urlencode import requests from selenium import webdriver from selenium.webdriver import FirefoxOptions @@ -8,8 +9,10 @@ from .base import BaseClass from .enums import RequestsEnum + class TTLValue(BaseClass): """Describes value with some time to live (like authorization token)""" + _ttl: int = 3600 # Time To Live (in seconds) _value: object = None @@ -21,8 +24,8 @@ def __init__(self, *args, **kwargs): def is_valid(self): """ - Checks wether value is still valid - True if TTLValue was issued less seconds before than Time To Live + Checks wether value is still valid + True if TTLValue was issued less seconds before than Time To Live """ seconds_passed = (datetime.now() - self.issued_at).seconds if seconds_passed < self._ttl: @@ -60,9 +63,7 @@ def get_cookie(self): # link = self.sender.link notbot = self.sender.notbot.value - return self.set_value( - notbot - ) + return self.set_value(notbot) class NotBot(TTLValue): @@ -73,12 +74,12 @@ class NotBot(TTLValue): @classmethod def create(cls, **browser_settings): """ - Creates NotBot with certain browser_settings - Refer to webdriver.Firefox arguments and docs for more info + Creates NotBot with certain browser_settings + Refer to webdriver.Firefox arguments and docs for more info """ - if isinstance(browser_settings, dict) and 'browser_settings' in browser_settings: + if isinstance(browser_settings, dict) and "browser_settings" in browser_settings: # I'm not sure if this is the right way of multi-passing kwargs :| - browser_settings = browser_settings.get('browser_settings', {}) + browser_settings = browser_settings.get("browser_settings", {}) obj = cls() obj._browser_kwargs = browser_settings @@ -97,28 +98,26 @@ def value(self) -> dict: raise RuntimeError("Could not get notbot") return notbot - def __make_request( - self, - driver: webdriver.Firefox) -> tuple[str | None, str | None]: + def __make_request(self, driver: webdriver.Firefox) -> tuple[str | None, str | None]: """Returns notbot and pow-result (also PHPSESSID) cookies value""" - driver.get('https://rozklad.ontu.edu.ua/guest_n.php') + driver.get("https://rozklad.ontu.edu.ua/guest_n.php") notbot: str | None = None pow_result: str | None = None phpsesid: str | None = None cookies = driver.get_cookies() if cookies: for cookie in cookies: - if cookie['name'] == 'notbot': - notbot = cookie['value'] - if cookie['name'] == 'pow-result': - pow_result = cookie['value'] - if cookie['name'] == 'PHPSESSID': - phpsesid = cookie['value'] + if cookie["name"] == "notbot": + notbot = cookie["value"] + if cookie["name"] == "pow-result": + pow_result = cookie["value"] + if cookie["name"] == "PHPSESSID": + phpsesid = cookie["value"] return (notbot, pow_result, phpsesid) def get_notbot(self): """Gets notbot by making webdriver request (emulates JS)""" - options = self._browser_kwargs.pop('options', None) + options = self._browser_kwargs.pop("options", None) if not options: options = FirefoxOptions() options.add_argument("--headless") @@ -126,82 +125,86 @@ def get_notbot(self): driver = webdriver.Firefox( options=options, # desired_capabilities=desired_capabilities, - **self._browser_kwargs + **self._browser_kwargs, ) i = 0 while True: print("Making request to get cookies") - notbot, pow_result, phpsesid = self.__make_request( - driver=driver - ) + notbot, pow_result, phpsesid = self.__make_request(driver=driver) if all([notbot, pow_result, phpsesid]): break print(f"Sleeping for {i ** 2} seconds") - time.sleep(i ** 2) + time.sleep(i**2) i += 1 driver.close() - return self.set_value( - { - "notbot": notbot, - "pow-result": pow_result, - "PHPSESSID": phpsesid - } - ) + return self.set_value({"notbot": notbot, "pow-result": pow_result, "PHPSESSID": phpsesid}) class Sender(BaseClass): """Describes sender with link, notbot and cookies to send requests""" - link: str = 'https://rozklad.ontu.edu.ua/guest_n.php' + + default_link: str = "https://rozklad.ontu.edu.ua/guest_n.php" + teachers_link: str = "https://rozklad.ontu.edu.ua/departments_all.php" notbot: NotBot = None cookies: Cookies = None + for_teachers: bool = False + + @property + def link(self): + """Returns link to send requests to""" + if self.for_teachers: + return self.teachers_link + return self.default_link def __init__(self, *args, **kwargs): - notbot_kwargs = kwargs.get('notbot', {}) + notbot_kwargs = kwargs.get("notbot", {}) + for_teachers = kwargs.get("for_teachers", False) + self.for_teachers = for_teachers self.notbot = NotBot.create(**notbot_kwargs) self.cookies = Cookies(self) _responses: list[requests.Response] = [] - def send_request(self, method: str, data: (dict | None) = None): + def send_request(self, method: str, data: (dict | None) = None, query: (dict | None) = None): """Sends request with method and some data, if needed""" session = requests.Session() if method not in RequestsEnum.Methods.CHOICES.value: raise ValueError( - f'arg. `method` should be one of: {RequestsEnum.Methods.CHOICES.value}', + f"arg. `method` should be one of: {RequestsEnum.Methods.CHOICES.value}", method, ) + link = self.link + if query: + link += "?" + urlencode(query) + try: # Just in case, I guess for item, key in self.cookies.value.items(): - session.cookies.set( - item, - key, - path='/' - ) + session.cookies.set(item, key, path="/") response: requests.Response = session.request( method=method, - url=self.link, + url=link, data=data, headers={ # They are really getting on my nerves at this point TBH "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", - } + }, ) except Exception as exception: raise ValueError( - f'could not get response from {self.link}, got exception: {exception}', - self.link, - exception + f"could not get response from {link}, got exception: {exception}", + link, + exception, ) from exception if response.status_code != RequestsEnum.code_ok(): raise ValueError( - 'server returned non OK response', + "server returned non OK response", response.status_code, response, - response.content + response.content, ) # Keep responses for a little while self._responses.append(response)