diff --git a/src/ferry_planner/schedule.py b/src/ferry_planner/schedule.py index c4d7081..0f19fea 100644 --- a/src/ferry_planner/schedule.py +++ b/src/ferry_planner/schedule.py @@ -94,7 +94,7 @@ def redirect(cls, redirect_url: str) -> "HtmlParseResult": return result @classmethod - def from_sailings(cls, sailings: Sequence[FerrySailing], notes: Sequence[str]) -> "HtmlParseResult": + def from_sailings(cls, sailings: Iterable[FerrySailing], notes: Iterable[str]) -> "HtmlParseResult": result = HtmlParseResult() result.sailings = tuple(sailings) result.notes = tuple(notes) @@ -117,7 +117,7 @@ class ScheduleDB: def __init__( self, *, - ferry_connections: Sequence[FerryConnection] | set[FerryConnection] | frozenset[FerryConnection], + ferry_connections: Iterable[FerryConnection] | set[FerryConnection] | frozenset[FerryConnection], base_url: str, cache_dir: Path, cache_ahead_days: int, @@ -321,11 +321,15 @@ def parse_schedule_html(response: httpx.Response, date: datetime) -> HtmlParseRe soup = BeautifulSoup(markup=html, features="html.parser") table_tag = soup.find("table", id="dailyScheduleTableOnward") daterange_tag = soup.find("div", id="dateRangeModal") # for seasonal - rows: Sequence[Tag] = [] + rows = [] if table_tag and isinstance(table_tag, Tag) and table_tag.tbody: rows = table_tag.tbody.find_all("tr") elif daterange_tag and isinstance(daterange_tag, Tag): - hrefs = [a["href"] for a in daterange_tag.find_all("a")] + hrefs = [ + a.attrs["href"] + for a in daterange_tag.find_all("a") + if isinstance(a, Tag) and isinstance(a.attrs["href"], str) + ] index = ScheduleParser.get_seasonal_schedule_daterange_index(hrefs, date) if index < 0: msg = f"date {date} is out of seasonal schedules range" @@ -347,7 +351,7 @@ def parse_schedule_html(response: httpx.Response, date: datetime) -> HtmlParseRe return HtmlParseResult.from_sailings(sailings, notes) @staticmethod - def parse_sailings_from_html_rows(rows: Sequence[Tag], date: datetime) -> Sequence[FerrySailing]: + def parse_sailings_from_html_rows(rows: Iterable[Tag], date: datetime) -> Sequence[FerrySailing]: sailing_row_min_td_count = 3 sailings = [] for row in rows: @@ -411,22 +415,27 @@ def parse_sailing_comments(comments: str) -> list[str]: @staticmethod def get_seasonal_schedule_rows(url: str, soup: BeautifulSoup, date: datetime) -> Sequence[Tag]: - rows: Sequence[Tag] = [] + rows = [] form = soup.find("form", id="seasonalSchedulesForm") if not isinstance(form, Tag): msg = "'seasonalSchedulesForm' not found" raise ScheduleParseError(msg, url=url) weekday = WEEKDAY_NAMES[date.weekday()] for thead in form.find_all("thead"): - if thead.text.lower().strip().startswith(weekday): + if thead.get_text().lower().strip().startswith(weekday): rows = [ - x for x in itertools.takewhile(lambda t: t.name != "thead", thead.next_siblings) if x.name == "tr" + x + for x in itertools.takewhile( + lambda t: isinstance(t, Tag) and t.name != "thead", + thead.next_siblings, + ) + if isinstance(x, Tag) and x.name == "tr" ] break return rows @staticmethod - def get_seasonal_schedule_daterange_index(hrefs: Sequence[str], date: datetime) -> int: + def get_seasonal_schedule_daterange_index(hrefs: Iterable[str], date: datetime) -> int: for i, href in enumerate(hrefs): dates = ScheduleParser.get_seasonal_schedule_daterange_from_url(href) if dates and date.date() >= dates[0].date() and date.date() <= dates[1].date(): diff --git a/uv.lock b/uv.lock index 1457773..8ccb287 100644 --- a/uv.lock +++ b/uv.lock @@ -26,14 +26,15 @@ wheels = [ [[package]] name = "beautifulsoup4" -version = "4.12.3" +version = "4.13.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "soupsieve" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b3/ca/824b1195773ce6166d388573fc106ce56d4a805bd7427b624e063596ec58/beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", size = 581181 } +sdist = { url = "https://files.pythonhosted.org/packages/f0/3c/adaf39ce1fb4afdd21b611e3d530b183bb7759c9b673d60db0e347fd4439/beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b", size = 619516 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925 }, + { url = "https://files.pythonhosted.org/packages/f9/49/6abb616eb3cbab6a7cca303dc02fdf3836de2e0b834bf966a7f5271a34d8/beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16", size = 186015 }, ] [[package]] @@ -120,7 +121,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "beautifulsoup4", specifier = "~=4.12.3" }, + { name = "beautifulsoup4", specifier = "~=4.13.1" }, { name = "fastapi", specifier = "~=0.115.7" }, { name = "httpx", specifier = "~=0.28.1" }, { name = "jinja2", specifier = "~=3.1.5" },