From 06751a0b9c6dcbf00b1954fba2b682dd45c22592 Mon Sep 17 00:00:00 2001 From: "Michael B." Date: Thu, 9 Nov 2023 17:42:22 +0100 Subject: [PATCH] Fix pandas FutureWarning: "Passing literal html to 'read_html' is deprecated" This addresses #1685 (`institutional_holders`) and also `get_earnings_dates()`. Pandas issue is found here: https://github.com/pandas-dev/pandas/issues/53767 and the change in code here: https://github.com/pandas-dev/pandas/blob/5cedf87cccd77c7b4b6aaa64bfec98b32b512f68/pandas/io/html.py#L1238 As for legacy Python 2.7 support: `io.StringIO` seems to be supported in the versions I tested. See https://docs.python.org/2/library/io.html --- yfinance/base.py | 3 ++- yfinance/scrapers/holders.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 21af2bb51..0db4b8d26 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -22,6 +22,7 @@ from __future__ import print_function import datetime as _datetime +from io import StringIO import json as _json import logging import time as _time @@ -2090,7 +2091,7 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]: "the issue. Thank you for your patience.") try: - data = pd.read_html(data)[0] + data = pd.read_html(StringIO(data))[0] except ValueError: if page_offset == 0: # Should not fail on first page diff --git a/yfinance/scrapers/holders.py b/yfinance/scrapers/holders.py index 684278774..9c0fa397d 100644 --- a/yfinance/scrapers/holders.py +++ b/yfinance/scrapers/holders.py @@ -1,3 +1,5 @@ +from io import StringIO + import pandas as pd from yfinance.data import TickerData @@ -36,7 +38,7 @@ def _scrape(self, proxy): ticker_url = f"{self._SCRAPE_URL_}/{self._data.ticker}" try: resp = self._data.cache_get(ticker_url + '/holders', proxy=proxy) - holders = pd.read_html(resp.text) + holders = pd.read_html(StringIO(resp.text)) except Exception: holders = []