From a080c9a52e05473c99b04fa48c2a54e927c43501 Mon Sep 17 00:00:00 2001 From: Kentaro Wada Date: Fri, 26 Jan 2024 20:17:19 +0900 Subject: [PATCH 1/2] Add is_google_drive_url function --- gdown/parse_url.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gdown/parse_url.py b/gdown/parse_url.py index c0128cc6..d2dad78d 100644 --- a/gdown/parse_url.py +++ b/gdown/parse_url.py @@ -3,6 +3,11 @@ import warnings +def is_google_drive_url(url): + parsed = urllib.parse.urlparse(url) + return parsed.hostname in ["drive.google.com", "docs.google.com"] + + def parse_url(url, warning=True): """Parse URLs especially for Google Drive links. @@ -11,7 +16,7 @@ def parse_url(url, warning=True): """ parsed = urllib.parse.urlparse(url) query = urllib.parse.parse_qs(parsed.query) - is_gdrive = parsed.hostname in ["drive.google.com", "docs.google.com"] + is_gdrive = is_google_drive_url(url=url) is_download_link = parsed.path.endswith("/uc") if not is_gdrive: From 523e081460d39eb3b4351d56e94acd654982e758 Mon Sep 17 00:00:00 2001 From: Kentaro Wada Date: Sat, 27 Jan 2024 00:00:04 +0900 Subject: [PATCH 2/2] Retry with canonicalized url if original url is not gdrive url --- gdown/download_folder.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/gdown/download_folder.py b/gdown/download_folder.py index b52ac8b0..7814162b 100644 --- a/gdown/download_folder.py +++ b/gdown/download_folder.py @@ -11,6 +11,7 @@ from .download import _get_session from .download import download from .exceptions import FolderContentsMaximumLimitError +from .parse_url import is_google_drive_url MAX_NUMBER_FILES = 50 @@ -99,16 +100,26 @@ def _download_and_parse_google_drive_link( return_code = True - # canonicalize the language into English - if "?" in url: - url += "&hl=en" - else: - url += "?hl=en" + for _ in range(2): + if is_google_drive_url(url): + # canonicalize the language into English + if "?" in url: + url += "&hl=en" + else: + url += "?hl=en" + + res = sess.get(url, verify=verify) + if res.status_code != 200: + return False, None - res = sess.get(url, verify=verify) + if is_google_drive_url(url): + break + + if not is_google_drive_url(res.url): + break - if res.status_code != 200: - return False, None + # need to try with canonicalized url if the original url redirects to gdrive + url = res.url gdrive_file, id_name_type_iter = _parse_google_drive_file( url=url,