Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Retry with canonicalized url if original url is not gdrive url #304

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions gdown/download_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .download import _get_session
from .download import download
from .exceptions import FolderContentsMaximumLimitError
from .parse_url import is_google_drive_url

MAX_NUMBER_FILES = 50

Expand Down Expand Up @@ -99,16 +100,26 @@ def _download_and_parse_google_drive_link(

return_code = True

# canonicalize the language into English
if "?" in url:
url += "&hl=en"
else:
url += "?hl=en"
for _ in range(2):
if is_google_drive_url(url):
# canonicalize the language into English
if "?" in url:
url += "&hl=en"
else:
url += "?hl=en"

res = sess.get(url, verify=verify)
if res.status_code != 200:
return False, None

res = sess.get(url, verify=verify)
if is_google_drive_url(url):
break

if not is_google_drive_url(res.url):
break

if res.status_code != 200:
return False, None
# need to try with canonicalized url if the original url redirects to gdrive
url = res.url

gdrive_file, id_name_type_iter = _parse_google_drive_file(
url=url,
Expand Down
7 changes: 6 additions & 1 deletion gdown/parse_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
import warnings


def is_google_drive_url(url):
parsed = urllib.parse.urlparse(url)
return parsed.hostname in ["drive.google.com", "docs.google.com"]


def parse_url(url, warning=True):
"""Parse URLs especially for Google Drive links.

Expand All @@ -11,7 +16,7 @@ def parse_url(url, warning=True):
"""
parsed = urllib.parse.urlparse(url)
query = urllib.parse.parse_qs(parsed.query)
is_gdrive = parsed.hostname in ["drive.google.com", "docs.google.com"]
is_gdrive = is_google_drive_url(url=url)
is_download_link = parsed.path.endswith("/uc")

if not is_gdrive:
Expand Down
Loading