Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Improving download performance when cloning based on specific branches or tags #190

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerabil
numpy; python_version < '3.8'
numpy>=1.22.2; python_version >= '3.8'
npm
requests
requests
GitPython
49 changes: 25 additions & 24 deletions src/fosslight_util/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import logging
import argparse
import shutil
import pygit2 as git
from git import Repo, GitCommandError
import bz2
import contextlib
from datetime import datetime
Expand Down Expand Up @@ -230,14 +230,10 @@ def get_github_token(git_url):


def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
ref_to_checkout = decide_checkout(checkout_to, tag, branch)
msg = ""
oss_name = get_github_ossname(git_url)
oss_version = ""
github_token = get_github_token(git_url)
callbacks = None
if github_token != "":
callbacks = git.RemoteCallbacks(credentials=git.UserPass("foo", github_token)) # username is not used, so set to dummy
refs_to_checkout = decide_checkout(checkout_to, tag, branch)
clone_default_branch_flag = False
msg = ""

try:
if platform.system() != "Windows":
Expand All @@ -248,30 +244,35 @@ def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
alarm.start()

Path(target_dir).mkdir(parents=True, exist_ok=True)
repo = git.clone_repository(git_url, target_dir,
bare=False, repository=None,
remote=None, callbacks=callbacks)
if refs_to_checkout != "":
try:
# gitPython uses the branch argument the same whether you check out to a branch or a tag.
repo = Repo.clone_from(git_url, target_dir, branch=refs_to_checkout)
except GitCommandError as error:
error_msg = error.args[2].decode("utf-8")
if "Remote branch " + refs_to_checkout + " not found in upstream origin" in error_msg:
# clone default branch, when non-existent branch or tag entered
repo = Repo.clone_from(git_url, target_dir)
clone_default_branch_flag = True
else:
repo = Repo.clone_from(git_url, target_dir)
clone_default_branch_flag = True

if refs_to_checkout != tag or clone_default_branch_flag:
oss_version = repo.active_branch.name
else:
oss_version = repo.git.describe('--tags')
logger.info(f"git checkout: {oss_version}")

if platform.system() != "Windows":
signal.alarm(0)
else:
del alarm
except Exception as error:
logger.warning(f"git clone - failed: {error}")
msg = str(error)
return False, msg, oss_name, oss_version
try:
if ref_to_checkout != "":
ref_list = [x for x in repo.references]
ref_to_checkout = get_ref_to_checkout(ref_to_checkout, ref_list)
logger.info(f"git checkout: {ref_to_checkout}")
repo.checkout(ref_to_checkout)
return False, msg, oss_name, refs_to_checkout

for prefix_ref in prefix_refs:
if ref_to_checkout.startswith(prefix_ref):
oss_version = ref_to_checkout[len(prefix_ref):]

except Exception as error:
logger.warning(f"git checkout to {ref_to_checkout} - failed: {error}")
return True, msg, oss_name, oss_version


Expand Down
108 changes: 104 additions & 4 deletions tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,34 @@
import os
import pytest

from fosslight_util.download import cli_download_and_extract
from fosslight_util.download import cli_download_and_extract, download_git_clone
from tests import constants


def test_download_from_github():
# given
git_url = "https://github.com/LGE-OSS/example"
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
log_dir = "test_result/download_log/example"

# when
success, _, _, _ = cli_download_and_extract(git_url, target_dir, log_dir)

# then
assert success is True
assert len(os.listdir(target_dir)) > 0


@pytest.mark.parametrize("git_url",
["git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git;protocol=git;branch=ci-test",
"git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git;protocol=git;tag=v32"])
def test_download_from_github_with_branch_or_tag(git_url):
# given
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
success, _, _, _ = cli_download_and_extract("https://github.com/LGE-OSS/example",
target_dir,
"test_result/download_log/example")
log_dir = "test_result/download_log/example"

# when
success, _, _, _ = cli_download_and_extract(git_url, target_dir, log_dir)

# then
assert success is True
Expand All @@ -38,3 +56,85 @@ def test_download_from_wget(project_name, project_url):
# then
assert success is True
assert len(os.listdir(target_dir)) > 0


def test_download_git_clone_with_branch():
# given
git_url = "git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git"
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
branch_name = "ci-test"

# when
success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, branch=branch_name)

# then
assert success is True
assert len(os.listdir(target_dir)) > 0
assert oss_name == ''
assert oss_version == branch_name


def test_download_git_clone_with_tag():
# given
git_url = "git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git"
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
tag_name = "v32"

# when
success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, tag=tag_name)

# then
assert success is True
assert len(os.listdir(target_dir)) > 0
assert oss_name == ''
assert oss_version == tag_name


def test_download_main_branch_when_any_branch_or_tag_not_entered():
# given
git_url = "https://github.com/LGE-OSS/example"
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
expected_oss_name = "main"

# when
success, _, oss_name, oss_version = download_git_clone(git_url, target_dir)

# then
assert success is True
assert len(os.listdir(target_dir)) > 0
assert oss_name == 'LGE-OSS-example'
assert oss_version == expected_oss_name


def test_download_main_branch_when_non_existent_branch_entered():
# given
git_url = "https://github.com/LGE-OSS/example"
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
branch_name = "non-existent-branch"
expected_oss_name = "main"

# when
success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, branch=branch_name)

# then
assert success is True
assert len(os.listdir(target_dir)) > 0
assert oss_name == 'LGE-OSS-example'
assert oss_version == expected_oss_name


def test_download_main_branch_when_non_existent_tag_entered():
# given
git_url = "https://github.com/LGE-OSS/example"
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
tag_name = "non-existent-tag"
expected_oss_name = "main"

# when
success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, tag=tag_name)

# then
assert success is True
assert len(os.listdir(target_dir)) > 0
assert oss_name == 'LGE-OSS-example'
assert oss_version == expected_oss_name
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ exclude = .tox/*
filterwarnings = ignore::DeprecationWarning
norecursedirs = test_result/* tests/legacy


[testenv:test_run]
deps =
-r{toxinidir}/requirements-dev.txt
Expand Down
Loading