Skip to content

Commit

Permalink
Refactor scan_for_package_info to use scan_resource and benefit from …
Browse files Browse the repository at this point in the history
…timeout support #135

Signed-off-by: Thomas Druez <tdruez@nexb.com>
  • Loading branch information
tdruez committed Apr 14, 2021
1 parent d708973 commit 1eaf35f
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 8 deletions.
42 changes: 34 additions & 8 deletions scanpipe/pipes/scancode.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,23 @@ def get_resource_info(location):
return file_info


def scan_resource(location, scanners):
"""
Wrap the scancode-toolkit `scan_resource` method to support timeout on direct
scanner functions calls.
Return a dict of scan `results` and a list of `errors`.
"""
# `rid` is not needed in this context, yet required in the scan_resource args
location_rid = location, 0
_, _, errors, _, results, _ = scancode_cli.scan_resource(location_rid, scanners)
return results, errors


def scan_file(location):
"""
Run a license, copyright, email, and url scan functions on provided `location`,
using the scancode-toolkit scan_resource method to support timeout.
Run a license, copyright, email, and url scan on provided `location`,
using the scancode-toolkit direct API.
Return a dict of scan `results` and a list of `errors`.
"""
Expand All @@ -135,12 +148,19 @@ def scan_file(location):
Scanner("emails", scancode_api.get_emails),
Scanner("urls", scancode_api.get_urls),
]
return scan_resource(location, scanners)

# `rid` is not needed in this context, yet required in the scan_resource args
location_rid = location, 0
_, _, errors, _, results, _ = scancode_cli.scan_resource(location_rid, scanners)

return results, errors
def scan_for_package_info(location):
"""
Run a package scan on provided `location` using the scancode-toolkit direct API.
Return a dict of scan `results` and a list of `errors`.
"""
scanners = [
Scanner("packages", scancode_api.get_package_info),
]
return scan_resource(location, scanners)


def scan_file_and_save_results(codebase_resource):
Expand Down Expand Up @@ -176,16 +196,22 @@ def scan_for_files(project):
def scan_package_and_save_results(codebase_resource):
"""
Scan the `codebase_resource` for package and save the results in the database.
Create project errors if any occurred during the scan.
"""
package_info = scancode_api.get_package_info(codebase_resource.location)
packages = package_info.get("packages", [])
scan_results, scan_errors = scan_for_package_info(codebase_resource.location)

packages = scan_results.get("packages", [])
if packages:
for package_data in packages:
codebase_resource.create_and_add_package(package_data)
codebase_resource.status = "application-package"
codebase_resource.save()

if scan_errors:
codebase_resource.add_errors(scan_errors)
codebase_resource.status = "scanned-with-error"
codebase_resource.save()


def scan_for_application_packages(project):
"""
Expand Down
61 changes: 61 additions & 0 deletions scanpipe/tests/test_pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,11 @@ def test_scanpipe_pipes_scancode_scan_file_and_save_results(self):
project=project1, path="not available"
)

self.assertEqual(0, project1.projecterrors.count())
scancode.scan_file_and_save_results(codebase_resource1)
codebase_resource1.refresh_from_db()
self.assertEqual("scanned-with-error", codebase_resource1.status)
self.assertEqual(4, project1.projecterrors.count())

copy_inputs([self.data_location / "notice.NOTICE"], project1.codebase_path)
codebase_resource2 = CodebaseResource.objects.create(
Expand All @@ -294,6 +296,29 @@ def test_scanpipe_pipes_scancode_scan_file_and_save_results(self):
]
self.assertEqual(expected, codebase_resource2.license_expressions)

def test_scanpipe_pipes_scancode_scan_file_and_save_results_timeout_error(self):
project1 = Project.objects.create(name="Analysis")
copy_inputs([self.data_location / "notice.NOTICE"], project1.codebase_path)
codebase_resource = CodebaseResource.objects.create(
project=project1, path="notice.NOTICE"
)

with mock.patch("scancode.api.get_copyrights") as get_copyrights:
get_copyrights.side_effect = InterruptTimeoutError
scancode.scan_file_and_save_results(codebase_resource)

codebase_resource.refresh_from_db()
self.assertEqual("scanned-with-error", codebase_resource.status)
self.assertEqual(1, project1.projecterrors.count())
error = project1.projecterrors.latest("created_date")
self.assertEqual("CodebaseResource", error.model)
self.assertEqual("", error.traceback)
expected_message = (
"ERROR: for scanner: copyrights:\n"
"ERROR: Processing interrupted: timeout after 120 seconds."
)
self.assertEqual(expected_message, error.message)

@mock.patch("scanpipe.pipes.scancode.scan_file")
def test_scanpipe_pipes_scancode_scan_for_files(self, mock_scan_file):
scan_results = {"license_expressions": ["mit"]}
Expand Down Expand Up @@ -324,6 +349,42 @@ def test_scanpipe_pipes_scancode_scan_for_files(self, mock_scan_file):
self.assertEqual("scanned", resource.status)
self.assertEqual(["mit"], resource.license_expressions)

def test_scanpipe_pipes_scancode_scan_for_package_info_timeout(self):
input_location = str(self.data_location / "notice.NOTICE")

with mock.patch("scancode.api.get_package_info") as get_package_info:
get_package_info.side_effect = InterruptTimeoutError
scan_results, scan_errors = scancode.scan_for_package_info(input_location)

expected_errors = [
"ERROR: for scanner: packages:\n"
"ERROR: Processing interrupted: timeout after 120 seconds."
]
self.assertEqual(expected_errors, scan_errors)

def test_scanpipe_pipes_scancode_scan_package_and_save_results_timeout_error(self):
project1 = Project.objects.create(name="Analysis")
copy_inputs([self.data_location / "notice.NOTICE"], project1.codebase_path)
codebase_resource = CodebaseResource.objects.create(
project=project1, path="notice.NOTICE"
)

with mock.patch("scancode.api.get_package_info") as get_package_info:
get_package_info.side_effect = InterruptTimeoutError
scancode.scan_package_and_save_results(codebase_resource)

codebase_resource.refresh_from_db()
self.assertEqual("scanned-with-error", codebase_resource.status)
self.assertEqual(1, project1.projecterrors.count())
error = project1.projecterrors.latest("created_date")
self.assertEqual("CodebaseResource", error.model)
self.assertEqual("", error.traceback)
expected_message = (
"ERROR: for scanner: packages:\n"
"ERROR: Processing interrupted: timeout after 120 seconds."
)
self.assertEqual(expected_message, error.message)

def test_scanpipe_pipes_scancode_virtual_codebase(self):
project = Project.objects.create(name="asgiref")
input_location = self.data_location / "asgiref-3.3.0_scan.json"
Expand Down

0 comments on commit 1eaf35f

Please # to comment.