Skip to content

Commit

Permalink
Remove the run_extractcode pipe in favor of extractcode API #312
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas Druez <tdruez@nexb.com>
  • Loading branch information
tdruez committed Aug 31, 2021
1 parent 1ab5fa7 commit b24dcaf
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 49 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@

### Unreleased

- Remove the run_extractcode pipe in favor of extractcode API.
https://github.com/nexB/scancode.io/issues/312

- The `scancode.run_scancode` pipe now uses an optimal number of available CPUs for
multiprocessing by default.
The exact number of parallel processes available to ScanCode.io can be defined
using the SCANCODEIO_PROCESSES setting.
https://github.com/nexB/scancode.io/issues/302

- Renamed the SCANCODE_DEFAULT_OPTIONS setting to SCANCODE_TOOLKIT_CLI_OPTIONS.
https://github.com/nexB/scancode.io/issues/302

- Log the outputs of run_scancode as progress indication.
https://github.com/nexB/scancode.io/issues/300
Expand Down
2 changes: 1 addition & 1 deletion docs/custom-pipelines.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ ones, or remove any of them.
return (
# Original steps from the ScanCodebase pipeline
cls.copy_inputs_to_codebase_directory,
cls.run_extractcode,
cls.extract_archives,
cls.run_scancode,
cls.build_inventory_from_scan,
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorial-2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ Instructions
2021-07-12 17:45:53.85 Pipeline [scan_codebase] starting
2021-07-12 17:45:53.85 Step [copy_inputs_to_codebase_directory] starting
2021-07-12 17:45:53.86 Step [copy_inputs_to_codebase_directory] completed in 0.00 seconds
2021-07-12 17:45:53.86 Step [run_extractcode] starting
2021-07-12 17:45:53.86 Step [extract_archives] starting
[...]
2021-07-12 17:46:01.61 Pipeline completed
Expand Down
26 changes: 13 additions & 13 deletions scanpipe/pipelines/scan_codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,15 @@ class ScanCodebase(Pipeline):
def steps(cls):
return (
cls.copy_inputs_to_codebase_directory,
cls.run_extractcode,
cls.extract_archives,
cls.run_scancode,
cls.build_inventory_from_scan,
cls.csv_output,
)

extractcode_options = [
"--shallow",
"--all-formats",
]
# Set to True to extract recursively nested archives in archives.
extract_recursively = False

scancode_options = [
"--copyright",
"--email",
Expand All @@ -67,16 +66,17 @@ def copy_inputs_to_codebase_directory(self):
"""
copy_inputs(self.project.inputs(), self.project.codebase_path)

def run_extractcode(self):
def extract_archives(self):
"""
Extracts with extractcode.
Extracts archives with extractcode.
"""
with self.save_errors(scancode.ScancodeError):
scancode.run_extractcode(
location=str(self.project.codebase_path),
options=self.extractcode_options,
raise_on_error=True,
)
extract_errors = scancode.extract_archives(
location=self.project.codebase_path,
recurse=self.extract_recursively,
)

if extract_errors:
self.add_error("\n".join(extract_errors))

def run_scancode(self):
"""
Expand Down
51 changes: 27 additions & 24 deletions scanpipe/pipes/scancode.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from commoncode import fileutils
from commoncode.resource import VirtualCodebase
from extractcode import all_kinds
from extractcode import api as extractcode_api
from extractcode.extract import extract_file
from scancode import ScancodeError
from scancode import Scanner
Expand Down Expand Up @@ -73,7 +74,9 @@ def get_max_workers(keep_available):

def extract(location, target):
"""
Wraps the `extractcode.extract_file` to execute the extraction and return errors.
Extracts the file at `location` to the `target` and return errors.
Wraps the `extractcode.extract_file` function.
"""
errors = []

Expand All @@ -84,6 +87,29 @@ def extract(location, target):
return errors


def extract_archives(location, recurse, all_formats=True):
"""
Extracts all archives at `location` and return errors.
Wraps the `extractcode.api.extract_archives` function.
If `recurse` is True, extract nested archives-in-archives recursively.
If `all_formats` is True, extract all supported archives formats.
"""
options = {
"recurse": recurse,
"replace_originals": False,
"all_formats": all_formats,
}

errors = []
for event in extractcode_api.extract_archives(location, **options):
if event.done:
errors.extend(event.errors)

return errors


def get_resource_info(location):
"""
Returns a mapping suitable for the creation of a new CodebaseResource.
Expand Down Expand Up @@ -285,29 +311,6 @@ def scan_for_application_packages(project):
_scan_and_save(project, scan_for_package_info, save_scan_package_results)


def run_extractcode(location, options=None, raise_on_error=False):
"""
Extracts content at `location` with extractcode.
Optional arguments for the `extractcode` executable can be provided with the
`options` list.
If `raise_on_error` is enabled, a ScancodeError will be raised if the
exitcode is greater than 0.
"""
extractcode_args = [
pipes.get_bin_executable("extractcode"),
shlex.quote(location),
]

if options:
extractcode_args.extend(options)

exitcode, output = pipes.run_command(extractcode_args)
if exitcode > 0 and raise_on_error:
raise ScancodeError(output)

return exitcode, output


def run_scancode(location, output_file, options, raise_on_error=False):
"""
Scans the `location` content and write the results into an `output_file`.
Expand Down
8 changes: 4 additions & 4 deletions scanpipe/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,8 @@ def test_scanpipe_run_model_profile_method(self):
"2021-02-05 12:46:47.63 Step [copy_inputs_to_codebase_directory] starting\n"
"2021-02-05 12:46:47.63 Step [copy_inputs_to_codebase_directory]"
" completed in 0.00 seconds\n"
"2021-02-05 12:46:47.63 Step [run_extractcode] starting\n"
"2021-02-05 12:46:48.13 Step [run_extractcode] completed in 0.50 seconds\n"
"2021-02-05 12:46:47.63 Step [extract_archives] starting\n"
"2021-02-05 12:46:48.13 Step [extract_archives] completed in 0.50 seconds\n"
"2021-02-05 12:46:48.14 Step [run_scancode] starting\n"
"2021-02-05 12:46:52.59 Step [run_scancode] completed in 4.45 seconds\n"
"2021-02-05 12:46:52.59 Step [build_inventory_from_scan] starting\n"
Expand All @@ -558,7 +558,7 @@ def test_scanpipe_run_model_profile_method(self):
"build_inventory_from_scan": 0.16,
"copy_inputs_to_codebase_directory": 0.0,
"csv_output": 0.06,
"run_extractcode": 0.5,
"extract_archives": 0.5,
"run_scancode": 4.45,
}
self.assertEqual(expected, run1.profile())
Expand All @@ -569,7 +569,7 @@ def test_scanpipe_run_model_profile_method(self):

expected = (
"copy_inputs_to_codebase_directory 0.0 seconds 0.0%\n"
"run_extractcode 0.5 seconds 9.7%\n"
"extract_archives 0.5 seconds 9.7%\n"
"\x1b[41;37mrun_scancode 4.45 seconds 86.1%\x1b[m\n"
"build_inventory_from_scan 0.16 seconds 3.1%\n"
"csv_output 0.06 seconds 1.2%\n"
Expand Down
6 changes: 0 additions & 6 deletions scanpipe/tests/test_pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,12 +494,6 @@ def test_scanpipe_pipes_scancode_create_codebase_resources_inject_policy(self):
}
self.assertEqual(expected, resource2.licenses[0]["policy"])

def test_scanpipe_pipes_scancode_run_extractcode(self):
project = Project.objects.create(name="name with space")
exitcode, output = scancode.run_extractcode(str(project.codebase_path))
self.assertEqual(0, exitcode)
self.assertIn("Extracting done.", output)

def test_scanpipe_pipes_scancode_run_scancode(self):
project = Project.objects.create(name="name with space")
exitcode, output = scancode.run_scancode(
Expand Down

0 comments on commit b24dcaf

Please # to comment.