Skip to content

Commit

Permalink
Support inputs as tarballs in root_filesystem pipelines #96
Browse files Browse the repository at this point in the history
- Improve support for unknown distros in docker and root_filesystem pipelines #97

Signed-off-by: Thomas Druez <tdruez@nexb.com>
  • Loading branch information
tdruez committed Feb 15, 2021
1 parent aa6ba17 commit eb970db
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 10 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@

### v1.1.0 (unreleased)

- Support inputs as tarballs in root_filesystem pipelines.
The input archives are now extracted with extractcode to the codebase/ directory.
https://github.com/nexB/scancode.io/issues/96

- Improve support for unknown distros in docker and root_filesystem pipelines.
The pipeline logs the distro errors on the project instead of failing.
https://github.com/nexB/scancode.io/issues/97

- Implement Pipeline registration through distribution entry points.
Pipeline can now be installed as part of external libraries.
With this change pipelines are no longer referenced by the
Expand Down
5 changes: 3 additions & 2 deletions scanpipe/pipelines/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@ def collect_and_create_system_packages(self):
"""
Collect installed system packages for each layer based on the distro.
"""
for image in self.images:
docker.scan_image_for_system_packages(self.project, image)
with self.save_errors(rootfs.DistroNotFound, rootfs.DistroNotSupported):
for image in self.images:
docker.scan_image_for_system_packages(self.project, image)

def tag_uninteresting_codebase_resources(self):
"""
Expand Down
29 changes: 25 additions & 4 deletions scanpipe/pipelines/root_filesystems.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

import os

from extractcode.extract import extract_file

from scanpipe import pipes
from scanpipe.pipelines import Pipeline
from scanpipe.pipes import rootfs
Expand All @@ -32,6 +34,22 @@ class RootFS(Pipeline):
A pipeline to analyze a Linux root filesystem aka. rootfs.
"""

def extract_input_files_to_codebase_directory(self):
"""
Extract root filesystem input archives with extractcode.
"""
input_files = self.project.inputs("*")
target = str(self.project.codebase_path)
extract_errors = []

for input_file in input_files:
for event in extract_file(input_file, target):
if event.done:
extract_errors.extend(event.errors)

if extract_errors:
self.add_error("\n".join(extract_errors))

def find_root_filesystems(self):
"""
Find the root filesystems in project codebase/.
Expand All @@ -42,11 +60,11 @@ def collect_rootfs_information(self):
"""
Collect rootfs information and store on project.
"""

rootfs_data = {}
for rfs in self.root_filesystems:
rootfs_data["name"] = os.path.basename(rfs.location)
rootfs_data["distro"] = rfs.distro.to_dict()
rootfs_data["distro"] = rfs.distro.to_dict() if rfs.distro else {}

self.project.extra_data.update({"images": rootfs_data})
self.project.save()

Expand All @@ -60,9 +78,11 @@ def collect_and_create_codebase_resources(self):
def collect_and_create_system_packages(self):
"""
Collect installed system packages for each rootfs based on the distro.
The collection of system packages is only available for known distros.
"""
for rfs in self.root_filesystems:
rootfs.scan_rootfs_for_system_packages(self.project, rfs)
with self.save_errors(rootfs.DistroNotFound, rootfs.DistroNotSupported):
for rfs in self.root_filesystems:
rootfs.scan_rootfs_for_system_packages(self.project, rfs)

def tag_uninteresting_codebase_resources(self):
"""
Expand Down Expand Up @@ -122,6 +142,7 @@ def tag_not_analyzed_codebase_resources(self):
pipes.tag_not_analyzed_codebase_resources(self.project)

steps = (
extract_input_files_to_codebase_directory,
find_root_filesystems,
collect_rootfs_information,
collect_and_create_codebase_resources,
Expand Down
6 changes: 4 additions & 2 deletions scanpipe/pipes/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,12 @@ def scan_image_for_system_packages(project, image, detect_licenses=True):
as a CodebaseResource and relate that CodebaseResource to its
DiscoveredPackage or keep that as a missing file.
"""
distro_id = image.distro.identifier
if not image.distro:
raise rootfs.DistroNotFound(f"Distro not found.")

distro_id = image.distro.identifier
if distro_id not in rootfs.PACKAGE_GETTER_BY_DISTRO:
raise NotImplementedError(f'Distro "{distro_id}" is not supported.')
raise rootfs.DistroNotSupported(f'Distro "{distro_id}" is not supported.')

package_getter = partial(
rootfs.PACKAGE_GETTER_BY_DISTRO[distro_id],
Expand Down
14 changes: 12 additions & 2 deletions scanpipe/pipes/rootfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@
}


class DistroNotFound(Exception):
pass


class DistroNotSupported(Exception):
pass


@attr.attributes
class Resource:
rootfs_path = attr.attrib(
Expand Down Expand Up @@ -154,10 +162,12 @@ def scan_rootfs_for_system_packages(project, rootfs, detect_licenses=True):
as a CodebaseResource and relate that CodebaseResource to its
DiscoveredPackage or keep that as a missing file.
"""
distro_id = rootfs.distro.identifier
if not rootfs.distro:
raise DistroNotFound(f"Distro not found.")

distro_id = rootfs.distro.identifier
if distro_id not in PACKAGE_GETTER_BY_DISTRO:
raise NotImplementedError(f'Distro "{distro_id}" is not supported.')
raise DistroNotSupported(f'Distro "{distro_id}" is not supported.')

package_getter = partial(
PACKAGE_GETTER_BY_DISTRO[distro_id],
Expand Down

0 comments on commit eb970db

Please # to comment.