Skip to content

Commit

Permalink
Merge pull request #3456 from armintaenzertng/update-spdx-tools-version
Browse files Browse the repository at this point in the history
Upgrade `spdx-tools` to v0.8.1 #3455
  • Loading branch information
pombredanne authored Oct 13, 2023
2 parents 1088337 + 6e8d2f9 commit f3e52e1
Show file tree
Hide file tree
Showing 21 changed files with 948 additions and 1,082 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ v32.1.0 (next, roadmap)
of these in other summary plugins.
See https://github.com/nexB/scancode-toolkit/issues/1745

- Upgraded spdx-tools dependency to v0.8.
See https://github.com/nexB/scancode-toolkit/issues/3455


v32.0.8 - 2023-10-11
------------------------
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ requests==2.28.1
saneyaml==0.6.0
six==1.16.0
soupsieve==2.3.2.post1
spdx-tools==0.7.0rc0
spdx-tools==0.8.1
text-unidecode==1.3
toml==0.10.2
typecode==30.0.1
Expand Down
2 changes: 1 addition & 1 deletion setup-mini.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ install_requires =
pymaven_patch >= 0.2.8
requests >= 2.7.0
saneyaml >= 0.6.0
spdx_tools == 0.7.0rc0
spdx_tools == 0.8.1
text_unidecode >= 1.0
toml >= 0.10.0
urlpy
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ install_requires =
pymaven_patch >= 0.2.8
requests >= 2.7.0
saneyaml >= 0.6.0
spdx_tools == 0.7.0rc0
spdx_tools == 0.8.1
text_unidecode >= 1.0
toml >= 0.10.0
urlpy
Expand Down
219 changes: 91 additions & 128 deletions src/formattedcode/output_spdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,33 @@
import os
import sys
import uuid
from datetime import datetime
from io import BytesIO
from io import StringIO

from spdx.checksum import Checksum
from spdx.checksum import ChecksumAlgorithm
from spdx.creationinfo import Tool
from spdx.document import ExtractedLicense
from spdx.document import Document
from spdx.license import License
from spdx.file import File
from spdx.package import Package
from spdx.relationship import Relationship
from spdx.utils import calc_verif_code
from spdx.utils import NoAssert
from spdx.utils import SPDXNone
from spdx.version import Version

from license_expression import Licensing
from commoncode.cliutils import OUTPUT_GROUP
from commoncode.cliutils import PluggableCommandLineOption
from commoncode.fileutils import file_name
from commoncode.fileutils import parent_directory
from commoncode.text import python_safe_name
from spdx_tools.spdx.model import SpdxNoAssertion
from spdx_tools.spdx.model import Version
from spdx_tools.spdx.model import CreationInfo
from spdx_tools.spdx.model import Actor
from spdx_tools.spdx.model import ActorType
from spdx_tools.spdx.model import Document
from spdx_tools.spdx.model import Package
from spdx_tools.spdx.model import File
from spdx_tools.spdx.model import Checksum
from spdx_tools.spdx.model import ChecksumAlgorithm
from spdx_tools.spdx.model import ExtractedLicensingInfo
from spdx_tools.spdx.model import SpdxNone
from spdx_tools.spdx.model import Relationship
from spdx_tools.spdx.model import RelationshipType
from spdx_tools.spdx.spdx_element_utils import calculate_package_verification_code

from formattedcode import FileOptionType
from licensedcode.detection import get_matches_from_detection_mappings
from plugincode.output import output_impl
Expand Down Expand Up @@ -63,50 +67,6 @@ def logger_debug(*args):
Output plugins to write scan results in SPDX format.
"""

_spdx_list_is_patched = False


def _patch_license_list():
"""
Patch the SPDX Python library license list to match the list of ScanCode
known SPDX licenses.
"""
global _spdx_list_is_patched
if not _spdx_list_is_patched:
from spdx.config import LICENSE_MAP
from licensedcode.models import load_licenses
licenses = load_licenses(with_deprecated=True)
spdx_licenses = get_licenses_by_spdx_key(licenses.values())
LICENSE_MAP.update(spdx_licenses)
_spdx_list_is_patched = True


def get_licenses_by_spdx_key(licenses):
"""
Return a mapping of {spdx_key: license object} given a ``license`` sequence
of License objects.
"""
spdx_licenses = {}
for lic in licenses:
if not (lic.spdx_license_key or lic.other_spdx_license_keys):
continue

if lic.spdx_license_key:
name = lic.name
slk = lic.spdx_license_key
spdx_licenses[slk] = name
spdx_licenses[name] = slk

for other_spdx in lic.other_spdx_license_keys:
if not (other_spdx and other_spdx.strip()):
continue
slk = other_spdx
spdx_licenses[slk] = name
spdx_licenses[name] = slk

return spdx_licenses


@output_impl
class SpdxTvOutput(OutputPlugin):

Expand Down Expand Up @@ -220,7 +180,7 @@ def write_spdx(
tool_version,
notice,
package_name='',
download_location=NoAssert(),
download_location=SpdxNoAssertion(),
as_tagvalue=True,
spdx_version = (2, 2),
with_notice_text=False,
Expand All @@ -240,37 +200,43 @@ def write_spdx(
licensing = Licensing()

as_rdf = not as_tagvalue
_patch_license_list()

ns_prefix = '_'.join(package_name.lower().split())
comment = notice + f'\nSPDX License List: {scancode_config.spdx_license_list_version}'

version_major, version_minor = scancode_config.spdx_license_list_version.split(".")
spdx_license_list_version = Version(major=version_major, minor=version_minor)

doc = Document(
version=Version(*spdx_version),
data_license=License.from_identifier('CC0-1.0'),
comment=notice,
namespace=f'http://spdx.org/spdxdocs/{ns_prefix}-{uuid.uuid4()}',
license_list_version=scancode_config.spdx_license_list_version,
name='SPDX Document created by ScanCode Toolkit'
tool_name = tool_name or 'ScanCode'
creator = Actor(ActorType.TOOL, f'{tool_name} {tool_version}')

creation_info = CreationInfo(
spdx_id="SPDXRef-DOCUMENT",
spdx_version=f"SPDX-{spdx_version[0]}.{spdx_version[1]}",
data_license='CC0-1.0',
document_comment=comment,
document_namespace=f'http://spdx.org/spdxdocs/{ns_prefix}-{uuid.uuid4()}',
license_list_version=spdx_license_list_version,
name='SPDX Document created by ScanCode Toolkit',
creators=[creator],
created=datetime.now(),
)

tool_name = tool_name or 'ScanCode'
doc.creation_info.add_creator(Tool(f'{tool_name} {tool_version}'))
doc.creation_info.set_created_now()
doc.creation_info.license_list_version = spdx_license_list_version

package_id = '001'
package = doc.package = Package(
package = Package(
name=package_name,
download_location=download_location,
spdx_id=f'SPDXRef-{package_id}',
)

doc = Document(
creation_info=creation_info,
packages=[package],
)

# Use a set of unique copyrights for the package.
package.cr_text = set()
package_copyright_texts = set()

all_files_have_no_license = True
all_files_have_no_copyright = True
Expand All @@ -285,13 +251,18 @@ def write_spdx(
# Set a relative file name as that is what we want in
# SPDX output (with explicit leading './').
name = './' + file_data.get('path')
file_entry = File(
spdx_id=f'SPDXRef-{sid}',
name=name)

if file_data.get('file_type') == 'empty':
file_entry.set_checksum(Checksum(ChecksumAlgorithm.SHA1, "da39a3ee5e6b4b0d3255bfef95601890afd80709"))
checksum = Checksum(ChecksumAlgorithm.SHA1, "da39a3ee5e6b4b0d3255bfef95601890afd80709")
else:
file_entry.set_checksum(Checksum(ChecksumAlgorithm.SHA1, file_data.get('sha1') or ''))
# FIXME: this sets the checksum of a file to the empty string hash if unknown; tracked in https://github.com/nexB/scancode-toolkit/issues/3453
checksum = Checksum(ChecksumAlgorithm.SHA1, file_data.get('sha1') or 'da39a3ee5e6b4b0d3255bfef95601890afd80709')

file_entry = File(
spdx_id=f'SPDXRef-{sid}',
name=name,
checksums=[checksum]
)

file_license_detections = file_data.get('license_detections')
license_matches = get_matches_from_detection_mappings(file_license_detections)
Expand All @@ -312,63 +283,63 @@ def write_spdx(
spdx_id = f'LicenseRef-scancode-{license_key}'
is_license_ref = spdx_id.lower().startswith('licenseref-')

if not is_license_ref:
spdx_license = License.from_identifier(spdx_id)
else:
spdx_license = ExtractedLicense(spdx_id)
spdx_license.name = file_license.short_name
spdx_license = licensing.parse(spdx_id)

if is_license_ref:
text = match.get('matched_text')
# FIXME: replace this with the licensedb URL
comment = (
f'See details at https://github.com/nexB/scancode-toolkit'
f'/blob/develop/src/licensedcode/data/licenses/{license_key}.yml\n'
f'/blob/develop/src/licensedcode/data/licenses/{license_key}.LICENSE\n'
)
spdx_license.comment = comment
text = match.get('matched_text')
# always set some text, even if we did not extract the
# matched text
if not text:
text = comment
spdx_license.text = text
doc.add_extr_lic(spdx_license)
extracted_license = ExtractedLicensingInfo(
license_id=spdx_id,
# always set some text, even if we did not extract the
# matched text
extracted_text=text if text else comment,
license_name=file_license.short_name,
comment=comment,
)
doc.extracted_licensing_info.append(extracted_license)

# Add licenses in the order they appear in the file. Maintaining
# the order might be useful for provenance purposes.
file_entry.add_lics(spdx_license)
package.add_lics_from_file(spdx_license)
file_entry.license_info_in_file.append(spdx_license)
package.license_info_from_files.append(spdx_license)

elif license_matches is None:
all_files_have_no_license = False
file_entry.add_lics(NoAssert())
file_entry.license_info_in_file.append(SpdxNoAssertion())

else:
file_entry.add_lics(SPDXNone())
file_entry.license_info_in_file.append(SpdxNone())

file_entry.conc_lics = NoAssert()
file_entry.license_concluded = SpdxNoAssertion()

file_copyrights = file_data.get('copyrights')
if file_copyrights:
all_files_have_no_copyright = False
file_entry.copyright = []
copyrights = []
for file_copyright in file_copyrights:
file_entry.copyright.append(file_copyright.get('copyright'))
copyrights.append(file_copyright.get('copyright'))

package.cr_text.update(file_entry.copyright)
package_copyright_texts.update(copyrights)

# Create a text of copyright statements in the order they appear in
# the file. Maintaining the order might be useful for provenance
# purposes.
file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'
file_entry.copyright_text = '\n'.join(copyrights) + '\n'

elif file_copyrights is None:
all_files_have_no_copyright = False
file_entry.copyright = NoAssert()
file_entry.copyright_text = SpdxNoAssertion()

else:
file_entry.copyright = SPDXNone()
file_entry.copyright_text = SpdxNone()

doc.add_file(file_entry)
relationship = Relationship(f'{package.spdx_id} CONTAINS {file_entry.spdx_id}')
doc.add_relationship(relationship)
doc.files.append(file_entry)
relationship = Relationship(package.spdx_id, RelationshipType.CONTAINS, file_entry.spdx_id)
doc.relationships.append(relationship)

if not doc.files:
if as_tagvalue:
Expand All @@ -379,33 +350,29 @@ def write_spdx(
output_file.write(msg)

# Remove duplicate licenses from the list for the package.
unique_licenses = {l.identifier: l for l in package.licenses_from_files}
unique_licenses = list(unique_licenses.values())
if not len(package.licenses_from_files):
package.license_info_from_files = list(set(package.license_info_from_files))
if not package.license_info_from_files:
if all_files_have_no_license:
package.licenses_from_files = [SPDXNone()]
package.license_info_from_files = [SpdxNone()]
else:
package.licenses_from_files = [NoAssert()]
package.license_info_from_files = [SpdxNoAssertion()]
else:
# List license identifiers alphabetically for the package.
package.licenses_from_files = sorted(
unique_licenses,
key=lambda x: x.identifier,
)
package.license_info_from_files = sorted(package.license_info_from_files)

if len(package.cr_text) == 0:
if not package_copyright_texts:
if all_files_have_no_copyright:
package.cr_text = SPDXNone()
package.copyright_text = SpdxNone()
else:
package.cr_text = NoAssert()
package.copyright_text = SpdxNoAssertion()
else:
# Create a text of alphabetically sorted copyright
# statements for the package.
package.cr_text = '\n'.join(sorted(package.cr_text)) + '\n'
package.copyright_text = '\n'.join(sorted(package_copyright_texts)) + '\n'

package.verif_code = calc_verif_code(doc.files)
package.license_declared = NoAssert()
package.conc_lics = NoAssert()
package.verification_code = calculate_package_verification_code(doc.files)
package.license_declared = SpdxNoAssertion()
package.license_concluded = SpdxNoAssertion()

# The spdx-tools write_document returns either:
# - unicode for tag values
Expand All @@ -416,19 +383,15 @@ def write_spdx(
# in the other case we deal with text all the way.

if doc.files:

if as_tagvalue:
from spdx.writers.tagvalue import write_document # NOQA
elif as_rdf:
from spdx.writers.rdf import write_document # NOQA

if as_tagvalue:
from spdx_tools.spdx.writer.tagvalue.tagvalue_writer import write_document_to_stream # NOQA
spdx_output = StringIO()
elif as_rdf:
from spdx_tools.spdx.writer.rdf.rdf_writer import write_document_to_stream # NOQA
# rdf is utf-encoded bytes
spdx_output = BytesIO()

write_document(doc, spdx_output, validate=False)
write_document_to_stream(doc, spdx_output, validate=False)
result = spdx_output.getvalue()

if as_rdf:
Expand Down
Loading

0 comments on commit f3e52e1

Please # to comment.