From dec20a61247bf79e2c0f3988b76baf8fddeb8950 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Wed, 12 Mar 2025 11:12:20 +1000 Subject: [PATCH 1/5] chore: add back provenance asset information Signed-off-by: Ben Selwyn-Smith --- src/macaron/database/table_definitions.py | 6 +++ src/macaron/provenance/provenance_finder.py | 48 ++++++++++++------- src/macaron/provenance/provenance_verifier.py | 26 +++++----- src/macaron/slsa_analyzer/analyzer.py | 13 +++-- .../checks/provenance_available_check.py | 25 +++++++--- 5 files changed, 81 insertions(+), 37 deletions(-) diff --git a/src/macaron/database/table_definitions.py b/src/macaron/database/table_definitions.py index 2a7f1e95a..d91d55154 100644 --- a/src/macaron/database/table_definitions.py +++ b/src/macaron/database/table_definitions.py @@ -511,6 +511,12 @@ class Provenance(ORMBase): #: The provenance payload. provenance_payload: Mapped[InTotoPayload] = mapped_column(ProvenancePayload, nullable=False) + #: The name of the provenance asset. + provenance_asset_name: Mapped[str] = mapped_column(String, nullable=True) + + #: The URL of the provenance asset. + provenance_asset_url: Mapped[str] = mapped_column(String, nullable=True) + #: The verified status of the provenance. verified: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) diff --git a/src/macaron/provenance/provenance_finder.py b/src/macaron/provenance/provenance_finder.py index 853a3a3cd..dee99893c 100644 --- a/src/macaron/provenance/provenance_finder.py +++ b/src/macaron/provenance/provenance_finder.py @@ -6,6 +6,7 @@ import logging import os import tempfile +from dataclasses import dataclass from functools import partial from packageurl import PackageURL @@ -30,6 +31,15 @@ logger: logging.Logger = logging.getLogger(__name__) +@dataclass(frozen=True) +class ProvenanceAsset: + """This class exists to hold a provenance payload with the original asset's name and URL.""" + + payload: InTotoPayload + name: str + url: str + + class ProvenanceFinder: """This class is used to find and retrieve provenance files from supported registries.""" @@ -44,7 +54,7 @@ def __init__(self) -> None: elif isinstance(registry, JFrogMavenRegistry): self.jfrog_registry = registry - def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]: + def find_provenance(self, purl: PackageURL) -> list[ProvenanceAsset]: """Find the provenance file(s) of the passed PURL. Parameters @@ -54,8 +64,8 @@ def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]: Returns ------- - list[InTotoPayload] - The provenance payload, or an empty list if not found. + list[ProvenanceAsset] + The provenance asset, or an empty list if not found. """ logger.debug("Seeking provenance of: %s", purl) @@ -88,7 +98,7 @@ def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]: logger.debug("Provenance finding not supported for PURL type: %s", purl.type) return [] - def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload]]]) -> list[InTotoPayload]: + def _find_provenance(self, discovery_functions: list[partial[list[ProvenanceAsset]]]) -> list[ProvenanceAsset]: """Find the provenance file(s) using the passed discovery functions. Parameters @@ -99,7 +109,7 @@ def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload] Returns ------- list[InTotoPayload] - The provenance payload(s) from the first successful function, or an empty list if none were. + The provenance asset(s) from the first successful function, or an empty list if none were. """ if not discovery_functions: return [] @@ -114,7 +124,7 @@ def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload] return [] -def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoPayload]: +def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[ProvenanceAsset]: """Find and download the NPM based provenance for the passed PURL. Two kinds of attestation can be retrieved from npm: "Provenance" and "Publish". The "Provenance" attestation @@ -131,8 +141,8 @@ def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoP Returns ------- - list[InTotoPayload] - The provenance payload(s), or an empty list if not found. + list[ProvenanceAsset] + The provenance asset(s), or an empty list if not found. """ if not registry.enabled: logger.debug("The npm registry is not enabled.") @@ -178,16 +188,19 @@ def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoP publish_payload = load_provenance_payload(signed_download_path) except LoadIntotoAttestationError as error: logger.error("Error while loading publish attestation: %s", error) - return [provenance_payload] + return [ProvenanceAsset(provenance_payload, npm_provenance_asset.name, npm_provenance_asset.url)] - return [provenance_payload, publish_payload] + return [ + ProvenanceAsset(provenance_payload, npm_provenance_asset.name, npm_provenance_asset.url), + ProvenanceAsset(publish_payload, npm_provenance_asset.name, npm_provenance_asset.url), + ] except OSError as error: logger.error("Error while storing provenance in the temporary directory: %s", error) return [] -def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[InTotoPayload]: +def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[ProvenanceAsset]: """Find and download the GAV based provenance for the passed PURL. Parameters @@ -199,8 +212,8 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[ Returns ------- - list[InTotoPayload] | None - The provenance payload if found, or an empty list otherwise. + list[ProvenanceAsset] | None + The provenance asset if found, or an empty list otherwise. Raises ------ @@ -269,7 +282,7 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[ if not is_witness_provenance_payload(provenance_payload, witness_verifier_config.predicate_types): continue - provenances.append(provenance_payload) + provenances.append(ProvenanceAsset(provenance_payload, provenance_asset.name, provenance_asset.url)) except OSError as error: logger.error("Error while storing provenance in the temporary directory: %s", error) @@ -314,7 +327,7 @@ def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]: def find_provenance_from_ci( analyze_ctx: AnalyzeContext, git_obj: Git | None, download_path: str -) -> InTotoPayload | None: +) -> ProvenanceAsset | None: """Try to find provenance from CI services of the repository. Note that we stop going through the CI services once we encounter a CI service @@ -409,7 +422,10 @@ def find_provenance_from_ci( download_provenances_from_ci_service(ci_info, download_path) # TODO consider how to handle multiple payloads here. - return ci_info["provenances"][0].payload if ci_info["provenances"] else None + if ci_info["provenances"]: + provenance = ci_info["provenances"][0] + return ProvenanceAsset(provenance.payload, provenance.asset.name, provenance.asset.url) + return None else: logger.debug("CI service not supported for provenance finding: %s", ci_service.name) diff --git a/src/macaron/provenance/provenance_verifier.py b/src/macaron/provenance/provenance_verifier.py index 174d09c6d..f366fe127 100644 --- a/src/macaron/provenance/provenance_verifier.py +++ b/src/macaron/provenance/provenance_verifier.py @@ -17,6 +17,7 @@ from macaron.config.defaults import defaults from macaron.config.global_config import global_config from macaron.provenance.provenance_extractor import ProvenancePredicate, SLSAGithubGenericBuildDefinitionV01 +from macaron.provenance.provenance_finder import ProvenanceAsset from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type from macaron.slsa_analyzer.analyze_context import AnalyzeContext from macaron.slsa_analyzer.asset import AssetLocator @@ -28,15 +29,15 @@ logger: logging.Logger = logging.getLogger(__name__) -def verify_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool: +def verify_provenance(purl: PackageURL, provenance_assets: list[ProvenanceAsset]) -> bool: """Verify the passed provenance. Parameters ---------- purl: PackageURL The PURL of the analysis target. - provenance: list[InTotoPayload] - The list of provenance. + provenance_assets: list[ProvenanceAsset] + The list of provenance assets. Returns ------- @@ -50,7 +51,7 @@ def verify_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool verification_function = None if purl.type == "npm": - verification_function = partial(verify_npm_provenance, purl, provenance) + verification_function = partial(verify_npm_provenance, purl, provenance_assets) # TODO other verification functions go here. @@ -61,31 +62,34 @@ def verify_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool return False -def verify_npm_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool: +def verify_npm_provenance(purl: PackageURL, provenance_assets: list[ProvenanceAsset]) -> bool: """Compare the unsigned payload subject digest with the signed payload digest, if available. Parameters ---------- purl: PackageURL The PURL of the analysis target. - provenance: list[InTotoPayload] - The provenances to verify. + provenance_assets: list[ProvenanceAsset] + The provenance assets to verify. Returns ------- bool True if the provenance was verified, or False otherwise. """ - if len(provenance) != 2: - logger.debug("Expected unsigned and signed provenance.") + if len(provenance_assets) != 2: + logger.debug("Expected unsigned and signed provenance assets.") return False - signed_subjects = provenance[1].statement.get("subject") + signed_provenance = provenance_assets[1].payload + unsigned_provenance = provenance_assets[0].payload + + signed_subjects = signed_provenance.statement.get("subject") if not signed_subjects: logger.debug("Missing signed subjects.") return False - unsigned_subjects = provenance[0].statement.get("subject") + unsigned_subjects = unsigned_provenance.statement.get("subject") if not unsigned_subjects: logger.debug("Missing unsigned subjects.") return False diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index e3957e875..740382299 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -357,15 +357,17 @@ def run_single( package_registries_info = self._populate_package_registry_info() provenance_is_verified = False + provenance_asset = None if not provenance_payload and parsed_purl: # Try to find the provenance file for the parsed PURL. provenance_finder = ProvenanceFinder() provenances = provenance_finder.find_provenance(parsed_purl) if provenances: - provenance_payload = provenances[0] + provenance_asset = provenances[0] + provenance_payload = provenance_asset.payload if provenance_payload.verified: provenance_is_verified = True - elif verify_provenance: + if verify_provenance: provenance_is_verified = provenance_verifier.verify_provenance(parsed_purl, provenances) # Try to extract the repository URL and commit digest from the Provenance, if it exists. @@ -490,10 +492,11 @@ def run_single( if not provenance_payload: # Look for provenance using the CI. with tempfile.TemporaryDirectory() as temp_dir: - provenance_payload = find_provenance_from_ci(analyze_ctx, git_obj, temp_dir) + provenance_asset = find_provenance_from_ci(analyze_ctx, git_obj, temp_dir) # If found, validate analysis target against new provenance. - if provenance_payload: + if provenance_asset: # If repository URL was not provided as input, check the one found during analysis. + provenance_payload = provenance_asset.payload if not repo_path_input and component.repository: repo_path_input = component.repository.remote_path provenance_repo_url = provenance_commit_digest = None @@ -538,6 +541,8 @@ def run_single( provenance_payload=provenance_payload, slsa_level=slsa_level, slsa_version=slsa_version, + provenance_asset_name=provenance_asset.name if provenance_asset else None, + provenance_asset_url=provenance_asset.url if provenance_asset else None, # TODO Add release tag, release digest. ) diff --git a/src/macaron/slsa_analyzer/checks/provenance_available_check.py b/src/macaron/slsa_analyzer/checks/provenance_available_check.py index 77fcf87fe..1da852955 100644 --- a/src/macaron/slsa_analyzer/checks/provenance_available_check.py +++ b/src/macaron/slsa_analyzer/checks/provenance_available_check.py @@ -74,18 +74,31 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: CheckResultData The result of the check. """ - available = ( - ctx.dynamic_data["provenance_info"] - and ctx.dynamic_data["provenance_info"].provenance_payload - and not ctx.dynamic_data["is_inferred_prov"] - ) + provenance_info = None + inferred = False + if ctx.dynamic_data["provenance_info"]: + provenance_info = ctx.dynamic_data["provenance_info"] + inferred = ctx.dynamic_data["is_inferred_prov"] + + if not provenance_info or not provenance_info.provenance_payload or inferred: + return CheckResultData( + result_tables=[ + ProvenanceAvailableFacts( + confidence=Confidence.HIGH, + ) + ], + result_type=CheckResultType.FAILED, + ) + return CheckResultData( result_tables=[ ProvenanceAvailableFacts( confidence=Confidence.HIGH, + asset_name=provenance_info.provenance_asset_name, + asset_url=provenance_info.provenance_asset_url, ) ], - result_type=CheckResultType.PASSED if available else CheckResultType.FAILED, + result_type=CheckResultType.PASSED, ) From bc5d11c21487f3c9e3f5b35019637375eafe45b4 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Mon, 28 Apr 2025 09:45:51 +1000 Subject: [PATCH 2/5] chore: prune table columns Signed-off-by: Ben Selwyn-Smith --- src/macaron/database/table_definitions.py | 3 --- src/macaron/slsa_analyzer/analyzer.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/macaron/database/table_definitions.py b/src/macaron/database/table_definitions.py index d91d55154..be8928ce4 100644 --- a/src/macaron/database/table_definitions.py +++ b/src/macaron/database/table_definitions.py @@ -499,9 +499,6 @@ class Provenance(ORMBase): #: The release tag commit sha. release_commit_sha: Mapped[str] = mapped_column(String, nullable=True) - #: The release tag. - release_tag: Mapped[str] = mapped_column(String, nullable=True) - #: The repository URL from the provenance. repository_url: Mapped[str] = mapped_column(String, nullable=True) diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 740382299..d75b0d94e 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -543,7 +543,7 @@ def run_single( slsa_version=slsa_version, provenance_asset_name=provenance_asset.name if provenance_asset else None, provenance_asset_url=provenance_asset.url if provenance_asset else None, - # TODO Add release tag, release digest. + # TODO Add release digest. ) analyze_ctx.dynamic_data["validate_malware"] = validate_malware From 25bf1c1bbe2b895326574e406042362a0725fd5e Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Mon, 28 Apr 2025 09:51:29 +1000 Subject: [PATCH 3/5] chore: update database diagram Signed-off-by: Ben Selwyn-Smith --- docs/source/assets/er-diagram.svg | 1592 ++++++++++++++++------------- 1 file changed, 876 insertions(+), 716 deletions(-) diff --git a/docs/source/assets/er-diagram.svg b/docs/source/assets/er-diagram.svg index e61539047..33520a9ba 100644 --- a/docs/source/assets/er-diagram.svg +++ b/docs/source/assets/er-diagram.svg @@ -1,958 +1,1118 @@ - - - - -%3 - + + + + _analysis - -_analysis - -id - [INTEGER] - NOT NULL - -analysis_time - [VARCHAR] - NOT NULL - -macaron_version - [VARCHAR] - NOT NULL + +_analysis + +id + + [INTEGER] + NOT NULL + +analysis_time + + [VARCHAR] + NOT NULL + +macaron_version + + [VARCHAR] + NOT NULL _component - -_component - -id - [INTEGER] - NOT NULL - -analysis_id - [INTEGER] - NOT NULL - -name - [VARCHAR(100)] - NOT NULL - -namespace - [VARCHAR(255)] - -purl - [VARCHAR] - NOT NULL - -qualifiers - [VARCHAR(1024)] - -subpath - [VARCHAR(200)] - -type - [VARCHAR(16)] - NOT NULL - -version - [VARCHAR(100)] + +_component + +id + + [INTEGER] + NOT NULL + +analysis_id + + [INTEGER] + NOT NULL + +name + + [VARCHAR(100)] + NOT NULL + +namespace + + [VARCHAR(255)] + +purl + + [VARCHAR] + NOT NULL + +qualifiers + + [VARCHAR(1024)] + +subpath + + [VARCHAR(200)] + +type + + [VARCHAR(16)] + NOT NULL + +version + + [VARCHAR(100)] _analysis--_component - -0..N -1 + +0..N +1 _check_facts - -_check_facts - -id - [INTEGER] - NOT NULL - -check_result_id - [INTEGER] - NOT NULL - -check_type - [VARCHAR] - NOT NULL - -component_id - [INTEGER] - NOT NULL - -confidence - [FLOAT] - NOT NULL + +_check_facts + +id + + [INTEGER] + NOT NULL + +check_result_id + + [INTEGER] + NOT NULL + +check_type + + [VARCHAR] + NOT NULL + +component_id + + [INTEGER] + NOT NULL + +confidence + + [FLOAT] + NOT NULL _component--_check_facts - -0..N -1 + +0..N +1 _check_result - -_check_result - -id - [INTEGER] - NOT NULL - -check_id - [VARCHAR] - NOT NULL - -component_id - [INTEGER] - NOT NULL - -passed - [BOOLEAN] - NOT NULL + +_check_result + +id + + [INTEGER] + NOT NULL + +check_id + + [VARCHAR] + NOT NULL + +component_id + + [INTEGER] + NOT NULL + +passed + + [BOOLEAN] + NOT NULL _component--_check_result - -0..N -1 + +0..N +1 _dependency - -_dependency - -child_component - [INTEGER] - NOT NULL - -parent_component - [INTEGER] - NOT NULL + +_dependency + +child_component + + [INTEGER] + NOT NULL + +parent_component + + [INTEGER] + NOT NULL _component--_dependency - -1 -1 + +1 +1 _component--_dependency - -1 -1 + +1 +1 _provenance - -_provenance - -id - [INTEGER] - NOT NULL - -commit_sha - [VARCHAR] - -component_id - [INTEGER] - NOT NULL - -provenance_payload - [VARCHAR] - NOT NULL - -release_commit_sha - [VARCHAR] - -release_tag - [VARCHAR] - -repository_url - [VARCHAR] - -slsa_level - [INTEGER] - NOT NULL - -slsa_version - [VARCHAR] - -verified - [BOOLEAN] - NOT NULL + +_provenance + +id + + [INTEGER] + NOT NULL + +commit_sha + + [VARCHAR] + +component_id + + [INTEGER] + NOT NULL + +provenance_asset_name + + [VARCHAR] + +provenance_asset_url + + [VARCHAR] + +provenance_payload + + [VARCHAR] + NOT NULL + +release_commit_sha + + [VARCHAR] + +release_tag + + [VARCHAR] + +repository_url + + [VARCHAR] + +slsa_level + + [INTEGER] + NOT NULL + +slsa_version + + [VARCHAR] + +verified + + [BOOLEAN] + NOT NULL _component--_provenance - -0..N -1 + +0..N +1 _provenance_subject - -_provenance_subject - -id - [INTEGER] - NOT NULL - -component_id - [INTEGER] - NOT NULL - -sha256 - [VARCHAR] - NOT NULL + +_provenance_subject + +id + + [INTEGER] + NOT NULL + +component_id + + [INTEGER] + NOT NULL + +sha256 + + [VARCHAR] + NOT NULL _component--_provenance_subject - -0..N -1 + +0..N +1 _repo_finder_metadata - -_repo_finder_metadata - -id - [INTEGER] - NOT NULL - -commit_finder_outcome - [VARCHAR(21)] - NOT NULL - -component_id - [INTEGER] - NOT NULL - -found_commit - [VARCHAR] - NOT NULL - -found_url - [VARCHAR] - NOT NULL - -repo_finder_outcome - [VARCHAR(24)] - NOT NULL + +_repo_finder_metadata + +id + + [INTEGER] + NOT NULL + +commit_finder_outcome + + [VARCHAR(21)] + NOT NULL + +component_id + + [INTEGER] + NOT NULL + +found_commit + + [VARCHAR] + NOT NULL + +found_url + + [VARCHAR] + NOT NULL + +repo_finder_outcome + + [VARCHAR(24)] + NOT NULL _component--_repo_finder_metadata - -0..N -1 + +0..N +1 _repository - -_repository - -id - [INTEGER] - NOT NULL - -branch_name - [VARCHAR] - -commit_date - [VARCHAR] - NOT NULL - -commit_sha - [VARCHAR] - NOT NULL - -complete_name - [VARCHAR] - NOT NULL - -component_id - [INTEGER] - NOT NULL - -fs_path - [VARCHAR] - NOT NULL - -full_name - [VARCHAR] - NOT NULL - -name - [VARCHAR] - NOT NULL - -owner - [VARCHAR] - -release_tag - [VARCHAR] - -remote_path - [VARCHAR] - NOT NULL - -type - [VARCHAR] - NOT NULL + +_repository + +id + + [INTEGER] + NOT NULL + +branch_name + + [VARCHAR] + +commit_date + + [VARCHAR] + NOT NULL + +commit_sha + + [VARCHAR] + NOT NULL + +complete_name + + [VARCHAR] + NOT NULL + +component_id + + [INTEGER] + NOT NULL + +fs_path + + [VARCHAR] + NOT NULL + +full_name + + [VARCHAR] + NOT NULL + +name + + [VARCHAR] + NOT NULL + +owner + + [VARCHAR] + +release_tag + + [VARCHAR] + +remote_path + + [VARCHAR] + NOT NULL + +type + + [VARCHAR] + NOT NULL _component--_repository - -0..N -1 + +0..N +1 _slsa_level - -_slsa_level - -component_id - [INTEGER] - NOT NULL - -reached - [BOOLEAN] - NOT NULL - -slsa_level - [INTEGER] - NOT NULL + +_slsa_level + +component_id + + [INTEGER] + NOT NULL + +reached + + [BOOLEAN] + NOT NULL + +slsa_level + + [INTEGER] + NOT NULL _component--_slsa_level - -1 -1 + +1 +1 _slsa_requirement - -_slsa_requirement - -id - [INTEGER] - NOT NULL - -component_id - [INTEGER] - NOT NULL - -feedback - [VARCHAR] - -requirement_name - [VARCHAR(27)] - NOT NULL - -requirement_short_description - [VARCHAR] + +_slsa_requirement + +id + + [INTEGER] + NOT NULL + +component_id + + [INTEGER] + NOT NULL + +feedback + + [VARCHAR] + +requirement_name + + [VARCHAR(27)] + NOT NULL + +requirement_short_description + + [VARCHAR] _component--_slsa_requirement - -0..N -1 + +0..N +1 _artifact_pipeline_check - -_artifact_pipeline_check - -id - [INTEGER] - NOT NULL - -deploy_job - [VARCHAR] - -deploy_step - [VARCHAR] - -deploy_workflow - [VARCHAR] - -from_provenance - [BOOLEAN] - NOT NULL - -published_before_commit - [BOOLEAN] - NOT NULL - -run_deleted - [BOOLEAN] - NOT NULL - -run_url - [VARCHAR] + +_artifact_pipeline_check + +id + + [INTEGER] + NOT NULL + +deploy_job + + [VARCHAR] + +deploy_step + + [VARCHAR] + +deploy_workflow + + [VARCHAR] + +from_provenance + + [BOOLEAN] + NOT NULL + +published_before_commit + + [BOOLEAN] + NOT NULL + +run_deleted + + [BOOLEAN] + NOT NULL + +run_url + + [VARCHAR] _check_facts--_artifact_pipeline_check - -1 -1 + +1 +1 _build_as_code_check - -_build_as_code_check - -id - [INTEGER] - NOT NULL - -build_tool_name - [VARCHAR] - NOT NULL - -build_trigger - [VARCHAR] - -ci_service_name - [VARCHAR] - NOT NULL - -deploy_command - [VARCHAR] - -language - [VARCHAR] - NOT NULL - -language_distributions - [VARCHAR] - -language_url - [VARCHAR] - -language_versions - [VARCHAR] + +_build_as_code_check + +id + + [INTEGER] + NOT NULL + +build_tool_name + + [VARCHAR] + NOT NULL + +build_trigger + + [VARCHAR] + +ci_service_name + + [VARCHAR] + NOT NULL + +deploy_command + + [VARCHAR] + +language + + [VARCHAR] + NOT NULL + +language_distributions + + [VARCHAR] + +language_url + + [VARCHAR] + +language_versions + + [VARCHAR] _check_facts--_build_as_code_check - -1 -1 + +1 +1 _build_script_check - -_build_script_check - -id - [INTEGER] - NOT NULL - -build_tool_command - [VARCHAR] - -build_tool_name - [VARCHAR] - NOT NULL - -build_trigger - [VARCHAR] - -ci_service_name - [VARCHAR] - NOT NULL - -language - [VARCHAR] - NOT NULL - -language_distributions - [VARCHAR] - -language_url - [VARCHAR] - -language_versions - [VARCHAR] + +_build_script_check + +id + + [INTEGER] + NOT NULL + +build_tool_command + + [VARCHAR] + +build_tool_name + + [VARCHAR] + NOT NULL + +build_trigger + + [VARCHAR] + +ci_service_name + + [VARCHAR] + NOT NULL + +language + + [VARCHAR] + NOT NULL + +language_distributions + + [VARCHAR] + +language_url + + [VARCHAR] + +language_versions + + [VARCHAR] _check_facts--_build_script_check - -1 -1 + +1 +1 _build_service_check - -_build_service_check - -id - [INTEGER] - NOT NULL - -build_command - [VARCHAR] - -build_tool_name - [VARCHAR] - NOT NULL - -build_trigger - [VARCHAR] - -ci_service_name - [VARCHAR] - NOT NULL - -language - [VARCHAR] - NOT NULL - -language_distributions - [VARCHAR] - -language_url - [VARCHAR] - -language_versions - [VARCHAR] + +_build_service_check + +id + + [INTEGER] + NOT NULL + +build_command + + [VARCHAR] + +build_tool_name + + [VARCHAR] + NOT NULL + +build_trigger + + [VARCHAR] + +ci_service_name + + [VARCHAR] + NOT NULL + +language + + [VARCHAR] + NOT NULL + +language_distributions + + [VARCHAR] + +language_url + + [VARCHAR] + +language_versions + + [VARCHAR] _check_facts--_build_service_check - -1 -1 + +1 +1 _build_tool_check - -_build_tool_check - -id - [INTEGER] - NOT NULL - -build_tool_name - [VARCHAR] - NOT NULL - -language - [VARCHAR] - NOT NULL + +_build_tool_check + +id + + [INTEGER] + NOT NULL + +build_tool_name + + [VARCHAR] + NOT NULL + +language + + [VARCHAR] + NOT NULL _check_facts--_build_tool_check - -1 -1 + +1 +1 _cue_expectation - -_cue_expectation - -id - [INTEGER] - NOT NULL - -asset_url - [VARCHAR] - -description - [VARCHAR] - NOT NULL - -expectation_type - [VARCHAR] - NOT NULL - -path - [VARCHAR] - NOT NULL - -sha - [VARCHAR] - -target - [VARCHAR] - NOT NULL - -text - [VARCHAR] + +_cue_expectation + +id + + [INTEGER] + NOT NULL + +asset_url + + [VARCHAR] + +description + + [VARCHAR] + NOT NULL + +expectation_type + + [VARCHAR] + NOT NULL + +path + + [VARCHAR] + NOT NULL + +sha + + [VARCHAR] + +target + + [VARCHAR] + NOT NULL + +text + + [VARCHAR] _check_facts--_cue_expectation - -1 -1 + +1 +1 _detect_malicious_metadata_check - -_detect_malicious_metadata_check - -id - [INTEGER] - NOT NULL - -detail_information - [JSON] - NOT NULL - -known_malware - [VARCHAR] - -result - [JSON] - NOT NULL + +_detect_malicious_metadata_check + +id + + [INTEGER] + NOT NULL + +detail_information + + [JSON] + NOT NULL + +known_malware + + [VARCHAR] + +result + + [JSON] + NOT NULL _check_facts--_detect_malicious_metadata_check - -1 -1 + +1 +1 _github_actions_vulnerabilities_check - -_github_actions_vulnerabilities_check - -id - [INTEGER] - NOT NULL - -caller_workflow - [VARCHAR] - NOT NULL - -github_actions_id - [VARCHAR] - NOT NULL - -github_actions_version - [VARCHAR] - NOT NULL - -vulnerability_urls - [JSON] - NOT NULL + +_github_actions_vulnerabilities_check + +id + + [INTEGER] + NOT NULL + +caller_workflow + + [VARCHAR] + NOT NULL + +github_actions_id + + [VARCHAR] + NOT NULL + +github_actions_version + + [VARCHAR] + NOT NULL + +vulnerability_urls + + [JSON] + NOT NULL _check_facts--_github_actions_vulnerabilities_check - -1 -1 + +1 +1 _provenance_available_check - -_provenance_available_check - -id - [INTEGER] - NOT NULL - -asset_name - [VARCHAR] - -asset_url - [VARCHAR] + +_provenance_available_check + +id + + [INTEGER] + NOT NULL + +asset_name + + [VARCHAR] + +asset_url + + [VARCHAR] _check_facts--_provenance_available_check - -1 -1 + +1 +1 _provenance_derived_commit_check - -_provenance_derived_commit_check - -id - [INTEGER] - NOT NULL - -commit_info - [VARCHAR] + +_provenance_derived_commit_check + +id + + [INTEGER] + NOT NULL + +commit_info + + [VARCHAR] _check_facts--_provenance_derived_commit_check - -1 -1 + +1 +1 _provenance_derived_repo_check - -_provenance_derived_repo_check - -id - [INTEGER] - NOT NULL - -repository_info - [VARCHAR] + +_provenance_derived_repo_check + +id + + [INTEGER] + NOT NULL + +repository_info + + [VARCHAR] _check_facts--_provenance_derived_repo_check - -1 -1 + +1 +1 _provenance_verified_check - -_provenance_verified_check - -id - [INTEGER] - NOT NULL - -build_level - [INTEGER] - NOT NULL - -build_type - [VARCHAR] + +_provenance_verified_check + +id + + [INTEGER] + NOT NULL + +build_level + + [INTEGER] + NOT NULL + +build_type + + [VARCHAR] _check_facts--_provenance_verified_check - -1 -1 + +1 +1 _provenance_witness_l1_check - -_provenance_witness_l1_check - -id - [INTEGER] - NOT NULL - -artifact_url - [VARCHAR] - -provenance_name - [VARCHAR] - NOT NULL - -provenance_url - [VARCHAR] + +_provenance_witness_l1_check + +id + + [INTEGER] + NOT NULL + +artifact_url + + [VARCHAR] + +provenance_name + + [VARCHAR] + NOT NULL + +provenance_url + + [VARCHAR] _check_facts--_provenance_witness_l1_check - -1 -1 + +1 +1 _scm_authenticity_check - -_scm_authenticity_check - -id - [INTEGER] - NOT NULL - -build_tool - [VARCHAR] - NOT NULL - -fork_count - [INTEGER] - -reason - [VARCHAR] - NOT NULL - -repo_link - [VARCHAR] - -stars_count - [INTEGER] - -status - [VARCHAR] - NOT NULL + +_scm_authenticity_check + +id + + [INTEGER] + NOT NULL + +build_tool + + [VARCHAR] + NOT NULL + +fork_count + + [INTEGER] + +reason + + [VARCHAR] + NOT NULL + +repo_link + + [VARCHAR] + +stars_count + + [INTEGER] + +status + + [VARCHAR] + NOT NULL _check_facts--_scm_authenticity_check - -1 -1 + +1 +1 _trusted_builder_check - -_trusted_builder_check - -id - [INTEGER] - NOT NULL - -build_tool_name - [VARCHAR] - NOT NULL - -build_trigger - [VARCHAR] - -ci_service_name - [VARCHAR] - NOT NULL + +_trusted_builder_check + +id + + [INTEGER] + NOT NULL + +build_tool_name + + [VARCHAR] + NOT NULL + +build_trigger + + [VARCHAR] + +ci_service_name + + [VARCHAR] + NOT NULL _check_facts--_trusted_builder_check - -1 -1 + +1 +1 _vcs_check - -_vcs_check - -id - [INTEGER] - NOT NULL - -git_repo - [VARCHAR] + +_vcs_check + +id + + [INTEGER] + NOT NULL + +git_repo + + [VARCHAR] _check_facts--_vcs_check - -1 -1 + +1 +1 _check_result--_check_facts - -0..N -1 + +0..N +1 _release_artifact - -_release_artifact - -id - [INTEGER] - NOT NULL - -name - [VARCHAR] - NOT NULL - -provenance_id - [INTEGER] - -slsa_verified - [BOOLEAN] + +_release_artifact + +id + + [INTEGER] + NOT NULL + +name + + [VARCHAR] + NOT NULL + +provenance_id + + [INTEGER] + +slsa_verified + + [BOOLEAN] _provenance--_release_artifact - -0..N -{0,1} + +0..N +{0,1} _hash_digest - -_hash_digest - -id - [INTEGER] - NOT NULL - -artifact_id - [INTEGER] - NOT NULL - -digest - [VARCHAR] - NOT NULL - -digest_algorithm - [VARCHAR] - NOT NULL + +_hash_digest + +id + + [INTEGER] + NOT NULL + +artifact_id + + [INTEGER] + NOT NULL + +digest + + [VARCHAR] + NOT NULL + +digest_algorithm + + [VARCHAR] + NOT NULL _release_artifact--_hash_digest - -0..N -1 + +0..N +1 From 5531b1df5ee506660ea06009e9443ac0316e2ee3 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Wed, 21 May 2025 10:52:51 +1000 Subject: [PATCH 4/5] chore: add pypi attestation asset information Signed-off-by: Ben Selwyn-Smith --- src/macaron/provenance/provenance_finder.py | 12 +++++------ .../repo_finder/repo_finder_deps_dev.py | 20 ++++++++++--------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/macaron/provenance/provenance_finder.py b/src/macaron/provenance/provenance_finder.py index dee99893c..715204a16 100644 --- a/src/macaron/provenance/provenance_finder.py +++ b/src/macaron/provenance/provenance_finder.py @@ -294,7 +294,7 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[ return provenances[:1] -def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]: +def find_pypi_provenance(purl: PackageURL) -> list[ProvenanceAsset]: """Find and download the PyPI based provenance for the passed PURL. Parameters @@ -304,11 +304,11 @@ def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]: Returns ------- - list[InTotoPayload] | None - The provenance payload if found, or an empty list otherwise. + list[ProvenanceAsset] + The provenance assets found, or an empty list otherwise. """ - attestation, verified = DepsDevRepoFinder.get_attestation(purl) - if not attestation: + attestation, url, verified = DepsDevRepoFinder.get_attestation(purl) + if not (attestation and url): return [] with tempfile.TemporaryDirectory() as temp_dir: @@ -319,7 +319,7 @@ def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]: try: payload = load_provenance_payload(file_name) payload.verified = verified - return [payload] + return [ProvenanceAsset(payload, purl.name, url)] except LoadIntotoAttestationError as load_error: logger.error("Error while loading provenance: %s", load_error) return [] diff --git a/src/macaron/repo_finder/repo_finder_deps_dev.py b/src/macaron/repo_finder/repo_finder_deps_dev.py index 9d723c2d9..07b5e4f34 100644 --- a/src/macaron/repo_finder/repo_finder_deps_dev.py +++ b/src/macaron/repo_finder/repo_finder_deps_dev.py @@ -164,7 +164,7 @@ def get_latest_version(purl: PackageURL) -> tuple[PackageURL | None, RepoFinderI ) @staticmethod - def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]: + def get_attestation(purl: PackageURL) -> tuple[dict | None, str | None, bool]: """Retrieve the attestation associated with the passed PURL. Parameters @@ -174,17 +174,18 @@ def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]: Returns ------- - tuple[dict | None, bool] - The attestation, or None if not found, and a flag for whether it is verified. + tuple[dict | None, str | None, bool] + The attestation, or None if not found, the url of the attestation asset, + and a flag for whether the attestation is verified. """ if purl.type != "pypi": logger.debug("PURL type (%s) attestation not yet supported via deps.dev.") - return None, False + return None, None, False if not purl.version: latest_purl, _ = DepsDevRepoFinder.get_latest_version(purl) if not latest_purl: - return None, False + return None, None, False purl = latest_purl # Example of a PURL endpoint for deps.dev with '/' encoded as '%2F': @@ -194,7 +195,7 @@ def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]: result = send_get_http(target_url, headers={}) if not result: - return None, False + return None, None, False attestation_keys = ["attestations"] if "version" in result: @@ -203,21 +204,22 @@ def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]: result_attestations = json_extract(result, attestation_keys, list) if not result_attestations: logger.debug("No attestations in result.") - return None, False + return None, None, False if len(result_attestations) > 1: logger.debug("More than one attestation in result: %s", len(result_attestations)) attestation_url = json_extract(result_attestations, [0, "url"], str) if not attestation_url: logger.debug("No attestation reported for %s", purl) - return None, False + return None, None, False attestation_data = send_get_http(attestation_url, headers={}) if not attestation_data: - return None, False + return None, None, False return ( PyPIRegistry().extract_attestation(attestation_data), + attestation_url, json_extract(result_attestations, [0, "verified"], bool) or False, ) From 631c6f101e1a412c6b2c28edec4f974edab07e96 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Wed, 21 May 2025 10:56:36 +1000 Subject: [PATCH 5/5] chore: update test Signed-off-by: Ben Selwyn-Smith --- tests/repo_finder/test_repo_finder_deps_dev.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/repo_finder/test_repo_finder_deps_dev.py b/tests/repo_finder/test_repo_finder_deps_dev.py index 9df584fc5..1de5fae25 100644 --- a/tests/repo_finder/test_repo_finder_deps_dev.py +++ b/tests/repo_finder/test_repo_finder_deps_dev.py @@ -165,7 +165,7 @@ def test_get_attestation_failures( httpserver.expect_request(target_url).respond_with_data(data) - result, _ = DepsDevRepoFinder().get_attestation(purl) + result, _, _ = DepsDevRepoFinder().get_attestation(purl) assert not result @@ -195,6 +195,7 @@ def test_get_attestation_success(httpserver: HTTPServer, deps_dev_service_mock: """ data = data.replace("*replace_url*", attestation_url) httpserver.expect_request(target_url).respond_with_data(data) - result, verified = DepsDevRepoFinder().get_attestation(purl) + result, url, verified = DepsDevRepoFinder().get_attestation(purl) assert result + assert url == attestation_url assert verified