Skip to content

chore: store provenance asset info #975

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,592 changes: 876 additions & 716 deletions docs/source/assets/er-diagram.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 6 additions & 3 deletions src/macaron/database/table_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,6 @@ class Provenance(ORMBase):
#: The release tag commit sha.
release_commit_sha: Mapped[str] = mapped_column(String, nullable=True)

#: The release tag.
release_tag: Mapped[str] = mapped_column(String, nullable=True)

#: The repository URL from the provenance.
repository_url: Mapped[str] = mapped_column(String, nullable=True)

Expand All @@ -511,6 +508,12 @@ class Provenance(ORMBase):
#: The provenance payload.
provenance_payload: Mapped[InTotoPayload] = mapped_column(ProvenancePayload, nullable=False)

#: The name of the provenance asset.
provenance_asset_name: Mapped[str] = mapped_column(String, nullable=True)

#: The URL of the provenance asset.
provenance_asset_url: Mapped[str] = mapped_column(String, nullable=True)

#: The verified status of the provenance.
verified: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

Expand Down
60 changes: 38 additions & 22 deletions src/macaron/provenance/provenance_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import os
import tempfile
from dataclasses import dataclass
from functools import partial

from packageurl import PackageURL
Expand All @@ -30,6 +31,15 @@
logger: logging.Logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class ProvenanceAsset:
"""This class exists to hold a provenance payload with the original asset's name and URL."""

payload: InTotoPayload
name: str
url: str


class ProvenanceFinder:
"""This class is used to find and retrieve provenance files from supported registries."""

Expand All @@ -44,7 +54,7 @@ def __init__(self) -> None:
elif isinstance(registry, JFrogMavenRegistry):
self.jfrog_registry = registry

def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]:
def find_provenance(self, purl: PackageURL) -> list[ProvenanceAsset]:
"""Find the provenance file(s) of the passed PURL.

Parameters
Expand All @@ -54,8 +64,8 @@ def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]:

Returns
-------
list[InTotoPayload]
The provenance payload, or an empty list if not found.
list[ProvenanceAsset]
The provenance asset, or an empty list if not found.
"""
logger.debug("Seeking provenance of: %s", purl)

Expand Down Expand Up @@ -88,7 +98,7 @@ def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]:
logger.debug("Provenance finding not supported for PURL type: %s", purl.type)
return []

def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload]]]) -> list[InTotoPayload]:
def _find_provenance(self, discovery_functions: list[partial[list[ProvenanceAsset]]]) -> list[ProvenanceAsset]:
"""Find the provenance file(s) using the passed discovery functions.

Parameters
Expand All @@ -99,7 +109,7 @@ def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload]
Returns
-------
list[InTotoPayload]
The provenance payload(s) from the first successful function, or an empty list if none were.
The provenance asset(s) from the first successful function, or an empty list if none were.
"""
if not discovery_functions:
return []
Expand All @@ -114,7 +124,7 @@ def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload]
return []


def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoPayload]:
def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[ProvenanceAsset]:
"""Find and download the NPM based provenance for the passed PURL.

Two kinds of attestation can be retrieved from npm: "Provenance" and "Publish". The "Provenance" attestation
Expand All @@ -131,8 +141,8 @@ def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoP

Returns
-------
list[InTotoPayload]
The provenance payload(s), or an empty list if not found.
list[ProvenanceAsset]
The provenance asset(s), or an empty list if not found.
"""
if not registry.enabled:
logger.debug("The npm registry is not enabled.")
Expand Down Expand Up @@ -178,16 +188,19 @@ def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoP
publish_payload = load_provenance_payload(signed_download_path)
except LoadIntotoAttestationError as error:
logger.error("Error while loading publish attestation: %s", error)
return [provenance_payload]
return [ProvenanceAsset(provenance_payload, npm_provenance_asset.name, npm_provenance_asset.url)]

return [provenance_payload, publish_payload]
return [
ProvenanceAsset(provenance_payload, npm_provenance_asset.name, npm_provenance_asset.url),
ProvenanceAsset(publish_payload, npm_provenance_asset.name, npm_provenance_asset.url),
]

except OSError as error:
logger.error("Error while storing provenance in the temporary directory: %s", error)
return []


def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[InTotoPayload]:
def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[ProvenanceAsset]:
"""Find and download the GAV based provenance for the passed PURL.

Parameters
Expand All @@ -199,8 +212,8 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[

Returns
-------
list[InTotoPayload] | None
The provenance payload if found, or an empty list otherwise.
list[ProvenanceAsset] | None
The provenance asset if found, or an empty list otherwise.

Raises
------
Expand Down Expand Up @@ -269,7 +282,7 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
if not is_witness_provenance_payload(provenance_payload, witness_verifier_config.predicate_types):
continue

provenances.append(provenance_payload)
provenances.append(ProvenanceAsset(provenance_payload, provenance_asset.name, provenance_asset.url))
except OSError as error:
logger.error("Error while storing provenance in the temporary directory: %s", error)

Expand All @@ -281,7 +294,7 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
return provenances[:1]


def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]:
def find_pypi_provenance(purl: PackageURL) -> list[ProvenanceAsset]:
"""Find and download the PyPI based provenance for the passed PURL.

Parameters
Expand All @@ -291,11 +304,11 @@ def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]:

Returns
-------
list[InTotoPayload] | None
The provenance payload if found, or an empty list otherwise.
list[ProvenanceAsset]
The provenance assets found, or an empty list otherwise.
"""
attestation, verified = DepsDevRepoFinder.get_attestation(purl)
if not attestation:
attestation, url, verified = DepsDevRepoFinder.get_attestation(purl)
if not (attestation and url):
return []

with tempfile.TemporaryDirectory() as temp_dir:
Expand All @@ -306,15 +319,15 @@ def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]:
try:
payload = load_provenance_payload(file_name)
payload.verified = verified
return [payload]
return [ProvenanceAsset(payload, purl.name, url)]
except LoadIntotoAttestationError as load_error:
logger.error("Error while loading provenance: %s", load_error)
return []


def find_provenance_from_ci(
analyze_ctx: AnalyzeContext, git_obj: Git | None, download_path: str
) -> InTotoPayload | None:
) -> ProvenanceAsset | None:
"""Try to find provenance from CI services of the repository.

Note that we stop going through the CI services once we encounter a CI service
Expand Down Expand Up @@ -409,7 +422,10 @@ def find_provenance_from_ci(
download_provenances_from_ci_service(ci_info, download_path)

# TODO consider how to handle multiple payloads here.
return ci_info["provenances"][0].payload if ci_info["provenances"] else None
if ci_info["provenances"]:
provenance = ci_info["provenances"][0]
return ProvenanceAsset(provenance.payload, provenance.asset.name, provenance.asset.url)
return None

else:
logger.debug("CI service not supported for provenance finding: %s", ci_service.name)
Expand Down
26 changes: 15 additions & 11 deletions src/macaron/provenance/provenance_verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from macaron.config.defaults import defaults
from macaron.config.global_config import global_config
from macaron.provenance.provenance_extractor import ProvenancePredicate, SLSAGithubGenericBuildDefinitionV01
from macaron.provenance.provenance_finder import ProvenanceAsset
from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
from macaron.slsa_analyzer.asset import AssetLocator
Expand All @@ -28,15 +29,15 @@
logger: logging.Logger = logging.getLogger(__name__)


def verify_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool:
def verify_provenance(purl: PackageURL, provenance_assets: list[ProvenanceAsset]) -> bool:
"""Verify the passed provenance.

Parameters
----------
purl: PackageURL
The PURL of the analysis target.
provenance: list[InTotoPayload]
The list of provenance.
provenance_assets: list[ProvenanceAsset]
The list of provenance assets.

Returns
-------
Expand All @@ -50,7 +51,7 @@ def verify_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool
verification_function = None

if purl.type == "npm":
verification_function = partial(verify_npm_provenance, purl, provenance)
verification_function = partial(verify_npm_provenance, purl, provenance_assets)

# TODO other verification functions go here.

Expand All @@ -61,31 +62,34 @@ def verify_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool
return False


def verify_npm_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool:
def verify_npm_provenance(purl: PackageURL, provenance_assets: list[ProvenanceAsset]) -> bool:
"""Compare the unsigned payload subject digest with the signed payload digest, if available.

Parameters
----------
purl: PackageURL
The PURL of the analysis target.
provenance: list[InTotoPayload]
The provenances to verify.
provenance_assets: list[ProvenanceAsset]
The provenance assets to verify.

Returns
-------
bool
True if the provenance was verified, or False otherwise.
"""
if len(provenance) != 2:
logger.debug("Expected unsigned and signed provenance.")
if len(provenance_assets) != 2:
logger.debug("Expected unsigned and signed provenance assets.")
return False

signed_subjects = provenance[1].statement.get("subject")
signed_provenance = provenance_assets[1].payload
unsigned_provenance = provenance_assets[0].payload

signed_subjects = signed_provenance.statement.get("subject")
if not signed_subjects:
logger.debug("Missing signed subjects.")
return False

unsigned_subjects = provenance[0].statement.get("subject")
unsigned_subjects = unsigned_provenance.statement.get("subject")
if not unsigned_subjects:
logger.debug("Missing unsigned subjects.")
return False
Expand Down
20 changes: 11 additions & 9 deletions src/macaron/repo_finder/repo_finder_deps_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def get_latest_version(purl: PackageURL) -> tuple[PackageURL | None, RepoFinderI
)

@staticmethod
def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]:
def get_attestation(purl: PackageURL) -> tuple[dict | None, str | None, bool]:
"""Retrieve the attestation associated with the passed PURL.

Parameters
Expand All @@ -174,17 +174,18 @@ def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]:

Returns
-------
tuple[dict | None, bool]
The attestation, or None if not found, and a flag for whether it is verified.
tuple[dict | None, str | None, bool]
The attestation, or None if not found, the url of the attestation asset,
and a flag for whether the attestation is verified.
"""
if purl.type != "pypi":
logger.debug("PURL type (%s) attestation not yet supported via deps.dev.")
return None, False
return None, None, False

if not purl.version:
latest_purl, _ = DepsDevRepoFinder.get_latest_version(purl)
if not latest_purl:
return None, False
return None, None, False
purl = latest_purl

# Example of a PURL endpoint for deps.dev with '/' encoded as '%2F':
Expand All @@ -194,7 +195,7 @@ def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]:

result = send_get_http(target_url, headers={})
if not result:
return None, False
return None, None, False

attestation_keys = ["attestations"]
if "version" in result:
Expand All @@ -203,21 +204,22 @@ def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]:
result_attestations = json_extract(result, attestation_keys, list)
if not result_attestations:
logger.debug("No attestations in result.")
return None, False
return None, None, False
if len(result_attestations) > 1:
logger.debug("More than one attestation in result: %s", len(result_attestations))

attestation_url = json_extract(result_attestations, [0, "url"], str)
if not attestation_url:
logger.debug("No attestation reported for %s", purl)
return None, False
return None, None, False

attestation_data = send_get_http(attestation_url, headers={})
if not attestation_data:
return None, False
return None, None, False

return (
PyPIRegistry().extract_attestation(attestation_data),
attestation_url,
json_extract(result_attestations, [0, "verified"], bool) or False,
)

Expand Down
15 changes: 10 additions & 5 deletions src/macaron/slsa_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,15 +357,17 @@ def run_single(
package_registries_info = self._populate_package_registry_info()

provenance_is_verified = False
provenance_asset = None
if not provenance_payload and parsed_purl:
# Try to find the provenance file for the parsed PURL.
provenance_finder = ProvenanceFinder()
provenances = provenance_finder.find_provenance(parsed_purl)
if provenances:
provenance_payload = provenances[0]
provenance_asset = provenances[0]
provenance_payload = provenance_asset.payload
if provenance_payload.verified:
provenance_is_verified = True
elif verify_provenance:
if verify_provenance:
provenance_is_verified = provenance_verifier.verify_provenance(parsed_purl, provenances)

# Try to extract the repository URL and commit digest from the Provenance, if it exists.
Expand Down Expand Up @@ -490,10 +492,11 @@ def run_single(
if not provenance_payload:
# Look for provenance using the CI.
with tempfile.TemporaryDirectory() as temp_dir:
provenance_payload = find_provenance_from_ci(analyze_ctx, git_obj, temp_dir)
provenance_asset = find_provenance_from_ci(analyze_ctx, git_obj, temp_dir)
# If found, validate analysis target against new provenance.
if provenance_payload:
if provenance_asset:
# If repository URL was not provided as input, check the one found during analysis.
provenance_payload = provenance_asset.payload
if not repo_path_input and component.repository:
repo_path_input = component.repository.remote_path
provenance_repo_url = provenance_commit_digest = None
Expand Down Expand Up @@ -538,7 +541,9 @@ def run_single(
provenance_payload=provenance_payload,
slsa_level=slsa_level,
slsa_version=slsa_version,
# TODO Add release tag, release digest.
provenance_asset_name=provenance_asset.name if provenance_asset else None,
provenance_asset_url=provenance_asset.url if provenance_asset else None,
# TODO Add release digest.
)

analyze_ctx.dynamic_data["validate_malware"] = validate_malware
Expand Down
Loading
Loading