From 46dd014b3ad03327ce51714a4955b92583463823 Mon Sep 17 00:00:00 2001 From: Miroslav Shubernetskiy Date: Tue, 5 Nov 2024 13:26:17 -0500 Subject: [PATCH] fix: external tools producing duplicate keys Some tools work on context directories (e.g. sbom) and for non-fs artifacts (e.g. docker image), sbom tool would run both during host chalk time as well as artifact chalk time collections. As both would collect its output for the same path input, it would produce duplicate output in both report and chalk mark levels. Now any tool can run at most once and any future attempts for the same path are skipped which removes duplicate keys. --- CHANGELOG.md | 5 +++++ src/plugins/externalTool.nim | 18 +++++++++++------- tests/functional/test_plugins.py | 5 ++++- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17238e28..dd5ac14d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ - Docker pushing non-chalked images did not report metsys plugin keys such as `_EXIT_CODE`, `_CHALK_RUN_TIME`. ([#438](https://github.com/crashappsec/chalk/pull/438)) +- External tools for non-file artifacts (e.g. docker image) + sent duplicate keys in both report-level as well as + chalk-mark level. For example `SBOM` key with equivalent + content was duplicated twice. + ([#440](https://github.com/crashappsec/chalk/pull/440)) ### New Features diff --git a/src/plugins/externalTool.nim b/src/plugins/externalTool.nim index f0774fbf..f56e5557 100644 --- a/src/plugins/externalTool.nim +++ b/src/plugins/externalTool.nim @@ -8,11 +8,13 @@ ## This plugin uses information from the config file to set metadata ## keys. -import std/[algorithm, sequtils] +import std/[algorithm, sequtils, sets] import ".."/[config, plugin_api, util] -type PIInfo = ref object - name: string +type + AlreadyRanError = object of CatchableError + PIInfo = ref object + name: string proc ensureRunCallback[T](cb: CallbackObj, args: seq[Box]): T = let value = runCallback(cb, args) @@ -20,11 +22,11 @@ proc ensureRunCallback[T](cb: CallbackObj, args: seq[Box]): T = raise newException(ValueError, "missing implemenetation of " & $(cb)) return unpack[T](value.get()) -var toolCache = initTable[string, ChalkDict]() +var alreadyRan = initHashSet[string]() proc runOneTool(info: PIInfo, path: string): ChalkDict = let key = info.name & ":" & path - if key in toolCache: - return toolCache[key] + if key in alreadyRan: + raise newException(AlreadyRanError, "") trace("Running tool: " & info.name) result = ChalkDict() @@ -65,7 +67,7 @@ proc runOneTool(info: PIInfo, path: string): ChalkDict = d.del("info") trace(info.name & ": produced keys " & $(d.keys().toSeq())) - toolCache[key] = d + alreadyRan.incl(key) return d template toolBase(path: string) {.dirty.} = @@ -109,6 +111,8 @@ template toolBase(path: string) {.dirty.} = result.merge(data.nestWith(info.name)) if len(data) >= 0 and attrGet[bool]("tool." & info.name & ".stop_on_success"): break + except AlreadyRanError: + trace(info.name & ": already ran for " & path & ". skipping") except: error(info.name & ": " & getCurrentExceptionMsg()) diff --git a/tests/functional/test_plugins.py b/tests/functional/test_plugins.py index fb0cf5d7..88d61994 100644 --- a/tests/functional/test_plugins.py +++ b/tests/functional/test_plugins.py @@ -18,7 +18,7 @@ validate_virtual_chalk, ) from .conf import CODEOWNERS, CONFIGS, DATA, DOCKERFILES, LS_PATH, PYS -from .utils.dict import ANY +from .utils.dict import ANY, MISSING from .utils.docker import Docker from .utils.git import Git from .utils.log import get_logger @@ -869,6 +869,9 @@ def test_syft_docker(chalk_copy: Chalk, test_file: str, random_hex: str): tag=tag, ) + assert build.report.contains(sbom_data) + assert build.mark.has(SBOM=MISSING) + # artifact is the docker image artifact_info = ArtifactInfo( type="Docker Image",