Skip to content

Commit 199809e

Browse files
authored
fix: fix incorrect skip result evaluation causing false positives in PyPI malware reporting (#1031)
Resolved issue in ProbLog model where skip results were evaluated as false, causing many false positives. Rule IDs have also been added. Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
1 parent 5f73035 commit 199809e

File tree

3 files changed

+106
-38
lines changed

3 files changed

+106
-38
lines changed

src/macaron/malware_analyzer/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ When contributing an analyzer, it must meet the following requirements:
6161
- The analyzer name must be added to [heuristics.py](./pypi_heuristics/heuristics.py) file so it can be used for rule combinations in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py)
6262
- Update the `malware_rules_problog_model` in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py) with logical statements where the heuristic should be included. When adding new rules, please follow the following guidelines:
6363
- Provide a [confidence value](../slsa_analyzer/checks/check_result.py) using the `Confidence` enum.
64-
- Provide a name based on this confidence value (i.e. `high`, `medium`, or `low`)
65-
- If it does not already exist, make sure to assign this to the result variable (`problog_result_access`)
64+
- Ensure it is assigned to the `problog_result_access` string variable, otherwise it will not be queried and evaluated.
65+
- Assign a rule ID to the rule. This will be used to backtrack to determine if it was triggered.
66+
- Make sure to wrap pass/fail statements in `passed()` and `failed()`. Not doing so may result in undesirable behaviour, see the comments in the model for more details.
6667
- If there are commonly used combinations introduced by adding the heuristic, combine and justify them at the top of the static model (see `quickUndetailed` and `forceSetup` as current examples).
6768

6869
### Confidence Score Motivation

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 71 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,9 @@ def validate_malware(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[boo
128128
is_malware, detail_info = sourcecode_analyzer.analyze()
129129
return is_malware, detail_info
130130

131-
def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, HeuristicResult]) -> float | None:
131+
def evaluate_heuristic_results(
132+
self, heuristic_results: dict[Heuristics, HeuristicResult]
133+
) -> tuple[float, JsonType]:
132134
"""Analyse the heuristic results to determine the maliciousness of the package.
133135
134136
Parameters
@@ -138,18 +140,19 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist
138140
139141
Returns
140142
-------
141-
float | None
142-
Returns the confidence associated with the detected malicious combination, otherwise None if no associated
143-
malicious combination was triggered.
143+
tuple[float, JsonType]
144+
Returns the confidence associated with the detected malicious combination, and associated rule IDs detailing
145+
what rules were triggered and their confidence as a dict[str, float] type.
144146
"""
145147
facts_list: list[str] = []
148+
triggered_rules: dict[str, JsonType] = {}
149+
146150
for heuristic, result in heuristic_results.items():
147-
if result == HeuristicResult.SKIP:
148-
facts_list.append(f"0.0::{heuristic.value}.")
149-
elif result == HeuristicResult.PASS:
151+
if result == HeuristicResult.PASS:
150152
facts_list.append(f"{heuristic.value} :- true.")
151-
else: # HeuristicResult.FAIL
153+
elif result == HeuristicResult.FAIL:
152154
facts_list.append(f"{heuristic.value} :- false.")
155+
# Do not define for HeuristicResult.SKIP
153156

154157
facts = "\n".join(facts_list)
155158
problog_code = f"{facts}\n\n{self.malware_rules_problog_model}"
@@ -158,10 +161,13 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist
158161
problog_model = PrologString(problog_code)
159162
problog_results: dict[Term, float] = get_evaluatable().create_from(problog_model).evaluate()
160163

161-
confidence: float | None = problog_results.get(Term(self.problog_result_access))
162-
if confidence == 0.0:
163-
return None # no rules were triggered
164-
return confidence
164+
confidence = problog_results.pop(Term(self.problog_result_access), 0.0)
165+
if confidence > 0: # a rule was triggered
166+
for term, conf in problog_results.items():
167+
if term.args:
168+
triggered_rules[str(term.args[0])] = conf
169+
170+
return confidence, triggered_rules
165171

166172
def run_heuristics(
167173
self, pypi_package_json: PyPIPackageJsonAsset
@@ -278,9 +284,10 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
278284
except HeuristicAnalyzerValueError:
279285
return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN)
280286

281-
confidence = self.evaluate_heuristic_results(result)
287+
confidence, triggered_rules = self.evaluate_heuristic_results(result)
288+
detail_info["triggered_rules"] = triggered_rules
282289
result_type = CheckResultType.FAILED
283-
if confidence is None:
290+
if not confidence:
284291
confidence = Confidence.HIGH
285292
result_type = CheckResultType.PASSED
286293
elif ctx.dynamic_data["validate_malware"]:
@@ -321,51 +328,79 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
321328
AnomalousVersionAnalyzer,
322329
]
323330

331+
# name used to query the result of all problog rules, so it can be accessed outside the model.
324332
problog_result_access = "result"
325333

326334
malware_rules_problog_model = f"""
327-
% Heuristic groupings
335+
% ----- Wrappers ------
336+
% When a heuristic is skipped, it is ommitted from the problog model facts definition. This means that references in this
337+
% static model must account for when they are not existent. These wrappers perform this function using the inbuilt try_call
338+
% problog function. It will try to evaluate the provided logic, and return false if it encounters an error, such as the fact
339+
% not being defined. For example, you are expecting A to pass, so we do:
340+
%
341+
% passed(A)
342+
%
343+
% If A was 'true', then this will return true, as A did pass. If A was 'false', then this will return false, as A did not pass.
344+
% If A was not defined, then this will return false, as A did not pass.
345+
% Please use these wrappers throughout the problog model for logic definitions.
346+
347+
passed(H) :- try_call(H).
348+
failed(H) :- try_call(not H).
349+
350+
% ----- Heuristic groupings -----
328351
% These are common combinations of heuristics that are used in many of the rules, thus themselves representing
329352
% certain behaviors. When changing or adding rules here, if there are frequent combinations of particular
330353
% heuristics, group them together here.
331354
332355
% Maintainer has recently joined, publishing an undetailed page with no links.
333-
quickUndetailed :- not {Heuristics.EMPTY_PROJECT_LINK.value}, not {Heuristics.CLOSER_RELEASE_JOIN_DATE.value}.
356+
quickUndetailed :- failed({Heuristics.EMPTY_PROJECT_LINK.value}), failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}).
334357
335358
% Maintainer releases a suspicious setup.py and forces it to run by omitting a .whl file.
336-
forceSetup :- not {Heuristics.SUSPICIOUS_SETUP.value}, not {Heuristics.WHEEL_ABSENCE.value}.
359+
forceSetup :- failed({Heuristics.SUSPICIOUS_SETUP.value}), failed({Heuristics.WHEEL_ABSENCE.value}).
337360
338-
% Suspicious Combinations
361+
% ----- Suspicious Combinations -----
339362
340363
% Package released recently with little detail, forcing the setup.py to run.
341-
{Confidence.HIGH.value}::high :- quickUndetailed, forceSetup, not {Heuristics.ONE_RELEASE.value}.
342-
{Confidence.HIGH.value}::high :- quickUndetailed, forceSetup, not {Heuristics.HIGH_RELEASE_FREQUENCY.value}.
364+
{Confidence.HIGH.value}::trigger(malware_high_confidence_1) :-
365+
quickUndetailed, forceSetup, failed({Heuristics.ONE_RELEASE.value}).
366+
{Confidence.HIGH.value}::trigger(malware_high_confidence_2) :-
367+
quickUndetailed, forceSetup, failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}).
343368
344369
% Package released recently with little detail, with some more refined trust markers introduced: project links,
345370
% multiple different releases, but there is no source code repository matching it and the setup is suspicious.
346-
{Confidence.HIGH.value}::high :- not {Heuristics.SOURCE_CODE_REPO.value},
347-
not {Heuristics.HIGH_RELEASE_FREQUENCY.value},
348-
not {Heuristics.CLOSER_RELEASE_JOIN_DATE.value},
349-
{Heuristics.UNCHANGED_RELEASE.value},
371+
{Confidence.HIGH.value}::trigger(malware_high_confidence_3) :-
372+
failed({Heuristics.SOURCE_CODE_REPO.value}),
373+
failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
374+
passed({Heuristics.UNCHANGED_RELEASE.value}),
375+
failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}),
350376
forceSetup.
351377
352378
% Package released recently with little detail, with multiple releases as a trust marker, but frequent and with
353379
% the same code.
354-
{Confidence.MEDIUM.value}::medium :- quickUndetailed,
355-
not {Heuristics.HIGH_RELEASE_FREQUENCY.value},
356-
not {Heuristics.UNCHANGED_RELEASE.value},
357-
{Heuristics.SUSPICIOUS_SETUP.value}.
380+
{Confidence.MEDIUM.value}::trigger(malware_medium_confidence_1) :-
381+
quickUndetailed,
382+
failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
383+
failed({Heuristics.UNCHANGED_RELEASE.value}),
384+
passed({Heuristics.SUSPICIOUS_SETUP.value}).
358385
359386
% Package released recently with little detail and an anomalous version number for a single-release package.
360-
{Confidence.MEDIUM.value}::medium :- quickUndetailed,
361-
not {Heuristics.ONE_RELEASE.value},
362-
{Heuristics.WHEEL_ABSENCE.value},
363-
not {Heuristics.ANOMALOUS_VERSION.value}.
364-
365-
{problog_result_access} :- high.
366-
{problog_result_access} :- medium.
367-
387+
{Confidence.MEDIUM.value}::trigger(malware_medium_confidence_2) :-
388+
quickUndetailed,
389+
failed({Heuristics.ONE_RELEASE.value}),
390+
failed({Heuristics.ANOMALOUS_VERSION.value}).
391+
392+
% ----- Evaluation -----
393+
394+
% Aggregate result
395+
{problog_result_access} :- trigger(malware_high_confidence_1).
396+
{problog_result_access} :- trigger(malware_high_confidence_2).
397+
{problog_result_access} :- trigger(malware_high_confidence_3).
398+
{problog_result_access} :- trigger(malware_medium_confidence_2).
399+
{problog_result_access} :- trigger(malware_medium_confidence_1).
368400
query({problog_result_access}).
401+
402+
% Explainability
403+
query(trigger(_)).
369404
"""
370405

371406

tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pytest_httpserver import HTTPServer
1313

1414
from macaron.config.defaults import load_defaults
15+
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
1516
from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
1617
from macaron.slsa_analyzer.checks.check_result import CheckResultType
1718
from macaron.slsa_analyzer.checks.detect_malicious_metadata_check import DetectMaliciousMetadataCheck
@@ -98,3 +99,34 @@ def test_detect_malicious_metadata(
9899
).respond_with_json({})
99100

100101
assert check.run_check(ctx).result_type == expected
102+
103+
104+
@pytest.mark.parametrize(
105+
("combination"),
106+
[
107+
pytest.param(
108+
{
109+
# similar to rule ID malware_high_confidence_1, but SUSPICIOUS_SETUP is skipped since the file does not exist,
110+
# so the rule should not trigger.
111+
Heuristics.EMPTY_PROJECT_LINK: HeuristicResult.FAIL,
112+
Heuristics.SOURCE_CODE_REPO: HeuristicResult.SKIP,
113+
Heuristics.ONE_RELEASE: HeuristicResult.FAIL,
114+
Heuristics.HIGH_RELEASE_FREQUENCY: HeuristicResult.SKIP,
115+
Heuristics.UNCHANGED_RELEASE: HeuristicResult.SKIP,
116+
Heuristics.CLOSER_RELEASE_JOIN_DATE: HeuristicResult.FAIL,
117+
Heuristics.SUSPICIOUS_SETUP: HeuristicResult.SKIP,
118+
Heuristics.WHEEL_ABSENCE: HeuristicResult.FAIL,
119+
Heuristics.ANOMALOUS_VERSION: HeuristicResult.PASS,
120+
},
121+
id="test_skipped_evaluation",
122+
)
123+
],
124+
)
125+
def test_evaluations(combination: dict[Heuristics, HeuristicResult]) -> None:
126+
"""Test heuristic combinations to ensure they evaluate as expected."""
127+
check = DetectMaliciousMetadataCheck()
128+
129+
confidence, triggered_rules = check.evaluate_heuristic_results(combination)
130+
assert confidence == 0
131+
# Expecting this to be a dictionary, so we can ignore the type problems
132+
assert len(dict(triggered_rules)) == 0 # type: ignore[arg-type]

0 commit comments

Comments
 (0)