Skip to content

Commit

Permalink
Analyse dumps from JoeSandbox karton. Remove unused old drakrun-prod. (
Browse files Browse the repository at this point in the history
…#9)

* Analyse dumps from JoeSandbox karton. Remove unused old drakrun-prod

* fix black format

* Remove outdated filter, change analyze_dumps function signature

* fix flake8
  • Loading branch information
Konstanty Cieśliński authored Apr 7, 2021
1 parent c0eb0bd commit ab6ad4e
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 54 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ Extracts static configuration from samples and memory dumps using the malduck en
},
{
"type": "analysis",
"kind": "drakrun-prod"
"kind": "drakrun"
},
{
"type": "analysis",
"kind": "drakrun"
"kind": "joesandbox"
}
```

Expand Down
150 changes: 99 additions & 51 deletions karton/config_extractor/config_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@
import json
import os
import re
import tempfile
import zipfile
from collections import namedtuple

from karton.core import Config, Karton, Resource, Task
from karton.core.resource import ResourceBase
from malduck.extractor import ExtractManager, ExtractorModules

from .__version__ import __version__

DumpInfo = namedtuple("DumpInfo", ("path", "base"))


class AnalysisExtractManager(ExtractManager):
"""
Expand Down Expand Up @@ -53,8 +58,8 @@ class ConfigExtractor(Karton):
"kind": "runnable",
"platform": "linux",
},
{"type": "analysis", "kind": "drakrun-prod"},
{"type": "analysis", "kind": "drakrun"},
{"type": "analysis", "kind": "joesandbox"},
]

@classmethod
Expand Down Expand Up @@ -133,52 +138,52 @@ def analyze_sample(self, sample: ResourceBase) -> None:
else:
self.log.info("Failed to get config")

# analyze a drakrun analysis
def analyze_drakrun(self, sample, path):
def analyze_dumps(self, sample, dump_infos):
"""
Analyse multiple dumps from given sample. There can be more than one
dump from which we managed to extract config from – try to find the best
candidate for each family.
"""
extractor = create_extractor(self)
dumps_path = os.path.join(path, "dumps")
dump_candidates = {}

results = {
"analysed": 0,
"crashed": 0,
}

analysis_dumps = sorted(os.listdir(dumps_path))
for i, dump in enumerate(analysis_dumps):
# catch only dumps
if re.match(r"^[a-f0-9]{4,16}_[a-f0-9]{16}$", dump):
results["analysed"] += 1
self.log.debug(
"Analyzing dump %d/%d %s", i, len(analysis_dumps), str(dump)
)
dump_path = os.path.join(dumps_path, dump)

with open(dump_path, "rb") as f:
dump_data = f.read()

if not dump_data:
self.log.warning("Dump {} is empty".format(dump))
continue

base = int(dump.split("_")[0], 16)
for i, dump_info in enumerate(dump_infos):
dump_basename = os.path.basename(dump_info.path)
results["analysed"] += 1
self.log.debug(
"Analyzing dump %d/%d %s", i, len(dump_infos), str(dump_basename)
)

try:
family = extractor.push_file(dump_path, base=base)
if family:
self.log.info("Found better %s config in %s", family, dump)
dump_candidates[family] = (dump, dump_data)
except Exception:
self.log.exception("Error while extracting from {}".format(dump))
results["crashed"] += 1
with open(dump_info.path, "rb") as f:
dump_data = f.read()

if not dump_data:
self.log.warning("Dump {} is empty".format(dump_basename))
continue

try:
family = extractor.push_file(dump_info.path, base=dump_info.base)
if family:
self.log.info("Found better %s config in %s", family, dump_basename)
dump_candidates[family] = (dump_basename, dump_data)
except Exception:
self.log.exception(
"Error while extracting from {}".format(dump_basename)
)
results["crashed"] += 1

self.log.debug("Finished analysing dump no. %d", i)
self.log.debug("Finished analysing dump no. %d", i)

self.log.info("Merging and reporting extracted configs")
for family, config in extractor.configs.items():
dump, dump_data = dump_candidates[family]
self.log.info("* (%s) %s => %s", family, dump, json.dumps(config))
parent = Resource(name=dump, content=dump_data)
dump_basename, dump_data = dump_candidates[family]
self.log.info("* (%s) %s => %s", family, dump_basename, json.dumps(config))
parent = Resource(name=dump_basename, content=dump_data)
task = Task(
{
"type": "sample",
Expand All @@ -198,37 +203,80 @@ def analyze_drakrun(self, sample, path):

self.log.info("done analysing, results: {}".format(json.dumps(results)))

def get_base_from_drakrun_dump(self, dump_name):
"""
Drakrun dumps come in form: <base>_<hash> e.g. 405000_688f58c58d798ecb,
that can be read as a dump from address 0x405000 with a content hash
equal to 688f58c58d798ecb.
"""
return int(dump_name.split("_")[0], 16)

def analyze_drakrun(self, sample, dumps):
with dumps.extract_temporary() as tmpdir: # type: ignore
dumps_path = os.path.join(tmpdir, "dumps")
dump_infos = []
for fname in os.listdir(dumps_path):
# Drakrun stores meta information in seperate file for each dump.
# Filter it as we want to analyse only dumps.
if not re.match(r"^[a-f0-9]{4,16}_[a-f0-9]{16}$", fname):
continue
dump_path = os.path.join(dumps_path, fname)
dump_base = self.get_base_from_drakrun_dump(fname)
dump_infos.append(DumpInfo(path=dump_path, base=dump_base))
self.analyze_dumps(sample, dump_infos)

def get_base_from_joesandbox_dump(self, dump_name):
"""
JoeSandbox dumps come in three formats:
1) raw dumps with .sdmp extension, e.g.
00000002.00000003.385533966.003C0000.00000004.00000001.sdmp
2) dumps that start with 0x4d5a bytes
2.1) unmodified with .raw.unpack extension, e.g.
0.0.tmpi0shwswy.exe.1290000.0.raw.unpack
2.2) modified by joesandbox engine with .unpack extension, e.g.
0.0.tmpi0shwswy.exe.1290000.0.unpack
"""
if "sdmp" in dump_name:
return int(dump_name.split(".")[3], 16)
elif "raw.unpack" in dump_name:
return int(dump_name.split(".")[4], 16)
elif "unpack" in dump_name:
return int(dump_name.split(".")[4], 16)

def analyze_joesandbox(self, sample, dumps):
with tempfile.TemporaryDirectory() as tmpdir:
dumpsf = os.path.join(tmpdir, "dumps.zip")
dumps.download_to_file(dumpsf)
zipf = zipfile.ZipFile(dumpsf)
dumps_path = tmpdir + "/dumps"
zipf.extractall(dumps_path, pwd=b"infected")
dump_infos = []
for fname in os.listdir(dumps_path):
dump_path = os.path.join(dumps_path, fname)
dump_base = self.get_base_from_joesandbox_dump(fname)
dump_infos.append(DumpInfo(path=dump_path, base=dump_base))
self.analyze_dumps(sample, dump_infos)

def process(self, task: Task) -> None: # type: ignore
sample = task.get_resource("sample")
headers = task.headers

if headers["type"] == "sample":
self.log.info("Analyzing original binary")
self.analyze_sample(sample)
elif headers["type"] == "analysis" and headers["kind"] == "drakrun-prod":
analysis = task.get_resource("analysis")
if analysis.size > 1024 * 1024 * 128:

self.log.info("Analysis is too large, aborting")
return

with analysis.extract_temporary() as fpath: # type: ignore
with open(os.path.join(fpath, "sample.txt"), "r") as f:
sample_hash = f.read()

self.log.info(
"Processing drakmon analysis, sample: {}".format(sample_hash)
)
self.analyze_drakrun(sample, fpath)
elif headers["type"] == "analysis" and headers["kind"] == "drakrun":
# DRAKVUF Sandbox (codename: drakmon OSS)
sample_hash = hashlib.sha256(sample.content or b"").hexdigest()
self.log.info(
"Processing drakmon OSS analysis, sample: {}".format(sample_hash)
)
dumps = task.get_resource("dumps.zip")
with dumps.extract_temporary() as tmpdir: # type: ignore
self.analyze_drakrun(sample, tmpdir)
self.analyze_drakrun(sample, dumps)
elif headers["type"] == "analysis" and headers["kind"] == "joesandbox":
sample_hash = hashlib.sha256(sample.content or b"").hexdigest()
self.log.info(f"Processing joesandbox analysis, sample: {sample_hash}")
dumps = task.get_resource("dumps.zip")
self.analyze_joesandbox(sample, dumps)

self.log.debug("Printing gc stats")
self.log.debug(gc.get_stats())
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
karton.core==4.0.5
karton-core==4.2.0
malduck==4.1.0

0 comments on commit ab6ad4e

Please # to comment.