Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Analyse dumps from JoeSandbox karton. Remove unused old drakrun-prod. #9

Merged
merged 4 commits into from
Apr 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ Extracts static configuration from samples and memory dumps using the malduck en
},
{
"type": "analysis",
"kind": "drakrun-prod"
"kind": "drakrun"
},
{
"type": "analysis",
"kind": "drakrun"
"kind": "joesandbox"
}
```

Expand Down
150 changes: 99 additions & 51 deletions karton/config_extractor/config_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@
import json
import os
import re
import tempfile
import zipfile
from collections import namedtuple

from karton.core import Config, Karton, Resource, Task
from karton.core.resource import ResourceBase
from malduck.extractor import ExtractManager, ExtractorModules

from .__version__ import __version__

DumpInfo = namedtuple("DumpInfo", ("path", "base"))


class AnalysisExtractManager(ExtractManager):
"""
Expand Down Expand Up @@ -53,8 +58,8 @@ class ConfigExtractor(Karton):
"kind": "runnable",
"platform": "linux",
},
{"type": "analysis", "kind": "drakrun-prod"},
{"type": "analysis", "kind": "drakrun"},
{"type": "analysis", "kind": "joesandbox"},
]

@classmethod
Expand Down Expand Up @@ -133,52 +138,52 @@ def analyze_sample(self, sample: ResourceBase) -> None:
else:
self.log.info("Failed to get config")

# analyze a drakrun analysis
def analyze_drakrun(self, sample, path):
def analyze_dumps(self, sample, dump_infos):
"""
Analyse multiple dumps from given sample. There can be more than one
dump from which we managed to extract config from – try to find the best
candidate for each family.
"""
extractor = create_extractor(self)
dumps_path = os.path.join(path, "dumps")
dump_candidates = {}

results = {
"analysed": 0,
"crashed": 0,
}

analysis_dumps = sorted(os.listdir(dumps_path))
for i, dump in enumerate(analysis_dumps):
# catch only dumps
if re.match(r"^[a-f0-9]{4,16}_[a-f0-9]{16}$", dump):
results["analysed"] += 1
self.log.debug(
"Analyzing dump %d/%d %s", i, len(analysis_dumps), str(dump)
)
dump_path = os.path.join(dumps_path, dump)

with open(dump_path, "rb") as f:
dump_data = f.read()

if not dump_data:
self.log.warning("Dump {} is empty".format(dump))
continue

base = int(dump.split("_")[0], 16)
for i, dump_info in enumerate(dump_infos):
dump_basename = os.path.basename(dump_info.path)
results["analysed"] += 1
self.log.debug(
"Analyzing dump %d/%d %s", i, len(dump_infos), str(dump_basename)
)

try:
family = extractor.push_file(dump_path, base=base)
if family:
self.log.info("Found better %s config in %s", family, dump)
dump_candidates[family] = (dump, dump_data)
except Exception:
self.log.exception("Error while extracting from {}".format(dump))
results["crashed"] += 1
with open(dump_info.path, "rb") as f:
dump_data = f.read()

if not dump_data:
self.log.warning("Dump {} is empty".format(dump_basename))
continue

try:
family = extractor.push_file(dump_info.path, base=dump_info.base)
if family:
self.log.info("Found better %s config in %s", family, dump_basename)
dump_candidates[family] = (dump_basename, dump_data)
except Exception:
self.log.exception(
"Error while extracting from {}".format(dump_basename)
)
results["crashed"] += 1

self.log.debug("Finished analysing dump no. %d", i)
self.log.debug("Finished analysing dump no. %d", i)

self.log.info("Merging and reporting extracted configs")
for family, config in extractor.configs.items():
dump, dump_data = dump_candidates[family]
self.log.info("* (%s) %s => %s", family, dump, json.dumps(config))
parent = Resource(name=dump, content=dump_data)
dump_basename, dump_data = dump_candidates[family]
self.log.info("* (%s) %s => %s", family, dump_basename, json.dumps(config))
parent = Resource(name=dump_basename, content=dump_data)
task = Task(
{
"type": "sample",
Expand All @@ -198,37 +203,80 @@ def analyze_drakrun(self, sample, path):

self.log.info("done analysing, results: {}".format(json.dumps(results)))

def get_base_from_drakrun_dump(self, dump_name):
"""
Drakrun dumps come in form: <base>_<hash> e.g. 405000_688f58c58d798ecb,
that can be read as a dump from address 0x405000 with a content hash
equal to 688f58c58d798ecb.
"""
return int(dump_name.split("_")[0], 16)

def analyze_drakrun(self, sample, dumps):
with dumps.extract_temporary() as tmpdir: # type: ignore
dumps_path = os.path.join(tmpdir, "dumps")
dump_infos = []
for fname in os.listdir(dumps_path):
# Drakrun stores meta information in seperate file for each dump.
# Filter it as we want to analyse only dumps.
if not re.match(r"^[a-f0-9]{4,16}_[a-f0-9]{16}$", fname):
continue
dump_path = os.path.join(dumps_path, fname)
dump_base = self.get_base_from_drakrun_dump(fname)
dump_infos.append(DumpInfo(path=dump_path, base=dump_base))
self.analyze_dumps(sample, dump_infos)

def get_base_from_joesandbox_dump(self, dump_name):
"""
JoeSandbox dumps come in three formats:
1) raw dumps with .sdmp extension, e.g.
00000002.00000003.385533966.003C0000.00000004.00000001.sdmp
2) dumps that start with 0x4d5a bytes
2.1) unmodified with .raw.unpack extension, e.g.
0.0.tmpi0shwswy.exe.1290000.0.raw.unpack
2.2) modified by joesandbox engine with .unpack extension, e.g.
0.0.tmpi0shwswy.exe.1290000.0.unpack
"""
if "sdmp" in dump_name:
return int(dump_name.split(".")[3], 16)
elif "raw.unpack" in dump_name:
return int(dump_name.split(".")[4], 16)
elif "unpack" in dump_name:
return int(dump_name.split(".")[4], 16)

def analyze_joesandbox(self, sample, dumps):
with tempfile.TemporaryDirectory() as tmpdir:
dumpsf = os.path.join(tmpdir, "dumps.zip")
dumps.download_to_file(dumpsf)
zipf = zipfile.ZipFile(dumpsf)
dumps_path = tmpdir + "/dumps"
zipf.extractall(dumps_path, pwd=b"infected")
dump_infos = []
for fname in os.listdir(dumps_path):
dump_path = os.path.join(dumps_path, fname)
dump_base = self.get_base_from_joesandbox_dump(fname)
dump_infos.append(DumpInfo(path=dump_path, base=dump_base))
self.analyze_dumps(sample, dump_infos)

def process(self, task: Task) -> None: # type: ignore
sample = task.get_resource("sample")
headers = task.headers

if headers["type"] == "sample":
self.log.info("Analyzing original binary")
self.analyze_sample(sample)
elif headers["type"] == "analysis" and headers["kind"] == "drakrun-prod":
analysis = task.get_resource("analysis")
if analysis.size > 1024 * 1024 * 128:

self.log.info("Analysis is too large, aborting")
return

with analysis.extract_temporary() as fpath: # type: ignore
with open(os.path.join(fpath, "sample.txt"), "r") as f:
sample_hash = f.read()

self.log.info(
"Processing drakmon analysis, sample: {}".format(sample_hash)
)
self.analyze_drakrun(sample, fpath)
elif headers["type"] == "analysis" and headers["kind"] == "drakrun":
# DRAKVUF Sandbox (codename: drakmon OSS)
sample_hash = hashlib.sha256(sample.content or b"").hexdigest()
self.log.info(
"Processing drakmon OSS analysis, sample: {}".format(sample_hash)
)
dumps = task.get_resource("dumps.zip")
with dumps.extract_temporary() as tmpdir: # type: ignore
self.analyze_drakrun(sample, tmpdir)
self.analyze_drakrun(sample, dumps)
elif headers["type"] == "analysis" and headers["kind"] == "joesandbox":
sample_hash = hashlib.sha256(sample.content or b"").hexdigest()
self.log.info(f"Processing joesandbox analysis, sample: {sample_hash}")
dumps = task.get_resource("dumps.zip")
self.analyze_joesandbox(sample, dumps)

self.log.debug("Printing gc stats")
self.log.debug(gc.get_stats())
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
karton.core==4.0.5
karton-core==4.2.0
malduck==4.1.0