Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Analyse dumps from JoeSandbox karton. Remove unused old drakrun-prod. #9

Merged
merged 4 commits into from
Apr 7, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ Extracts static configuration from samples and memory dumps using the malduck en
},
{
"type": "analysis",
"kind": "drakrun-prod"
"kind": "drakrun"
},
{
"type": "analysis",
"kind": "drakrun"
"kind": "joesandbox"
}
```

Expand Down
136 changes: 89 additions & 47 deletions karton/config_extractor/config_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import json
import os
import re
import tempfile
import zipfile

from karton.core import Config, Karton, Resource, Task
from karton.core.resource import ResourceBase
Expand Down Expand Up @@ -55,6 +57,7 @@ class ConfigExtractor(Karton):
},
{"type": "analysis", "kind": "drakrun-prod"},
{"type": "analysis", "kind": "drakrun"},
{"type": "analysis", "kind": "joesandbox"},
]

@classmethod
Expand Down Expand Up @@ -133,10 +136,16 @@ def analyze_sample(self, sample: ResourceBase) -> None:
else:
self.log.info("Failed to get config")

# analyze a drakrun analysis
def analyze_drakrun(self, sample, path):
def analyze_dumps(self, sample, dumps_path, base_from_fname):
"""
Analyse multiple dumps from given sample. There can be more than one
dump from which we managed to extract config from – try to find the best
candidate for each family. Dumps from different sources (e.g. drakrun/sandbox)
might follow diffent naming convention and that's why we require `base_from_fname`
function as argument that given a dump file name will extract the address
from which the dump has been taken.
"""
extractor = create_extractor(self)
dumps_path = os.path.join(path, "dumps")
dump_candidates = {}

results = {
Expand All @@ -146,33 +155,31 @@ def analyze_drakrun(self, sample, path):

analysis_dumps = sorted(os.listdir(dumps_path))
for i, dump in enumerate(analysis_dumps):
# catch only dumps
if re.match(r"^[a-f0-9]{4,16}_[a-f0-9]{16}$", dump):
results["analysed"] += 1
self.log.debug(
"Analyzing dump %d/%d %s", i, len(analysis_dumps), str(dump)
)
dump_path = os.path.join(dumps_path, dump)

with open(dump_path, "rb") as f:
dump_data = f.read()

if not dump_data:
self.log.warning("Dump {} is empty".format(dump))
continue

base = int(dump.split("_")[0], 16)

try:
family = extractor.push_file(dump_path, base=base)
if family:
self.log.info("Found better %s config in %s", family, dump)
dump_candidates[family] = (dump, dump_data)
except Exception:
self.log.exception("Error while extracting from {}".format(dump))
results["crashed"] += 1

self.log.debug("Finished analysing dump no. %d", i)
results["analysed"] += 1
self.log.debug(
"Analyzing dump %d/%d %s", i, len(analysis_dumps), str(dump)
)
dump_path = os.path.join(dumps_path, dump)

with open(dump_path, "rb") as f:
dump_data = f.read()

if not dump_data:
self.log.warning("Dump {} is empty".format(dump))
continue

base = base_from_fname(dump)

try:
family = extractor.push_file(dump_path, base=base)
if family:
self.log.info("Found better %s config in %s", family, dump)
dump_candidates[family] = (dump, dump_data)
except Exception:
self.log.exception("Error while extracting from {}".format(dump))
results["crashed"] += 1

self.log.debug("Finished analysing dump no. %d", i)

self.log.info("Merging and reporting extracted configs")
for family, config in extractor.configs.items():
Expand All @@ -198,37 +205,72 @@ def analyze_drakrun(self, sample, path):

self.log.info("done analysing, results: {}".format(json.dumps(results)))

def get_base_from_drakrun_dump(self, dump_name):
"""
Drakrun dumps come in form: <base>_<hash> e.g. 405000_688f58c58d798ecb,
that can be read as a dump from address 0x405000 with a content hash
equal to 688f58c58d798ecb.
"""
return int(dump.split("_")[0], 16)

def analyze_drakrun(self, sample, dumps):
with dumps.extract_temporary() as tmpdir: # type: ignore
dumps_path = os.path.join(path, "dumps")
# Drakrun stores meta information in seperate file for each dump.
# Filter it as we want to analyse only dumps.
for fname in os.listdir(dumps_path):
if not re.match(r"^[a-f0-9]{4,16}_[a-f0-9]{16}$", fname):
full_path = os.path.join(dumps_path, fname)
os.remove(tmpdir + fname)
self.analyze_dumps(sample, dumps_path, self.get_base_from_drakrun_dump)

def get_base_from_joesandbox_dump(self, dump_name):
"""
JoeSandbox dumps come in three formats:
1) raw dumps with .sdmp extension, e.g.
00000002.00000003.385533966.003C0000.00000004.00000001.sdmp
2) dumps that start with 0x4d5a bytes
2.1) unmodified with .raw.unpack extension, e.g.
0.0.tmpi0shwswy.exe.1290000.0.raw.unpack
2.2) modified by joesandbox engine with .unpack extension, e.g.
0.0.tmpi0shwswy.exe.1290000.0.unpack
"""
if "sdmp" in dump_name:
return int(dump_name.split(".")[3], 16)
elif "raw.unpack" in dump_name:
return int(dump_name.split(".")[4], 16)
elif "unpack" in dump_name:
return int(dump_name.split(".")[4], 16)

def analyze_joesandbox(self, sample, dumps):
with tempfile.TemporaryDirectory() as tmpdir:
dumpsf = os.path.join(tmpdir, "dumps.zip")
dumps.download_to_file(dumpsf)
zipf = zipfile.ZipFile(dumpsf)
dumps_path = tmpdir + "/dumps"
zipf.extractall(dumps_path, pwd=b'infected')
self.analyze_dumps(sample, dumps_path, self.get_base_from_joesandbox_dump)

def process(self, task: Task) -> None: # type: ignore
sample = task.get_resource("sample")
headers = task.headers

if headers["type"] == "sample":
self.log.info("Analyzing original binary")
self.analyze_sample(sample)
elif headers["type"] == "analysis" and headers["kind"] == "drakrun-prod":
analysis = task.get_resource("analysis")
if analysis.size > 1024 * 1024 * 128:

self.log.info("Analysis is too large, aborting")
return

with analysis.extract_temporary() as fpath: # type: ignore
with open(os.path.join(fpath, "sample.txt"), "r") as f:
sample_hash = f.read()

self.log.info(
"Processing drakmon analysis, sample: {}".format(sample_hash)
)
self.analyze_drakrun(sample, fpath)
elif headers["type"] == "analysis" and headers["kind"] == "drakrun":
# DRAKVUF Sandbox (codename: drakmon OSS)
sample_hash = hashlib.sha256(sample.content or b"").hexdigest()
self.log.info(
"Processing drakmon OSS analysis, sample: {}".format(sample_hash)
)
dumps = task.get_resource("dumps.zip")
with dumps.extract_temporary() as tmpdir: # type: ignore
self.analyze_drakrun(sample, tmpdir)
self.analyze_drakrun(sample, dumps)
elif headers["type"] == "analysis" and headers["kind"] == "joesandbox":
sample_hash = hashlib.sha256(sample.content or b"").hexdigest()
self.log.info(f"Processing joesandbox analysis, sample: {sample_hash}")
dumps = task.get_resource("dumps.zip")
self.analyze_joesandbox(sample, dumps)

self.log.debug("Printing gc stats")
self.log.debug(gc.get_stats())
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
karton.core==4.0.5
karton-core==4.0.5
malduck==4.1.0