Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add oleobj arg to just find external relations #772

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions oletools/common/io_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
or unusual language settings.

In such settings, output to console falls back to ASCII-only. Also open()
suddenly fails to interprete non-ASCII characters.
suddenly fails to interpret non-ASCII characters.

Therefore, at start of scripts can run :py:meth:`ensure_stdout_handles_unicode`
and when opening text files use :py:meth:`uopen` to replace :py:meth:`open`.
Expand All @@ -18,7 +18,7 @@

# === LICENSE =================================================================

# msodde is copyright (c) 2017-2018 Philippe Lagadec (http://www.decalage.info)
# io_encoding is copyright (c) 2017-2018 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
Expand Down
1 change: 0 additions & 1 deletion oletools/msodde.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@
from oletools import rtfobj
from oletools.ppt_record_parser import is_ppt
from oletools import crypto
from oletools.common.io_encoding import ensure_stdout_handles_unicode
from oletools.common.log_helper import log_helper

# -----------------------------------------------------------------------------
Expand Down
250 changes: 123 additions & 127 deletions oletools/oleobj.py

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion oletools/olevba.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,6 @@
from oletools import oleform
from oletools import rtfobj
from oletools import crypto
from oletools.common.io_encoding import ensure_stdout_handles_unicode
from oletools.common import codepages
from oletools import ftguess
from oletools.common.log_helper import log_helper
Expand Down
44 changes: 44 additions & 0 deletions tests/common/test_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
Test that all --json output is always valid json.

Since this test takes rather long, it is not included in regular unittest runs.
To enable it, set environment variable OLETOOLS_TEST_JSON to value "1"
"""

import os
from os.path import relpath
import json
import unittest

from tests.test_utils import DATA_BASE_DIR, call_and_capture
from tests.test_utils.testdata_reader import loop_and_extract


@unittest.skipIf('OLETOOLS_TEST_JSON' not in os.environ or os.environ['OLETOOLS_TEST_JSON'] != '1',
'Test takes pretty long, do not include in regular test runs')
class TestJson(unittest.TestCase):
"""Test that all --json output is always valid json."""

def test_all(self):
"""Check that olevba, msodde and oleobj produce valid json for ALL samples."""
for sample_path in loop_and_extract():
if sample_path.startswith(DATA_BASE_DIR):
print(f'TestJson: checking sample {relpath(sample_path, DATA_BASE_DIR)}')
else:
print(f'TestJson: checking sample {sample_path}')
output, _ = call_and_capture('oleobj', ['--json', '--nodump', sample_path],
accept_nonzero_exit=True)
json.loads(output)

output, _ = call_and_capture('olevba', ['--json', sample_path],
accept_nonzero_exit=True)
json.loads(output)

output, _ = call_and_capture('msodde', ['--json', sample_path],
accept_nonzero_exit=True)
json.loads(output)


# just in case somebody calls this file as a script
if __name__ == '__main__':
unittest.main()
20 changes: 16 additions & 4 deletions tests/oleobj/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import unittest
from tempfile import mkdtemp
from shutil import rmtree
from os import listdir
from os.path import join, isfile
from hashlib import md5
from glob import glob
Expand Down Expand Up @@ -68,16 +69,16 @@ def preread_file(args):
raise ValueError('ignore_arg not as expected!')
with open(filename, 'rb') as file_handle:
data = file_handle.read()
err_stream, err_dumping, did_dump = \
err_stream, err_dumping, did_dump, found_external = \
oleobj.process_file(filename, data, output_dir=output_dir)
if did_dump and not err_stream and not err_dumping:
if did_dump and not err_stream and not err_dumping and not found_external:
return oleobj.RETURN_DID_DUMP
else:
return oleobj.RETURN_NO_DUMP # just anything else
return oleobj.RETURN_NO_DUMP # just anything else, will cause error


class TestOleObj(unittest.TestCase):
""" Tests oleobj basic feature """
"""Tests oleobj basic feature: dump embedded content."""

def setUp(self):
""" fixture start: create temp dir """
Expand Down Expand Up @@ -158,6 +159,17 @@ def test_non_streamed(self):
return self.do_test_md5(['-d', self.temp_dir], test_fun=preread_file,
only_run_every=4)

def test_nodump(self):
"""Ensure that with --nodump nothing is ever written to disc."""
data_dir = join(DATA_BASE_DIR, 'oleobj')
for sample_name, _, _ in SAMPLES:
args = ['-d', self.temp_dir, '--nodump', join(data_dir, sample_name)]
call_and_capture('oleobj', args,
accept_nonzero_exit=True)
temp_dir_contents = listdir(self.temp_dir)
if temp_dir_contents:
self.fail('Found file in temp dir despite "--nodump": {}'.format(temp_dir_contents))


class TestSaneFilenameCreation(unittest.TestCase):
""" Test sanitization / creation of sane filenames """
Expand Down
12 changes: 10 additions & 2 deletions tests/oleobj/test_external_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,20 @@ def test_external_links(self):
for dirpath, _, filenames in os.walk(BASE_DIR):
for filename in filenames:
file_path = path.join(dirpath, filename)
if not path.isfile(file_path):
continue

output, ret_val = call_and_capture('oleobj', [file_path, ],
output, ret_val = call_and_capture('oleobj', ['--nodump', file_path, ],
accept_nonzero_exit=True)
self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP,
self.assertEqual(ret_val, oleobj.RETURN_FOUND_EXTERNAL,
msg='Wrong return value {} for {}. Output:\n{}'
.format(ret_val, filename, output))
found_relationship = False
for line in output.splitlines():
if line.startswith('Found relationship'):
found_relationship = True
break
self.assertTrue(found_relationship)


# just in case somebody calls this file as a script
Expand Down
46 changes: 40 additions & 6 deletions tests/test_utils/testdata_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
"""

import os, sys, zipfile
from os.path import relpath, join, isfile
from os.path import relpath, join, isfile, splitext
from contextlib import contextmanager
from tempfile import mkstemp
from tempfile import mkstemp, TemporaryDirectory, NamedTemporaryFile

from . import DATA_BASE_DIR

Expand Down Expand Up @@ -73,11 +73,10 @@ def loop_over_files(subdir=''):
and the contents of the file, with the file being unzipped first if it ends
with .zip.

:arg str subdir: Optional subdir of test data dir that caller is interested
in
"""
# create temp dir to extract files into
See also: :py:meth:`loop_and_extract`

:param str subdir: Optional subdir of test data dir that caller is interested in
"""
for base_dir, _, files in os.walk(join(DATA_BASE_DIR, subdir)):
for filename in files:
relative_path = relpath(join(base_dir, filename), DATA_BASE_DIR)
Expand All @@ -87,6 +86,41 @@ def loop_over_files(subdir=''):
yield relative_path, read(relative_path)


def loop_and_extract(subdir=''):
"""
Find all files, decrypting them to tempdir if necessary.

Does a `os.walk` through all test data or the given subdir and yields
the absolute path for each sample, which is either its original location
in `DATA_BASE_DIR` or in a temporary directory if it had to be decrypted.

The temp dir and files inside it are always deleted right after usage.

See also: :py:meth:`loop_over_files`

:param str subdir: Optional subdir of test data dir that caller is interested in
"""
with TemporaryDirectory(prefix='oletools-test-') as temp_dir:
for base_dir, _, files in os.walk(join(DATA_BASE_DIR, subdir)):
for filename in files:
full_path = join(base_dir, filename)
if filename.endswith('.zip'):
# remove the ".zip" and split the rest into actual name and extension
actual_name, actual_extn = splitext(splitext(filename)[0])

with zipfile.ZipFile(full_path, 'r') as zip_file:
# create a temp file that has a proper file name and is deleted on closing
with NamedTemporaryFile(dir=temp_dir, prefix=actual_name, suffix=actual_extn) \
as temp_file:
# our test samples are not big, so we can read the whole thing at once
temp_file.write(zip_file.read(zip_file.namelist()[0],
pwd=ENCRYPTED_FILES_PASSWORD))
temp_file.flush()
yield temp_file.name
else:
yield full_path


@contextmanager
def decrypt_sample(relpath):
"""
Expand Down
15 changes: 4 additions & 11 deletions tests/test_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ def call_and_capture(module, args=None, accept_nonzero_exit=False,
Only drawback sofar: stdout and stderr are merged into one (which is
what users see on their shell as well). When testing for json-compatible
output you should `exclude_stderr` to `False` since logging ignores stderr,
so unforseen warnings (e.g. issued by pypy) would mess up your json.
so unforeseen warnings (e.g. issued by pypy) would mess up your json.

:param str module: name of module to test, e.g. `olevba`
:param args: arguments for module's main function
:param bool fail_nonzero: Raise error if command returns non-0 return code
:param bool accept_nonzero_exit: Do not raise error if command returns non-0 return code
:param bool exclude_stderr: Exclude output to `sys.stderr` from output
(e.g. if parsing output through json)
:returns: ret_code, output
:rtype: int, str
:returns: output, ret_code
:rtype: str, int
"""
# create a PYTHONPATH environment var to prefer our current code
env = os.environ.copy()
Expand All @@ -47,13 +47,6 @@ def call_and_capture(module, args=None, accept_nonzero_exit=False,
except KeyError:
env['PYTHONPATH'] = SOURCE_BASE_DIR

# hack: in python2 output encoding (sys.stdout.encoding) was None
# although sys.getdefaultencoding() and sys.getfilesystemencoding were ok
# TODO: maybe can remove this once branch
# "encoding-for-non-unicode-environments" is merged
if 'PYTHONIOENCODING' not in env:
env['PYTHONIOENCODING'] = 'utf8'

# ensure args is a tuple
my_args = tuple(args) if args else ()

Expand Down