diff --git a/oletools/common/io_encoding.py b/oletools/common/io_encoding.py
index b32d82d22..629569166 100644
--- a/oletools/common/io_encoding.py
+++ b/oletools/common/io_encoding.py
@@ -7,7 +7,7 @@
 or unusual language settings.
 
 In such settings, output to console falls back to ASCII-only. Also open()
-suddenly fails to interprete non-ASCII characters.
+suddenly fails to interpret non-ASCII characters.
 
 Therefore, at start of scripts can run :py:meth:`ensure_stdout_handles_unicode`
 and when opening text files use :py:meth:`uopen` to replace :py:meth:`open`.
@@ -18,7 +18,7 @@
 
 # === LICENSE =================================================================
 
-# msodde is copyright (c) 2017-2018 Philippe Lagadec (http://www.decalage.info)
+# io_encoding is copyright (c) 2017-2018 Philippe Lagadec (http://www.decalage.info)
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/oletools/msodde.py b/oletools/msodde.py
index 303d97476..b32ed7ed9 100644
--- a/oletools/msodde.py
+++ b/oletools/msodde.py
@@ -74,7 +74,6 @@
 from oletools import rtfobj
 from oletools.ppt_record_parser import is_ppt
 from oletools import crypto
-from oletools.common.io_encoding import ensure_stdout_handles_unicode
 from oletools.common.log_helper import log_helper
 
 # -----------------------------------------------------------------------------
diff --git a/oletools/oleobj.py b/oletools/oleobj.py
index 9f67752ea..7daca703a 100644
--- a/oletools/oleobj.py
+++ b/oletools/oleobj.py
@@ -2,8 +2,9 @@
 """
 oleobj.py
 
-oleobj is a Python script and module to parse OLE objects and files stored
-into various MS Office file formats (doc, xls, ppt, docx, xlsx, pptx, etc)
+oleobj is a Python script and module to extract OLE objects and files stored
+into various MS Office file formats (doc, xls, ppt, docx, xlsx, pptx, etc).
+It also finds external relationships in newer xml-based office file formats.
 
 Author: Philippe Lagadec - http://www.decalage.info
 License: BSD, see source code or documentation
@@ -43,7 +44,6 @@
 
 from __future__ import print_function
 
-import logging
 import struct
 import argparse
 import os
@@ -68,10 +68,11 @@
     sys.path.insert(0, _parent_dir)
 
 from oletools.thirdparty import xglob
+from oletools import record_base
 from oletools.ppt_record_parser import (is_ppt, PptFile,
                                         PptRecordExOleVbaActiveXAtom)
-from oletools.ooxml import XmlParser
-from oletools.common.io_encoding import ensure_stdout_handles_unicode
+from oletools import ooxml
+from oletools.common.log_helper import log_helper
 
 # -----------------------------------------------------------------------------
 # CHANGELOG:
@@ -94,7 +95,6 @@
 
 # -----------------------------------------------------------------------------
 # TODO:
-# + setup logging (common with other oletools)
 
 
 # -----------------------------------------------------------------------------
@@ -111,64 +111,18 @@
 # === LOGGING =================================================================
 
 DEFAULT_LOG_LEVEL = "warning"
-LOG_LEVELS = {'debug':    logging.DEBUG,
-              'info':     logging.INFO,
-              'warning':  logging.WARNING,
-              'error':    logging.ERROR,
-              'critical': logging.CRITICAL,
-              'debug-olefile': logging.DEBUG}
-
-
-class NullHandler(logging.Handler):
-    """
-    Log Handler without output, to avoid printing messages if logging is not
-    configured by the main application.
-    Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
-    see https://docs.python.org/2.6/library/logging.html section
-    configuring-logging-for-a-library
-    """
-    def emit(self, record):
-        pass
-
-
-def get_logger(name, level=logging.CRITICAL+1):
-    """
-    Create a suitable logger object for this module.
-    The goal is not to change settings of the root logger, to avoid getting
-    other modules' logs on the screen.
-    If a logger exists with same name, reuse it. (Else it would have duplicate
-    handlers and messages would be doubled.)
-    The level is set to CRITICAL+1 by default, to avoid any logging.
-    """
-    # First, test if there is already a logger with the same name, else it
-    # will generate duplicate messages (due to duplicate handlers):
-    if name in logging.Logger.manager.loggerDict:
-        # NOTE: another less intrusive but more "hackish" solution would be to
-        # use getLogger then test if its effective level is not default.
-        logger = logging.getLogger(name)
-        # make sure level is OK:
-        logger.setLevel(level)
-        return logger
-    # get a new logger:
-    logger = logging.getLogger(name)
-    # only add a NullHandler for this logger, it is up to the application
-    # to configure its own logging:
-    logger.addHandler(NullHandler())
-    logger.setLevel(level)
-    return logger
-
 
 # a global logger object used for debugging:
-log = get_logger('oleobj')     # pylint: disable=invalid-name
+log = log_helper.get_or_create_silent_logger("oleobj")
 
 
 def enable_logging():
-    """
-    Enable logging for this module (disabled by default).
-    This will set the module-specific logger level to NOTSET, which
-    means the main application controls the actual logging level.
-    """
-    log.setLevel(logging.NOTSET)
+    """Enable logging in this module; for use by importing scripts"""
+    log.setLevel(log_helper.NOTSET)
+    ooxml.enable_logging()
+    record_base.enable_logging()     # for ppt_record_parser
+
+    # do not enable logging for olefile, have extra logging value "debug-olefile" for that
 
 
 # === CONSTANTS ===============================================================
@@ -211,6 +165,7 @@ def enable_logging():
 RETURN_ERR_ARGS = 2    # reserve for OptionParser.parse_args
 RETURN_ERR_STREAM = 4  # error opening/parsing a stream
 RETURN_ERR_DUMP = 8    # error dumping data from stream to file
+RETURN_FOUND_EXTERNAL = 16   # found an external relationship
 
 # Not sure if they can all be "External", but just in case
 BLACKLISTED_RELATIONSHIP_TYPES = [
@@ -428,6 +383,7 @@ def parse(self, data):
             # TODO: SLACK DATA
         except (IOError, struct.error):      # no data to read actual_size
             log.debug('data is not embedded but only a link')
+            # TODO: extract that link and log it, might point to malicious content
             self.is_link = True
             self.actual_size = 0
             self.data = None
@@ -578,7 +534,7 @@ def get_sane_embedded_filenames(filename, src_path, tmp_path, max_len,
 
         # identify suffix. Dangerous suffixes are all short
         idx = candidate.rfind('.')
-        if idx is -1:
+        if idx == -1:
             candidates_without_suffix.append(candidate)
             continue
         elif idx < len(candidate)-5:
@@ -757,7 +713,7 @@ def find_ole(filename, data, xml_parser=None):
             # keep compatibility with 3rd-party code that calls this function
             # directly without providing an XmlParser instance
             if xml_parser is None:
-                xml_parser = XmlParser(arg_for_zip)
+                xml_parser = ooxml.XmlParser(arg_for_zip)
                 # force iteration so XmlParser.iter_non_xml() returns data
                 for _ in xml_parser.iter_xml():
                     pass
@@ -832,7 +788,7 @@ def find_customUI(xml_parser):
             yield customui_onload
 
 
-def process_file(filename, data, output_dir=None):
+def process_file(filename, data, output_dir=None, nodump=False):
     """ find embedded objects in given file
 
     if data is given (from xglob for encrypted zip files), then filename is
@@ -841,12 +797,22 @@ def process_file(filename, data, output_dir=None):
 
     If output_dir is given and does not exist, it is created. If it is not
     given, data is saved to same directory as the input file.
+
+    If nodump is given as `True`, nothing is written to disc, the file is only
+    checked for presence of dump-able contents or external relationships. The
+    returned flag `did_dump` is still set to `True` if something dump-worthy is
+    found.
+
+    Returns 4 bools: `(err_stream, err_dumping, did_dump, found_external)`,
+    indicating whether there was an error in extracting streams, in dupming
+    data and whether anything relevant was found (dump-worthy or external
+    relationship)
     """
     # sanitize filename, leave space for embedded filename part
     sane_fname = sanitize_filename(filename, max_len=MAX_FILENAME_LENGTH-5) or\
         'NONAME'
     if output_dir:
-        if not os.path.isdir(output_dir):
+        if not os.path.isdir(output_dir) and not nodump:
             log.info('creating output directory %s', output_dir)
             os.mkdir(output_dir)
 
@@ -855,29 +821,30 @@ def process_file(filename, data, output_dir=None):
         base_dir = os.path.dirname(filename)
         fname_prefix = os.path.join(base_dir, sane_fname)
 
-    # TODO: option to extract objects to files (false by default)
-    print('-'*79)
-    print('File: %r' % filename)
+    # TODO: option to extract objects to files (false by default) (solved by option nodump?)
+    log.print_str('-'*79)
+    log.print_str('File: %r' % filename)
     index = 1
 
     # do not throw errors but remember them and try continue with other streams
     err_stream = False
     err_dumping = False
     did_dump = False
+    found_external = False
 
     xml_parser = None
     if is_zipfile(filename):
         log.info('file could be an OOXML file, looking for relationships with '
                  'external links')
-        xml_parser = XmlParser(filename)
+        xml_parser = ooxml.XmlParser(filename)
         for relationship, target in find_external_relationships(xml_parser):
-            did_dump = True
-            print("Found relationship '%s' with external link %s" % (relationship, target))
+            found_external = True
+            log.print_str("Found relationship '%s' with external link %s" % (relationship, target))
             if target.startswith('mhtml:'):
-                print("Potential exploit for CVE-2021-40444")
+                log.print_str("Potential exploit for CVE-2021-40444")
         for target in find_customUI(xml_parser):
-            did_dump = True
-            print("Found customUI tag with external link or VBA macro %s (possibly exploiting CVE-2021-42292)" % target)
+            found_external = True
+            log.print_str("Found customUI tag with external link or VBA macro %s (possibly exploiting CVE-2021-42292)" % target)
 
     # look for ole files inside file (e.g. unzip docx)
     # have to finish work on every ole stream inside iteration, since handles
@@ -893,9 +860,9 @@ def process_file(filename, data, output_dir=None):
                 stream = None
                 try:
                     stream = ole.openstream(path_parts)
-                    print('extract file embedded in OLE object from stream %r:'
+                    log.print_str('extract file embedded in OLE object from stream %r:'
                           % stream_path)
-                    print('Parsing OLE Package')
+                    log.print_str('Parsing OLE Package')
                     opkg = OleNativeStream(stream)
                     # leave stream open until dumping is finished
                 except Exception:
@@ -910,9 +877,9 @@ def process_file(filename, data, output_dir=None):
                     log.debug('Object is not embedded but only linked to '
                               '- skip')
                     continue
-                print(u'Filename = "%s"' % opkg.filename)
-                print(u'Source path = "%s"' % opkg.src_path)
-                print(u'Temp path = "%s"' % opkg.temp_path)
+                log.print_str(u'Filename = "%s"' % opkg.filename)
+                log.print_str(u'Source path = "%s"' % opkg.src_path)
+                log.print_str(u'Temp path = "%s"' % opkg.temp_path)
                 for embedded_fname in get_sane_embedded_filenames(
                         opkg.filename, opkg.src_path, opkg.temp_path,
                         MAX_FILENAME_LENGTH - len(sane_fname) - 1, index):
@@ -921,67 +888,83 @@ def process_file(filename, data, output_dir=None):
                         break
 
                 # dump
-                try:
-                    print('saving to file %s' % fname)
-                    with open(fname, 'wb') as writer:
-                        n_dumped = 0
-                        next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size)
-                        while next_size:
-                            data = stream.read(next_size)
-                            writer.write(data)
-                            n_dumped += len(data)
-                            if len(data) != next_size:
-                                log.warning('Wanted to read {0}, got {1}'
-                                            .format(next_size, len(data)))
-                                break
-                            next_size = min(DUMP_CHUNK_SIZE,
-                                            opkg.actual_size - n_dumped)
-                    did_dump = True
-                except Exception as exc:
-                    log.warning('error dumping to {0} ({1})'
-                                .format(fname, exc))
-                    err_dumping = True
-                finally:
+                if nodump:
+                    log.debug('Skip dumping')
                     stream.close()
+                    did_dump = True   # still tell caller that there's something to dump
+                else:
+                    try:
+                        log.print_str('saving to file %s' % fname)
+                        with open(fname, 'wb') as writer:
+                            n_dumped = 0
+                            next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size)
+                            while next_size:
+                                data = stream.read(next_size)
+                                writer.write(data)
+                                n_dumped += len(data)
+                                if len(data) != next_size:
+                                    log.warning('Wanted to read {0}, got {1}'
+                                                .format(next_size, len(data)))
+                                    break
+                                next_size = min(DUMP_CHUNK_SIZE,
+                                                opkg.actual_size - n_dumped)
+                        did_dump = True
+                    except Exception as exc:
+                        log.warning('error dumping to {0} ({1})'
+                                    .format(fname, exc))
+                        err_dumping = True
+                    finally:
+                        stream.close()
 
                 index += 1
-    return err_stream, err_dumping, did_dump
+    return err_stream, err_dumping, did_dump, found_external
 
 
-# === MAIN ====================================================================
+# === ARGUMENT PARSING =======================================================
+
+
+# banner to be printed at program start
+BANNER = """oleobj %s - http://decalage.info/python/oletools
+THIS IS WORK IN PROGRESS - Check updates regularly!
+Please report any issue at https://github.com/decalage2/oletools/issues
+""" % __version__
+
+
+class ArgParserWithBanner(argparse.ArgumentParser):
+    """ Print banner before showing any error """
+    def error(self, message):
+        print(BANNER)
+        super(ArgParserWithBanner, self).error(message)
 
 
-def existing_file(filename):
-    """ called by argument parser to see whether given file exists """
-    if not os.path.isfile(filename):
-        raise argparse.ArgumentTypeError('{0} is not a file.'.format(filename))
+def existing_file_or_glob(filename):
+    """ called by argument parser to see whether given file[s] exists """
+    if not os.path.isfile(filename) and not xglob.is_glob(filename):
+        raise argparse.ArgumentTypeError('{0} does not specify existing file[s]'.format(filename))
     return filename
 
 
+# === MAIN ====================================================================
+
 def main(cmd_line_args=None):
     """ main function, called when running this as script
 
     Per default (cmd_line_args=None) uses sys.argv. For testing, however, can
     provide other arguments.
     """
-    # print banner with version
-    ensure_stdout_handles_unicode()
-    print('oleobj %s - http://decalage.info/oletools' % __version__)
-    print('THIS IS WORK IN PROGRESS - Check updates regularly!')
-    print('Please report any issue at '
-          'https://github.com/decalage2/oletools/issues')
-    print('')
-
     usage = 'usage: %(prog)s [options] <filename> [filename2 ...]'
-    parser = argparse.ArgumentParser(usage=usage)
+    parser = ArgParserWithBanner(usage=usage)
     # parser.add_argument('-o', '--outfile', dest='outfile',
     #     help='output file')
     # parser.add_argument('-c', '--csv', dest='csv',
     #     help='export results to a CSV file')
     parser.add_argument("-r", action="store_true", dest="recursive",
-                        help='find files recursively in subdirectories.')
+                        help='find files recursively in subdirectories. '
+                             'Input arg must still be file or glob.')
     parser.add_argument("-d", type=str, dest="output_dir", default=None,
                         help='use specified directory to output files.')
+    parser.add_argument("--nodump", action="store_true",
+                        help="Do not dump anything, just check for external relationships")
     parser.add_argument("-z", "--zip", dest='zip_password', type=str,
                         default=None,
                         help='if the file is a zip archive, open first file '
@@ -996,11 +979,13 @@ def main(cmd_line_args=None):
                         default=DEFAULT_LOG_LEVEL,
                         help='logging level debug/info/warning/error/critical '
                              '(default=%(default)s)')
-    parser.add_argument('input', nargs='*', type=existing_file, metavar='FILE',
+    parser.add_argument('-j', '--json', action='store_true',
+                        help='Convert all output to json format')
+    parser.add_argument('input', nargs='*', type=existing_file_or_glob, metavar='FILE',
                         help='Office files to parse (same as -i)')
 
     # options for compatibility with ripOLE
-    parser.add_argument('-i', '--more-input', type=str, metavar='FILE',
+    parser.add_argument('-i', '--more-input', type=existing_file_or_glob, metavar='FILE',
                         help='Additional file to parse (same as positional '
                              'arguments)')
     parser.add_argument('-v', '--verbose', action='store_true',
@@ -1013,25 +998,30 @@ def main(cmd_line_args=None):
     if options.verbose:
         options.loglevel = 'debug'
 
-    # Print help if no arguments are passed
-    if not options.input:
-        parser.print_help()
-        return RETURN_ERR_ARGS
-
     # Setup logging to the console:
     # here we use stdout instead of stderr by default, so that the output
     # can be redirected properly.
-    logging.basicConfig(level=LOG_LEVELS[options.loglevel], stream=sys.stdout,
-                        format='%(levelname)-8s %(message)s')
-    # enable logging in the modules:
-    log.setLevel(logging.NOTSET)
     if options.loglevel == 'debug-olefile':
+        if options.json:
+            raise argparse.ArgumentTypeError('log-level "debug-olefile" cannot be combined with "--json"')
         olefile.enable_logging()
+        options.loglevel = 'debug'
+    log_helper.enable_logging(level=options.loglevel, use_json=options.json,
+                              stream=sys.stdout)
+
+    # first thing after enabling logging: print banner
+    log.print_str(BANNER)
+
+    # Print help if no arguments are passed
+    if not options.input:
+        log.print_str(parser.format_help())
+        return RETURN_ERR_ARGS
 
     # remember if there was a problem and continue with other data
     any_err_stream = False
     any_err_dumping = False
     any_did_dump = False
+    any_found_external = False
 
     for container, filename, data in \
             xglob.iter_files(options.input, recursive=options.recursive,
@@ -1040,11 +1030,15 @@ def main(cmd_line_args=None):
         # ignore directory names stored in zip files:
         if container and filename.endswith('/'):
             continue
-        err_stream, err_dumping, did_dump = \
-            process_file(filename, data, options.output_dir)
+        err_stream, err_dumping, did_dump, found_external = \
+            process_file(filename, data, options.output_dir, options.nodump)
         any_err_stream |= err_stream
         any_err_dumping |= err_dumping
         any_did_dump |= did_dump
+        any_found_external |= found_external
+
+    # end logging
+    log_helper.end_logging()
 
     # assemble return value
     return_val = RETURN_NO_DUMP
@@ -1054,6 +1048,8 @@ def main(cmd_line_args=None):
         return_val += RETURN_ERR_STREAM
     if any_err_dumping:
         return_val += RETURN_ERR_DUMP
+    if any_found_external:
+        return_val += RETURN_FOUND_EXTERNAL
     return return_val
 
 
diff --git a/oletools/olevba.py b/oletools/olevba.py
index 52ffd5126..dd338f47d 100644
--- a/oletools/olevba.py
+++ b/oletools/olevba.py
@@ -341,7 +341,6 @@
 from oletools import oleform
 from oletools import rtfobj
 from oletools import crypto
-from oletools.common.io_encoding import ensure_stdout_handles_unicode
 from oletools.common import codepages
 from oletools import ftguess
 from oletools.common.log_helper import log_helper
diff --git a/tests/common/test_json.py b/tests/common/test_json.py
new file mode 100644
index 000000000..37da56927
--- /dev/null
+++ b/tests/common/test_json.py
@@ -0,0 +1,44 @@
+"""
+Test that all --json output is always valid json.
+
+Since this test takes rather long, it is not included in regular unittest runs.
+To enable it, set environment variable OLETOOLS_TEST_JSON to value "1"
+"""
+
+import os
+from os.path import relpath
+import json
+import unittest
+
+from tests.test_utils import DATA_BASE_DIR, call_and_capture
+from tests.test_utils.testdata_reader import loop_and_extract
+
+
+@unittest.skipIf('OLETOOLS_TEST_JSON' not in os.environ or os.environ['OLETOOLS_TEST_JSON'] != '1',
+                 'Test takes pretty long, do not include in regular test runs')
+class TestJson(unittest.TestCase):
+    """Test that all --json output is always valid json."""
+
+    def test_all(self):
+        """Check that olevba, msodde and oleobj produce valid json for ALL samples."""
+        for sample_path in loop_and_extract():
+            if sample_path.startswith(DATA_BASE_DIR):
+                print(f'TestJson: checking sample {relpath(sample_path, DATA_BASE_DIR)}')
+            else:
+                print(f'TestJson: checking sample {sample_path}')
+            output, _ = call_and_capture('oleobj', ['--json', '--nodump', sample_path],
+                                         accept_nonzero_exit=True)
+            json.loads(output)
+
+            output, _ = call_and_capture('olevba', ['--json', sample_path],
+                                         accept_nonzero_exit=True)
+            json.loads(output)
+
+            output, _ = call_and_capture('msodde', ['--json', sample_path],
+                                         accept_nonzero_exit=True)
+            json.loads(output)
+
+
+# just in case somebody calls this file as a script
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/oleobj/test_basic.py b/tests/oleobj/test_basic.py
index 3fdcab037..f9cc1d559 100644
--- a/tests/oleobj/test_basic.py
+++ b/tests/oleobj/test_basic.py
@@ -3,6 +3,7 @@
 import unittest
 from tempfile import mkdtemp
 from shutil import rmtree
+from os import listdir
 from os.path import join, isfile
 from hashlib import md5
 from glob import glob
@@ -68,16 +69,16 @@ def preread_file(args):
         raise ValueError('ignore_arg not as expected!')
     with open(filename, 'rb') as file_handle:
         data = file_handle.read()
-    err_stream, err_dumping, did_dump = \
+    err_stream, err_dumping, did_dump, found_external = \
         oleobj.process_file(filename, data, output_dir=output_dir)
-    if did_dump and not err_stream and not err_dumping:
+    if did_dump and not err_stream and not err_dumping and not found_external:
         return oleobj.RETURN_DID_DUMP
     else:
-        return oleobj.RETURN_NO_DUMP   # just anything else
+        return oleobj.RETURN_NO_DUMP   # just anything else, will cause error
 
 
 class TestOleObj(unittest.TestCase):
-    """ Tests oleobj basic feature """
+    """Tests oleobj basic feature: dump embedded content."""
 
     def setUp(self):
         """ fixture start: create temp dir """
@@ -158,6 +159,17 @@ def test_non_streamed(self):
         return self.do_test_md5(['-d', self.temp_dir], test_fun=preread_file,
                                 only_run_every=4)
 
+    def test_nodump(self):
+        """Ensure that with --nodump nothing is ever written to disc."""
+        data_dir = join(DATA_BASE_DIR, 'oleobj')
+        for sample_name, _, _ in SAMPLES:
+            args = ['-d', self.temp_dir, '--nodump', join(data_dir, sample_name)]
+            call_and_capture('oleobj', args,
+                             accept_nonzero_exit=True)
+        temp_dir_contents = listdir(self.temp_dir)
+        if temp_dir_contents:
+            self.fail('Found file in temp dir despite "--nodump": {}'.format(temp_dir_contents))
+
 
 class TestSaneFilenameCreation(unittest.TestCase):
     """ Test sanitization / creation of sane filenames """
diff --git a/tests/oleobj/test_external_links.py b/tests/oleobj/test_external_links.py
index 2b7fc5bff..05df3b12a 100644
--- a/tests/oleobj/test_external_links.py
+++ b/tests/oleobj/test_external_links.py
@@ -21,12 +21,20 @@ def test_external_links(self):
         for dirpath, _, filenames in os.walk(BASE_DIR):
             for filename in filenames:
                 file_path = path.join(dirpath, filename)
+                if not path.isfile(file_path):
+                    continue
 
-                output, ret_val = call_and_capture('oleobj', [file_path, ],
+                output, ret_val = call_and_capture('oleobj', ['--nodump', file_path, ],
                                                    accept_nonzero_exit=True)
-                self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP,
+                self.assertEqual(ret_val, oleobj.RETURN_FOUND_EXTERNAL,
                                  msg='Wrong return value {} for {}. Output:\n{}'
                                      .format(ret_val, filename, output))
+                found_relationship = False
+                for line in output.splitlines():
+                    if line.startswith('Found relationship'):
+                        found_relationship = True
+                        break
+                self.assertTrue(found_relationship)
 
 
 # just in case somebody calls this file as a script
diff --git a/tests/test_utils/testdata_reader.py b/tests/test_utils/testdata_reader.py
index 5f1a6baad..81f96f030 100644
--- a/tests/test_utils/testdata_reader.py
+++ b/tests/test_utils/testdata_reader.py
@@ -7,9 +7,9 @@
 """
 
 import os, sys, zipfile
-from os.path import relpath, join, isfile
+from os.path import relpath, join, isfile, splitext
 from contextlib import contextmanager
-from tempfile import mkstemp
+from tempfile import mkstemp, TemporaryDirectory, NamedTemporaryFile
 
 from . import DATA_BASE_DIR
 
@@ -73,11 +73,10 @@ def loop_over_files(subdir=''):
     and the contents of the file, with the file being unzipped first if it ends
     with .zip.
 
-    :arg str subdir: Optional subdir of test data dir that caller is interested
-                     in
-    """
-    # create temp dir to extract files into
+    See also: :py:meth:`loop_and_extract`
 
+    :param str subdir: Optional subdir of test data dir that caller is interested in
+    """
     for base_dir, _, files in os.walk(join(DATA_BASE_DIR, subdir)):
         for filename in files:
             relative_path = relpath(join(base_dir, filename), DATA_BASE_DIR)
@@ -87,6 +86,41 @@ def loop_over_files(subdir=''):
                 yield relative_path, read(relative_path)
 
 
+def loop_and_extract(subdir=''):
+    """
+    Find all files, decrypting them to tempdir if necessary.
+
+    Does a `os.walk` through all test data or the given subdir and yields
+    the absolute path for each sample, which is either its original location
+    in `DATA_BASE_DIR` or in a temporary directory if it had to be decrypted.
+
+    The temp dir and files inside it are always deleted right after usage.
+
+    See also: :py:meth:`loop_over_files`
+
+    :param str subdir: Optional subdir of test data dir that caller is interested in
+    """
+    with TemporaryDirectory(prefix='oletools-test-') as temp_dir:
+        for base_dir, _, files in os.walk(join(DATA_BASE_DIR, subdir)):
+            for filename in files:
+                full_path = join(base_dir, filename)
+                if filename.endswith('.zip'):
+                    # remove the ".zip" and split the rest into actual name and extension
+                    actual_name, actual_extn = splitext(splitext(filename)[0])
+
+                    with zipfile.ZipFile(full_path, 'r') as zip_file:
+                        # create a temp file that has a proper file name and is deleted on closing
+                        with NamedTemporaryFile(dir=temp_dir, prefix=actual_name, suffix=actual_extn) \
+                                as temp_file:
+                            # our test samples are not big, so we can read the whole thing at once
+                            temp_file.write(zip_file.read(zip_file.namelist()[0],
+                                                          pwd=ENCRYPTED_FILES_PASSWORD))
+                            temp_file.flush()
+                            yield temp_file.name
+                else:
+                    yield full_path
+
+
 @contextmanager
 def decrypt_sample(relpath):
     """
diff --git a/tests/test_utils/utils.py b/tests/test_utils/utils.py
index 45cedc8d3..64f73b0bd 100644
--- a/tests/test_utils/utils.py
+++ b/tests/test_utils/utils.py
@@ -29,15 +29,15 @@ def call_and_capture(module, args=None, accept_nonzero_exit=False,
     Only drawback sofar: stdout and stderr are merged into one (which is
     what users see on their shell as well). When testing for json-compatible
     output you should `exclude_stderr` to `False` since logging ignores stderr,
-    so unforseen warnings (e.g. issued by pypy) would mess up your json.
+    so unforeseen warnings (e.g. issued by pypy) would mess up your json.
 
     :param str module: name of module to test, e.g. `olevba`
     :param args: arguments for module's main function
-    :param bool fail_nonzero: Raise error if command returns non-0 return code
+    :param bool accept_nonzero_exit: Do not raise error if command returns non-0 return code
     :param bool exclude_stderr: Exclude output to `sys.stderr` from output
                                 (e.g. if parsing output through json)
-    :returns: ret_code, output
-    :rtype: int, str
+    :returns: output, ret_code
+    :rtype: str, int
     """
     # create a PYTHONPATH environment var to prefer our current code
     env = os.environ.copy()
@@ -47,13 +47,6 @@ def call_and_capture(module, args=None, accept_nonzero_exit=False,
     except KeyError:
         env['PYTHONPATH'] = SOURCE_BASE_DIR
 
-    # hack: in python2 output encoding (sys.stdout.encoding) was None
-    # although sys.getdefaultencoding() and sys.getfilesystemencoding were ok
-    # TODO: maybe can remove this once branch
-    #       "encoding-for-non-unicode-environments" is merged
-    if 'PYTHONIOENCODING' not in env:
-        env['PYTHONIOENCODING'] = 'utf8'
-
     # ensure args is a tuple
     my_args = tuple(args) if args else ()