diff --git a/oletools/common/io_encoding.py b/oletools/common/io_encoding.py index b32d82d22..629569166 100644 --- a/oletools/common/io_encoding.py +++ b/oletools/common/io_encoding.py @@ -7,7 +7,7 @@ or unusual language settings. In such settings, output to console falls back to ASCII-only. Also open() -suddenly fails to interprete non-ASCII characters. +suddenly fails to interpret non-ASCII characters. Therefore, at start of scripts can run :py:meth:`ensure_stdout_handles_unicode` and when opening text files use :py:meth:`uopen` to replace :py:meth:`open`. @@ -18,7 +18,7 @@ # === LICENSE ================================================================= -# msodde is copyright (c) 2017-2018 Philippe Lagadec (http://www.decalage.info) +# io_encoding is copyright (c) 2017-2018 Philippe Lagadec (http://www.decalage.info) # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/oletools/msodde.py b/oletools/msodde.py index 303d97476..b32ed7ed9 100644 --- a/oletools/msodde.py +++ b/oletools/msodde.py @@ -74,7 +74,6 @@ from oletools import rtfobj from oletools.ppt_record_parser import is_ppt from oletools import crypto -from oletools.common.io_encoding import ensure_stdout_handles_unicode from oletools.common.log_helper import log_helper # ----------------------------------------------------------------------------- diff --git a/oletools/oleobj.py b/oletools/oleobj.py index 9f67752ea..7daca703a 100644 --- a/oletools/oleobj.py +++ b/oletools/oleobj.py @@ -2,8 +2,9 @@ """ oleobj.py -oleobj is a Python script and module to parse OLE objects and files stored -into various MS Office file formats (doc, xls, ppt, docx, xlsx, pptx, etc) +oleobj is a Python script and module to extract OLE objects and files stored +into various MS Office file formats (doc, xls, ppt, docx, xlsx, pptx, etc). +It also finds external relationships in newer xml-based office file formats. Author: Philippe Lagadec - http://www.decalage.info License: BSD, see source code or documentation @@ -43,7 +44,6 @@ from __future__ import print_function -import logging import struct import argparse import os @@ -68,10 +68,11 @@ sys.path.insert(0, _parent_dir) from oletools.thirdparty import xglob +from oletools import record_base from oletools.ppt_record_parser import (is_ppt, PptFile, PptRecordExOleVbaActiveXAtom) -from oletools.ooxml import XmlParser -from oletools.common.io_encoding import ensure_stdout_handles_unicode +from oletools import ooxml +from oletools.common.log_helper import log_helper # ----------------------------------------------------------------------------- # CHANGELOG: @@ -94,7 +95,6 @@ # ----------------------------------------------------------------------------- # TODO: -# + setup logging (common with other oletools) # ----------------------------------------------------------------------------- @@ -111,64 +111,18 @@ # === LOGGING ================================================================= DEFAULT_LOG_LEVEL = "warning" -LOG_LEVELS = {'debug': logging.DEBUG, - 'info': logging.INFO, - 'warning': logging.WARNING, - 'error': logging.ERROR, - 'critical': logging.CRITICAL, - 'debug-olefile': logging.DEBUG} - - -class NullHandler(logging.Handler): - """ - Log Handler without output, to avoid printing messages if logging is not - configured by the main application. - Python 2.7 has logging.NullHandler, but this is necessary for 2.6: - see https://docs.python.org/2.6/library/logging.html section - configuring-logging-for-a-library - """ - def emit(self, record): - pass - - -def get_logger(name, level=logging.CRITICAL+1): - """ - Create a suitable logger object for this module. - The goal is not to change settings of the root logger, to avoid getting - other modules' logs on the screen. - If a logger exists with same name, reuse it. (Else it would have duplicate - handlers and messages would be doubled.) - The level is set to CRITICAL+1 by default, to avoid any logging. - """ - # First, test if there is already a logger with the same name, else it - # will generate duplicate messages (due to duplicate handlers): - if name in logging.Logger.manager.loggerDict: - # NOTE: another less intrusive but more "hackish" solution would be to - # use getLogger then test if its effective level is not default. - logger = logging.getLogger(name) - # make sure level is OK: - logger.setLevel(level) - return logger - # get a new logger: - logger = logging.getLogger(name) - # only add a NullHandler for this logger, it is up to the application - # to configure its own logging: - logger.addHandler(NullHandler()) - logger.setLevel(level) - return logger - # a global logger object used for debugging: -log = get_logger('oleobj') # pylint: disable=invalid-name +log = log_helper.get_or_create_silent_logger("oleobj") def enable_logging(): - """ - Enable logging for this module (disabled by default). - This will set the module-specific logger level to NOTSET, which - means the main application controls the actual logging level. - """ - log.setLevel(logging.NOTSET) + """Enable logging in this module; for use by importing scripts""" + log.setLevel(log_helper.NOTSET) + ooxml.enable_logging() + record_base.enable_logging() # for ppt_record_parser + + # do not enable logging for olefile, have extra logging value "debug-olefile" for that # === CONSTANTS =============================================================== @@ -211,6 +165,7 @@ def enable_logging(): RETURN_ERR_ARGS = 2 # reserve for OptionParser.parse_args RETURN_ERR_STREAM = 4 # error opening/parsing a stream RETURN_ERR_DUMP = 8 # error dumping data from stream to file +RETURN_FOUND_EXTERNAL = 16 # found an external relationship # Not sure if they can all be "External", but just in case BLACKLISTED_RELATIONSHIP_TYPES = [ @@ -428,6 +383,7 @@ def parse(self, data): # TODO: SLACK DATA except (IOError, struct.error): # no data to read actual_size log.debug('data is not embedded but only a link') + # TODO: extract that link and log it, might point to malicious content self.is_link = True self.actual_size = 0 self.data = None @@ -578,7 +534,7 @@ def get_sane_embedded_filenames(filename, src_path, tmp_path, max_len, # identify suffix. Dangerous suffixes are all short idx = candidate.rfind('.') - if idx is -1: + if idx == -1: candidates_without_suffix.append(candidate) continue elif idx < len(candidate)-5: @@ -757,7 +713,7 @@ def find_ole(filename, data, xml_parser=None): # keep compatibility with 3rd-party code that calls this function # directly without providing an XmlParser instance if xml_parser is None: - xml_parser = XmlParser(arg_for_zip) + xml_parser = ooxml.XmlParser(arg_for_zip) # force iteration so XmlParser.iter_non_xml() returns data for _ in xml_parser.iter_xml(): pass @@ -832,7 +788,7 @@ def find_customUI(xml_parser): yield customui_onload -def process_file(filename, data, output_dir=None): +def process_file(filename, data, output_dir=None, nodump=False): """ find embedded objects in given file if data is given (from xglob for encrypted zip files), then filename is @@ -841,12 +797,22 @@ def process_file(filename, data, output_dir=None): If output_dir is given and does not exist, it is created. If it is not given, data is saved to same directory as the input file. + + If nodump is given as `True`, nothing is written to disc, the file is only + checked for presence of dump-able contents or external relationships. The + returned flag `did_dump` is still set to `True` if something dump-worthy is + found. + + Returns 4 bools: `(err_stream, err_dumping, did_dump, found_external)`, + indicating whether there was an error in extracting streams, in dupming + data and whether anything relevant was found (dump-worthy or external + relationship) """ # sanitize filename, leave space for embedded filename part sane_fname = sanitize_filename(filename, max_len=MAX_FILENAME_LENGTH-5) or\ 'NONAME' if output_dir: - if not os.path.isdir(output_dir): + if not os.path.isdir(output_dir) and not nodump: log.info('creating output directory %s', output_dir) os.mkdir(output_dir) @@ -855,29 +821,30 @@ def process_file(filename, data, output_dir=None): base_dir = os.path.dirname(filename) fname_prefix = os.path.join(base_dir, sane_fname) - # TODO: option to extract objects to files (false by default) - print('-'*79) - print('File: %r' % filename) + # TODO: option to extract objects to files (false by default) (solved by option nodump?) + log.print_str('-'*79) + log.print_str('File: %r' % filename) index = 1 # do not throw errors but remember them and try continue with other streams err_stream = False err_dumping = False did_dump = False + found_external = False xml_parser = None if is_zipfile(filename): log.info('file could be an OOXML file, looking for relationships with ' 'external links') - xml_parser = XmlParser(filename) + xml_parser = ooxml.XmlParser(filename) for relationship, target in find_external_relationships(xml_parser): - did_dump = True - print("Found relationship '%s' with external link %s" % (relationship, target)) + found_external = True + log.print_str("Found relationship '%s' with external link %s" % (relationship, target)) if target.startswith('mhtml:'): - print("Potential exploit for CVE-2021-40444") + log.print_str("Potential exploit for CVE-2021-40444") for target in find_customUI(xml_parser): - did_dump = True - print("Found customUI tag with external link or VBA macro %s (possibly exploiting CVE-2021-42292)" % target) + found_external = True + log.print_str("Found customUI tag with external link or VBA macro %s (possibly exploiting CVE-2021-42292)" % target) # look for ole files inside file (e.g. unzip docx) # have to finish work on every ole stream inside iteration, since handles @@ -893,9 +860,9 @@ def process_file(filename, data, output_dir=None): stream = None try: stream = ole.openstream(path_parts) - print('extract file embedded in OLE object from stream %r:' + log.print_str('extract file embedded in OLE object from stream %r:' % stream_path) - print('Parsing OLE Package') + log.print_str('Parsing OLE Package') opkg = OleNativeStream(stream) # leave stream open until dumping is finished except Exception: @@ -910,9 +877,9 @@ def process_file(filename, data, output_dir=None): log.debug('Object is not embedded but only linked to ' '- skip') continue - print(u'Filename = "%s"' % opkg.filename) - print(u'Source path = "%s"' % opkg.src_path) - print(u'Temp path = "%s"' % opkg.temp_path) + log.print_str(u'Filename = "%s"' % opkg.filename) + log.print_str(u'Source path = "%s"' % opkg.src_path) + log.print_str(u'Temp path = "%s"' % opkg.temp_path) for embedded_fname in get_sane_embedded_filenames( opkg.filename, opkg.src_path, opkg.temp_path, MAX_FILENAME_LENGTH - len(sane_fname) - 1, index): @@ -921,67 +888,83 @@ def process_file(filename, data, output_dir=None): break # dump - try: - print('saving to file %s' % fname) - with open(fname, 'wb') as writer: - n_dumped = 0 - next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size) - while next_size: - data = stream.read(next_size) - writer.write(data) - n_dumped += len(data) - if len(data) != next_size: - log.warning('Wanted to read {0}, got {1}' - .format(next_size, len(data))) - break - next_size = min(DUMP_CHUNK_SIZE, - opkg.actual_size - n_dumped) - did_dump = True - except Exception as exc: - log.warning('error dumping to {0} ({1})' - .format(fname, exc)) - err_dumping = True - finally: + if nodump: + log.debug('Skip dumping') stream.close() + did_dump = True # still tell caller that there's something to dump + else: + try: + log.print_str('saving to file %s' % fname) + with open(fname, 'wb') as writer: + n_dumped = 0 + next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size) + while next_size: + data = stream.read(next_size) + writer.write(data) + n_dumped += len(data) + if len(data) != next_size: + log.warning('Wanted to read {0}, got {1}' + .format(next_size, len(data))) + break + next_size = min(DUMP_CHUNK_SIZE, + opkg.actual_size - n_dumped) + did_dump = True + except Exception as exc: + log.warning('error dumping to {0} ({1})' + .format(fname, exc)) + err_dumping = True + finally: + stream.close() index += 1 - return err_stream, err_dumping, did_dump + return err_stream, err_dumping, did_dump, found_external -# === MAIN ==================================================================== +# === ARGUMENT PARSING ======================================================= + + +# banner to be printed at program start +BANNER = """oleobj %s - http://decalage.info/python/oletools +THIS IS WORK IN PROGRESS - Check updates regularly! +Please report any issue at https://github.com/decalage2/oletools/issues +""" % __version__ + + +class ArgParserWithBanner(argparse.ArgumentParser): + """ Print banner before showing any error """ + def error(self, message): + print(BANNER) + super(ArgParserWithBanner, self).error(message) -def existing_file(filename): - """ called by argument parser to see whether given file exists """ - if not os.path.isfile(filename): - raise argparse.ArgumentTypeError('{0} is not a file.'.format(filename)) +def existing_file_or_glob(filename): + """ called by argument parser to see whether given file[s] exists """ + if not os.path.isfile(filename) and not xglob.is_glob(filename): + raise argparse.ArgumentTypeError('{0} does not specify existing file[s]'.format(filename)) return filename +# === MAIN ==================================================================== + def main(cmd_line_args=None): """ main function, called when running this as script Per default (cmd_line_args=None) uses sys.argv. For testing, however, can provide other arguments. """ - # print banner with version - ensure_stdout_handles_unicode() - print('oleobj %s - http://decalage.info/oletools' % __version__) - print('THIS IS WORK IN PROGRESS - Check updates regularly!') - print('Please report any issue at ' - 'https://github.com/decalage2/oletools/issues') - print('') - usage = 'usage: %(prog)s [options] [filename2 ...]' - parser = argparse.ArgumentParser(usage=usage) + parser = ArgParserWithBanner(usage=usage) # parser.add_argument('-o', '--outfile', dest='outfile', # help='output file') # parser.add_argument('-c', '--csv', dest='csv', # help='export results to a CSV file') parser.add_argument("-r", action="store_true", dest="recursive", - help='find files recursively in subdirectories.') + help='find files recursively in subdirectories. ' + 'Input arg must still be file or glob.') parser.add_argument("-d", type=str, dest="output_dir", default=None, help='use specified directory to output files.') + parser.add_argument("--nodump", action="store_true", + help="Do not dump anything, just check for external relationships") parser.add_argument("-z", "--zip", dest='zip_password', type=str, default=None, help='if the file is a zip archive, open first file ' @@ -996,11 +979,13 @@ def main(cmd_line_args=None): default=DEFAULT_LOG_LEVEL, help='logging level debug/info/warning/error/critical ' '(default=%(default)s)') - parser.add_argument('input', nargs='*', type=existing_file, metavar='FILE', + parser.add_argument('-j', '--json', action='store_true', + help='Convert all output to json format') + parser.add_argument('input', nargs='*', type=existing_file_or_glob, metavar='FILE', help='Office files to parse (same as -i)') # options for compatibility with ripOLE - parser.add_argument('-i', '--more-input', type=str, metavar='FILE', + parser.add_argument('-i', '--more-input', type=existing_file_or_glob, metavar='FILE', help='Additional file to parse (same as positional ' 'arguments)') parser.add_argument('-v', '--verbose', action='store_true', @@ -1013,25 +998,30 @@ def main(cmd_line_args=None): if options.verbose: options.loglevel = 'debug' - # Print help if no arguments are passed - if not options.input: - parser.print_help() - return RETURN_ERR_ARGS - # Setup logging to the console: # here we use stdout instead of stderr by default, so that the output # can be redirected properly. - logging.basicConfig(level=LOG_LEVELS[options.loglevel], stream=sys.stdout, - format='%(levelname)-8s %(message)s') - # enable logging in the modules: - log.setLevel(logging.NOTSET) if options.loglevel == 'debug-olefile': + if options.json: + raise argparse.ArgumentTypeError('log-level "debug-olefile" cannot be combined with "--json"') olefile.enable_logging() + options.loglevel = 'debug' + log_helper.enable_logging(level=options.loglevel, use_json=options.json, + stream=sys.stdout) + + # first thing after enabling logging: print banner + log.print_str(BANNER) + + # Print help if no arguments are passed + if not options.input: + log.print_str(parser.format_help()) + return RETURN_ERR_ARGS # remember if there was a problem and continue with other data any_err_stream = False any_err_dumping = False any_did_dump = False + any_found_external = False for container, filename, data in \ xglob.iter_files(options.input, recursive=options.recursive, @@ -1040,11 +1030,15 @@ def main(cmd_line_args=None): # ignore directory names stored in zip files: if container and filename.endswith('/'): continue - err_stream, err_dumping, did_dump = \ - process_file(filename, data, options.output_dir) + err_stream, err_dumping, did_dump, found_external = \ + process_file(filename, data, options.output_dir, options.nodump) any_err_stream |= err_stream any_err_dumping |= err_dumping any_did_dump |= did_dump + any_found_external |= found_external + + # end logging + log_helper.end_logging() # assemble return value return_val = RETURN_NO_DUMP @@ -1054,6 +1048,8 @@ def main(cmd_line_args=None): return_val += RETURN_ERR_STREAM if any_err_dumping: return_val += RETURN_ERR_DUMP + if any_found_external: + return_val += RETURN_FOUND_EXTERNAL return return_val diff --git a/oletools/olevba.py b/oletools/olevba.py index 52ffd5126..dd338f47d 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -341,7 +341,6 @@ from oletools import oleform from oletools import rtfobj from oletools import crypto -from oletools.common.io_encoding import ensure_stdout_handles_unicode from oletools.common import codepages from oletools import ftguess from oletools.common.log_helper import log_helper diff --git a/tests/common/test_json.py b/tests/common/test_json.py new file mode 100644 index 000000000..37da56927 --- /dev/null +++ b/tests/common/test_json.py @@ -0,0 +1,44 @@ +""" +Test that all --json output is always valid json. + +Since this test takes rather long, it is not included in regular unittest runs. +To enable it, set environment variable OLETOOLS_TEST_JSON to value "1" +""" + +import os +from os.path import relpath +import json +import unittest + +from tests.test_utils import DATA_BASE_DIR, call_and_capture +from tests.test_utils.testdata_reader import loop_and_extract + + +@unittest.skipIf('OLETOOLS_TEST_JSON' not in os.environ or os.environ['OLETOOLS_TEST_JSON'] != '1', + 'Test takes pretty long, do not include in regular test runs') +class TestJson(unittest.TestCase): + """Test that all --json output is always valid json.""" + + def test_all(self): + """Check that olevba, msodde and oleobj produce valid json for ALL samples.""" + for sample_path in loop_and_extract(): + if sample_path.startswith(DATA_BASE_DIR): + print(f'TestJson: checking sample {relpath(sample_path, DATA_BASE_DIR)}') + else: + print(f'TestJson: checking sample {sample_path}') + output, _ = call_and_capture('oleobj', ['--json', '--nodump', sample_path], + accept_nonzero_exit=True) + json.loads(output) + + output, _ = call_and_capture('olevba', ['--json', sample_path], + accept_nonzero_exit=True) + json.loads(output) + + output, _ = call_and_capture('msodde', ['--json', sample_path], + accept_nonzero_exit=True) + json.loads(output) + + +# just in case somebody calls this file as a script +if __name__ == '__main__': + unittest.main() diff --git a/tests/oleobj/test_basic.py b/tests/oleobj/test_basic.py index 3fdcab037..f9cc1d559 100644 --- a/tests/oleobj/test_basic.py +++ b/tests/oleobj/test_basic.py @@ -3,6 +3,7 @@ import unittest from tempfile import mkdtemp from shutil import rmtree +from os import listdir from os.path import join, isfile from hashlib import md5 from glob import glob @@ -68,16 +69,16 @@ def preread_file(args): raise ValueError('ignore_arg not as expected!') with open(filename, 'rb') as file_handle: data = file_handle.read() - err_stream, err_dumping, did_dump = \ + err_stream, err_dumping, did_dump, found_external = \ oleobj.process_file(filename, data, output_dir=output_dir) - if did_dump and not err_stream and not err_dumping: + if did_dump and not err_stream and not err_dumping and not found_external: return oleobj.RETURN_DID_DUMP else: - return oleobj.RETURN_NO_DUMP # just anything else + return oleobj.RETURN_NO_DUMP # just anything else, will cause error class TestOleObj(unittest.TestCase): - """ Tests oleobj basic feature """ + """Tests oleobj basic feature: dump embedded content.""" def setUp(self): """ fixture start: create temp dir """ @@ -158,6 +159,17 @@ def test_non_streamed(self): return self.do_test_md5(['-d', self.temp_dir], test_fun=preread_file, only_run_every=4) + def test_nodump(self): + """Ensure that with --nodump nothing is ever written to disc.""" + data_dir = join(DATA_BASE_DIR, 'oleobj') + for sample_name, _, _ in SAMPLES: + args = ['-d', self.temp_dir, '--nodump', join(data_dir, sample_name)] + call_and_capture('oleobj', args, + accept_nonzero_exit=True) + temp_dir_contents = listdir(self.temp_dir) + if temp_dir_contents: + self.fail('Found file in temp dir despite "--nodump": {}'.format(temp_dir_contents)) + class TestSaneFilenameCreation(unittest.TestCase): """ Test sanitization / creation of sane filenames """ diff --git a/tests/oleobj/test_external_links.py b/tests/oleobj/test_external_links.py index 2b7fc5bff..05df3b12a 100644 --- a/tests/oleobj/test_external_links.py +++ b/tests/oleobj/test_external_links.py @@ -21,12 +21,20 @@ def test_external_links(self): for dirpath, _, filenames in os.walk(BASE_DIR): for filename in filenames: file_path = path.join(dirpath, filename) + if not path.isfile(file_path): + continue - output, ret_val = call_and_capture('oleobj', [file_path, ], + output, ret_val = call_and_capture('oleobj', ['--nodump', file_path, ], accept_nonzero_exit=True) - self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP, + self.assertEqual(ret_val, oleobj.RETURN_FOUND_EXTERNAL, msg='Wrong return value {} for {}. Output:\n{}' .format(ret_val, filename, output)) + found_relationship = False + for line in output.splitlines(): + if line.startswith('Found relationship'): + found_relationship = True + break + self.assertTrue(found_relationship) # just in case somebody calls this file as a script diff --git a/tests/test_utils/testdata_reader.py b/tests/test_utils/testdata_reader.py index 5f1a6baad..81f96f030 100644 --- a/tests/test_utils/testdata_reader.py +++ b/tests/test_utils/testdata_reader.py @@ -7,9 +7,9 @@ """ import os, sys, zipfile -from os.path import relpath, join, isfile +from os.path import relpath, join, isfile, splitext from contextlib import contextmanager -from tempfile import mkstemp +from tempfile import mkstemp, TemporaryDirectory, NamedTemporaryFile from . import DATA_BASE_DIR @@ -73,11 +73,10 @@ def loop_over_files(subdir=''): and the contents of the file, with the file being unzipped first if it ends with .zip. - :arg str subdir: Optional subdir of test data dir that caller is interested - in - """ - # create temp dir to extract files into + See also: :py:meth:`loop_and_extract` + :param str subdir: Optional subdir of test data dir that caller is interested in + """ for base_dir, _, files in os.walk(join(DATA_BASE_DIR, subdir)): for filename in files: relative_path = relpath(join(base_dir, filename), DATA_BASE_DIR) @@ -87,6 +86,41 @@ def loop_over_files(subdir=''): yield relative_path, read(relative_path) +def loop_and_extract(subdir=''): + """ + Find all files, decrypting them to tempdir if necessary. + + Does a `os.walk` through all test data or the given subdir and yields + the absolute path for each sample, which is either its original location + in `DATA_BASE_DIR` or in a temporary directory if it had to be decrypted. + + The temp dir and files inside it are always deleted right after usage. + + See also: :py:meth:`loop_over_files` + + :param str subdir: Optional subdir of test data dir that caller is interested in + """ + with TemporaryDirectory(prefix='oletools-test-') as temp_dir: + for base_dir, _, files in os.walk(join(DATA_BASE_DIR, subdir)): + for filename in files: + full_path = join(base_dir, filename) + if filename.endswith('.zip'): + # remove the ".zip" and split the rest into actual name and extension + actual_name, actual_extn = splitext(splitext(filename)[0]) + + with zipfile.ZipFile(full_path, 'r') as zip_file: + # create a temp file that has a proper file name and is deleted on closing + with NamedTemporaryFile(dir=temp_dir, prefix=actual_name, suffix=actual_extn) \ + as temp_file: + # our test samples are not big, so we can read the whole thing at once + temp_file.write(zip_file.read(zip_file.namelist()[0], + pwd=ENCRYPTED_FILES_PASSWORD)) + temp_file.flush() + yield temp_file.name + else: + yield full_path + + @contextmanager def decrypt_sample(relpath): """ diff --git a/tests/test_utils/utils.py b/tests/test_utils/utils.py index 45cedc8d3..64f73b0bd 100644 --- a/tests/test_utils/utils.py +++ b/tests/test_utils/utils.py @@ -29,15 +29,15 @@ def call_and_capture(module, args=None, accept_nonzero_exit=False, Only drawback sofar: stdout and stderr are merged into one (which is what users see on their shell as well). When testing for json-compatible output you should `exclude_stderr` to `False` since logging ignores stderr, - so unforseen warnings (e.g. issued by pypy) would mess up your json. + so unforeseen warnings (e.g. issued by pypy) would mess up your json. :param str module: name of module to test, e.g. `olevba` :param args: arguments for module's main function - :param bool fail_nonzero: Raise error if command returns non-0 return code + :param bool accept_nonzero_exit: Do not raise error if command returns non-0 return code :param bool exclude_stderr: Exclude output to `sys.stderr` from output (e.g. if parsing output through json) - :returns: ret_code, output - :rtype: int, str + :returns: output, ret_code + :rtype: str, int """ # create a PYTHONPATH environment var to prefer our current code env = os.environ.copy() @@ -47,13 +47,6 @@ def call_and_capture(module, args=None, accept_nonzero_exit=False, except KeyError: env['PYTHONPATH'] = SOURCE_BASE_DIR - # hack: in python2 output encoding (sys.stdout.encoding) was None - # although sys.getdefaultencoding() and sys.getfilesystemencoding were ok - # TODO: maybe can remove this once branch - # "encoding-for-non-unicode-environments" is merged - if 'PYTHONIOENCODING' not in env: - env['PYTHONIOENCODING'] = 'utf8' - # ensure args is a tuple my_args = tuple(args) if args else ()