diff --git a/oletools/olevba.py b/oletools/olevba.py index 9e0ed8df..eebf406d 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -262,7 +262,7 @@ import math import zipfile import re -import optparse +import argparse import binascii import base64 import zlib @@ -523,7 +523,7 @@ def __init__(self, stream_path, variable, expected, value): # return codes RETURN_OK = 0 RETURN_WARNINGS = 1 # (reserved, not used yet) -RETURN_WRONG_ARGS = 2 # (fixed, built into optparse) +RETURN_WRONG_ARGS = 2 # (fixed, built into argparse) RETURN_FILE_NOT_FOUND = 3 RETURN_XGLOB_ERR = 4 RETURN_OPEN_ERROR = 5 @@ -2571,7 +2571,8 @@ class VBA_Parser(object): Class to parse MS Office files, to detect VBA macros and extract VBA source code """ - def __init__(self, filename, data=None, container=None, relaxed=False, encoding=DEFAULT_API_ENCODING): + def __init__(self, filename, data=None, container=None, relaxed=False, encoding=DEFAULT_API_ENCODING, + disable_pcode=False): """ Constructor for VBA_Parser @@ -2629,6 +2630,7 @@ def __init__(self, filename, data=None, container=None, relaxed=False, encoding= self.encoding = encoding self.xlm_macros = [] #: Output from pcodedmp, disassembly of the VBA P-code + self.disable_pcode = disable_pcode self.pcodedmp_output = None #: Flag set to True/False if VBA stomping detected self.vba_stomping_detected = None @@ -2734,9 +2736,7 @@ def open_openxml(self, _file): with z.open(subfile) as file_handle: ole_data = file_handle.read() try: - self.ole_subfiles.append( - VBA_Parser(filename=subfile, data=ole_data, - relaxed=self.relaxed)) + self.append_subfile(filename=subfile, data=ole_data) except OlevbaBaseException as exc: if self.relaxed: log.info('%s is not a valid OLE file (%s)' % (subfile, exc)) @@ -2785,9 +2785,7 @@ def open_word2003xml(self, data): # TODO: handle different offsets => separate function try: ole_data = mso_file_extract(mso_data) - self.ole_subfiles.append( - VBA_Parser(filename=fname, data=ole_data, - relaxed=self.relaxed)) + self.append_subfile(filename=fname, data=ole_data) except OlevbaBaseException as exc: if self.relaxed: log.info('Error parsing subfile {0}: {1}' @@ -2832,9 +2830,7 @@ def open_flatopc(self, data): for bindata in pkgpart.iterfind(TAG_PKGBINDATA): try: ole_data = binascii.a2b_base64(bindata.text) - self.ole_subfiles.append( - VBA_Parser(filename=fname, data=ole_data, - relaxed=self.relaxed)) + self.append_subfile(filename=fname, data=ole_data) except OlevbaBaseException as exc: if self.relaxed: log.info('Error parsing subfile {0}: {1}' @@ -2905,9 +2901,7 @@ def open_mht(self, data): # TODO: check if it is actually an OLE file # TODO: get the MSO filename from content_location? - self.ole_subfiles.append( - VBA_Parser(filename=fname, data=ole_data, - relaxed=self.relaxed)) + self.append_subfile(filename=fname, data=ole_data) except OlevbaBaseException as exc: if self.relaxed: log.info('%s does not contain a valid OLE file (%s)' @@ -2946,8 +2940,7 @@ def open_ppt(self): try: ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True) for vba_data in ppt.iter_vba_data(): - self.ole_subfiles.append(VBA_Parser(None, vba_data, - container='PptParser')) + self.append_subfile(None, vba_data, container='PptParser') log.info('File is PPT') self.ole_file.close() # just in case self.ole_file = None # required to make other methods look at ole_subfiles @@ -2975,6 +2968,14 @@ def open_text(self, data): # set type only if parsing succeeds self.type = TYPE_TEXT + def append_subfile(self, filename, data, container=None): + """ + Create sub-parser for given subfile/data and append to subfiles. + """ + self.ole_subfiles.append(VBA_Parser(filename, data, container, + relaxed=self.relaxed, + encoding=self.encoding, + disable_pcode=self.disable_pcode)) def find_vba_projects(self): """ @@ -3476,6 +3477,9 @@ def extract_pcode(self): :rtype: str """ # only run it once: + if self.disable_pcode: + self.pcodedmp_output = '' + return '' if self.pcodedmp_output is None: log.debug('Calling pcodedmp to extract and disassemble the VBA P-code') # import pcodedmp here to avoid circular imports: @@ -3715,7 +3719,7 @@ def colorize_keywords(self, vba_code): def process_file(self, show_decoded_strings=False, display_code=True, hide_attributes=True, vba_code_only=False, show_deobfuscated_code=False, - deobfuscate=False, pcode=False): + deobfuscate=False, show_pcode=False): """ Process a single file @@ -3727,7 +3731,7 @@ def process_file(self, show_decoded_strings=False, otherwise each module is analyzed separately (old behaviour) :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) - :param pcode bool: if True, call pcodedmp to disassemble P-code and display it + :param show_pcode bool: if True, call pcodedmp to disassemble P-code and display it """ #TODO: replace print by writing to a provided output file (sys.stdout by default) # fix conflicting parameters: @@ -3797,7 +3801,7 @@ def process_file(self, show_decoded_strings=False, # display the exception with full stack trace for debugging log.info('Error parsing form: %s' % exc) log.debug('Traceback:', exc_info=True) - if pcode: + if show_pcode: print('-' * 79) print('P-CODE disassembly:') pcode = self.extract_pcode() @@ -3824,7 +3828,7 @@ def process_file(self, show_decoded_strings=False, def process_file_json(self, show_decoded_strings=False, display_code=True, hide_attributes=True, vba_code_only=False, show_deobfuscated_code=False, - deobfuscate=False): + deobfuscate=False, show_pcode=False): """ Process a single file @@ -3837,7 +3841,9 @@ def process_file_json(self, show_decoded_strings=False, :param global_analysis: bool, if True all modules are merged for a single analysis (default), otherwise each module is analyzed separately (old behaviour) :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) + :param show_deobfuscated_code: bool, if True add deobfuscated code to result :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) + :param show_pcode: bool, if True add extracted pcode to result """ #TODO: fix conflicting parameters (?) @@ -3855,6 +3861,7 @@ def process_file_json(self, show_decoded_strings=False, result['analysis'] = None result['code_deobfuscated'] = None result['do_deobfuscate'] = deobfuscate + result['show_pcode'] = show_pcode try: #TODO: handle olefile errors, when an OLE file is malformed @@ -3883,6 +3890,8 @@ def process_file_json(self, show_decoded_strings=False, deobfuscate) if show_deobfuscated_code: result['code_deobfuscated'] = self.reveal() + if show_pcode: + result['pcode'] = self.extract_pcode() result['macros'] = macros result['json_conversion_successful'] = True except Exception as exc: @@ -3945,57 +3954,84 @@ def parse_args(cmd_line_args=None): } usage = 'usage: olevba [options] [filename2 ...]' - parser = optparse.OptionParser(usage=usage) - # parser.add_option('-o', '--outfile', dest='outfile', + parser = argparse.ArgumentParser(usage=usage) + parser.add_argument('filenames', nargs='*', help='Files to analyze') + # parser.add_argument('-o', '--outfile', dest='outfile', # help='output file') - # parser.add_option('-c', '--csv', dest='csv', + # parser.add_argument('-c', '--csv', dest='csv', # help='export results to a CSV file') - parser.add_option("-r", action="store_true", dest="recursive", - help='find files recursively in subdirectories.') - parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, - help='if the file is a zip archive, open all files from it, using the provided password.') - parser.add_option("-p", "--password", type='str', action='append', - default=[], - help='if encrypted office files are encountered, try ' - 'decryption with this password. May be repeated.') - parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', - help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') - # output mode; could make this even simpler with add_option(type='choice') but that would make - # cmd line interface incompatible... - modes = optparse.OptionGroup(parser, title='Output mode (mutually exclusive)') - modes.add_option("-t", '--triage', action="store_const", dest="output_mode", - const='triage', default='unspecified', - help='triage mode, display results as a summary table (default for multiple files)') - modes.add_option("-d", '--detailed', action="store_const", dest="output_mode", - const='detailed', default='unspecified', - help='detailed mode, display full results (default for single file)') - modes.add_option("-j", '--json', action="store_const", dest="output_mode", - const='json', default='unspecified', - help='json mode, detailed in json format (never default)') - parser.add_option_group(modes) - parser.add_option("-a", '--analysis', action="store_false", dest="display_code", default=True, - help='display only analysis results, not the macro source code') - parser.add_option("-c", '--code', action="store_true", dest="vba_code_only", default=False, - help='display only VBA source code, do not analyze it') - parser.add_option("--decode", action="store_true", dest="show_decoded_strings", - help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex, VBA).') - parser.add_option("--attr", action="store_false", dest="hide_attributes", default=True, - help='display the attribute lines at the beginning of VBA source code') - parser.add_option("--reveal", action="store_true", dest="show_deobfuscated_code", - help='display the macro source code after replacing all the obfuscated strings by their decoded content.') - parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, - help="logging level debug/info/warning/error/critical (default=%default)") - parser.add_option('--deobf', dest="deobfuscate", action="store_true", default=False, - help="Attempt to deobfuscate VBA expressions (slow)") - parser.add_option('--relaxed', dest="relaxed", action="store_true", default=False, - help="Do not raise errors if opening of substream fails") - parser.add_option('--pcode', dest="pcode", action="store_true", default=False, - help="Disassemble and display the P-code (using pcodedmp)") - - (options, args) = parser.parse_args(cmd_line_args) + parser.add_argument("-r", action="store_true", dest="recursive", + help='find files recursively in subdirectories.') + parser.add_argument("-z", "--zip", dest='zip_password', type=str, + default=None, + help='if the file is a zip archive, open all files ' + 'from it, using the provided password.') + parser.add_argument("-p", "--password", type=str, action='append', + default=[], + help='if encrypted office files are encountered, try ' + 'decryption with this password. May be repeated.') + parser.add_argument("-f", "--zipfname", dest='zip_fname', type=str, + default='*', + help='if the file is a zip archive, file(s) to be ' + 'opened within the zip. Wildcards * and ? are ' + 'supported. (default: %(default)s)') + modes = parser.add_argument_group(title='Output mode (mutually exclusive)') + modes.add_argument("-t", '--triage', action="store_const", + dest="output_mode", const='triage', + default='unspecified', + help='triage mode, display results as a summary table ' + '(default for multiple files)') + modes.add_argument("-d", '--detailed', action="store_const", + dest="output_mode", const='detailed', + default='unspecified', + help='detailed mode, display full results (default for ' + 'single file)') + modes.add_argument("-j", '--json', action="store_const", + dest="output_mode", const='json', default='unspecified', + help='json mode, detailed in json format ' + '(never default)') + parser.add_argument("-a", '--analysis', action="store_false", + dest="display_code", default=True, + help='display only analysis results, not the macro ' + 'source code') + parser.add_argument("-c", '--code', action="store_true", + dest="vba_code_only", default=False, + help='display only VBA source code, do not analyze it') + parser.add_argument("--decode", action="store_true", + dest="show_decoded_strings", + help='display all the obfuscated strings with their ' + 'decoded content (Hex, Base64, StrReverse, ' + 'Dridex, VBA).') + parser.add_argument("--attr", action="store_false", dest="hide_attributes", + default=True, + help='display the attribute lines at the beginning of ' + 'VBA source code') + parser.add_argument("--reveal", action="store_true", + dest="show_deobfuscated_code", + help='display the macro source code after replacing ' + 'all the obfuscated strings by their decoded ' + 'content.') + parser.add_argument('-l', '--loglevel', dest="loglevel", action="store", + default=DEFAULT_LOG_LEVEL, + help='logging level debug/info/warning/error/critical ' + '(default=%(default)s)') + parser.add_argument('--deobf', dest="deobfuscate", action="store_true", + default=False, + help="Attempt to deobfuscate VBA expressions (slow)") + parser.add_argument('--relaxed', dest="relaxed", action="store_true", + default=False, + help='Do not raise errors if opening of substream ' + 'fails') + parser.add_argument('--show-pcode', dest="show_pcode", action="store_true", + default=False, + help="Show disassembled P-code (using pcodedmp)") + parser.add_argument('--no-pcode', action='store_true', + help='Disable extraction and analysis of pcode') + + options = parser.parse_args(cmd_line_args) # Print help if no arguments are passed - if len(args) == 0: + if len(options.filenames) == 0: # print banner with version python_version = '%d.%d.%d' % sys.version_info[0:3] print('olevba %s on Python %s - http://decalage.info/python/oletools' % @@ -4004,9 +4040,12 @@ def parse_args(cmd_line_args=None): parser.print_help() sys.exit(RETURN_WRONG_ARGS) + if options.show_pcode and options.no_pcode: + parser.error('You cannot combine options --no-pcode and --show-pcode') + options.loglevel = LOG_LEVELS[options.loglevel] - return options, args + return options def process_file(filename, data, container, options, crypto_nesting=0): @@ -4020,7 +4059,8 @@ def process_file(filename, data, container, options, crypto_nesting=0): try: # Open the file vba_parser = VBA_Parser_CLI(filename, data=data, container=container, - relaxed=options.relaxed) + relaxed=options.relaxed, + disable_pcode=options.no_pcode) if options.output_mode == 'detailed': # fully detailed output @@ -4028,7 +4068,7 @@ def process_file(filename, data, container, options, crypto_nesting=0): display_code=options.display_code, hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, show_deobfuscated_code=options.show_deobfuscated_code, - deobfuscate=options.deobfuscate, pcode=options.pcode) + deobfuscate=options.deobfuscate, show_pcode=options.show_pcode) elif options.output_mode == 'triage': # summarized output for triage: vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, @@ -4039,7 +4079,7 @@ def process_file(filename, data, container, options, crypto_nesting=0): display_code=options.display_code, hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, show_deobfuscated_code=options.show_deobfuscated_code, - deobfuscate=options.deobfuscate)) + deobfuscate=options.deobfuscate, show_pcode=options.show_pcode)) else: # (should be impossible) raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) @@ -4103,8 +4143,6 @@ def process_file(filename, data, container, options, crypto_nesting=0): log.info('Working on decrypted file') return process_file(decrypted_file, data, container or filename, options, crypto_nesting+1) - except Exception: - raise finally: # clean up try: log.debug('Removing crypt temp file {}'.format(decrypted_file)) @@ -4123,7 +4161,7 @@ def main(cmd_line_args=None): in process_args. Per default (cmd_line_args=None), sys.argv is used. Option mainly added for unit-testing """ - options, args = parse_args(cmd_line_args) + options = parse_args(cmd_line_args) # provide info about tool and its version if options.output_mode == 'json': @@ -4145,14 +4183,12 @@ def main(cmd_line_args=None): if options.show_deobfuscated_code and not options.deobfuscate: log.debug('set --deobf because --reveal was set') options.deobfuscate = True - if options.output_mode == 'triage' and options.show_deobfuscated_code: - log.debug('ignoring option --reveal in triage output mode') # gather info on all files that must be processed # ignore directory names stored in zip files: all_input_info = tuple((container, filename, data) for container, filename, data in xglob.iter_files( - args, recursive=options.recursive, + options.filenames, recursive=options.recursive, zip_password=options.zip_password, zip_fname=options.zip_fname) if not (container and filename.endswith('/'))) @@ -4164,6 +4200,12 @@ def main(cmd_line_args=None): else: options.output_mode = 'triage' + if options.output_mode == 'triage': + if options.show_deobfuscated_code: + log.debug('ignoring option --reveal in triage output mode') + if options.show_pcode: + log.debug('ignoring option --show-pcode in triage output mode') + # Column headers for triage mode if options.output_mode == 'triage': print('%-12s %-65s' % ('Flags', 'Filename')) diff --git a/oletools/ppt_parser.py b/oletools/ppt_parser.py index fa1fd29a..93b75a4b 100644 --- a/oletools/ppt_parser.py +++ b/oletools/ppt_parser.py @@ -1615,7 +1615,7 @@ def iterative_decompress(stream, size, chunk_size=4096): decompressor = zlib.decompressobj() n_read = 0 - decomp = '' + decomp = b'' return_err = None try: