-
Notifications
You must be signed in to change notification settings - Fork 547
/
Copy patholedump.py
2521 lines (2259 loc) · 108 KB
/
oledump.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
__description__ = 'Analyze OLE files (Compound Binary Files)'
__author__ = 'Didier Stevens'
__version__ = '0.0.79'
__date__ = '2025/03/04'
"""
Source code put in public domain by Didier Stevens, no Copyright
https://DidierStevens.com
Use at your own risk
# http://www.wordarticles.com/Articles/Formats/StreamCompression.php
History:
2014/08/21: start
2014/08/22: added ZIP support
2014/08/23: added stdin support
2014/08/25: added options extract and info
2014/08/26: bugfix pipe
2014/09/01: added * as selection option
2014/09/15: exception handling for import OleFileIO_PL
2014/11/12: added plugins
2014/11/15: continued plugins
2014/11/21: added pluginoptions
2014/12/14: 0.0.3: Added YARA support; added decoders
2014/12/19: 0.0.4: fixed bug when file was not OLE
2014/12/24: 0.0.5: fixed storage bug and added MacrosContainsOnlyAttributes
2014/12/25: 0.0.6: added support for ZIP containers with OLE files, like .docx
2014/12/26: added printing of filename OLE files inside ZIP
2014/12/31: suppressed printing of filename when selecting
2015/02/09: 0.0.7: added handling of .docx, ... inside ZIP file; Added option yarastrings
2015/02/10: 0.0.8: added YARACompile
2015/02/19: 0.0.9: added option -q
2015/02/23: 0.0.10: handle errors in compressed macros
2015/02/24: continue
2015/03/02: 0.0.11: added option -M
2015/03/05: added support for .xml files
2015/03/11: 0.0.12: added code pages identification
2015/03/13: Fixed oElement.firstChild.nodeValue UnicodeEncodeError bug
2015/03/19: 0.0.13: added option -c
2015/03/24: added man page
2015/03/25: added option --decompress
2015/03/26: changed --raw option
2015/04/10: 0.0.14: fixed bug SearchAndDecompressSub
2015/05/08: 0.0.15: added direct support for ActiveMime files
2015/05/13: 0.0.16: changed HeuristicDecompress with findall; renamed MacrosContainsOnlyAttributes to MacrosContainsOnlyAttributesOrOptions
2015/06/08: 0.0.17: Fix HexAsciiDump
2015/06/14: Added exit code
2015/07/26: 0.0.18: Added option --vbadecompresscorrupt
2015/09/12: added option --cut
2015/09/13: changed exit code to 2 when macros detected
2015/09/16: Rename old OleFileIO_PL to new olefile so that local copy of the module can be used
2015/09/17: added help for pip install olefile
2015/09/22: fixed os.path.isfile(filename) bug
2015/10/30: 0.0.19 added option -E and environment variable OLEDUMP_EXTRA; added MD5 to option -i
2015/11/08: 0.0.20 added man text for option -E; changed OptionsEnvironmentVariables so option takes precedence over environment variable
2015/11/09: continued -E
2015/11/12: 0.0.21 added dslsimulationdb
2015/11/17: added support for :-number in --cut option
2015/12/16: 0.0.22 some enhancements for --raw option
2015/12/22: 0.0.23 updated cut syntax
2016/06/08: 0.0.24 option -v works with option -E
2016/08/01: 0.0.25 added Magic to info
2016/10/16: decompressed.replace('\r\n', '\n'); added plugindir and decoderdir options by Remi Pointel
2016/12/11: 0.0.26 added indicator O for OLE10Native
2017/03/04: 0.0.27 added externals for YARA rules
2017/07/20: 0.0.28 added # to option -y
2017/10/14: 0.0.29 added options -t, -S; and \x00Attribut bugfix provided by Charles Smutz
2017/11/01: 0.0.30 replaced hexdump and hexasciidump with cDump
2017/11/04: added return codes -1 and 1
2017/12/13: 0.0.31 corrected man
2017/12/16: 0.0.32 added indexQuiet to cPlugin
2018/02/18: 0.0.33 added option -j
2018/05/06: 0.0.34 -s is more userfriendly
2018/07/01: 0.0.35 rename option --json to --jsonoutput
2018/07/01: fix for json output with OOXML files
2018/07/07: 0.0.36: updated to version 2 of jsonoutput
2018/08/04: 0.0.37 added option --vbadecompressskipattributes
2018/08/13: 0.0.38 changed output processing of plugins like plugin_ppt: if a plugin returns a string, that string is dumped with option -q
2018/11/25: 0.0.39 started VBA/dir parsing for modules, to display with option -i
2018/11/26: continued VBA/dir parsing for modules; added c and s selection; added selection warning; added option -A and option -T; added yara #x#
2018/11/30: added yara #r#; updated ParseCutTerm
2018/12/18: 0.0.40 added option --password
2019/02/16: 0.0.41 updated Cut
2019/03/12: 0.0.42 added warning for ZIP container without ole file; fixed selectiong warning
2019/07/21: 0.0.43 added option --storages, %CLSID%, stream UNICODE name
2019/11/04: fixed plugin path when compiled with pyinstaller
2019/11/05: Python 3 support
2019/11/24: changed HeuristicDecompress; Python 3 fixes
2019/12/18: 0.0.44 added option -f
2020/01/06: 0.0.45 added verbose YARACompile
2020/03/06: 0.0.46 added %CLSIDDESC% and Root Entry to --storages
2020/03/08: 0.0.47 updated man
2020/03/09: 0.0.48 Python 3 bug fix
2020/03/28: 0.0.49 -s (selection) is no longer case sensitive with letter prefixes
2020/05/21: 0.0.50 fixed typos man page
2020/07/18: 0.0.51 small fix ASCII dump: 0x7F is not printable
2020/07/25: 0.0.52 added support for pyzipper
2020/08/??: 0.0.53 added ole plugin class
2020/08/28: added support to select streams by name
2020/08/30: fixed & updated raw VBA decompression
2020/09/05: 0.0.54 added extra info parameter %MODULEINFO%
2020/09/29: bugfix for Python 2 (mro)
2020/11/08: 0.0.55 added support for -v with --jsonoutput; added ! indicator
2020/12/04: 0.0.56 Python 3 Fixes
2020/12/12: 0.0.57 refactoring Translate
2021/01/09: 0.0.58 updated man
2021/02/06: 0.0.59 small change to XML detection logic
2021/02/23: 0.0.60 small change PIP message
2021/06/20: 0.0.61 updated man
2021/08/11: 0.0.62 fix return code bug for multiple OLE files inside OOXML container
2022/02/21: 0.0.63 Python 3 fix
2022/03/04: 0.0.64 added option -u
2022/04/26: 0.0.65 added message for pyzipper
2022/05/03: 0.0.66 small refactoring
2022/05/11: 0.0.67 added PrintUserdefinedProperties
2022/06/07: 0.0.68 added extra info parameters %CTIME% %MTIME% %CTIMEHEX% %MTIMEHEX%
2022/07/22: 0.0.69 minor documentation change
2022/09/04: 0.0.70 bumping version for update to plugin(s), no changes to oledump.py
2022/11/09: 0.0.71 bumping version for update to plugin(s), no changes to oledump.py
2023/02/25: 0.0.72 added cStruct
2023/03/23: 0.0.73 updated cStruct
2023/04/01: 0.0.74 added CalculateChosenHash
2023/05/01: 0.0.75 bumping version for update to plugin(s), no changes to oledump.py
2024/05/15: 0.0.76 added cMyJSONOutput
2024/07/11: 0.0.77 bumping version for update to plugin(s), no changes to oledump.py
2024/12/24: 0.0.78 Python 3.12 fix mattew124
2025/03/04: 0.0.79 fixed URL in man page kristofbaute
Todo:
"""
import optparse
import sys
import math
import os
import binascii
import xml.dom.minidom
import zlib
import hashlib
import textwrap
import re
import string
import codecs
import json
import struct
import datetime
import collections
if sys.version_info[0] >= 3:
from io import StringIO
else:
from cStringIO import StringIO
if sys.version_info[0] >= 3:
from io import BytesIO as DataIO
else:
from cStringIO import StringIO as DataIO
try:
import yara
except ImportError:
pass
try:
import olefile
except ImportError:
print('This program requires module olefile.\nhttp://www.decalage.info/python/olefileio\n')
print("You can use PIP to install olefile like this: pip install olefile\nWindows: pip is located in Python's Scripts folder.\n")
exit(-1)
try:
from oletools.common.clsid import KNOWN_CLSIDS
except ImportError:
KNOWN_CLSIDS = {}
try:
import pyzipper as zipfile
except ImportError:
import zipfile
dumplinelength = 16
MALWARE_PASSWORD = 'infected'
OLEFILE_MAGIC = b'\xD0\xCF\x11\xE0'
ACTIVEMIME_MAGIC = b'ActiveMime'
REGEX_STANDARD = b'[\x09\x20-\x7E]'
def PrintManual():
manual = r'''
Manual:
oledump is a tool to analyze OLE files (officially: Compound File Binary Format, CFBF). Many file formats are in fact OLE files, like Microsoft Office files, MSI files, ... Even the new Microsoft Office Open XML (OOXML) format uses OLE files for VBA macros.
oledump can analyze OLE files directly, or indirectly when they are contained in some file format (like .docm, .xml, ...).
A cheat sheet can be found here: https://www.sans.org/posters/oledump-py-quick-reference/
oledump uses 2 modules that are not part of Python 2: olefile (http://www.decalage.info/python/olefileio) and YARA.
You need to install the olefile module for this program to work.
The YARA module is not mandatory if you don't use YARA rules.
Running oledump with a spreadsheet (.xls binary format) lists al the streams found in the OLE file (an OLE file is a virtual filesystem with folders and files, known as streams), like this:
C:\Demo>oledump.py Book1.xls
1: 4096 '\\x05DocumentSummaryInformation'
2: 4096 '\\x05SummaryInformation'
3: 4096 'Workbook'
The first column is an index assigned to the stream by oledump. This index is used to select streams. The second column is the size of the stream (number of bytes inside the stream), and the last column is the name of the stream.
To select a stream for analysis, use option -s with the index (number of the stream, or a for all streams), like this:
C:\Demo>oledump.py -s 1 Book1.xls
00000000: FE FF 00 00 05 01 02 00 00 00 00 00 00 00 00 00 ................
00000010: 00 00 00 00 00 00 00 00 01 00 00 00 02 D5 CD D5 .............i-i
00000020: 9C 2E 1B 10 93 97 08 00 2B 2C F9 AE 30 00 00 00 ........+,..0...
00000030: E4 00 00 00 09 00 00 00 01 00 00 00 50 00 00 00 ............P...
00000040: 0F 00 00 00 58 00 00 00 17 00 00 00 70 00 00 00 ....X.......p...
...
When selecting a stream, its content is shown as an ASCII dump (this can also be done with option -a).
Option -x produces a hexadecimal dump instead of an ASCII dump.
C:\Demo>oledump.py -s 1 -x Book1.xls
FE FF 00 00 05 01 02 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 01 00 00 00 02 D5 CD D5
9C 2E 1B 10 93 97 08 00 2B 2C F9 AE 30 00 00 00
E4 00 00 00 09 00 00 00 01 00 00 00 50 00 00 00
0F 00 00 00 58 00 00 00 17 00 00 00 70 00 00 00
...
Option -A does an ASCII dump (like option -a), but with duplicate lines removed.
Option -S dumps the strings.
Option -d produces a raw dump of the content of the stream. This content can be redirected to a file, like this:
C:\Demo>oledump.py -s 1 -d Book1.xls > content.bin
or it can be piped into another command, like this:
C:\Demo>oledump.py -s 1 -d Book1.xls | pdfid.py -f
If the raw dump needs to be processed by a string codec, like utf16, use option -t instead of -d and provide the codec:
C:\Demo>oledump.py -s 1 -t utf16 Book1.xls
Streams can also be selected by their full name (example: -s 'VBA/ThisWorkkbook').
Option -C (--cut) allows for the partial selection of a stream. Use this option to "cut out" part of the stream.
The --cut option takes an argument to specify which section of bytes to select from the stream. This argument is composed of 2 terms separated by a colon (:), like this:
termA:termB
termA and termB can be:
- nothing (an empty string)
- a positive decimal number; example: 10
- an hexadecimal number (to be preceded by 0x); example: 0x10
- a case sensitive ASCII string to search for (surrounded by square brackets and single quotes); example: ['MZ']
- a case sensitive UNICODE string to search for (surrounded by square brackets and single quotes prefixed with u); example: [u'User']
- an hexadecimal string to search for (surrounded by square brackets); example: [d0cf11e0]
If termA is nothing, then the cut section of bytes starts with the byte at position 0.
If termA is a number, then the cut section of bytes starts with the byte at the position given by the number (first byte has index 0).
If termA is a string to search for, then the cut section of bytes starts with the byte at the position where the string is first found. If the string is not found, the cut is empty (0 bytes).
If termB is nothing, then the cut section of bytes ends with the last byte.
If termB is a number, then the cut section of bytes ends with the byte at the position given by the number (first byte has index 0).
When termB is a number, it can have suffix letter l. This indicates that the number is a length (number of bytes), and not a position.
termB can also be a negative number (decimal or hexademical): in that case the position is counted from the end of the file. For example, :-5 selects the complete file except the last 5 bytes.
If termB is a string to search for, then the cut section of bytes ends with the last byte at the position where the string is first found. If the string is not found, the cut is empty (0 bytes).
No checks are made to assure that the position specified by termA is lower than the position specified by termB. This is left up to the user.
Search string expressions (ASCII, UNICODE and hexadecimal) can be followed by an instance (a number equal to 1 or greater) to indicate which instance needs to be taken. For example, ['ABC']2 will search for the second instance of string 'ABC'. If this instance is not found, then nothing is selected.
Search string expressions (ASCII, UNICODE and hexadecimal) can be followed by an offset (+ or - a number) to add (or substract) an offset to the found instance. This number can be a decimal or hexadecimal (prefix 0x) value. For example, ['ABC']+3 will search for the first instance of string 'ABC' and then select the bytes after ABC (+ 3).
Finally, search string expressions (ASCII, UNICODE and hexadecimal) can be followed by an instance and an offset.
Examples:
This argument can be used to dump the first 256 bytes of a PE file located inside the stream: ['MZ']:0x100l
This argument can be used to dump the OLE file located inside the stream: [d0cf11e0]:
When this option is not used, the complete stream is selected.
When analyzing a Microsoft Office document with VBA macros, you will see output similar to this:
C:\Demo>oledump.py Book2-vba.xls
1: 109 '\\x01CompObj'
2: 276 '\\x05DocumentSummaryInformation'
3: 224 '\\x05SummaryInformation'
4: 2484 'Workbook'
5: 529 '_VBA_PROJECT_CUR/PROJECT'
6: 104 '_VBA_PROJECT_CUR/PROJECTwm'
7: M 1196 '_VBA_PROJECT_CUR/VBA/Sheet1'
8: m 977 '_VBA_PROJECT_CUR/VBA/Sheet2'
9: m 977 '_VBA_PROJECT_CUR/VBA/Sheet3'
10: m 985 '_VBA_PROJECT_CUR/VBA/ThisWorkbook'
11: 2651 '_VBA_PROJECT_CUR/VBA/_VBA_PROJECT'
12: 549 '_VBA_PROJECT_CUR/VBA/dir'
The letter M next to the index of some of the streams (streams 7, 8, 9 and 10) is a macro indicator.
If you select a macro stream, the ASCII dump will not help you much. This is because of compression. VBA macros are stored inside streams using a proprietary compression method. To decompress the VBA macros source code, you use option -v, like this:
C:\Demo>oledump.py -s 7 -v Book2-vba.xls
Attribute VB_Name = "Sheet1"
Attribute VB_Base = "0{00020820-0000-0000-C000-000000000046}"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = True
Attribute VB_TemplateDerived = False
Attribute VB_Customizable = True
Sub Workbook_Open()
MsgBox "VBA macro"
End Sub
If the VBA macro code is only composed of Attribute or Option statements, and no other statements, then the indicator is a lower case letter m. Example:
C:\Demo>oledump.py -s 8 -v Book2-vba.xls
Attribute VB_Name = "Sheet2"
Attribute VB_Base = "0{00020820-0000-0000-C000-000000000046}"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = True
Attribute VB_TemplateDerived = False
Attribute VB_Customizable = True
If the VBA code contains other statements than Attribute or Options statements, then the indicator is a upper case letter M.
This M/m indicator allows you to focus first on interesting VBA macros.
A ! indicator means that the stream is a VBA module, but that no VBA code was detected that starts with one or more attributes.
To decompress the macros and skip the initial attributes, use option --vbadecompressskipattributes.
When compressed VBA code is corrupted, the status indicatore will be E (error).
C:\Demo>oledump.py Book2-vba.xls
1: 109 '\\x01CompObj'
2: 276 '\\x05DocumentSummaryInformation'
3: 224 '\\x05SummaryInformation'
4: 2484 'Workbook'
5: 529 '_VBA_PROJECT_CUR/PROJECT'
6: 104 '_VBA_PROJECT_CUR/PROJECTwm'
7: E 1196 '_VBA_PROJECT_CUR/VBA/Sheet1'
8: m 977 '_VBA_PROJECT_CUR/VBA/Sheet2'
9: m 977 '_VBA_PROJECT_CUR/VBA/Sheet3'
10: m 985 '_VBA_PROJECT_CUR/VBA/ThisWorkbook'
11: 2651 '_VBA_PROJECT_CUR/VBA/_VBA_PROJECT'
12: 549 '_VBA_PROJECT_CUR/VBA/dir'
To view the VBA code up til the corruption, use option --vbadecompresscorrupt.
C:\Demo>oledump.py -s 7 --vbadecompresscorrupt Book2-vba.xls
Option -i (without option -s) displays extra information for modules:
C:\Demo>oledump.py -i Book2-vba.xls
1: 107 '\\x01CompObj'
2: 256 '\\x05DocumentSummaryInformation'
3: 216 '\\x05SummaryInformation'
4: 15615 'Workbook'
5: 435 '_VBA_PROJECT_CUR/PROJECT'
6: 62 '_VBA_PROJECT_CUR/PROJECTwm'
7: m 985 813+172 '_VBA_PROJECT_CUR/VBA/Sheet1'
8: M 1767 1545+222 '_VBA_PROJECT_CUR/VBA/ThisWorkbook'
9: 2413 '_VBA_PROJECT_CUR/VBA/_VBA_PROJECT'
10: 1253 '_VBA_PROJECT_CUR/VBA/__SRP_0'
11: 106 '_VBA_PROJECT_CUR/VBA/__SRP_1'
12: 504 '_VBA_PROJECT_CUR/VBA/__SRP_2'
13: 103 '_VBA_PROJECT_CUR/VBA/__SRP_3'
14: 524 '_VBA_PROJECT_CUR/VBA/dir'
Modules can contain compiled code and source code (usually, both). In this example, stream 7 and 8 have extra information: the size of the compiled code (left of the + sign) and the size of de source code (right of the + sign).
Stream 7 is a module with size 985, the first 813 bytes are the compiled code and the last 172 bytes are the source code.
When selecting the content of modules, the index can be suffixed with c to select only the bytes of the compiled code, or with s to select only the bytes of the source code:
C:\Demo>oledump.py -s 7s Book2-vba.xls
00000000: 01 A8 B0 00 41 74 74 72 69 62 75 74 00 65 20 56 ....Attribut.e V
00000010: 42 5F 4E 61 6D 00 65 20 3D 20 22 53 68 65 40 65 B_Nam.e = "She@e
00000020: 74 31 22 0D 0A 0A E8 42 04 61 73 02 74 30 7B 30 t1"....B.as.t0{0
00000030: 30 30 C0 32 30 38 32 30 2D 00 20 04 08 0E 43 00 00.20820-. ...C.
00000040: 14 02 1C 01 24 30 30 34 36 02 7D 0D 7C 47 6C 6F ....$0046.}.|Glo
00000050: 62 61 6C 21 01 C4 53 70 61 63 01 92 46 61 08 6C bal!..Spac..Fa.l
00000060: 73 65 0C 64 43 72 65 61 10 74 61 62 6C 15 1F 50 se.dCrea.tabl..P
00000070: 72 65 20 64 65 63 6C 61 00 06 49 64 11 00 AB 54 re decla..Id...T
00000080: 72 75 0D 42 45 78 70 08 6F 73 65 14 1C 54 65 6D ru.BExp.ose..Tem
00000090: 70 00 6C 61 74 65 44 65 72 69 06 76 02 24 92 42 p.lateDeri.v.$.B
000000A0: 75 73 74 6F 6D 0C 69 7A 04 44 03 32 ustom.iz.D.2
Option -r can be used together with option -v to decompress a VBA macro stream that was extracted through some other mean than oledump. In such case, you provide the file that contains the compressed macro, instead of the OLE file.
ole files can contain streams that are not connected to the root entry. This can happen when a maldoc is cleaned by anti-virus. oledump will mark such streams as orphaned:
C:\Demo>oledump.py Book2-vba.xls
1: 114 '\\x01CompObj'
2: 107608 '\\x05DocumentSummaryInformation'
3: 52900 '\\x05SummaryInformation'
4: 11288 '1Table'
5: 131068 'Data'
6: 7726 'WordDocument'
7: 567 Orphan: 'dir'
8: 2282 Orphan: '__SRP_0'
9: 84 Orphan: '__SRP_1'
10: 3100 Orphan: '__SRP_2'
11: 188 Orphan: '__SRP_3'
12: M 9443 Orphan: 'NewMacros'
13: m 940 Orphan: 'ThisDocument'
14: 3835 Orphan: 'XVBA_PROJECT'
15: 484 Orphan: 'PROJECT'
16: 71 Orphan: 'PROJECTwm'
Microsoft Office files can contain embedded objects. They show up like this (notice stream 6 Ole10Native with indicator O):
C:\Demo>oledump.py Book1-insert-object-calc-rol3.exe.xls
1: 109 '\\x01CompObj'
2: 276 '\\x05DocumentSummaryInformation'
3: 224 '\\x05SummaryInformation'
4: 80 'MBD0004D0D1/\\x01CompObj'
5: 20 'MBD0004D0D1/\\x01Ole'
6: O 114798 'MBD0004D0D1/\\x01Ole10Native'
7: 11312 'Workbook'
To get more info about the embedded object, use option -i like this:
C:\Demo>oledump.py -s 6 -i Book1-insert-object-calc-rol3.exe.xls
String 1: calc-rol3.exe
String 2: C:\Demo\ole\CALC-R~1.EXE
String 3: C:\Demo\ole\CALC-R~1.EXE
Size embedded file: 114688
MD5 embedded file: bef425b95e45c54d649a19a7c55556a0
SHA256 embedded file: 211b63ae126411545f9177ec80114883d32f7e3c7ccf81ee4e5dd6ffe3a10e2d
To extract the embedded file, use option -e and redirect the output to a file like this:
C:\Demo>oledump.py -s 6 -e Book1-insert-object-calc-rol3.exe.xls > extracted.bin
Use option --storages to display storages (by default, oledump only lists streams). Indicator . is used for storages except for the Root Entry which has indicator R.
Option -f can be used to find embedded OLE files. This is useful, for example, in the following scenario:
AutoCAD drawing files (.dwg) can contain VBA macros. Although the .dwg file format is a proprietary format, VBA macros are stored as an embedded OLE file. The header of a DWG file contains a pointer to the embedded OLE file, but since an OLE file starts with a MAGIC sequence (D0CF11E0), you can just scan the input file for this sequence.
This can be done using option -f (--find). This option takes a value: letter l or a positive integer.
To have an overview of embedded OLE files, use option "-f l" (letter l) like this:
C:\Demo>oledump.py -f l Drawing1vba.dwg
Position of potential embedded OLE files:
1 0x00008090
This will report the position of every (potential) embedded OLE file inside the input file. Here you can see that there is one file at position 0x8090.
You can then select this file and analyze it, using -f 1 (integer 1):
C:\Demo>oledump.py -f 1 Drawing1vba.dwg
1: 374 'VBA_Project/PROJECT'
2: 38 'VBA_Project/PROJECTwm'
3: M 1255 'VBA_Project/VBA/ThisDrawing'
4: 1896 'VBA_Project/VBA/_VBA_PROJECT'
5: 315 'VBA_Project/VBA/dir'
6: 16 'VBA_Project_Version'
And then you can use option -s to select streams and analyze them.
Analyzing the content of streams (and VBA macros) can be quite challenging. To help with the analysis, oledump provides support for plugins and YARA rules.
plugins are Python programs that take the stream content as input and try to analyze it. Plugins can analyze the raw stream content or the decompressed VBA macro source code. Plugins analyze all streams, you don't need to select a particular stream.
VBA macros code in malicious documents is often obfuscated, and hard to understand. plugin_http_heuristics is a plugin for VBA macros that tries to recover the URL used to download the trojan in a malicious Office document. This URL is often obfuscated, for example by using hexadecimal or base64 strings to represent the URL. plugin_http_heuristics tries several heuristics to recover a URL.
Example:
C:\Demo>oledump.py -p plugin_http_heuristics sample.xls
1: 104 '\\x01CompObj'
2: 256 '\\x05DocumentSummaryInformation'
3: 228 '\\x05SummaryInformation'
4: 4372 'Workbook'
5: 583 '_VBA_PROJECT_CUR/PROJECT'
6: 83 '_VBA_PROJECT_CUR/PROJECTwm'
7: m 976 '_VBA_PROJECT_CUR/VBA/????1'
Plugin: HTTP Heuristics plugin
8: m 976 '_VBA_PROJECT_CUR/VBA/????2'
Plugin: HTTP Heuristics plugin
9: m 976 '_VBA_PROJECT_CUR/VBA/????3'
Plugin: HTTP Heuristics plugin
10: M 261251 '_VBA_PROJECT_CUR/VBA/????????'
Plugin: HTTP Heuristics plugin
http://???.???.???.??:8080/stat/lld.php
11: 8775 '_VBA_PROJECT_CUR/VBA/_VBA_PROJECT'
12: 1398 '_VBA_PROJECT_CUR/VBA/__SRP_0'
13: 212 '_VBA_PROJECT_CUR/VBA/__SRP_1'
14: 456 '_VBA_PROJECT_CUR/VBA/__SRP_2'
15: 385 '_VBA_PROJECT_CUR/VBA/__SRP_3'
16: 550 '_VBA_PROJECT_CUR/VBA/dir'
Option -q (quiet) only displays output from the plugins, it suppresses output from oledump. This makes it easier to spot URLs:
C:\Demo>oledump.py -p plugin_http_heuristics -q sample.xls
http://???.???.???.??:8080/stat/lld.php
When specifying plugins, you do not need to give the full path nor the .py extension (it's allowed though). If you just give the filename without a path, oledump will search for the plugin in the current directory and in the directory where oledump.py is located. You can specify more than one plugin by separating their names with a comma (,), or by using a at-file. A at-file is a text file containing the names of the plugins (one per line). If plugins are located in a different directory, you could specify it with the --plugindir option. To indicate to oledump that a text file is a at-file, you prefix iw with @, like this:
oledump.py -p @all-plugins.txt sample.xls
Some plugins take options too. Use --pluginoptions to specify these options.
oledump can scan the content of the streams with YARA rules (the YARA Python module must be installed). You provide the YARA rules with option -y. You can provide one file with YARA rules, an at-file (@file containing the filenames of the YARA files) or a directory. In case of a directory, all files inside the directory are read as YARA files. Or you can provide the YARA rule with the option value (and adhoc rule) if it starts with # (literal), #s# (string), #x# (hexadecimal string), #r# (regex string), #q# (quote), #h# (hexadecimal) or #b# (base64). Example: -y "#rule demo {strings: $a=\"demo\" condition: $a}"
Using #s#demo will instruct oledump to generate a rule to search for string demo (rule string {strings: $a = "demo" ascii wide nocase condition: $a) and use that rule.
All streams are scanned with the provided YARA rules, you can not use option -s to select an individual stream.
Example:
C:\Demo>oledump.py -y contains_pe_file.yara Book1-insert-object-exe.xls
1: 107 '\\x01CompObj'
2: 256 '\\x05DocumentSummaryInformation'
3: 216 '\\x05SummaryInformation'
4: 76 'MBD0049DB15/\\x01CompObj'
5: O 60326 'MBD0049DB15/\\x01Ole10Native'
YARA rule: Contains_PE_File
6: 19567 'Workbook'
In this example, you use YARA rule contains_pe_file.yara to find PE files (executables) inside Microsoft Office files. The rule triggered for stream 5, because it contains an EXE file embedded as OLE object.
If you want more information about what was detected by the YARA rule, use option --yarastrings like in this example:
C:\Demo>oledump.py -y contains_pe_file.yara --yarastrings Book1-insert-object-exe.xls
1: 107 '\\x01CompObj'
2: 256 '\\x05DocumentSummaryInformation'
3: 216 '\\x05SummaryInformation'
4: 76 'MBD0049DB15/\\x01CompObj'
5: O 60326 'MBD0049DB15/\\x01Ole10Native'
YARA rule: Contains_PE_File
000064 $a:
4d5a
'MZ'
6: 19567 'Workbook'
YARA rule contains_pe_file detects PE files by finding string MZ followed by string PE at the correct offset (AddressOfNewExeHeader).
The rule looks like this:
rule Contains_PE_File
{
meta:
author = "Didier Stevens (https://DidierStevens.com)"
description = "Detect a PE file inside a byte sequence"
method = "Find string MZ followed by string PE at the correct offset (AddressOfNewExeHeader)"
strings:
$a = "MZ"
condition:
for any i in (1..#a): (uint32(@a[i] + uint32(@a[i] + 0x3C)) == 0x00004550)
}
Distributed together with oledump are the YARA rules maldoc.yara. These are YARA rules to detect shellcode, based on Frank Boldewin's shellcode detector used in OfficeMalScanner.
Two external variables are declared for use in YARA rules: streamname contains the stream name, and VBA is True when the YARA engine is given VBA source code to scan.
When looking for traces of Windows executable code (PE files, shellcode, ...) with YARA rules, one must take into account the fact that the executable code might have been encoded (for example via XOR and a key) to evade detection.
To deal with this possibility, oledump supports decoders. A decoder is another type of plugin, that will bruteforce a type of encoding on each stream. For example, decoder_xor1 will encode each stream via XOR and a key of 1 byte. So effectively, 256 different encodings of the stream will be scanned by the YARA rules. 256 encodings because: XOR key 0x00, XOR key 0x01, XOR key 0x02, ..., XOR key 0xFF
Here is an example:
C:\Demo>oledump.py -y contains_pe_file.yara -D decoder_xor1 Book1-insert-object-exe-xor14.xls
1: 107 '\\x01CompObj'
2: 256 '\\x05DocumentSummaryInformation'
3: 216 '\\x05SummaryInformation'
4: 76 'MBD0049DB15/\\x01CompObj'
5: O 60326 'MBD0049DB15/\\x01Ole10Native'
YARA rule (stream decoder: XOR 1 byte key 0x14): Contains_PE_File
6: 19567 'Workbook'
The YARA rule triggers on stream 5. It contains a PE file encoded via XORing each byte with 0x14.
You can specify decoders in exactly the same way as plugins, for example specifying more than one decoder separated by a comma ,.
If decoders are located in a different directory, you could specify it with the --decoderdir option.
C:\Demo>oledump.py -y contains_pe_file.yara -D decoder_xor1,decoder_rol1,decoder_add1 Book1-insert-object-exe-xor14.xls
1: 107 '\\x01CompObj'
2: 256 '\\x05DocumentSummaryInformation'
3: 216 '\\x05SummaryInformation'
4: 76 'MBD0049DB15/\\x01CompObj'
5: O 60326 'MBD0049DB15/\\x01Ole10Native'
YARA rule (stream decoder: XOR 1 byte key 0x14): Contains_PE_File
6: 19567 'Workbook'
Some decoders take options, to be provided with option --decoderoptions.
OLE files contain metadata. Use option -M to display it.
Example:
C:\Demo>oledump.py -M Book1.xls
Properties SummaryInformation:
codepage: 1252 ANSI Latin 1; Western European (Windows)
author: Didier Stevens
last_saved_by: Didier Stevens
create_time: 2014-08-21 09:16:10
last_saved_time: 2014-08-21 10:26:40
creating_application: Microsoft Excel
security: 0
Properties DocumentSummaryInformation:
codepage_doc: 1252 ANSI Latin 1; Western European (Windows)
scale_crop: False
company: Didier Stevens Labs
links_dirty: False
shared_doc: False
hlinks_changed: False
version: 730895
Option -c calculates extra data per stream. This data is displayed per stream. Only the MD5 hash of the content of the stream is calculated.
Example:
C:\Demo>oledump.py -c Book1.xls
1: 4096 '\\x05DocumentSummaryInformation' ff1773dce227027d410b09f8f3224a56
2: 4096 '\\x05SummaryInformation' b46068f38a3294ca9163442cb8271028
3: 4096 'Workbook' d6a5bebba74fb1adf84c4ee66b2bf8dd
If you need more data than the MD5 of each stream, use option -E (extra). This option takes a parameter describing the extra data that needs to be calculated and displayed for each stream. The following variables are defined:
%INDEX%: the index of the stream
%INDICATOR%: macro indicator
%LENGTH%': the length of the stream
%NAME%: the printable name of the stream
%MD5%: calculates MD5 hash
%SHA1%: calculates SHA1 hash
%SHA256%: calculates SHA256 hash
%ENTROPY%: calculates entropy
%HEADHEX%: display first 20 bytes of the stream as hexadecimal
%HEADASCII%: display first 20 bytes of the stream as ASCII
%TAILHEX%: display last 20 bytes of the stream as hexadecimal
%TAILASCII%: display last 20 bytes of the stream as ASCII
%HISTOGRAM%: calculates a histogram
this is the prevalence of each byte value (0x00 through 0xFF)
at least 3 numbers are displayed separated by a comma:
number of values with a prevalence > 0
minimum values with a prevalence > 0
maximum values with a prevalence > 0
each value with a prevalence > 0
%BYTESTATS%: calculates byte statistics
byte statistics are 5 numbers separated by a comma:
number of NULL bytes
number of control bytes
number of whitespace bytes
number of printable bytes
number of high bytes
%CLSID%: storage/stream class ID
%CLSIDDESC%: storage/stream class ID description
%MODULEINFO%: for module streams: size of compiled code & size of compressed code; otherwise 'N/A' (you must use option -i)
%CTIME%: creation time
%MTIME%: modification time
%CTIMEHEX%: creation time in hexadecimal
%MTIMEHEX%: modification time in hexadecimal
The parameter for -E may contain other text than the variables, which will be printed. Escape characters \\n and \\t are supported.
Example displaying the MD5 and SHA256 hash per stream, separated by a space character:
C:\Demo>oledump.py -E "%MD5% %SHA256%" Book1.xls
1: 4096 '\\x05DocumentSummaryInformation' ff1773dce227027d410b09f8f3224a56 2817c0fbe2931a562be17ed163775ea5e0b12aac203a095f51ffdbd5b27e7737
2: 4096 '\\x05SummaryInformation' b46068f38a3294ca9163442cb8271028 2c3009a215346ae5163d5776ead3102e49f6b5c4d29bd1201e9a32d3bfe52723
3: 4096 'Workbook' d6a5bebba74fb1adf84c4ee66b2bf8dd 82157e87a4e70920bf8975625f636d84101bbe8f07a998bc571eb8fa32d3a498
If the extra parameter starts with !, then it replaces the complete output line (in stead of being appended to the output line).
Example:
C:\Demo>oledump.py -E "!%INDEX% %MD5%" Book1.xls
1 ff1773dce227027d410b09f8f3224a56
2 b46068f38a3294ca9163442cb8271028
3 d6a5bebba74fb1adf84c4ee66b2bf8dd
Option -v can be used together with option -c or -E to perform the calculations on the decompressed macro streams (m and M) in stead of the raw macro streams.
To include extra data with each use of oledump, define environment variable OLEDUMP_EXTRA with the parameter that should be passed to -E. When environment variable OLEDUMP_EXTRA is defined, option -E can be ommited. When option -E is used together with environment variable OLEDUMP_EXTRA, the parameter of option -E is used and the environment variable is ignored.
Sometimes during the analysis of an OLE file, you might come across compressed data inside the stream. For example, an indicator of ZLIB compressed DATA is byte 0x78.
Option --decompress instructs oledump to search for compressed data inside the selected stream, and then decompress it. If this fails, the original data is displayed.
Option -u can be used to include unused data found in the last sector of a stream, after the stream data.
oledump can handle several types of files. OLE files are supported, but also the new Office Open XML standard: these are XML files inside a ZIP container, but VBA macros are still stored as OLE files inside the ZIP file. In such case, the name of the OLE file inside the ZIP file will be displayed, and the indices will be prefixed by a letter (A for the first OLE file, B for the second OLE file, ...).
Example:
C:\Demo>oledump.py Book1.xlsm
A: xl/vbaProject.bin
A1: 462 'PROJECT'
A2: 86 'PROJECTwm'
A3: M 974 'VBA/Module1'
A4: m 977 'VBA/Sheet1'
A5: m 985 'VBA/ThisWorkbook'
A6: 2559 'VBA/_VBA_PROJECT'
A7: 1111 'VBA/__SRP_0'
A8: 74 'VBA/__SRP_1'
A9: 136 'VBA/__SRP_2'
A10: 103 'VBA/__SRP_3'
A11: 566 'VBA/dir'
oledump can also handle XML files that contain OLE files stored as base64 inside XML files.
Finally, all of these file types may be stored inside a password protected ZIP file (password infected). Storing malicious files inside a password protected ZIP file is common practice amongst malware researchers. Not only does it prevent accidental infection, but it also prevents anti-virus programs from deleting the sample.
oledump supports the analysis of samples stored in password protected ZIP files (password infected). Do not store more than one sample inside a password protected ZIP file. Each sample should be in its own ZIP container.
oledump also supports input/output redirection. This way, oledump can be used in a pipe.
Say for example that the sample OLE file is GZIP compressed. oledump can not handle GZIP files directly, but you can decompress and cat it with zcat and then pipe it into oledump for analysis, like this:
zcat sample.gz | oledump.py
With option -T (--headtail), output can be truncated to the first 10 lines and last 10 lines of output.
With option -j, oledump will output the content of the ole file as a JSON object that can be piped into other tools that support this JSON format. When option -v is used together with option -j, the produced JSON object contains decompressed VBA code.
Overview of indicators:
M: Macro (attributes and code)
m: macro (attributes without code)
E: Error (code that throws an error when decompressed)
!: Unusual macro (code without attributes)
O: object (embedded file)
.: storage
R: root entry
More info: https://blog.didierstevens.com/2020/11/15/oledump-indicators/
The return codes of oledump are:
-1 when an error occured
0 when the file is not an ole file (or does not contain an ole file)
1 when an ole file without macros was analyzed
2 when an ole file with macros was analyzed
'''
for line in manual.split('\n'):
print(textwrap.fill(line))
#Convert 2 Bytes If Python 3
def C2BIP3(string):
if sys.version_info[0] > 2:
if type(string) == bytes:
return string
else:
return bytes([ord(x) for x in string])
else:
return string
#Convert 2 String If Python 3
def C2SIP3(string):
if sys.version_info[0] > 2:
if type(string) == bytes:
return ''.join([chr(x) for x in string])
else:
return string
else:
return string
# CIC: Call If Callable
def CIC(expression):
if callable(expression):
return expression()
else:
return expression
# IFF: IF Function
def IFF(expression, valueTrue, valueFalse):
if expression:
return CIC(valueTrue)
else:
return CIC(valueFalse)
def P23Ord(value):
if type(value) == int:
return value
else:
return ord(value)
def P23Chr(value):
if type(value) == int:
return chr(value)
else:
return value
def File2String(filename):
try:
f = open(filename, 'rb')
except:
return None
try:
return f.read()
except:
return None
finally:
f.close()
class cDump():
def __init__(self, data, prefix='', offset=0, dumplinelength=16):
self.data = data
self.prefix = prefix
self.offset = offset
self.dumplinelength = dumplinelength
def HexDump(self):
oDumpStream = self.cDumpStream(self.prefix)
hexDump = ''
for i, b in enumerate(self.data):
if i % self.dumplinelength == 0 and hexDump != '':
oDumpStream.Addline(hexDump)
hexDump = ''
hexDump += IFF(hexDump == '', '', ' ') + '%02X' % self.C2IIP2(b)
oDumpStream.Addline(hexDump)
return oDumpStream.Content()
def CombineHexAscii(self, hexDump, asciiDump):
if hexDump == '':
return ''
countSpaces = 3 * (self.dumplinelength - len(asciiDump))
if len(asciiDump) <= self.dumplinelength / 2:
countSpaces += 1
return hexDump + ' ' + (' ' * countSpaces) + asciiDump
def HexAsciiDump(self, rle=False):
oDumpStream = self.cDumpStream(self.prefix)
position = ''
hexDump = ''
asciiDump = ''
previousLine = None
countRLE = 0
for i, b in enumerate(self.data):
b = self.C2IIP2(b)
if i % self.dumplinelength == 0:
if hexDump != '':
line = self.CombineHexAscii(hexDump, asciiDump)
if not rle or line != previousLine:
if countRLE > 0:
oDumpStream.Addline('* %d 0x%02x' % (countRLE, countRLE * self.dumplinelength))
oDumpStream.Addline(position + line)
countRLE = 0
else:
countRLE += 1
previousLine = line
position = '%08X:' % (i + self.offset)
hexDump = ''
asciiDump = ''
if i % self.dumplinelength == self.dumplinelength / 2:
hexDump += ' '
hexDump += ' %02X' % b
asciiDump += IFF(b >= 32 and b < 127, chr(b), '.')
if countRLE > 0:
oDumpStream.Addline('* %d 0x%02x' % (countRLE, countRLE * self.dumplinelength))
oDumpStream.Addline(self.CombineHexAscii(position + hexDump, asciiDump))
return oDumpStream.Content()
def Base64Dump(self, nowhitespace=False):
encoded = binascii.b2a_base64(self.data)
if nowhitespace:
return encoded
oDumpStream = self.cDumpStream(self.prefix)
length = 64
for i in range(0, len(encoded), length):
oDumpStream.Addline(encoded[0+i:length+i])
return oDumpStream.Content()
class cDumpStream():
def __init__(self, prefix=''):
self.oStringIO = StringIO()
self.prefix = prefix
def Addline(self, line):
if line != '':
self.oStringIO.write(self.prefix + line + '\n')
def Content(self):
return self.oStringIO.getvalue()
@staticmethod
def C2IIP2(data):
if sys.version_info[0] > 2:
return data
else:
return P23Ord(data)
def HexDump(data):
return cDump(data, dumplinelength=dumplinelength).HexDump()
def HexAsciiDump(data, rle=False):
return cDump(data, dumplinelength=dumplinelength).HexAsciiDump(rle=rle)
def Translate(expression):
return lambda x: x.decode(expression)
def ExtractStringsASCII(data):
regex = REGEX_STANDARD + b'{%d,}'
return re.findall(regex % 4, data)
def ExtractStringsUNICODE(data):
regex = b'((' + REGEX_STANDARD + b'\x00){%d,})'
return [foundunicodestring.replace(b'\x00', b'') for foundunicodestring, dummy in re.findall(regex % 4, data)]
def ExtractStrings(data):
return ExtractStringsASCII(data) + ExtractStringsUNICODE(data)
def DumpFunctionStrings(data):
return b''.join([extractedstring + b'\n' for extractedstring in ExtractStrings(data)])
#Fix for http://bugs.python.org/issue11395
def StdoutWriteChunked(data):
if sys.version_info[0] > 2:
sys.stdout.buffer.write(C2BIP3(data))
else:
while data != '':
sys.stdout.write(data[0:10000])
try:
sys.stdout.flush()
except IOError:
return
data = data[10000:]
def PrintableName(fname, orphan=0):
if orphan == 1:
return 'Orphan: ' + repr(fname)
else:
return repr('/'.join(fname))
def ParseTokenSequence(data):
flags = P23Ord(data[0])
data = data[1:]
result = []
for mask in [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80]:
if len(data) > 0:
if flags & mask:
result.append(data[0:2])
data = data[2:]
else:
result.append(data[0])
data = data[1:]
return result, data
def OffsetBits(data):
numberOfBits = int(math.ceil(math.log(len(data), 2)))
if numberOfBits < 4:
numberOfBits = 4
elif numberOfBits > 12:
numberOfBits = 12
return numberOfBits
def Bin(number):
result = bin(number)[2:]
while len(result) < 16:
result = '0' + result
return result
def DecompressChunk(compressedChunk):
if len(compressedChunk) < 2:
return None, None
header = P23Ord(compressedChunk[0]) + P23Ord(compressedChunk[1]) * 0x100
size = (header & 0x0FFF) + 3
flagCompressed = header & 0x8000
data = compressedChunk[2:2 + size - 2]
if flagCompressed == 0:
return data.decode(errors='ignore'), compressedChunk[size:]
decompressedChunk = ''
while len(data) != 0:
tokens, data = ParseTokenSequence(data)
for token in tokens:
if type(token) == int:
decompressedChunk += chr(token)
elif len(token) == 1:
decompressedChunk += token
else:
if decompressedChunk == '':
return None, None
numberOfOffsetBits = OffsetBits(decompressedChunk)
copyToken = P23Ord(token[0]) + P23Ord(token[1]) * 0x100
offset = 1 + (copyToken >> (16 - numberOfOffsetBits))
length = 3 + (((copyToken << numberOfOffsetBits) & 0xFFFF) >> numberOfOffsetBits)
copy = decompressedChunk[-offset:]
copy = copy[0:length]
lengthCopy = len(copy)
while length > lengthCopy: #a#
if length - lengthCopy >= lengthCopy:
copy += copy[0:lengthCopy]
length -= lengthCopy
else:
copy += copy[0:length - lengthCopy]
length -= length - lengthCopy
decompressedChunk += copy
return decompressedChunk, compressedChunk[size:]
def Decompress(compressedData, replace=True):
if P23Ord(compressedData[0]) != 1:
return (False, None)
remainder = compressedData[1:]
decompressed = ''
while len(remainder) != 0:
decompressedChunk, remainder = DecompressChunk(remainder)
if decompressedChunk == None:
return (False, decompressed)
decompressed += decompressedChunk
if replace:
return (True, decompressed.replace('\r\n', '\n'))
else:
return (True, decompressed)
def FindCompression(data):
return data.find(b'\x00Attribut\x00e ')
def SearchAndDecompressSub(data):
position = FindCompression(data)
if position == -1:
return (False, '')
else:
compressedData = data[position - 3:]
return Decompress(compressedData)
def SkipAttributes(text):
oAttribute = re.compile('^Attribute VB_.+? = [^\n]+\n')
while True:
oMatch = oAttribute.match(text)
if oMatch == None:
break
text = text[len(oMatch.group()):]
return text
def SearchAndDecompress(data, ifError='Error: unable to decompress\n', skipAttributes=False):
result, decompress = SearchAndDecompressSub(data)
if result or ifError == None:
if skipAttributes:
return SkipAttributes(decompress)
else:
return decompress
else:
return ifError
def ReadWORD(data):
if len(data) < 2:
return None, None
return P23Ord(data[0]) + P23Ord(data[1]) *0x100, data[2:]
def ReadDWORD(data):
if len(data) < 4:
return None, None
return P23Ord(data[0]) + P23Ord(data[1]) *0x100 + P23Ord(data[2]) *0x10000 + P23Ord(data[3]) *0x1000000, data[4:]
def ReadNullTerminatedString(data):
position = data.find(b'\x00')
if position == -1:
return None, None
return data[:position], data[position + 1:]