From c3d6861331e7de513d5c3fa2dd521e39d65c6bb6 Mon Sep 17 00:00:00 2001 From: Xi Juanjie Date: Mon, 27 Jul 2020 22:05:11 +0900 Subject: [PATCH 1/8] Add encrypted .zip support --- README.md | 5 ++++- unzipmbcs.py | 9 +++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 111e631..6e1a4e2 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ optional arguments: -h, --help show this help message and exit -e ENCODING, --encoding ENCODING character encoding of filename in the .zip + -p PASSWORD, --password PASSWORD + password for encrypted .zip ``` @@ -35,11 +37,12 @@ optional arguments: Return the information of the files in zip archive `filename` with character `encoding` -### extractZip(filename, encoding='utf-8', filters=None) +### extractZip(filename, encoding='utf-8', filters=None, password=None) Extract files in zip archive `filename` on current directory. Assume that the file names in zip archive are encoded as `encoding`. Only the files prefixed the values of `filters` list are extracted if `filters` are provided. +Use `password` on encrypted zip archive. ### fixZipFilename(filename, enc) Fix `filename` as UNICODE string which is originally encoded as `enc`. diff --git a/unzipmbcs.py b/unzipmbcs.py index 56bf898..9c2f30a 100644 --- a/unzipmbcs.py +++ b/unzipmbcs.py @@ -59,7 +59,7 @@ def _extractFileFromZip(z, fn, ofn): f.close() -def extractZip(filename, encoding='utf-8', filters=None): +def extractZip(filename, encoding='utf-8', filters=None, password=None): """ Extract files in zip archive `filename` on current directory. Assume that the file names in zip archive are encoded as `encoding`. @@ -67,6 +67,8 @@ def extractZip(filename, encoding='utf-8', filters=None): if `filters` are provided. """ z = zipfile.ZipFile(filename, 'r') + if password: + z.setpassword(bytes(password, 'cp437')) l = z.namelist() for fn in l: if len(fn) == 0 or fn[-1] == '/': @@ -129,6 +131,9 @@ def _main(): parser.add_argument('-e', '--encoding', help='character encoding of filename in the .zip', default='utf-8') + parser.add_argument('-p', '--password', + help='password for encrypted .zip', + default=None) parser.add_argument('zipfile', help='.zip file to unzip') parser.add_argument('target', nargs='*', help='file prefix to extract') @@ -143,7 +148,7 @@ def _main(): % tuple([entry[1]] + list(entry[2][:-1]) + [entry[0]])) elif args.cmd == 'x': extractZip(args.zipfile, encoding=args.encoding, - filters=args.target) + filters=args.target, password=args.password) else: print('Unknown command:', args.cmd) From f3a521379e0ac10bf70e9db279e86053bf590f58 Mon Sep 17 00:00:00 2001 From: Joo-Won Jung Date: Sun, 17 Jul 2022 20:14:12 +0900 Subject: [PATCH 2/8] chore: add encrypted zip testcase with full-width charater password --- test/lhaplus-zkenc.zip | Bin 0 -> 555 bytes test/test_unzipmbcs.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 test/lhaplus-zkenc.zip diff --git a/test/lhaplus-zkenc.zip b/test/lhaplus-zkenc.zip new file mode 100644 index 0000000000000000000000000000000000000000..ba5e6d8087ff86996029c8f4c05301f55c99e32c GIT binary patch literal 555 zcmWIWW@Zs#U}WH6@aFg!BC|AIT%3`CVLuRyFfcGQ_clg1*Y?e7PHpySj%ZHTE2$`n z5Lx_X#?3`vR&AJBzfL~g_P}D1H}P{gW5tghI(h2(zs0B9d8fNV->q)@WxfY)xXByX;oARES~r<8$#r1`2yP zFP&vEQvOo7by3$xRjz-@+TY@*XslQf-0moz>hjn*g(EHi8juVO4}k%BKJ`Jd1TX-3 zfLIuar@WlDT)(*h7>5185X}IE Date: Sun, 17 Jul 2022 20:22:23 +0900 Subject: [PATCH 3/8] feat: add exception handling when password is not supplied --- unzipmbcs.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/unzipmbcs.py b/unzipmbcs.py index b6029ad..e448904 100644 --- a/unzipmbcs.py +++ b/unzipmbcs.py @@ -53,7 +53,12 @@ def _extractFileFromZip(z, fn, ofn): extract a file `fn` in ZipFile `z` as `ofn` """ f = open(ofn, 'wb') - f.write(z.read(fn)) + try: + f.write(z.read(fn)) + except RuntimeError as e: + f.close() + os.remove(ofn) + raise e f.close() From 7a1b86fd76722b9cfdd9d5c9ba79eb7887bce113 Mon Sep 17 00:00:00 2001 From: Joo-Won Jung Date: Sun, 17 Jul 2022 20:30:09 +0900 Subject: [PATCH 4/8] feat: fix password encoding to accept MBCS password --- unzipmbcs.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/unzipmbcs.py b/unzipmbcs.py index e448904..fb93207 100644 --- a/unzipmbcs.py +++ b/unzipmbcs.py @@ -47,6 +47,10 @@ def fixZipFilename(filename, enc): raise e return result +def transcodeBytes(str, toEnc, fromEnc='utf-8'): + if sys.version_info[0] == 2: + return str.decode(fromEnc).encode(toEnc) + return bytes(str, toEnc) def _extractFileFromZip(z, fn, ofn): """ @@ -71,7 +75,7 @@ def extractZip(filename, encoding='utf-8', filters=None, password=None): """ z = zipfile.ZipFile(filename, 'r') if password: - z.setpassword(bytes(password, 'cp437')) + z.setpassword(transcodeBytes(password, encoding)) l = z.namelist() for fn in l: if len(fn) == 0 or fn[-1] == '/': From e8f14f706f9ef3b74649abf39cf0c4cb2a1a6605 Mon Sep 17 00:00:00 2001 From: Joo-Won Jung Date: Sun, 17 Jul 2022 22:08:40 +0900 Subject: [PATCH 5/8] refactor: refactor test code --- test/test_unzipmbcs.py | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/test/test_unzipmbcs.py b/test/test_unzipmbcs.py index 125cffb..e885776 100644 --- a/test/test_unzipmbcs.py +++ b/test/test_unzipmbcs.py @@ -6,6 +6,22 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..')) import unzipmbcs +def setUpModule(): + if (sys.getfilesystemencoding().lower() != 'utf-8') and (not os.environ.get('PYTHONIOENCODING')): + raise Exception('non-UTF8 filesystem. set PYTHONIOENCODING as your filesystem encoding!') + +def clearFiles(fileList): + for entry in fileList: + if (os.path.isfile(entry)): + os.remove(entry) + elif (len(os.listdir(entry)) == 0): + os.rmdir(entry) + entry = os.path.dirname(entry) + while entry != '': + if len(os.listdir(entry)) > 0: + break; + os.rmdir(entry) + entry = os.path.dirname(entry) class TestFromZip(unittest.TestCase): filename = 'NewFolder.zip' @@ -17,22 +33,10 @@ def testListZip(self): self.assertEqual(list(map(lambda x: x[0], result)), self.expected) def testExtractZip(self): - if (sys.getfilesystemencoding().lower() != 'utf-8') and (not os.environ.get('PYTHONIOENCODING')): - print('Warning: non-UTF8 filesystem.', - 'set PYTHONIOENCODING as your filesystem encoding!') - return unzipmbcs.extractZip(self.filename, self.encoding) map(lambda x: self.assertTrue(os.path.exists(x), x + ' not exist'), self.expected) - - # clean-up - files = list(self.expected) # clone the list - files.reverse() - for f in files: - if (os.path.isfile(f)): - os.remove(f) - else: - os.rmdir(f) + clearFiles(self.expected) class TestEncryptedZip(unittest.TestCase): filename = 'lhaplus-zkenc.zip' @@ -57,15 +61,7 @@ def testExtractWithPassword(self): unzipmbcs.extractZip(self.filename, self.encoding, password=self.password) map(lambda x: self.assertTrue(os.path.exists(x), x + ' not exist'), self.expected) - - # clean-up - files = list(self.expected) # clone the list - files.reverse() - for f in files: - if (os.path.isfile(f)): - os.remove(f) - else: - os.rmdir(f) + clearFiles(self.expected) if __name__ == '__main__': unittest.main() From bfa43357f2bdd3719fe83bf73c10bb9da6c7c4ea Mon Sep 17 00:00:00 2001 From: Joo-Won Jung Date: Sun, 17 Jul 2022 22:14:50 +0900 Subject: [PATCH 6/8] chore: change test zip file --- test/NewFolder.zip | Bin 412 -> 0 bytes test/test_unzipmbcs.py | 4 ++-- test/win-default.zip | Bin 0 -> 322 bytes 3 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 test/NewFolder.zip create mode 100644 test/win-default.zip diff --git a/test/NewFolder.zip b/test/NewFolder.zip deleted file mode 100644 index 473a00b167b4da8e56d6f8eab4a9f0b3a5cbd4c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 412 zcmWIWW@h1HU|`^2n7Z1~bKjJUe#StaB@hb%@$Rn*M{n%ib!?r&&UJg%>XlTK6!UE` z*%)$TXWscJTh{q!_FU@v%l&;#=#7oZpI6E1i}LB({wV#dW*fJT*_!XE(&TS@Vi*FT zRxmK+0IdL;3I`lO7SM`gzqWkUhpFOV=me@d;jGju43rfBVi6=&$1iTY4ReZhUE$;0 z*2U)E-dSz)OMJe|Hk&cPn~{l&0k?mECINv0n8fBggnsOfg=hphDjA3oPDj{^&G8Uj c3P7q8Nf)wL0=!w-K&qI5a0ie+2jVaQ0P1Xa%>V!Z diff --git a/test/test_unzipmbcs.py b/test/test_unzipmbcs.py index e885776..b95171f 100644 --- a/test/test_unzipmbcs.py +++ b/test/test_unzipmbcs.py @@ -24,9 +24,9 @@ def clearFiles(fileList): entry = os.path.dirname(entry) class TestFromZip(unittest.TestCase): - filename = 'NewFolder.zip' + filename = 'win-default.zip' encoding = 'cp949' - expected = [u'새 텍스트 문서.txt', u'새 폴더/', u'새 폴더/한글문서.txt'] + expected = [u'똠방각하.txt', u'한글 디렉토리/새 텍스트 문서.txt'] def testListZip(self): result = unzipmbcs.listZip(self.filename, self.encoding) diff --git a/test/win-default.zip b/test/win-default.zip new file mode 100644 index 0000000000000000000000000000000000000000..b6433ef054e66fd92216f56cf5f88a99617ce057 GIT binary patch literal 322 zcmWIWW@Zs#00H&+A48bGyf;<@vO$;!hu&$g9wk6%>Sa$xf^UakPRc_BdaOfA1Qhyl$3VFe&QesSY%g{>d9 z?>hQq$2$GpUloqt*t_f4I)$C<_N)cj5gOvAYXP*Q0%*sZ<_?9oJw0#c?g{W_WD;S( r?Mk4*K%f97QQeHJ6PtS=T0lAjfS8c$0=!w-KsuO!umeah0dW`roe5_~ literal 0 HcmV?d00001 From 1b261d0fc56791509ceb20c65bca47b36d470bf4 Mon Sep 17 00:00:00 2001 From: Joo-Won Jung Date: Sun, 17 Jul 2022 22:41:51 +0900 Subject: [PATCH 7/8] chore: fix typo, license details --- LICENSE.txt | 2 +- README.md | 4 ++-- unzipmbcs.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index 072ba69..2679ed0 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright 2016 Joo-Won Jung +Copyright (c) 2016-2022 Joo-Won Jung Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 6e1a4e2..b36a470 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,6 @@ But, in non-ASCII, non-Western environment, it makes trouble due to filenames. Since ZIP format was created too old (1993), there is no standard character encoding about the file name of zip archive entries. Most of zip file entries are encoded as legacy character encoding, local charset. -In modern UNICODE based environment or global data processing environment such as Linux, this makes inconvinience, less portability, mangled file names, fail to extract the file, and so on. +In modern UNICODE based environment or global data processing environment such as Linux, this makes inconvenience, less portability, mangled file names, fail to extract the file, and so on. -This module may mitigate the inconviniences. +This module may mitigate the inconveniences. diff --git a/unzipmbcs.py b/unzipmbcs.py index fb93207..a705195 100644 --- a/unzipmbcs.py +++ b/unzipmbcs.py @@ -1,6 +1,6 @@ #! python ######################## BEGIN LICENSE BLOCK ######################## -# Copyright 2016 Joo-Won Jung +# Copyright (c) 2016-2022 Joo-Won Jung # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From ddffbc18b9658028c8dad6358bbab4da07e1316d Mon Sep 17 00:00:00 2001 From: Joo-Won Jung Date: Sun, 17 Jul 2022 22:42:54 +0900 Subject: [PATCH 8/8] chore: update version --- pyproject.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 62c0fc1..3fa459d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,13 +4,12 @@ build-backend = "setuptools.build_meta" [project] name = "unzipmbcs" -version = "0.1.2" +version = "0.2.0" description = "UnZip for non-UTF8 encoding such as cp949, sjis, gbk, euc-kr, euc-jp, and gb2312" readme = "README.md" -license = { file = "LICENSE.txt" } keywords= [ "unzip", "pkzip", "non-UTF8", "mbcs", - "cp949", "sjis", "shift_jis", "gbk", "gb18030" + "cp949", "sjis", "shift_jis", "gbk", "gb18030", ] authors = [ {name="Joo-Won Jung", email="sanori@gmail.com"},