|
@@ -1,32 +1,60 @@
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
-import sys
|
|
|
+# Extracts a zip archive while converting file names from Shift-JIS encoding to UTF-8.
|
|
|
+#
|
|
|
+# Example:
|
|
|
+# python unzip-jp.py archive.zip
|
|
|
+#
|
|
|
+# Creates a directory `archive` and extracts the archive there.
|
|
|
+#
|
|
|
import zipfile
|
|
|
-import argparse
|
|
|
+import sys
|
|
|
+import os
|
|
|
+import codecs
|
|
|
|
|
|
-parser = argparse.ArgumentParser(description="accept to and from encodings")
|
|
|
-parser.add_argument('-O', metavar='decode', default="shift-jis")
|
|
|
-parser.add_argument('-I', metavar='encode', default="cp437")
|
|
|
-args, files = parser.parse_known_args();
|
|
|
+if len(sys.argv) < 2:
|
|
|
+ print('No archive name.')
|
|
|
+ print('')
|
|
|
+ print('Usage: unzip-jp archive [password]')
|
|
|
+ exit(1)
|
|
|
|
|
|
-def unzip(filename, encode, decode):
|
|
|
- code = 0
|
|
|
- with zipfile.ZipFile(filename) as myzip:
|
|
|
- for info in myzip.infolist():
|
|
|
- try:
|
|
|
- info.filename = info.filename.encode(encode, 'strict').decode(decode, 'strict')
|
|
|
- myzip.extract(info)
|
|
|
- except Exception as e:
|
|
|
- code = 1
|
|
|
- print("failed to extract {0}: {1}".format(info.filename, e))
|
|
|
- return code
|
|
|
+name = sys.argv[1]
|
|
|
|
|
|
-def main(files, encode, decode):
|
|
|
- code = 0
|
|
|
- for file in files:
|
|
|
- if unzip(file, encode, decode) == 1:
|
|
|
- code = 1
|
|
|
- return code
|
|
|
+if len(sys.argv) > 2:
|
|
|
+ password = sys.argv[2]
|
|
|
+else:
|
|
|
+ password = None
|
|
|
|
|
|
-if __name__ == '__main__':
|
|
|
- sys.exit(main(files, args.I, args.O))
|
|
|
+directory = os.path.splitext(os.path.basename(name))[0]
|
|
|
+
|
|
|
+if not os.path.exists(directory):
|
|
|
+ os.makedirs(directory)
|
|
|
+
|
|
|
+with zipfile.ZipFile(name, 'r') as z:
|
|
|
+ if password:
|
|
|
+ z.setpassword(password.encode('cp850','replace'))
|
|
|
+ for f in z.infolist():
|
|
|
+ bad_filename = f.filename
|
|
|
+ if bytes != str:
|
|
|
+ # Python 3 - decode filename into bytes
|
|
|
+ # assume CP437 - these zip files were from Windows anyway
|
|
|
+ bad_filename = bytes(bad_filename, 'cp437')
|
|
|
+ try:
|
|
|
+ uf = codecs.decode(bad_filename, 'sjis')
|
|
|
+ except:
|
|
|
+ uf = codecs.decode(bad_filename, 'shift_jisx0213')
|
|
|
+ # need to print repr in Python 2 as we may encounter UnicodeEncodeError
|
|
|
+ # when printing to a Windows console
|
|
|
+ print(repr(uf))
|
|
|
+ filename=os.path.join(directory, uf)
|
|
|
+ # create directories if necessary
|
|
|
+ if not os.path.exists(os.path.dirname(filename)):
|
|
|
+ try:
|
|
|
+ os.makedirs(os.path.dirname(filename))
|
|
|
+ except OSError as exc: # Guard against race condition
|
|
|
+ if exc.errno != errno.EEXIST:
|
|
|
+ raise
|
|
|
+ # don't try to write to directories
|
|
|
+ if not filename.endswith('/'):
|
|
|
+ with open(filename, 'wb') as dest:
|
|
|
+ dest.write(z.read(f))
|