123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- #!/usr/bin/env python
- # Extracts a zip archive while converting file names from Shift-JIS encoding to UTF-8.
- #
- # Example:
- # python unzip-jp.py archive.zip
- #
- # Creates a directory `archive` and extracts the archive there.
- #
- import zipfile
- import sys
- import os
- import codecs
- if len(sys.argv) < 2:
- print('No archive name.')
- print('')
- print('Usage: unzip-jp archive [password]')
- exit(1)
- name = sys.argv[1]
- if len(sys.argv) > 2:
- password = sys.argv[2]
- else:
- password = None
- directory = os.path.splitext(os.path.basename(name))[0]
- if not os.path.exists(directory):
- os.makedirs(directory)
- with zipfile.ZipFile(name, 'r') as z:
- if password:
- z.setpassword(password.encode('cp850','replace'))
- for f in z.infolist():
- bad_filename = f.filename
- if bytes != str:
- # Python 3 - decode filename into bytes
- # assume CP437 - these zip files were from Windows anyway
- bad_filename = bytes(bad_filename, 'cp437')
- try:
- uf = codecs.decode(bad_filename, 'sjis')
- except:
- uf = codecs.decode(bad_filename, 'shift_jisx0213')
- # need to print repr in Python 2 as we may encounter UnicodeEncodeError
- # when printing to a Windows console
- print(repr(uf))
- filename=os.path.join(directory, uf)
- # create directories if necessary
- if not os.path.exists(os.path.dirname(filename)):
- try:
- os.makedirs(os.path.dirname(filename))
- except OSError as exc: # Guard against race condition
- if exc.errno != errno.EEXIST:
- raise
- # don't try to write to directories
- if not filename.endswith('/'):
- with open(filename, 'wb') as dest:
- dest.write(z.read(f))
|