Python归档打包解包

Published: 2016-04-28

Tags: Python

本文总阅读量

测试是否为zip归档

#!/bin/env python
# -*- coding:utf-8 -*-
import zipfile

for filename in [ 'test.txt', 'test.zip',
                  'test.jpg', 'test.py' ]:
    print '%20s  %s' % (filename, zipfile.is_zipfile(filename))

输出:

[root@the5 py]# ls
test.jpg  test.py  test.txt  test.zip
[root@the5 py]# ./test.py
            test.txt  False
            test.zip  True
            test.jpg  False
             test.py  False
[root@the5 py]#

查询zip归档文件列表

#!/bin/env python
# -*- coding:utf-8 -*-
import zipfile

zf = zipfile.ZipFile('test.zip', 'r')
print zf.namelist()

输出:

[root@the5 py]# ./zipfile_namelist.py
['test.txt']
[root@the5 py]# unzip -t test.zip
Archive:  test.zip
    testing: test.txt                 OK
No errors detected in compressed data of test.zip.

输出结果可以看出,与unzip命令得到的文件列表相同,查询更详细的归档信息可以使用如下方式

查询归档详细信息

import datetime
import zipfile

def print_info(archive_name):
    zf = zipfile.ZipFile(archive_name)
    for info in zf.infolist():
        print info.filename
        print '\tComment:\t', info.comment
        print '\tModified:\t', datetime.datetime(*info.date_time)
        print '\tSystem:\t\t', info.create_system, '(0 = Windows, 3 = Unix)'
        print '\tZIP version:\t', info.create_version
        print '\tCompressed:\t', info.compress_size, 'bytes'
        print '\tUncompressed:\t', info.file_size, 'bytes'
        print

if __name__ == '__main__':
    print_info('IMG_201509.zip')

输出:

[root@the5 py]# ./test3.py
     ...
IMG_20150929_203432.jpg
    Comment:
    Modified:   2015-09-29 20:34:32
    System:     0 (0 = Windows, 3 = Unix)
    ZIP version:    63
    Compressed: 5840550 bytes
    Uncompressed:   5956792 bytes

IMG_20150930_091243.jpg
    Comment:
    Modified:   2015-09-30 09:12:42
    System:     0 (0 = Windows, 3 = Unix)
    ZIP version:    63
    Compressed: 2818819 bytes
    Uncompressed:   2932052 bytes
     ...

他会打印出归档中每个文件的详细信息

获取指定文件大小

import zipfile

zf = zipfile.ZipFile('example.zip')
for filename in [ 'README.txt', 'notthere.txt' ]:
    try:
        info = zf.getinfo(filename)
    except KeyError:
        print 'ERROR: Did not find %s in zip file' % filename
    else:
        print '%s is %d bytes' % (info.filename, info.file_size)

输出:

$ python zipfile_getinfo.py

README.txt is 75 bytes
ERROR: Did not find notthere.txt in zip file

提取文件

#!/bin/env python
# -*- coding:utf-8 -*-
import zipfile

zf = zipfile.ZipFile('vivo-201509.zip')
for filename in [ 'IMG_20150929_203432.jpg' ]:
    try:
        data = zf.read(filename)
    except KeyError:
        print 'ERROR: Did not find %s in zip file' % filename
    else:
        print filename
        with open('img.jpg', 'wb') as f:
            f.write(data)

创建归档

from zipfile_infolist import print_info
import zipfile

print 'creating archive'
zf = zipfile.ZipFile('zipfile_write.zip', mode='w')
try:
    print 'adding README.txt'
    zf.write('README.txt')
finally:
    print 'closing'
    zf.close()

print
print_info('zipfile_write.zip')

默认情况下,归档不会对文件进行压缩,只是有打包的作用

想要压缩,需要添加zlib模块

from zipfile_infolist import print_info
import zipfile
try:
    import zlib
    compression = zipfile.ZIP_DEFLATED
except:
    compression = zipfile.ZIP_STORED

modes = { zipfile.ZIP_DEFLATED: 'deflated',
          zipfile.ZIP_STORED:   'stored',
          }

print 'creating archive'
zf = zipfile.ZipFile('zipfile_write_compression.zip', mode='w')
try:
    print 'adding README.txt with compression mode', modes[compression]
    zf.write('README.txt', compress_type=compression)
finally:
    print 'closing'
    zf.close()

print
print_info('zipfile_write_compression.zip')

输出:

$ python zipfile_write_compression.py
creating archive
adding README.txt with compression mode deflated
closing

README.txt
        Comment:
        Modified:       2007-12-16 10:08:50
        System:         3 (0 = Windows, 3 = Unix)
        ZIP version:    20
        Compressed:     63 bytes
        Uncompressed:   75 bytes

打包时文件重命名

from zipfile_infolist import print_info
import zipfile

zf = zipfile.ZipFile('zipfile_write_arcname.zip', mode='w')
try:
    zf.write('README.txt', arcname='NOT_README.txt')
finally:
    zf.close()
print_info('zipfile_write_arcname.zip')

输出:

$ python zipfile_write_arcname.py
NOT_README.txt
        Comment:
        Modified:       2007-12-16 10:08:50
        System:         3 (0 = Windows, 3 = Unix)
        ZIP version:    20
        Compressed:     75 bytes
        Uncompressed:   75 bytes

打包字符串数据到归档

from zipfile_infolist import print_info
import zipfile

msg = 'This data did not exist in a file before being added to the ZIP file'
zf = zipfile.ZipFile('zipfile_writestr.zip',
                     mode='w',
                     compression=zipfile.ZIP_DEFLATED,
                     )
try:
    zf.writestr('from_string.txt', msg)
finally:
    zf.close()

print_info('zipfile_writestr.zip')

zf = zipfile.ZipFile('zipfile_writestr.zip', 'r')
print zf.read('from_string.txt')

输出:

$ python zipfile_writestr.py
from_string.txt
        Comment:
        Modified:       2007-12-16 11:38:14
        System:         3 (0 = Windows, 3 = Unix)
        ZIP version:    20
        Compressed:     62 bytes
        Uncompressed:   68 bytes

向归档中追加文件

from zipfile_infolist import print_info
import zipfile

print 'creating archive'
zf = zipfile.ZipFile('zipfile_append.zip', mode='w')
try:
    zf.write('README.txt')
finally:
    zf.close()

print
print_info('zipfile_append.zip')

print 'appending to the archive'
zf = zipfile.ZipFile('zipfile_append.zip', mode='a')
try:
    zf.write('README.txt', arcname='README2.txt')
finally:
    zf.close()

print
print_info('zipfile_append.zip')

使用czipfile提升性能

dnf install python-devel
pip install czipfile

使用方法:只需把zipfile的关键字修改为czipfile就可以了

如下输出可知,300多兆大小的照片归档,解压效率差别还是蛮大的,值得使用

[root@the5 py]# du -sh ./vivo-201509.zip
354M    ./vivo-201509.zip
[root@the5 py]# time ./test-zipfile.py

real    0m2.203s
user    0m1.464s
sys 0m0.542s
[root@the5 py]# time ./test-czipfile.py

real    0m1.630s
user    0m0.989s
sys 0m0.482s

参考: