如何通过 Python 2.7 将字节附加到 zip 文件内的文件中?

How do you append bytes onto a file inside of a zip file via Python 2.7?

我目前正在研究一个更大的拼图。对于我的作品,我有一个文件对象和 zip 文件的目的地。从来没有一次我知道文件对象的大小。我只知道我有一个。 zip 必须支持 zip64。

我的目标是获取该文件对象(指向文件的指针)并将其写入 zip 文件,而不会将整个文件加载到内存中。我想逐块执行此操作(特别是如果文件对象真的很大)。

关于我如何着手做这件事有什么想法吗?

import zipfile


zip_path = "/tmp/file.zip"
file_to_zip_path = "/home/ryanb58/Desktop/movie.mp4"

with zipfile.ZipFile(zip_path, mode="w", allowZip64=True) as zip:
    f = open(file_to_zip_path, 'rb')
    while True:
        data = f.read(1024)
        zip.writestr("file.mp4", data)
        if not data:
            break

我的问题是,当我将新字节写入 zip 文件时。完成后,我打开 zip,它只是一个巨大的同名小文件列表,每个文件的大小约为 1024 字节。我上面的代码 ^^ 我有点卡住了,所以任何想法或解决方案都会很棒。

按照@J.F.Sebastian 在他的评论中给出的建议,我能够将我的文件写入一个 zip 而无需将整个文件存入内存。

这是我的覆盖解决方案。

import zipfile

BUFFER_SIZE = 1024 * 10000 # 10 megabytes.

class Zip(zipfile.ZipFile):

def write(self, fileobj, arcname=None, compress_type=None):
    """Put the bytes from file into the archive under the name
    arcname."""

    """CONST"""
    ZIP64_LIMIT = (1 << 31) - 1
    ZIP_DEFLATED = 8

    try:
        import zlib # We may need its compression method
        crc32 = zlib.crc32
    except ImportError:
        zlib = None
        crc32 = binascii.crc32

    if not self.fp:
        raise RuntimeError(
              "Attempt to write to ZIP archive that was already closed")

    st = os.stat(fileobj.name) 
    isdir = stat.S_ISDIR(st.st_mode)
    mtime = time.localtime(st.st_mtime)
    date_time = mtime[0:6]

    # Create ZipInfo instance to store file information
    if arcname is None:
        arcname = "/temp.zip"
    arcname = os.path.normpath(os.path.splitdrive(arcname)[1])

    # Strips any leading forward or back slashes for files.
    while arcname[0] in (os.sep, os.altsep):
        arcname = arcname[1:]
    if isdir:
        arcname += '/'

    # Create the zipinfo.
    zinfo = zipfile.ZipInfo(arcname, date_time)
    zinfo.external_attr = (st.st_mode & 0xFFFF) << 16L      # Unix attributes

    if isdir:
        zinfo.compress_type = ZIP_STORED
    elif compress_type is None:
        zinfo.compress_type = self.compression
    else:
        zinfo.compress_type = compress_type

    zinfo.file_size = st.st_size
    zinfo.flag_bits = 0x00
    zinfo.header_offset = self.fp.tell()    # Start of header bytes

    self._writecheck(zinfo)
    self._didModify = True

    if isdir:
        zinfo.file_size = 0
        zinfo.compress_size = 0
        zinfo.CRC = 0
        zinfo.external_attr |= 0x10  # MS-DOS directory flag
        self.filelist.append(zinfo)
        self.NameToInfo[zinfo.filename] = zinfo
        self.fp.write(zinfo.FileHeader(False))
        return

    # Must overwrite CRC and sizes with correct data later
    zinfo.CRC = CRC = 0
    zinfo.compress_size = compress_size = 0
    # Compressed size can be larger than uncompressed size
    zip64 = self._allowZip64 and \
            zinfo.file_size * 1.05 > ZIP64_LIMIT
    self.fp.write(zinfo.FileHeader())
    if zinfo.compress_type == ZIP_DEFLATED:
        cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
             zlib.DEFLATED, -15)
    else:
        cmpr = None
    file_size = 0
    while 1:
        buf = fileobj.read(BUFFER_SIZE)
        if not buf:
            break
        file_size = file_size + len(buf)
        CRC = crc32(buf, CRC) & 0xffffffff
        if cmpr:
            buf = cmpr.compress(buf)
            compress_size = compress_size + len(buf)
        self.fp.write(buf)

    if cmpr:
        buf = cmpr.flush()
        compress_size = compress_size + len(buf)
        self.fp.write(buf)
        zinfo.compress_size = compress_size
    else:
        zinfo.compress_size = file_size
    zinfo.CRC = CRC
    zinfo.file_size = file_size
    if not zip64 and self._allowZip64:
        if file_size > ZIP64_LIMIT:
            raise RuntimeError('File size has increased during compressing')
        if compress_size > ZIP64_LIMIT:
            raise RuntimeError('Compressed size larger than uncompressed size')
    # Seek backwards and write file header (which will now include
    # correct CRC and file sizes)
    position = self.fp.tell()       # Preserve current position in file
    self.fp.seek(zinfo.header_offset, 0)
    self.fp.write(zinfo.FileHeader())
    self.fp.seek(position, 0)
    self.filelist.append(zinfo)
    self.NameToInfo[zinfo.filename] = zinfo

如您所见,我无法将 zip64 传递到 FileHeader 方法中,因为代码在系统上运行,它仅支持 Python 2.7.2 而支持正确的 headers zip64 文件,您至少需要 Python 2.7.4。

https://github.com/python/cpython/blob/2e46376c8c10908afed56ace4c7f0f7c64e80c5e/Misc/NEWS#L189