如何通过 Python 2.7 将字节附加到 zip 文件内的文件中?
How do you append bytes onto a file inside of a zip file via Python 2.7?
我目前正在研究一个更大的拼图。对于我的作品,我有一个文件对象和 zip 文件的目的地。从来没有一次我知道文件对象的大小。我只知道我有一个。 zip 必须支持 zip64。
我的目标是获取该文件对象(指向文件的指针)并将其写入 zip 文件,而不会将整个文件加载到内存中。我想逐块执行此操作(特别是如果文件对象真的很大)。
关于我如何着手做这件事有什么想法吗?
import zipfile
zip_path = "/tmp/file.zip"
file_to_zip_path = "/home/ryanb58/Desktop/movie.mp4"
with zipfile.ZipFile(zip_path, mode="w", allowZip64=True) as zip:
f = open(file_to_zip_path, 'rb')
while True:
data = f.read(1024)
zip.writestr("file.mp4", data)
if not data:
break
我的问题是,当我将新字节写入 zip 文件时。完成后,我打开 zip,它只是一个巨大的同名小文件列表,每个文件的大小约为 1024 字节。我上面的代码 ^^ 我有点卡住了,所以任何想法或解决方案都会很棒。
按照@J.F.Sebastian 在他的评论中给出的建议,我能够将我的文件写入一个 zip 而无需将整个文件存入内存。
这是我的覆盖解决方案。
import zipfile
BUFFER_SIZE = 1024 * 10000 # 10 megabytes.
class Zip(zipfile.ZipFile):
def write(self, fileobj, arcname=None, compress_type=None):
"""Put the bytes from file into the archive under the name
arcname."""
"""CONST"""
ZIP64_LIMIT = (1 << 31) - 1
ZIP_DEFLATED = 8
try:
import zlib # We may need its compression method
crc32 = zlib.crc32
except ImportError:
zlib = None
crc32 = binascii.crc32
if not self.fp:
raise RuntimeError(
"Attempt to write to ZIP archive that was already closed")
st = os.stat(fileobj.name)
isdir = stat.S_ISDIR(st.st_mode)
mtime = time.localtime(st.st_mtime)
date_time = mtime[0:6]
# Create ZipInfo instance to store file information
if arcname is None:
arcname = "/temp.zip"
arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
# Strips any leading forward or back slashes for files.
while arcname[0] in (os.sep, os.altsep):
arcname = arcname[1:]
if isdir:
arcname += '/'
# Create the zipinfo.
zinfo = zipfile.ZipInfo(arcname, date_time)
zinfo.external_attr = (st.st_mode & 0xFFFF) << 16L # Unix attributes
if isdir:
zinfo.compress_type = ZIP_STORED
elif compress_type is None:
zinfo.compress_type = self.compression
else:
zinfo.compress_type = compress_type
zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
if isdir:
zinfo.file_size = 0
zinfo.compress_size = 0
zinfo.CRC = 0
zinfo.external_attr |= 0x10 # MS-DOS directory flag
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
self.fp.write(zinfo.FileHeader(False))
return
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
# Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and \
zinfo.file_size * 1.05 > ZIP64_LIMIT
self.fp.write(zinfo.FileHeader())
if zinfo.compress_type == ZIP_DEFLATED:
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15)
else:
cmpr = None
file_size = 0
while 1:
buf = fileobj.read(BUFFER_SIZE)
if not buf:
break
file_size = file_size + len(buf)
CRC = crc32(buf, CRC) & 0xffffffff
if cmpr:
buf = cmpr.compress(buf)
compress_size = compress_size + len(buf)
self.fp.write(buf)
if cmpr:
buf = cmpr.flush()
compress_size = compress_size + len(buf)
self.fp.write(buf)
zinfo.compress_size = compress_size
else:
zinfo.compress_size = file_size
zinfo.CRC = CRC
zinfo.file_size = file_size
if not zip64 and self._allowZip64:
if file_size > ZIP64_LIMIT:
raise RuntimeError('File size has increased during compressing')
if compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
position = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset, 0)
self.fp.write(zinfo.FileHeader())
self.fp.seek(position, 0)
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
如您所见,我无法将 zip64 传递到 FileHeader 方法中,因为代码在系统上运行,它仅支持 Python 2.7.2 而支持正确的 headers zip64 文件,您至少需要 Python 2.7.4。
https://github.com/python/cpython/blob/2e46376c8c10908afed56ace4c7f0f7c64e80c5e/Misc/NEWS#L189
我目前正在研究一个更大的拼图。对于我的作品,我有一个文件对象和 zip 文件的目的地。从来没有一次我知道文件对象的大小。我只知道我有一个。 zip 必须支持 zip64。
我的目标是获取该文件对象(指向文件的指针)并将其写入 zip 文件,而不会将整个文件加载到内存中。我想逐块执行此操作(特别是如果文件对象真的很大)。
关于我如何着手做这件事有什么想法吗?
import zipfile
zip_path = "/tmp/file.zip"
file_to_zip_path = "/home/ryanb58/Desktop/movie.mp4"
with zipfile.ZipFile(zip_path, mode="w", allowZip64=True) as zip:
f = open(file_to_zip_path, 'rb')
while True:
data = f.read(1024)
zip.writestr("file.mp4", data)
if not data:
break
我的问题是,当我将新字节写入 zip 文件时。完成后,我打开 zip,它只是一个巨大的同名小文件列表,每个文件的大小约为 1024 字节。我上面的代码 ^^ 我有点卡住了,所以任何想法或解决方案都会很棒。
按照@J.F.Sebastian 在他的评论中给出的建议,我能够将我的文件写入一个 zip 而无需将整个文件存入内存。
这是我的覆盖解决方案。
import zipfile
BUFFER_SIZE = 1024 * 10000 # 10 megabytes.
class Zip(zipfile.ZipFile):
def write(self, fileobj, arcname=None, compress_type=None):
"""Put the bytes from file into the archive under the name
arcname."""
"""CONST"""
ZIP64_LIMIT = (1 << 31) - 1
ZIP_DEFLATED = 8
try:
import zlib # We may need its compression method
crc32 = zlib.crc32
except ImportError:
zlib = None
crc32 = binascii.crc32
if not self.fp:
raise RuntimeError(
"Attempt to write to ZIP archive that was already closed")
st = os.stat(fileobj.name)
isdir = stat.S_ISDIR(st.st_mode)
mtime = time.localtime(st.st_mtime)
date_time = mtime[0:6]
# Create ZipInfo instance to store file information
if arcname is None:
arcname = "/temp.zip"
arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
# Strips any leading forward or back slashes for files.
while arcname[0] in (os.sep, os.altsep):
arcname = arcname[1:]
if isdir:
arcname += '/'
# Create the zipinfo.
zinfo = zipfile.ZipInfo(arcname, date_time)
zinfo.external_attr = (st.st_mode & 0xFFFF) << 16L # Unix attributes
if isdir:
zinfo.compress_type = ZIP_STORED
elif compress_type is None:
zinfo.compress_type = self.compression
else:
zinfo.compress_type = compress_type
zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
if isdir:
zinfo.file_size = 0
zinfo.compress_size = 0
zinfo.CRC = 0
zinfo.external_attr |= 0x10 # MS-DOS directory flag
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
self.fp.write(zinfo.FileHeader(False))
return
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
# Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and \
zinfo.file_size * 1.05 > ZIP64_LIMIT
self.fp.write(zinfo.FileHeader())
if zinfo.compress_type == ZIP_DEFLATED:
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15)
else:
cmpr = None
file_size = 0
while 1:
buf = fileobj.read(BUFFER_SIZE)
if not buf:
break
file_size = file_size + len(buf)
CRC = crc32(buf, CRC) & 0xffffffff
if cmpr:
buf = cmpr.compress(buf)
compress_size = compress_size + len(buf)
self.fp.write(buf)
if cmpr:
buf = cmpr.flush()
compress_size = compress_size + len(buf)
self.fp.write(buf)
zinfo.compress_size = compress_size
else:
zinfo.compress_size = file_size
zinfo.CRC = CRC
zinfo.file_size = file_size
if not zip64 and self._allowZip64:
if file_size > ZIP64_LIMIT:
raise RuntimeError('File size has increased during compressing')
if compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
position = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset, 0)
self.fp.write(zinfo.FileHeader())
self.fp.seek(position, 0)
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
如您所见,我无法将 zip64 传递到 FileHeader 方法中,因为代码在系统上运行,它仅支持 Python 2.7.2 而支持正确的 headers zip64 文件,您至少需要 Python 2.7.4。
https://github.com/python/cpython/blob/2e46376c8c10908afed56ace4c7f0f7c64e80c5e/Misc/NEWS#L189