使用 Python 将 zip 文件拆分为多个块
Split a zip-file into chunks with Python
我有一段代码可以成功创建一个 zip 文件,但是如果它的大小超过 1MB,我需要拆分这个文件。
我有这段代码,但它不起作用:
from split_file_reader.split_file_writer import SplitFileWriter
import zipfile
# make element tree
tree = etree.ElementTree(batch_element)
# make xml file and write it in stream
xml_object = BytesIO()
tree.write(xml_object, pretty_print=True, xml_declaration=False, encoding="utf-8")
xml_file = xml_object.getvalue()
final = BytesIO()
with SplitFileWriter(final, 1_000_000) as sfw:
with zipfile.ZipFile(sfw, "a") as zip_file:
zip_file.writestr('Batch.xml', xml_file)
我想以字节形式检索拆分文件。压缩部分有效,但拆分部分无效。
阅读您正在使用的模块的文档,即 https://pypi.org/project/split-file-reader
里面应该有使用说明。
编辑:这是一个例子:
with SplitFileWriter("split.zip.", 500_000) as sfw:
with zipfile.ZipFile(file=sfw, mode='w') as zipf:
for root, dirs, files in os.walk("./"):
for file in files:
if file.startswith("random_payload"):
zipf.write(os.path.join(root, file))
根据split_file_reader docs,SplitFileWriter
的第一个参数可以是生成类文件对象的生成器。这将允许您将 zip 文件拆分为 BytesIO
个块的列表。
这是一个有效的示例脚本:
import zipfile
from io import BytesIO
from lxml import etree
from split_file_reader.split_file_writer import SplitFileWriter
# make element tree
# tree = etree.ElementTree(batch_element)
tree = etree.parse('/tmp/test.xml')
# make xml file and write it in stream
xml_object = BytesIO()
tree.write(xml_object, pretty_print=True, xml_declaration=False, encoding="utf-8")
xml_file = xml_object.getvalue()
chunks = []
def gen(lst):
while True:
lst.append(BytesIO())
yield lst[-1]
with SplitFileWriter(gen(chunks), 1_000_000) as sfw:
with zipfile.ZipFile(sfw, "w") as zip_file:
zip_file.writestr('Batch.xml', xml_file)
for i, chunk in enumerate(chunks):
print(f'chunk {i}: {len(chunk.getvalue())}')
输出:
chunk 0: 1000000
chunk 1: 1000000
chunk 2: 1000000
chunk 3: 1000000
chunk 4: 1000000
chunk 5: 887260
我有一段代码可以成功创建一个 zip 文件,但是如果它的大小超过 1MB,我需要拆分这个文件。
我有这段代码,但它不起作用:
from split_file_reader.split_file_writer import SplitFileWriter
import zipfile
# make element tree
tree = etree.ElementTree(batch_element)
# make xml file and write it in stream
xml_object = BytesIO()
tree.write(xml_object, pretty_print=True, xml_declaration=False, encoding="utf-8")
xml_file = xml_object.getvalue()
final = BytesIO()
with SplitFileWriter(final, 1_000_000) as sfw:
with zipfile.ZipFile(sfw, "a") as zip_file:
zip_file.writestr('Batch.xml', xml_file)
我想以字节形式检索拆分文件。压缩部分有效,但拆分部分无效。
阅读您正在使用的模块的文档,即 https://pypi.org/project/split-file-reader
里面应该有使用说明。
编辑:这是一个例子:
with SplitFileWriter("split.zip.", 500_000) as sfw:
with zipfile.ZipFile(file=sfw, mode='w') as zipf:
for root, dirs, files in os.walk("./"):
for file in files:
if file.startswith("random_payload"):
zipf.write(os.path.join(root, file))
根据split_file_reader docs,SplitFileWriter
的第一个参数可以是生成类文件对象的生成器。这将允许您将 zip 文件拆分为 BytesIO
个块的列表。
这是一个有效的示例脚本:
import zipfile
from io import BytesIO
from lxml import etree
from split_file_reader.split_file_writer import SplitFileWriter
# make element tree
# tree = etree.ElementTree(batch_element)
tree = etree.parse('/tmp/test.xml')
# make xml file and write it in stream
xml_object = BytesIO()
tree.write(xml_object, pretty_print=True, xml_declaration=False, encoding="utf-8")
xml_file = xml_object.getvalue()
chunks = []
def gen(lst):
while True:
lst.append(BytesIO())
yield lst[-1]
with SplitFileWriter(gen(chunks), 1_000_000) as sfw:
with zipfile.ZipFile(sfw, "w") as zip_file:
zip_file.writestr('Batch.xml', xml_file)
for i, chunk in enumerate(chunks):
print(f'chunk {i}: {len(chunk.getvalue())}')
输出:
chunk 0: 1000000
chunk 1: 1000000
chunk 2: 1000000
chunk 3: 1000000
chunk 4: 1000000
chunk 5: 887260