XlsxWriter:为什么 in_memory 给出了损坏的字节序列?
XlsxWriter: Why does in_memory give a corrupt byte sequence?
我正在生成一个 excel 文件并将其返回以供在烧瓶应用程序中下载。如果我使用 'in_memory': True
则文件已损坏并且 excel 无法打开它。如果直接写入磁盘,则相同的代码可以工作。对于内存中的代码,我遵循 this example from the XlsxWriter docs.
我怀疑字节是否不同,所以我写了一个小脚本来测试,它们确实略有不同(5730 中有 44 个字节)。
这是我生成相同工作簿的脚本,一个在内存中,一个在磁盘上。然后它比较字节并发现它们不同。为什么?
from io import BytesIO
from xlsxwriter import Workbook
def fill_workbook(workbook):
"""Populate the workbook with some test data"""
first_sheet = workbook.add_worksheet("First")
first_sheet.write(0, 0, "test")
next_sheet = workbook.add_worksheet("Next")
next_sheet.write(0, 0, "sample")
next_sheet.write(0, 1, "value")
workbook.close()
def get_bytes():
"""Get the bytes for the in-memory and on-disk workbooks"""
output = BytesIO()
in_mem = Workbook(output, {'in_memory': True})
filename = "direct.xlsx"
on_disk = Workbook(filename)
fill_workbook(in_mem)
fill_workbook(on_disk)
output.seek(0)
mem_bytes = output.read()
with open(filename, "rb") as f:
disk_bytes = f.read()
return mem_bytes, disk_bytes
def compare_bytes():
"""Compare the bytes of the two workbooks"""
mem_bytes, disk_bytes = get_bytes()
print(mem_bytes == disk_bytes)
same = 0
diff = 0
for mb, db in zip(mem_bytes, disk_bytes):
if mb == db:
same += 1
else:
diff +=1
print(f"{same} bytes same")
print(f"{diff} bytes different")
if __name__ == '__main__':
compare_bytes()
我 运行 我在 Python 3.7.3
上的脚本 XlsxWriter==1.2.8
当 in_memory
设置为 False
时它适用于我。让我们看看 XlsxWriter 的源代码中 in_memory 参数实际上做了什么:
for file_id, file_data in enumerate(xml_files):
os_filename, xml_filename, is_binary = file_data
if self.in_memory:
# Set sub-file timestamp to Excel's timestamp of 1/1/1980.
zipinfo = ZipInfo(xml_filename, (1980, 1, 1, 0, 0, 0))
# Copy compression type from parent ZipFile.
zipinfo.compress_type = xlsx_file.compression
if is_binary:
xlsx_file.writestr(zipinfo, os_filename.getvalue())
else:
xlsx_file.writestr(zipinfo,
os_filename.getvalue().encode('utf-8'))`
else:
# The sub-files are tempfiles on disk, i.e, not in memory.
# Set sub-file timestamp to 31/1/1980 due to portability
# issues setting it to Excel's timestamp of 1/1/1980.
timestamp = time.mktime((1980, 1, 31, 0, 0, 0, 0, 0, -1))
os.utime(os_filename, (timestamp, timestamp))
try:
xlsx_file.write(os_filename, xml_filename)
os.remove(os_filename)
我正在生成一个 excel 文件并将其返回以供在烧瓶应用程序中下载。如果我使用 'in_memory': True
则文件已损坏并且 excel 无法打开它。如果直接写入磁盘,则相同的代码可以工作。对于内存中的代码,我遵循 this example from the XlsxWriter docs.
我怀疑字节是否不同,所以我写了一个小脚本来测试,它们确实略有不同(5730 中有 44 个字节)。
这是我生成相同工作簿的脚本,一个在内存中,一个在磁盘上。然后它比较字节并发现它们不同。为什么?
from io import BytesIO
from xlsxwriter import Workbook
def fill_workbook(workbook):
"""Populate the workbook with some test data"""
first_sheet = workbook.add_worksheet("First")
first_sheet.write(0, 0, "test")
next_sheet = workbook.add_worksheet("Next")
next_sheet.write(0, 0, "sample")
next_sheet.write(0, 1, "value")
workbook.close()
def get_bytes():
"""Get the bytes for the in-memory and on-disk workbooks"""
output = BytesIO()
in_mem = Workbook(output, {'in_memory': True})
filename = "direct.xlsx"
on_disk = Workbook(filename)
fill_workbook(in_mem)
fill_workbook(on_disk)
output.seek(0)
mem_bytes = output.read()
with open(filename, "rb") as f:
disk_bytes = f.read()
return mem_bytes, disk_bytes
def compare_bytes():
"""Compare the bytes of the two workbooks"""
mem_bytes, disk_bytes = get_bytes()
print(mem_bytes == disk_bytes)
same = 0
diff = 0
for mb, db in zip(mem_bytes, disk_bytes):
if mb == db:
same += 1
else:
diff +=1
print(f"{same} bytes same")
print(f"{diff} bytes different")
if __name__ == '__main__':
compare_bytes()
我 运行 我在 Python 3.7.3
上的脚本 XlsxWriter==1.2.8
当 in_memory
设置为 False
时它适用于我。让我们看看 XlsxWriter 的源代码中 in_memory 参数实际上做了什么:
for file_id, file_data in enumerate(xml_files):
os_filename, xml_filename, is_binary = file_data
if self.in_memory:
# Set sub-file timestamp to Excel's timestamp of 1/1/1980.
zipinfo = ZipInfo(xml_filename, (1980, 1, 1, 0, 0, 0))
# Copy compression type from parent ZipFile.
zipinfo.compress_type = xlsx_file.compression
if is_binary:
xlsx_file.writestr(zipinfo, os_filename.getvalue())
else:
xlsx_file.writestr(zipinfo,
os_filename.getvalue().encode('utf-8'))`
else:
# The sub-files are tempfiles on disk, i.e, not in memory.
# Set sub-file timestamp to 31/1/1980 due to portability
# issues setting it to Excel's timestamp of 1/1/1980.
timestamp = time.mktime((1980, 1, 31, 0, 0, 0, 0, 0, -1))
os.utime(os_filename, (timestamp, timestamp))
try:
xlsx_file.write(os_filename, xml_filename)
os.remove(os_filename)