Python shutil pack a zip file and unzip it back EOF 错误
Python shutil pack a zip file and unzip it back EOF error
更新:简短回答,不要将存档打包到与文件源目录相同的路径,有问题的错误代码如下
shutil.make_archive(zip_path, 'zip', tmpdir)
原问题:
我正在使用shutils
打包和解压一个Tensorflow模型文件夹(我认为这个问题与shutils
更相关)
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
def load_model_as_bytes(model):
def file_chunk_generate(file_path):
CHUNK_SIZE = 4 * 1024 * 1024
with open(file_path, 'rb') as f:
while True:
piece = f.read(CHUNK_SIZE);
if len(piece) == 0:
return
yield ByteChunk(buffer=piece)
return file_chunk_generator
tmpdir = tempfile.mkdtemp()
tf.saved_model.save(model, tmpdir)
zip_path = os.path.join(tmpdir, "tf_model")
shutil.make_archive(zip_path, 'zip', tmpdir)
size = os.path.getsize(f'{zip_path}.zip')
logging.info(f"send model file zip, length: {size}") #-------output 4621
file_chunk_generator = file_chunk_generate(f'{zip_path}.zip')
return file_chunk_generator
class NeuralNetworkPart(Model):
def __init__(self):
super().__init__()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10)
def call(self, x):
x = x[0]
x = self.d1(x)
return self.d2(x)
model = NeuralNetworkPart()
it = load_model_as_bytes(model)
tmpdir = tempfile.mkdtemp()
zip_path = os.path.join(tmpdir, "tf_model.zip")
with open(zip_path, 'wb') as f:
for byte_chunk in it:
f.write(byte_chunk.buffer)
logging.info(f"receive model file zip, length: {os.path.getsize(zip_path)}") #-------output 4621
shutil.unpack_archive(zip_path, tmpdir)
基本上这个程序得到一个文件夹,使用make_archive
压缩它。然后以字节形式读取zip文件,将其存储在生成器变量中,并使用生成器编写另一个zip文件,并尝试使用unpack_archive
解压它。
在写入字节生成器之前,以及在解压缩之前写入 zip 文件之后,大小是相同的(在日志记录中检查过),但是在调用解压缩时,它会引发 EOF 错误
shutil.unpack_archive(zip_path, tmpdir)
File "/lib/python3.6/shutil.py", line 983, in unpack_archive
func(filename, extract_dir, **kwargs)
File "/lib/python3.6/shutil.py", line 901, in _unpack_zipfile
data = zip.read(info.filename)
File "/lib/python3.6/zipfile.py", line 1338, in read
return fp.read()
File "/lib/python3.6/zipfile.py", line 858, in read
buf += self._read1(self.MAX_N)
File "/lib/python3.6/zipfile.py", line 940, in _read1
data += self._read2(n - len(data))
File "/lib/python3.6/zipfile.py", line 975, in _read2
raise EOFError
这个稍微简化的版本似乎工作得很好。请注意 none 的临时文件已被清理;你可能想在你的 tmpdir 充满 TensorFlow 模型之前解决这个问题。
import os
import shutil
import tempfile
def file_chunk_generate(file_path):
CHUNK_SIZE = 4 * 1024 * 1024
with open(file_path, "rb") as f:
while True:
piece = f.read(CHUNK_SIZE)
if not piece:
return
yield piece
def get_zip_chunk_generator(source_dir):
arcname = shutil.make_archive(
os.path.join(tempfile.mkdtemp("zip-"), "tf_model"), "zip", source_dir
)
return file_chunk_generate(arcname)
def make_source_dir():
tmpdir = tempfile.mkdtemp("src-")
for x in range(5):
with open(os.path.join(tmpdir, f"test-{x}.txt"), "wb") as f:
f.write(b"foo" * 1024)
return tmpdir
source_dir = make_source_dir()
it = get_zip_chunk_generator(source_dir)
dest_dir = tempfile.mkdtemp(prefix="dest-")
print("1", os.listdir(dest_dir))
zip_path = os.path.join(dest_dir, "tf_model_dest.zip")
with open(zip_path, "wb") as f:
for byte_chunk in it:
f.write(byte_chunk)
print("2", os.listdir(dest_dir))
shutil.unpack_archive(zip_path, dest_dir)
print("3", os.listdir(dest_dir))
输出为
1 []
2 ['tf_model_dest.zip']
3 ['test-0.txt', 'test-1.txt', 'test-3.txt', 'test-2.txt', 'tf_model_dest.zip', 'test-4.txt']
如您所料。
不过,如果您要通过网络进行流式传输,我建议您使用 tarball(因为您确实可以在磁盘上根本没有任何文件的情况下做到这一点;ZIP 需要寻求支持才能解压,但 TAR 不需要) .
更新:简短回答,不要将存档打包到与文件源目录相同的路径,有问题的错误代码如下
shutil.make_archive(zip_path, 'zip', tmpdir)
原问题:
我正在使用shutils
打包和解压一个Tensorflow模型文件夹(我认为这个问题与shutils
更相关)
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
def load_model_as_bytes(model):
def file_chunk_generate(file_path):
CHUNK_SIZE = 4 * 1024 * 1024
with open(file_path, 'rb') as f:
while True:
piece = f.read(CHUNK_SIZE);
if len(piece) == 0:
return
yield ByteChunk(buffer=piece)
return file_chunk_generator
tmpdir = tempfile.mkdtemp()
tf.saved_model.save(model, tmpdir)
zip_path = os.path.join(tmpdir, "tf_model")
shutil.make_archive(zip_path, 'zip', tmpdir)
size = os.path.getsize(f'{zip_path}.zip')
logging.info(f"send model file zip, length: {size}") #-------output 4621
file_chunk_generator = file_chunk_generate(f'{zip_path}.zip')
return file_chunk_generator
class NeuralNetworkPart(Model):
def __init__(self):
super().__init__()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10)
def call(self, x):
x = x[0]
x = self.d1(x)
return self.d2(x)
model = NeuralNetworkPart()
it = load_model_as_bytes(model)
tmpdir = tempfile.mkdtemp()
zip_path = os.path.join(tmpdir, "tf_model.zip")
with open(zip_path, 'wb') as f:
for byte_chunk in it:
f.write(byte_chunk.buffer)
logging.info(f"receive model file zip, length: {os.path.getsize(zip_path)}") #-------output 4621
shutil.unpack_archive(zip_path, tmpdir)
基本上这个程序得到一个文件夹,使用make_archive
压缩它。然后以字节形式读取zip文件,将其存储在生成器变量中,并使用生成器编写另一个zip文件,并尝试使用unpack_archive
解压它。
在写入字节生成器之前,以及在解压缩之前写入 zip 文件之后,大小是相同的(在日志记录中检查过),但是在调用解压缩时,它会引发 EOF 错误
shutil.unpack_archive(zip_path, tmpdir)
File "/lib/python3.6/shutil.py", line 983, in unpack_archive
func(filename, extract_dir, **kwargs)
File "/lib/python3.6/shutil.py", line 901, in _unpack_zipfile
data = zip.read(info.filename)
File "/lib/python3.6/zipfile.py", line 1338, in read
return fp.read()
File "/lib/python3.6/zipfile.py", line 858, in read
buf += self._read1(self.MAX_N)
File "/lib/python3.6/zipfile.py", line 940, in _read1
data += self._read2(n - len(data))
File "/lib/python3.6/zipfile.py", line 975, in _read2
raise EOFError
这个稍微简化的版本似乎工作得很好。请注意 none 的临时文件已被清理;你可能想在你的 tmpdir 充满 TensorFlow 模型之前解决这个问题。
import os
import shutil
import tempfile
def file_chunk_generate(file_path):
CHUNK_SIZE = 4 * 1024 * 1024
with open(file_path, "rb") as f:
while True:
piece = f.read(CHUNK_SIZE)
if not piece:
return
yield piece
def get_zip_chunk_generator(source_dir):
arcname = shutil.make_archive(
os.path.join(tempfile.mkdtemp("zip-"), "tf_model"), "zip", source_dir
)
return file_chunk_generate(arcname)
def make_source_dir():
tmpdir = tempfile.mkdtemp("src-")
for x in range(5):
with open(os.path.join(tmpdir, f"test-{x}.txt"), "wb") as f:
f.write(b"foo" * 1024)
return tmpdir
source_dir = make_source_dir()
it = get_zip_chunk_generator(source_dir)
dest_dir = tempfile.mkdtemp(prefix="dest-")
print("1", os.listdir(dest_dir))
zip_path = os.path.join(dest_dir, "tf_model_dest.zip")
with open(zip_path, "wb") as f:
for byte_chunk in it:
f.write(byte_chunk)
print("2", os.listdir(dest_dir))
shutil.unpack_archive(zip_path, dest_dir)
print("3", os.listdir(dest_dir))
输出为
1 []
2 ['tf_model_dest.zip']
3 ['test-0.txt', 'test-1.txt', 'test-3.txt', 'test-2.txt', 'tf_model_dest.zip', 'test-4.txt']
如您所料。
不过,如果您要通过网络进行流式传输,我建议您使用 tarball(因为您确实可以在磁盘上根本没有任何文件的情况下做到这一点;ZIP 需要寻求支持才能解压,但 TAR 不需要) .