Python shutil pack a zip file and unzip it back EOF 错误

Python shutil pack a zip file and unzip it back EOF error

更新:简短回答,不要将存档打包到与文件源目录相同的路径,有问题的错误代码如下

shutil.make_archive(zip_path, 'zip', tmpdir)

原问题:

我正在使用shutils打包和解压一个Tensorflow模型文件夹(我认为这个问题与shutils更相关)

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
def load_model_as_bytes(model):
    def file_chunk_generate(file_path):
        CHUNK_SIZE = 4 * 1024 * 1024
        with open(file_path, 'rb') as f:
            while True:
                piece = f.read(CHUNK_SIZE);
                if len(piece) == 0:
                    return
                yield ByteChunk(buffer=piece)
        return file_chunk_generator

    tmpdir = tempfile.mkdtemp()
    tf.saved_model.save(model, tmpdir)
    zip_path = os.path.join(tmpdir, "tf_model")
    shutil.make_archive(zip_path, 'zip', tmpdir)
    size = os.path.getsize(f'{zip_path}.zip')
    logging.info(f"send model file zip, length: {size}") #-------output 4621
    file_chunk_generator = file_chunk_generate(f'{zip_path}.zip')
    return file_chunk_generator

class NeuralNetworkPart(Model):
    def __init__(self):
        super().__init__()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10)

    def call(self, x):
        x = x[0]
        x = self.d1(x)
        return self.d2(x)

model = NeuralNetworkPart()
it = load_model_as_bytes(model)
tmpdir = tempfile.mkdtemp()
zip_path = os.path.join(tmpdir, "tf_model.zip")
with open(zip_path, 'wb') as f:
    for byte_chunk in it:
        f.write(byte_chunk.buffer)
        logging.info(f"receive model file zip, length: {os.path.getsize(zip_path)}") #-------output 4621

shutil.unpack_archive(zip_path, tmpdir)      

基本上这个程序得到一个文件夹,使用make_archive压缩它。然后以字节形式读取zip文件,将其存储在生成器变量中,并使用生成器编写另一个zip文件,并尝试使用unpack_archive解压它。

在写入字节生成器之前,以及在解压缩之前写入 zip 文件之后,大小是相同的(在日志记录中检查过),但是在调用解压缩时,它会引发 EOF 错误

    shutil.unpack_archive(zip_path, tmpdir)
  File "/lib/python3.6/shutil.py", line 983, in unpack_archive
    func(filename, extract_dir, **kwargs)
  File "/lib/python3.6/shutil.py", line 901, in _unpack_zipfile
    data = zip.read(info.filename)
  File "/lib/python3.6/zipfile.py", line 1338, in read
    return fp.read()
  File "/lib/python3.6/zipfile.py", line 858, in read
    buf += self._read1(self.MAX_N)
  File "/lib/python3.6/zipfile.py", line 940, in _read1
    data += self._read2(n - len(data))
  File "/lib/python3.6/zipfile.py", line 975, in _read2
    raise EOFError

这个稍微简化的版本似乎工作得很好。请注意 none 的临时文件已被清理;你可能想在你的 tmpdir 充满 TensorFlow 模型之前解决这个问题。

import os
import shutil
import tempfile


def file_chunk_generate(file_path):
    CHUNK_SIZE = 4 * 1024 * 1024
    with open(file_path, "rb") as f:
        while True:
            piece = f.read(CHUNK_SIZE)
            if not piece:
                return
            yield piece


def get_zip_chunk_generator(source_dir):
    arcname = shutil.make_archive(
        os.path.join(tempfile.mkdtemp("zip-"), "tf_model"), "zip", source_dir
    )
    return file_chunk_generate(arcname)


def make_source_dir():
    tmpdir = tempfile.mkdtemp("src-")
    for x in range(5):
        with open(os.path.join(tmpdir, f"test-{x}.txt"), "wb") as f:
            f.write(b"foo" * 1024)
    return tmpdir


source_dir = make_source_dir()
it = get_zip_chunk_generator(source_dir)
dest_dir = tempfile.mkdtemp(prefix="dest-")
print("1", os.listdir(dest_dir))
zip_path = os.path.join(dest_dir, "tf_model_dest.zip")
with open(zip_path, "wb") as f:
    for byte_chunk in it:
        f.write(byte_chunk)
print("2", os.listdir(dest_dir))
shutil.unpack_archive(zip_path, dest_dir)
print("3", os.listdir(dest_dir))

输出为

1 []
2 ['tf_model_dest.zip']
3 ['test-0.txt', 'test-1.txt', 'test-3.txt', 'test-2.txt', 'tf_model_dest.zip', 'test-4.txt']

如您所料。

不过,如果您要通过网络进行流式传输,我建议您使用 tarball(因为您确实可以在磁盘上根本没有任何文件的情况下做到这一点;ZIP 需要寻求支持才能解压,但 TAR 不需要) .