"Error while extracting" 来自张量流数据集

"Error while extracting" from tensorflow datasets

我想在 COCO 上训练一个 tensorflow 图像分割模型,我想我会利用已经包含的数据集生成器。下载似乎已完成,但它在解压缩 zip 文件时崩溃。

运行 conda 环境下的 Jupyter Notebook 上的 TF 2.0.0。计算机是 64 位 Windows10. official image segmentation tutorial 中使用的 Oxford Pet III 数据集工作正常。

下面是错误信息(我的本地用户名替换为%user%)。

---------------------------------------------------------------------------
OutOfRangeError                           Traceback (most recent call last)
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\extractor.py in _sync_extract(self, from_path, method, to_path)
     88     try:
---> 89       for path, handle in iter_archive(from_path, method):
     90         path = tf.compat.as_text(path)

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\extractor.py in iter_zip(arch_f)
    176   with _open_or_pass(arch_f) as fobj:
--> 177     z = zipfile.ZipFile(fobj)
    178     for member in z.infolist():

~\.conda\envs\tf-tutorial\lib\zipfile.py in __init__(self, file, mode, compression, allowZip64)
   1130             if mode == 'r':
-> 1131                 self._RealGetContents()
   1132             elif mode in ('w', 'x'):

~\.conda\envs\tf-tutorial\lib\zipfile.py in _RealGetContents(self)
   1193         try:
-> 1194             endrec = _EndRecData(fp)
   1195         except OSError:

~\.conda\envs\tf-tutorial\lib\zipfile.py in _EndRecData(fpin)
    263     # Determine file size
--> 264     fpin.seek(0, 2)
    265     filesize = fpin.tell()

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\util\deprecation.py in new_func(*args, **kwargs)
    506                 instructions)
--> 507       return func(*args, **kwargs)
    508 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in seek(self, offset, whence, position)
    166       elif whence == 2:
--> 167         offset += self.size()
    168       else:

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in size(self)
    101     """Returns the size of the file."""
--> 102     return stat(self.__name).length
    103 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in stat(filename)
    726   """
--> 727   return stat_v2(filename)
    728 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in stat_v2(path)
    743   file_statistics = pywrap_tensorflow.FileStatistics()
--> 744   pywrap_tensorflow.Stat(compat.as_bytes(path), file_statistics)
    745   return file_statistics

OutOfRangeError: C:\Users\%user%\tensorflow_datasets\downloads\images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip; Unknown error

During handling of the above exception, another exception occurred:

ExtractError                              Traceback (most recent call last)
<ipython-input-27-887fa0198611> in <module>
      1 cocoBuilder = tfds.builder('coco')
      2 info = cocoBuilder.info
----> 3 cocoBuilder.download_and_prepare()

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\api_utils.py in disallow_positional_args_dec(fn, instance, args, kwargs)
     50     _check_no_positional(fn, args, ismethod, allowed=allowed)
     51     _check_required(fn, kwargs)
---> 52     return fn(*args, **kwargs)
     53 
     54   return disallow_positional_args_dec(wrapped)  # pylint: disable=no-value-for-parameter

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in download_and_prepare(self, download_dir, download_config)
    285         self._download_and_prepare(
    286             dl_manager=dl_manager,
--> 287             download_config=download_config)
    288 
    289         # NOTE: If modifying the lines below to put additional information in

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in _download_and_prepare(self, dl_manager, download_config)
    946     super(GeneratorBasedBuilder, self)._download_and_prepare(
    947         dl_manager=dl_manager,
--> 948         max_examples_per_split=download_config.max_examples_per_split,
    949     )
    950 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in _download_and_prepare(self, dl_manager, **prepare_split_kwargs)
    802     # Generating data for all splits
    803     split_dict = splits_lib.SplitDict()
--> 804     for split_generator in self._split_generators(dl_manager):
    805       if splits_lib.Split.ALL == split_generator.split_info.name:
    806         raise ValueError(

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\image\coco.py in _split_generators(self, dl_manager)
    237     root_url = 'http://images.cocodataset.org/'
    238     extracted_paths = dl_manager.download_and_extract({
--> 239         key: root_url + url for key, url in urls.items()
    240     })
    241 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\download_manager.py in download_and_extract(self, url_or_urls)
    357     with self._downloader.tqdm():
    358       with self._extractor.tqdm():
--> 359         return _map_promise(self._download_extract, url_or_urls)
    360 
    361   @property

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\download_manager.py in _map_promise(map_fn, all_inputs)
    393   """Map the function into each element and resolve the promise."""
    394   all_promises = utils.map_nested(map_fn, all_inputs)  # Apply the function
--> 395   res = utils.map_nested(_wait_on_promise, all_promises)
    396   return res

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in map_nested(function, data_struct, dict_only, map_tuple)
    127     return {
    128         k: map_nested(function, v, dict_only, map_tuple)
--> 129         for k, v in data_struct.items()
    130     }
    131   elif not dict_only:

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in <dictcomp>(.0)
    127     return {
    128         k: map_nested(function, v, dict_only, map_tuple)
--> 129         for k, v in data_struct.items()
    130     }
    131   elif not dict_only:

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in map_nested(function, data_struct, dict_only, map_tuple)
    141         return tuple(mapped)
    142   # Singleton
--> 143   return function(data_struct)
    144 
    145 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\download_manager.py in _wait_on_promise(p)
    377 
    378   def _wait_on_promise(p):
--> 379     return p.get()
    380 
    381 else:

~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in get(self, timeout)
    508         target = self._target()
    509         self._wait(timeout or DEFAULT_TIMEOUT)
--> 510         return self._target_settled_value(_raise=True)
    511 
    512     def _target_settled_value(self, _raise=False):

~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in _target_settled_value(self, _raise)
    512     def _target_settled_value(self, _raise=False):
    513         # type: (bool) -> Any
--> 514         return self._target()._settled_value(_raise)
    515 
    516     _value = _reason = _target_settled_value

~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in _settled_value(self, _raise)
    222             if _raise:
    223                 raise_val = self._fulfillment_handler0
--> 224                 reraise(type(raise_val), raise_val, self._traceback)
    225             return self._fulfillment_handler0
    226 

~\.conda\envs\tf-tutorial\lib\site-packages\six.py in reraise(tp, value, tb)
    694             if value.__traceback__ is not tb:
    695                 raise value.with_traceback(tb)
--> 696             raise value
    697         finally:
    698             value = None

~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in handle_future_result(future)
    840         # type: (Any) -> None
    841         try:
--> 842             resolve(future.result())
    843         except Exception as e:
    844             tb = exc_info()[2]

~\.conda\envs\tf-tutorial\lib\concurrent\futures\_base.py in result(self, timeout)
    423                 raise CancelledError()
    424             elif self._state == FINISHED:
--> 425                 return self.__get_result()
    426 
    427             self._condition.wait(timeout)

~\.conda\envs\tf-tutorial\lib\concurrent\futures\_base.py in __get_result(self)
    382     def __get_result(self):
    383         if self._exception:
--> 384             raise self._exception
    385         else:
    386             return self._result

~\.conda\envs\tf-tutorial\lib\concurrent\futures\thread.py in run(self)
     54 
     55         try:
---> 56             result = self.fn(*self.args, **self.kwargs)
     57         except BaseException as exc:
     58             self.future.set_exception(exc)

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\extractor.py in _sync_extract(self, from_path, method, to_path)
     92     except BaseException as err:
     93       msg = 'Error while extracting %s to %s : %s' % (from_path, to_path, err)
---> 94       raise ExtractError(msg)
     95     # `tf.io.gfile.Rename(overwrite=True)` doesn't work for non empty
     96     # directories, so delete destination first, if it already exists.

ExtractError: Error while extracting C:\Users\%user%\tensorflow_datasets\downloads\images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip to C:\Users\%user%\tensorflow_datasets\downloads\extracted\ZIP.images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip : C:\Users\%user%\tensorflow_datasets\downloads\images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip; Unknown error

这条消息对我来说似乎很神秘。当笔记本启动时,它试图提取到的文件夹不存在 - 它是由 Tensorflow 创建的,并且仅在该命令行中创建。我显然尝试过完全删除它并再次运行它,但没有效果。

导致错误的代码是(直到最后一行一切正常):

import tensorflow as tf
from __future__ import absolute_import, division, print_function, unicode_literals

from tensorflow_examples.models.pix2pix import pix2pix

import tensorflow_datasets as tfds

from IPython.display import clear_output
import matplotlib.pyplot as plt

dataset, info = tfds.load('coco', with_info=True)

还尝试将最后一个命令分解为分配 tdfs.builder 对象,然后分配 运行 download_and_extract,但再次出现相同的错误。

磁盘中有足够 space - 下载后,仍有 50+GB 可用,而数据集在其最大版本 (2014) 中应该为 37GB。

我和Windows 10 & COCO 2017 有类似的问题,我的解决方法很简单。根据错误信息中的文件夹路径手动解压ZIP文件。