'TypeError: expected bytes, str found' when creating images dataset with Dataset API
'TypeError: expected bytes, str found' when creating images dataset with Dataset API
我想使用数据集 API 从我的图像创建一个 TensorFlow 数据集。这些图像以复杂的层次结构组织,但最后总是有两个目录 "False" 和 "Genuine"。这段代码是我写的
import tensorflow as tf
from tensorflow.data import Dataset
import os
def enumerate_all_files(rootdir):
for subdir, dir, files in os.walk(rootdir):
for file in files:
# return path to the file and its label
# label is simply a 1 or 0 depending on whether an image is in the "Genuine" folder or not
yield os.path.join(subdir, file), int(subdir.split(os.path.sep)[-1] == "Genuine")
def input_parser(img_path, label):
# convert the label to one-hot encoding
one_hot = tf.one_hot(label, 2)
# read the img from file
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_png(img_file, channels=3)
return img_decoded, one_hot
def get_dataset():
generator = lambda: enumerate_all_files("/tmp/images/training/")
dataset = Dataset.from_generator(generator, (tf.string, tf.int32)).shuffle(1000).batch(100)
dataset = dataset.map(input_parser)
return dataset
然而,当我 运行 它在我的终端中
tf.enable_eager_execution()
# all the code above
d = get_dataset()
for f in d.make_one_shot_iterator():
print(f)
它因错误而崩溃
W tensorflow/core/framework/op_kernel.cc:1306] Unknown: SystemError: <weakref at 0x7ff8232f0620; to 'function' at 0x7ff8233c9048 (generator_py_func)> returned a result with an error set
TypeError: expected bytes, str found
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "lcnn.py", line 29, in <module>
for f in d.make_one_shot_iterator():
File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 487, in __next__
return self.next()
File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 518, in next
return self._next_internal()
File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 508, in _next_internal
output_shapes=self._flat_output_shapes)
File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 1848, in iterator_get_next_sync
"output_types", output_types, "output_shapes", output_shapes)
SystemError: <built-in function TFE_Py_FastPathExecute> returned a result with an error set
我在这里做错了什么?
编辑
我在不调用 map
、shuffle
和 batch
的情况下尝试 运行ning 代码,也没有注释掉 input_parser
方法,但错误仍然出现。
编辑 2
我将 Dataset.from_generator
更改为 Dataset.from_tensor_slices
以查看打开图片的代码是否有效。所以更改后的代码看起来像
def input_parser(img_path):
# convert the label to one-hot encoding
# one_hot = tf.one_hot(label, 2)
# read the img from file
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_png(img_file, channels=3)
return img_decoded
def get_dataset():
dataset = Dataset.from_tensor_slices(["/tmp/images/training/1000010.png"]).map(input_parser).shuffle(1000).batch(100)
return dataset
虽然这很好用
我不确定错误消息,但我尝试了您的代码,如果我在批处理和洗牌操作之前执行输入解析器映射,它对我有用:
def get_dataset():
generator = lambda: enumerate_all_files("/tmp/images/training/")
dataset = Dataset.from_generator(generator, (tf.string,tf.int32)).map(input_parser)
dataset = dataset.shuffle(1000).batch(100)
return dataset
我最终将代码从 Dataset.from_generator
更改为 Dataset.from_tensor_slices
。最终代码:
import tensorflow as tf
from tensorflow.data import Dataset
import os
tf.enable_eager_execution()
def enumerate_all_files(rootdir):
for subdir, dir, files in os.walk(rootdir):
for file in files:
# return path to the file and its label
# label is simply a 1 or 0 depending on whether an image is in the "Genuine" folder or not
yield os.path.join(subdir, file), int(subdir.split(os.path.sep)[-1] == "Genuine")
def input_parser(img_path, label):
# convert the label to one-hot encoding
one_hot = tf.one_hot(label, 2)
# read the img from file
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_png(img_file, channels=3)
return img_decoded, one_hot
def get_dataset():
file_paths = []
labels = []
for i in enumerate_all_files("/media/kuba/Seagate Expansion Drive/MGR/Spektrogramy/FFT/training/"):
file_paths.append(i[0])
labels.append(i[1])
dataset = Dataset.from_tensor_slices((file_paths, labels)).map(input_parser).shuffle(1000).batch(100)
return dataset
d = get_dataset()
for f in d.make_one_shot_iterator():
print(type(f))
我想使用数据集 API 从我的图像创建一个 TensorFlow 数据集。这些图像以复杂的层次结构组织,但最后总是有两个目录 "False" 和 "Genuine"。这段代码是我写的
import tensorflow as tf
from tensorflow.data import Dataset
import os
def enumerate_all_files(rootdir):
for subdir, dir, files in os.walk(rootdir):
for file in files:
# return path to the file and its label
# label is simply a 1 or 0 depending on whether an image is in the "Genuine" folder or not
yield os.path.join(subdir, file), int(subdir.split(os.path.sep)[-1] == "Genuine")
def input_parser(img_path, label):
# convert the label to one-hot encoding
one_hot = tf.one_hot(label, 2)
# read the img from file
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_png(img_file, channels=3)
return img_decoded, one_hot
def get_dataset():
generator = lambda: enumerate_all_files("/tmp/images/training/")
dataset = Dataset.from_generator(generator, (tf.string, tf.int32)).shuffle(1000).batch(100)
dataset = dataset.map(input_parser)
return dataset
然而,当我 运行 它在我的终端中
tf.enable_eager_execution()
# all the code above
d = get_dataset()
for f in d.make_one_shot_iterator():
print(f)
它因错误而崩溃
W tensorflow/core/framework/op_kernel.cc:1306] Unknown: SystemError: <weakref at 0x7ff8232f0620; to 'function' at 0x7ff8233c9048 (generator_py_func)> returned a result with an error set
TypeError: expected bytes, str found
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "lcnn.py", line 29, in <module>
for f in d.make_one_shot_iterator():
File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 487, in __next__
return self.next()
File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 518, in next
return self._next_internal()
File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 508, in _next_internal
output_shapes=self._flat_output_shapes)
File "/opt/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 1848, in iterator_get_next_sync
"output_types", output_types, "output_shapes", output_shapes)
SystemError: <built-in function TFE_Py_FastPathExecute> returned a result with an error set
我在这里做错了什么?
编辑
我在不调用 map
、shuffle
和 batch
的情况下尝试 运行ning 代码,也没有注释掉 input_parser
方法,但错误仍然出现。
编辑 2
我将 Dataset.from_generator
更改为 Dataset.from_tensor_slices
以查看打开图片的代码是否有效。所以更改后的代码看起来像
def input_parser(img_path):
# convert the label to one-hot encoding
# one_hot = tf.one_hot(label, 2)
# read the img from file
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_png(img_file, channels=3)
return img_decoded
def get_dataset():
dataset = Dataset.from_tensor_slices(["/tmp/images/training/1000010.png"]).map(input_parser).shuffle(1000).batch(100)
return dataset
虽然这很好用
我不确定错误消息,但我尝试了您的代码,如果我在批处理和洗牌操作之前执行输入解析器映射,它对我有用:
def get_dataset():
generator = lambda: enumerate_all_files("/tmp/images/training/")
dataset = Dataset.from_generator(generator, (tf.string,tf.int32)).map(input_parser)
dataset = dataset.shuffle(1000).batch(100)
return dataset
我最终将代码从 Dataset.from_generator
更改为 Dataset.from_tensor_slices
。最终代码:
import tensorflow as tf
from tensorflow.data import Dataset
import os
tf.enable_eager_execution()
def enumerate_all_files(rootdir):
for subdir, dir, files in os.walk(rootdir):
for file in files:
# return path to the file and its label
# label is simply a 1 or 0 depending on whether an image is in the "Genuine" folder or not
yield os.path.join(subdir, file), int(subdir.split(os.path.sep)[-1] == "Genuine")
def input_parser(img_path, label):
# convert the label to one-hot encoding
one_hot = tf.one_hot(label, 2)
# read the img from file
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_png(img_file, channels=3)
return img_decoded, one_hot
def get_dataset():
file_paths = []
labels = []
for i in enumerate_all_files("/media/kuba/Seagate Expansion Drive/MGR/Spektrogramy/FFT/training/"):
file_paths.append(i[0])
labels.append(i[1])
dataset = Dataset.from_tensor_slices((file_paths, labels)).map(input_parser).shuffle(1000).batch(100)
return dataset
d = get_dataset()
for f in d.make_one_shot_iterator():
print(type(f))