TensorFlow - 使用交错或 parallel_interleave 时出错
TensorFlow - Error when using interleave or parallel_interleave
我正在使用 V1.12 的 tf.data.Datasets
API 像这样 Q&A 读取目录中每个文件的多个预保存批处理的 .h5 文件。
我先做了一个生成器:
class generator_yield:
def __init__(self, file):
self.file = file
def __call__(self):
with h5py.File(self.file, 'r') as f:
yield f['X'][:], f['y'][:]
然后制作一个文件名列表并将它们传递给 Dataset
:
def _fnamesmaker(dir, mode='h5'):
fnames = []
for dirpath, _, filenames in os.walk(dir):
for fname in filenames:
if fname.endswith(mode):
fnames.append(os.path.abspath(os.path.join(dirpath, fname)))
return fnames
fnames = _fnamesmaker('./')
len_fnames = len(fnames)
fnames = tf.data.Dataset.from_tensor_slices(fnames)
应用Dataset的interleave方法:
# handle multiple files
ds = fnames.interleave(lambda filename: tf.data.Dataset.from_generator(
generator_yield(filename), output_types=(tf.float32, tf.float32),
output_shapes=(tf.TensorShape([100, 100, 1]), tf.TensorShape([100, 100, 1]))), cycle_length=len_fnames)
ds = ds.batch(5).shuffle(5).prefetch(5)
# init iterator
it = ds.make_initializable_iterator()
init_op = it.initializer
X_it, y_it = it.get_next()
型号:
# model
with tf.name_scope("Conv1"):
W = tf.get_variable("W", shape=[3, 3, 1, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable("b", shape=[1], initializer=tf.contrib.layers.xavier_initializer())
layer1 = tf.nn.conv2d(X_it, W, strides=[1, 1, 1, 1], padding='SAME') + b
logits = tf.nn.relu(layer1)
loss = tf.reduce_mean(tf.losses.mean_squared_error(labels=y_it, predictions=logits))
train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
开始会话:
with tf.Session() as sess:
sess.run([tf.global_variables_initializer(), init_op])
while True:
try:
data = sess.run(train_op)
print(data.shape)
except tf.errors.OutOfRangeError:
print('done.')
break
错误看起来像:
TypeError: expected str, bytes or os.PathLike object, not Tensor
At the init method of generator. Apparently when one applies interleave the it's a Tensor passes through to the generator
您不能 运行 数据集对象直接通过 sess.run。你必须定义一个迭代器,获取下一个元素。尝试做类似的事情:
next_elem = files.make_one_shot_iterator.get_next()
data = sess.run(next_elem)
你应该能够得到你的张量。
根据此 ,我的案例不会因 parralel_interleave
.
的性能而受益
...have a transformation that transforms each element of a source
dataset into multiple elements into the destination dataset...
它与数据(狗、猫...)的典型分类问题更相关保存在单独的目录中。我们这里有一个分割问题,这意味着标签包含输入图像的相同维度。所有数据都存储在一个目录中,每个 .h5 文件包含一个图像及其标签(遮罩)
这里,一个简单的map
和num_parallel_calls
是。
我正在使用 V1.12 的 tf.data.Datasets
API 像这样 Q&A 读取目录中每个文件的多个预保存批处理的 .h5 文件。
我先做了一个生成器:
class generator_yield:
def __init__(self, file):
self.file = file
def __call__(self):
with h5py.File(self.file, 'r') as f:
yield f['X'][:], f['y'][:]
然后制作一个文件名列表并将它们传递给 Dataset
:
def _fnamesmaker(dir, mode='h5'):
fnames = []
for dirpath, _, filenames in os.walk(dir):
for fname in filenames:
if fname.endswith(mode):
fnames.append(os.path.abspath(os.path.join(dirpath, fname)))
return fnames
fnames = _fnamesmaker('./')
len_fnames = len(fnames)
fnames = tf.data.Dataset.from_tensor_slices(fnames)
应用Dataset的interleave方法:
# handle multiple files
ds = fnames.interleave(lambda filename: tf.data.Dataset.from_generator(
generator_yield(filename), output_types=(tf.float32, tf.float32),
output_shapes=(tf.TensorShape([100, 100, 1]), tf.TensorShape([100, 100, 1]))), cycle_length=len_fnames)
ds = ds.batch(5).shuffle(5).prefetch(5)
# init iterator
it = ds.make_initializable_iterator()
init_op = it.initializer
X_it, y_it = it.get_next()
型号:
# model
with tf.name_scope("Conv1"):
W = tf.get_variable("W", shape=[3, 3, 1, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable("b", shape=[1], initializer=tf.contrib.layers.xavier_initializer())
layer1 = tf.nn.conv2d(X_it, W, strides=[1, 1, 1, 1], padding='SAME') + b
logits = tf.nn.relu(layer1)
loss = tf.reduce_mean(tf.losses.mean_squared_error(labels=y_it, predictions=logits))
train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
开始会话:
with tf.Session() as sess:
sess.run([tf.global_variables_initializer(), init_op])
while True:
try:
data = sess.run(train_op)
print(data.shape)
except tf.errors.OutOfRangeError:
print('done.')
break
错误看起来像:
TypeError: expected str, bytes or os.PathLike object, not Tensor At the init method of generator. Apparently when one applies interleave the it's a Tensor passes through to the generator
您不能 运行 数据集对象直接通过 sess.run。你必须定义一个迭代器,获取下一个元素。尝试做类似的事情:
next_elem = files.make_one_shot_iterator.get_next()
data = sess.run(next_elem)
你应该能够得到你的张量。
根据此 parralel_interleave
.
...have a transformation that transforms each element of a source dataset into multiple elements into the destination dataset...
它与数据(狗、猫...)的典型分类问题更相关保存在单独的目录中。我们这里有一个分割问题,这意味着标签包含输入图像的相同维度。所有数据都存储在一个目录中,每个 .h5 文件包含一个图像及其标签(遮罩)
这里,一个简单的map
和num_parallel_calls
是