尝试使用 tf.Data 解析 training/testing 摘要
Trying to resolve training/testing summaries using tf.Data
我希望在我的 NN 训练期间开发摘要,类似于 ,但我看到的所有示例都使用 feed_dict 而不是 tf.data。我的训练和测试有单独的初始值设定项:
self.train_init = iterator.make_initializer(train_data) # initializer for train_data
self.test_init = iterator.make_initializer(test_data) # initializer for test_data
在训练期间,我使用 sess.run(self.train_init) 初始化训练初始化器,但为了测试准确性,我需要初始化 sess.run(self.test_init) 我相信。目前我的代码如下所示:
for i in range(100):
sess.run(self.train_init)
total_loss = 0
n_batches = 0
try:
while True:
_, l = sess.run([self.optimizer, self.loss])
total_loss += l
n_batches += 1
except tf.errors.OutOfRangeError:
pass
if i % (10/1) == 0:
print('Avg. loss epoch {0}: {1}'.format(i, total_loss/n_batches))
acc, summ = sess.run(self.accuracy, self.summary_op)
writer.add_summary(summ, i)
按照目前的情况,每 10 次迭代测量一次准确性,但它使用的是训练批次,而不是测试批次。我希望能够看到随着时间的推移训练和测试的准确性,以便清楚地看到是否发生了过度拟合(良好的训练准确性但测试准确性差)。
我不知道如何在使用 tf.Data 时执行此操作。如何在进行 100 次迭代时在初始值设定项之间切换,同时创建我需要的摘要?
通常在训练过程之外评估测试集以优化性能。但是如果你真的想这样做 in-situ,对我来说最合适的解决方案之一是:
- 创建两个 tf.data 和一个占位符以在它们之间切换。
- 使用 tf.cond() 来控制流程,如
代码可能类似于:
with tf.name_scope('train_pipeline'):
train_ds = tf.data.Dataset.from_tensor_slices(...)
...
train_ds = iterator.make_initializer(train_data)
train_init = iterator.initialize
X_iterator_train = iterator.get_next()
with tf.name_scope('test_pipeline'):
test_ds = tf.data.Dataset.from_tensor_slices(...)
...
test_ds = iterator.make_initializer(test_data)
test_init = iterator.initialize
X_iterator_test = iterator.get_next()
train_or_test = tf.placeholder(tf.string, name='switch_buton')
def f1(): X_iterator_train
def f2(): X_iterator_test
inputs = tf.cond(tf.equal(train_or_test, 'train'), lambda :f1(), lambda: f2(), name='input_cond')
# model
... # use your input(IteratorGetNext) at your first layer, something like tf.nn.conv2d(inputs, ...)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# init summary writers for two different path
train_writer = tf.summary.FileWriter(...)
test_writer = tf.summary.FileWriter(...)
for ep in range(nb_epoch):
sess.run([train_init, test_init])
# begin training
for step in range(nb_batch):
# 90% train, 10% test
if step % 9 == 0:
sess.run(train_op, feed_dict={train_or_test: 'test'}) # switch to test input pipeline
train_writer.add_summary()
else:
sess.run(train_op, feed_dict={train_or_test: 'train'}) # switch to train input pipeline
test_writer.add_summary()
我希望在我的 NN 训练期间开发摘要,类似于
self.train_init = iterator.make_initializer(train_data) # initializer for train_data
self.test_init = iterator.make_initializer(test_data) # initializer for test_data
在训练期间,我使用 sess.run(self.train_init) 初始化训练初始化器,但为了测试准确性,我需要初始化 sess.run(self.test_init) 我相信。目前我的代码如下所示:
for i in range(100):
sess.run(self.train_init)
total_loss = 0
n_batches = 0
try:
while True:
_, l = sess.run([self.optimizer, self.loss])
total_loss += l
n_batches += 1
except tf.errors.OutOfRangeError:
pass
if i % (10/1) == 0:
print('Avg. loss epoch {0}: {1}'.format(i, total_loss/n_batches))
acc, summ = sess.run(self.accuracy, self.summary_op)
writer.add_summary(summ, i)
按照目前的情况,每 10 次迭代测量一次准确性,但它使用的是训练批次,而不是测试批次。我希望能够看到随着时间的推移训练和测试的准确性,以便清楚地看到是否发生了过度拟合(良好的训练准确性但测试准确性差)。
我不知道如何在使用 tf.Data 时执行此操作。如何在进行 100 次迭代时在初始值设定项之间切换,同时创建我需要的摘要?
通常在训练过程之外评估测试集以优化性能。但是如果你真的想这样做 in-situ,对我来说最合适的解决方案之一是:
- 创建两个 tf.data 和一个占位符以在它们之间切换。
- 使用 tf.cond() 来控制流程,如
代码可能类似于:
with tf.name_scope('train_pipeline'):
train_ds = tf.data.Dataset.from_tensor_slices(...)
...
train_ds = iterator.make_initializer(train_data)
train_init = iterator.initialize
X_iterator_train = iterator.get_next()
with tf.name_scope('test_pipeline'):
test_ds = tf.data.Dataset.from_tensor_slices(...)
...
test_ds = iterator.make_initializer(test_data)
test_init = iterator.initialize
X_iterator_test = iterator.get_next()
train_or_test = tf.placeholder(tf.string, name='switch_buton')
def f1(): X_iterator_train
def f2(): X_iterator_test
inputs = tf.cond(tf.equal(train_or_test, 'train'), lambda :f1(), lambda: f2(), name='input_cond')
# model
... # use your input(IteratorGetNext) at your first layer, something like tf.nn.conv2d(inputs, ...)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# init summary writers for two different path
train_writer = tf.summary.FileWriter(...)
test_writer = tf.summary.FileWriter(...)
for ep in range(nb_epoch):
sess.run([train_init, test_init])
# begin training
for step in range(nb_batch):
# 90% train, 10% test
if step % 9 == 0:
sess.run(train_op, feed_dict={train_or_test: 'test'}) # switch to test input pipeline
train_writer.add_summary()
else:
sess.run(train_op, feed_dict={train_or_test: 'train'}) # switch to train input pipeline
test_writer.add_summary()