如何使用 Tensorboard 在同一图上绘制不同的摘要指标?
How to plot different summary metrics on the same plot with Tensorboard?
我希望能够绘制出每批训练损失和平均验证损失 用于 Tensorboard 中同一图上的验证集。当我的验证集太大无法放入内存时,我 运行 遇到了这个问题,因此需要批处理和使用 tf.metrics
更新操作。
此问题适用于您希望在 Tensorboard 的同一图表上显示的任何 Tensorflow 指标。
我可以
- 分别绘制这两个图(参见 here)
- 在与每训练批次训练损失[=67=相同的图表上绘制每批次验证损失 ](当验证集可以是单个批次并且我可以重用下面的训练摘要操作
train_summ
时,这是可以的)
在下面的示例代码中,我的问题源于这样一个事实,即我的验证摘要 tf.summary.scalar
和 name=loss
被重命名为 loss_1
,因此被移动到 Tensorboard 中的单独图表.据我所知,Tensorboard 采用 "same name" 并将它们绘制在同一张图上,无论它们位于哪个文件夹中。这令人沮丧,因为 train_summ
(名称=loss) 仅写入 train
文件夹,valid_summ
(name=loss) 仅写入 valid
文件夹 - 但仍重命名为 loss_1
。
示例代码:
# View graphs with (Linux): $ tensorboard --logdir=/tmp/my_tf_model
import tensorflow as tf
import numpy as np
import os
import tempfile
def train_data_gen():
yield np.random.normal(size=[3]), np.array([0.5, 0.5, 0.5])
def valid_data_gen():
yield np.random.normal(size=[3]), np.array([0.8, 0.8, 0.8])
batch_size = 25
n_training_batches = 4
n_valid_batches = 2
n_epochs = 5
summary_loc = os.path.join(tempfile.gettempdir(), 'my_tf_model')
print("Summaries written to" + summary_loc)
# Dummy data
train_data = tf.data.Dataset.from_generator(train_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
valid_data = tf.data.Dataset.from_generator(valid_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(handle,
train_data.output_types, train_data.output_shapes)
batch_x, batch_y = iterator.get_next()
train_iter = train_data.make_initializable_iterator()
valid_iter = valid_data.make_initializable_iterator()
# Some ops on the data
loss = tf.losses.mean_squared_error(batch_x, batch_y)
valid_loss, valid_loss_update = tf.metrics.mean(loss)
# Write to summaries
train_summ = tf.summary.scalar('loss', loss)
valid_summ = tf.summary.scalar('loss', valid_loss) # <- will be renamed to "loss_1"
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
train_handle, valid_handle = sess.run([train_iter.string_handle(), valid_iter.string_handle()])
sess.run([train_iter.initializer, valid_iter.initializer])
# Summary writers
writer_train = tf.summary.FileWriter(os.path.join(summary_loc, 'train'), sess.graph)
writer_valid = tf.summary.FileWriter(os.path.join(summary_loc, 'valid'), sess.graph)
global_step = 0 # implicit as no actual training
for i in range(n_epochs):
# "Training"
for j in range(n_training_batches):
global_step += 1
summ = sess.run(train_summ, feed_dict={handle: train_handle})
writer_train.add_summary(summary=summ, global_step=global_step)
# "Validation"
sess.run(tf.local_variables_initializer())
for j in range(n_valid_batches):
_, batch_summ = sess.run([valid_loss_update, train_summ], feed_dict={handle: valid_handle})
# The following will plot the batch loss for the validation set on the loss plot with the training data:
# writer_valid.add_summary(summary=batch_summ, global_step=global_step + j + 1)
summ = sess.run(valid_summ)
writer_valid.add_summary(summary=summ, global_step=global_step) # <- I want this on the training loss graph
我试过的
- 分离
tf.summary.FileWriter
个对象(一个用于训练,一个用于验证),如 this issue and this question 所推荐(想想我在该问题的评论中提到的内容)
- 使用
tf.summary.merge
将我所有的训练和 validation/test 指标合并到整体摘要操作中;做有用的簿记,但没有在同一张图上绘制我想要的东西
- 使用
tf.summary.scalar
family
属性(loss
仍重命名为 loss_1
)
- (完整的 hack 解决方案) 在 训练 数据上使用
valid_loss, valid_loss_update = tf.metrics.mean(loss)
然后 运行 tf.local_variables_initializer()
每个训练批次。这确实为您提供了相同的摘要操作,因此将内容放在同一张图表上,但肯定不是您 打算 执行此操作的方式?它也不会推广到其他指标。
上下文
- 张量流 1.9.0
- 张量板 1.9.0
- Python 3.5.2
Tensorboard custom_scalar
plugin就是解决这个问题的方法。
这里又是同一个例子,用 custom_scalar
在同一个图上绘制两个损失(每个训练批次 + 所有验证批次的平均值):
# View graphs with (Linux): $ tensorboard --logdir=/tmp/my_tf_model
import os
import tempfile
import tensorflow as tf
import numpy as np
from tensorboard import summary as summary_lib
from tensorboard.plugins.custom_scalar import layout_pb2
def train_data_gen():
yield np.random.normal(size=[3]), np.array([0.5, 0.5, 0.5])
def valid_data_gen():
yield np.random.normal(size=[3]), np.array([0.8, 0.8, 0.8])
batch_size = 25
n_training_batches = 4
n_valid_batches = 2
n_epochs = 5
summary_loc = os.path.join(tempfile.gettempdir(), 'my_tf_model')
print("Summaries written to " + summary_loc)
# Dummy data
train_data = tf.data.Dataset.from_generator(
train_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
valid_data = tf.data.Dataset.from_generator(
valid_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(handle, train_data.output_types,
train_data.output_shapes)
batch_x, batch_y = iterator.get_next()
train_iter = train_data.make_initializable_iterator()
valid_iter = valid_data.make_initializable_iterator()
# Some ops on the data
loss = tf.losses.mean_squared_error(batch_x, batch_y)
valid_loss, valid_loss_update = tf.metrics.mean(loss)
with tf.name_scope('loss'):
train_summ = summary_lib.scalar('training', loss)
valid_summ = summary_lib.scalar('valid', valid_loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
train_handle, valid_handle = sess.run([train_iter.string_handle(), valid_iter.string_handle()])
sess.run([train_iter.initializer, valid_iter.initializer])
writer_train = tf.summary.FileWriter(os.path.join(summary_loc, 'train'), sess.graph)
writer_valid = tf.summary.FileWriter(os.path.join(summary_loc, 'valid'), sess.graph)
layout_summary = summary_lib.custom_scalar_pb(
layout_pb2.Layout(category=[
layout_pb2.Category(
title='losses',
chart=[
layout_pb2.Chart(
title='losses',
multiline=layout_pb2.MultilineChartContent(tag=[
'loss/training', 'loss/valid'
]))
])
]))
writer_train.add_summary(layout_summary)
global_step = 0
for i in range(n_epochs):
for j in range(n_training_batches): # "Training"
global_step += 1
summ = sess.run(train_summ, feed_dict={handle: train_handle})
writer_train.add_summary(summary=summ, global_step=global_step)
sess.run(tf.local_variables_initializer())
for j in range(n_valid_batches): # "Validation"
_, batch_summ = sess.run([valid_loss_update, train_summ], feed_dict={handle: valid_handle})
summ = sess.run(valid_summ)
writer_valid.add_summary(summary=summ, global_step=global_step)
Here's the resulting output 在 Tensorboard 中。
我希望能够绘制出每批训练损失和平均验证损失 用于 Tensorboard 中同一图上的验证集。当我的验证集太大无法放入内存时,我 运行 遇到了这个问题,因此需要批处理和使用 tf.metrics
更新操作。
此问题适用于您希望在 Tensorboard 的同一图表上显示的任何 Tensorflow 指标。
我可以
- 分别绘制这两个图(参见 here)
- 在与每训练批次训练损失[=67=相同的图表上绘制每批次验证损失 ](当验证集可以是单个批次并且我可以重用下面的训练摘要操作
train_summ
时,这是可以的)
在下面的示例代码中,我的问题源于这样一个事实,即我的验证摘要 tf.summary.scalar
和 name=loss
被重命名为 loss_1
,因此被移动到 Tensorboard 中的单独图表.据我所知,Tensorboard 采用 "same name" 并将它们绘制在同一张图上,无论它们位于哪个文件夹中。这令人沮丧,因为 train_summ
(名称=loss) 仅写入 train
文件夹,valid_summ
(name=loss) 仅写入 valid
文件夹 - 但仍重命名为 loss_1
。
示例代码:
# View graphs with (Linux): $ tensorboard --logdir=/tmp/my_tf_model
import tensorflow as tf
import numpy as np
import os
import tempfile
def train_data_gen():
yield np.random.normal(size=[3]), np.array([0.5, 0.5, 0.5])
def valid_data_gen():
yield np.random.normal(size=[3]), np.array([0.8, 0.8, 0.8])
batch_size = 25
n_training_batches = 4
n_valid_batches = 2
n_epochs = 5
summary_loc = os.path.join(tempfile.gettempdir(), 'my_tf_model')
print("Summaries written to" + summary_loc)
# Dummy data
train_data = tf.data.Dataset.from_generator(train_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
valid_data = tf.data.Dataset.from_generator(valid_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(handle,
train_data.output_types, train_data.output_shapes)
batch_x, batch_y = iterator.get_next()
train_iter = train_data.make_initializable_iterator()
valid_iter = valid_data.make_initializable_iterator()
# Some ops on the data
loss = tf.losses.mean_squared_error(batch_x, batch_y)
valid_loss, valid_loss_update = tf.metrics.mean(loss)
# Write to summaries
train_summ = tf.summary.scalar('loss', loss)
valid_summ = tf.summary.scalar('loss', valid_loss) # <- will be renamed to "loss_1"
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
train_handle, valid_handle = sess.run([train_iter.string_handle(), valid_iter.string_handle()])
sess.run([train_iter.initializer, valid_iter.initializer])
# Summary writers
writer_train = tf.summary.FileWriter(os.path.join(summary_loc, 'train'), sess.graph)
writer_valid = tf.summary.FileWriter(os.path.join(summary_loc, 'valid'), sess.graph)
global_step = 0 # implicit as no actual training
for i in range(n_epochs):
# "Training"
for j in range(n_training_batches):
global_step += 1
summ = sess.run(train_summ, feed_dict={handle: train_handle})
writer_train.add_summary(summary=summ, global_step=global_step)
# "Validation"
sess.run(tf.local_variables_initializer())
for j in range(n_valid_batches):
_, batch_summ = sess.run([valid_loss_update, train_summ], feed_dict={handle: valid_handle})
# The following will plot the batch loss for the validation set on the loss plot with the training data:
# writer_valid.add_summary(summary=batch_summ, global_step=global_step + j + 1)
summ = sess.run(valid_summ)
writer_valid.add_summary(summary=summ, global_step=global_step) # <- I want this on the training loss graph
我试过的
- 分离
tf.summary.FileWriter
个对象(一个用于训练,一个用于验证),如 this issue and this question 所推荐(想想我在该问题的评论中提到的内容) - 使用
tf.summary.merge
将我所有的训练和 validation/test 指标合并到整体摘要操作中;做有用的簿记,但没有在同一张图上绘制我想要的东西 - 使用
tf.summary.scalar
family
属性(loss
仍重命名为loss_1
) - (完整的 hack 解决方案) 在 训练 数据上使用
valid_loss, valid_loss_update = tf.metrics.mean(loss)
然后 运行tf.local_variables_initializer()
每个训练批次。这确实为您提供了相同的摘要操作,因此将内容放在同一张图表上,但肯定不是您 打算 执行此操作的方式?它也不会推广到其他指标。
上下文
- 张量流 1.9.0
- 张量板 1.9.0
- Python 3.5.2
Tensorboard custom_scalar
plugin就是解决这个问题的方法。
这里又是同一个例子,用 custom_scalar
在同一个图上绘制两个损失(每个训练批次 + 所有验证批次的平均值):
# View graphs with (Linux): $ tensorboard --logdir=/tmp/my_tf_model
import os
import tempfile
import tensorflow as tf
import numpy as np
from tensorboard import summary as summary_lib
from tensorboard.plugins.custom_scalar import layout_pb2
def train_data_gen():
yield np.random.normal(size=[3]), np.array([0.5, 0.5, 0.5])
def valid_data_gen():
yield np.random.normal(size=[3]), np.array([0.8, 0.8, 0.8])
batch_size = 25
n_training_batches = 4
n_valid_batches = 2
n_epochs = 5
summary_loc = os.path.join(tempfile.gettempdir(), 'my_tf_model')
print("Summaries written to " + summary_loc)
# Dummy data
train_data = tf.data.Dataset.from_generator(
train_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
valid_data = tf.data.Dataset.from_generator(
valid_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(handle, train_data.output_types,
train_data.output_shapes)
batch_x, batch_y = iterator.get_next()
train_iter = train_data.make_initializable_iterator()
valid_iter = valid_data.make_initializable_iterator()
# Some ops on the data
loss = tf.losses.mean_squared_error(batch_x, batch_y)
valid_loss, valid_loss_update = tf.metrics.mean(loss)
with tf.name_scope('loss'):
train_summ = summary_lib.scalar('training', loss)
valid_summ = summary_lib.scalar('valid', valid_loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
train_handle, valid_handle = sess.run([train_iter.string_handle(), valid_iter.string_handle()])
sess.run([train_iter.initializer, valid_iter.initializer])
writer_train = tf.summary.FileWriter(os.path.join(summary_loc, 'train'), sess.graph)
writer_valid = tf.summary.FileWriter(os.path.join(summary_loc, 'valid'), sess.graph)
layout_summary = summary_lib.custom_scalar_pb(
layout_pb2.Layout(category=[
layout_pb2.Category(
title='losses',
chart=[
layout_pb2.Chart(
title='losses',
multiline=layout_pb2.MultilineChartContent(tag=[
'loss/training', 'loss/valid'
]))
])
]))
writer_train.add_summary(layout_summary)
global_step = 0
for i in range(n_epochs):
for j in range(n_training_batches): # "Training"
global_step += 1
summ = sess.run(train_summ, feed_dict={handle: train_handle})
writer_train.add_summary(summary=summ, global_step=global_step)
sess.run(tf.local_variables_initializer())
for j in range(n_valid_batches): # "Validation"
_, batch_summ = sess.run([valid_loss_update, train_summ], feed_dict={handle: valid_handle})
summ = sess.run(valid_summ)
writer_valid.add_summary(summary=summ, global_step=global_step)
Here's the resulting output 在 Tensorboard 中。