Tensorboard 嵌入可视化
Tensorboard Embedding Visualization
我正在处理文本分类问题。我有 300 万行需要归类为 20 个类别。
以下是我整个代码中的两个代码片段:
这是定义我的 tf 变量的代码。:
class TextCNNRNN(object):
def __init__(self, embedding_mat, non_static, hidden_unit, sequence_length, max_pool_size,
num_classes, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):
self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name='input_x')
self.input_y = tf.placeholder(tf.float32, [None, num_classes], name='input_y')
self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
self.batch_size = tf.placeholder(tf.int32, [])
self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1], name='pad')
self.real_len = tf.placeholder(tf.int32, [None], name='real_len')
l2_loss = tf.constant(0.0)
with tf.device('/cpu:0'), tf.name_scope('embedding'):
self.emb_var = tf.Variable(embedding_mat, name='emb_var')
# if not non_static:
# self.emb_var = tf.constant(embedding_mat, name='enb_var')
# else:
# self.emb_var = tf.Variable(embedding_mat, name='emb_var')
self.embedded_chars = tf.nn.embedding_lookup(self.emb_var, self.input_x)
self.emb = tf.expand_dims(self.embedded_chars, -1)
我想可视化的张量是embedded_chars。
这是我输入投影仪的代码 api:
config = projector.ProjectorConfig()
config.model_checkpoint_path = checkpoint_prefix + str(best_at_step) +'.ckpt'
embedding = config.embeddings.add()
embedding.tensor_name = cnn_rnn.embedded_chars.name
#embedding.metadata_path = 'metadata.tsv'
emb_writer = tf.summary.FileWriter(metadata_path,sess.graph)
projector.visualize_embeddings(emb_writer, config)
我的期望:
我想查看经过训练的输入数据及其分类方式。
实际结果:
当我使用 embedded_chars 张量作为投影仪的输入时,注意负载。但是,当我使用 emb_var 时,我看到了嵌入加载。问题是 emb_var 只是我的词汇,但我需要查看我的实际数据集。
想通了。
final_embed_matrix = sess.run(cnn_rnn.emb_var)
embedding_var = tf.Variable(final_embed_matrix, name='embedding_viz' + str(i))
saver_embed = tf.train.Saver([embedding_var, output_var])
sess.run(embedding_var.initializer)
config = projector.ProjectorConfig()
config.model_checkpoint_path = emb_dir + '/' + foldername + str(best_at_step)+'viz' +'.ckpt'
emb_writer = tf.summary.FileWriter(emb_dir, sess.graph)
embedding = config.embeddings.add()
embedding.metadata_path = foldername + '_metadata.tsv'
embedding.tensor_name = output_var.name
embedding = config.embeddings.add()
embedding.metadata_path = 'metadata.tsv'
embedding.tensor_name = embedding_var.name
projector.visualize_embeddings(emb_writer, config)
saver_embed.save(sess, checkpoint_viz_prefix + str(best_at_step)+'viz' +'.ckpt')
我正在处理文本分类问题。我有 300 万行需要归类为 20 个类别。
以下是我整个代码中的两个代码片段:
这是定义我的 tf 变量的代码。:
class TextCNNRNN(object):
def __init__(self, embedding_mat, non_static, hidden_unit, sequence_length, max_pool_size,
num_classes, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):
self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name='input_x')
self.input_y = tf.placeholder(tf.float32, [None, num_classes], name='input_y')
self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
self.batch_size = tf.placeholder(tf.int32, [])
self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1], name='pad')
self.real_len = tf.placeholder(tf.int32, [None], name='real_len')
l2_loss = tf.constant(0.0)
with tf.device('/cpu:0'), tf.name_scope('embedding'):
self.emb_var = tf.Variable(embedding_mat, name='emb_var')
# if not non_static:
# self.emb_var = tf.constant(embedding_mat, name='enb_var')
# else:
# self.emb_var = tf.Variable(embedding_mat, name='emb_var')
self.embedded_chars = tf.nn.embedding_lookup(self.emb_var, self.input_x)
self.emb = tf.expand_dims(self.embedded_chars, -1)
我想可视化的张量是embedded_chars。
这是我输入投影仪的代码 api:
config = projector.ProjectorConfig()
config.model_checkpoint_path = checkpoint_prefix + str(best_at_step) +'.ckpt'
embedding = config.embeddings.add()
embedding.tensor_name = cnn_rnn.embedded_chars.name
#embedding.metadata_path = 'metadata.tsv'
emb_writer = tf.summary.FileWriter(metadata_path,sess.graph)
projector.visualize_embeddings(emb_writer, config)
我的期望: 我想查看经过训练的输入数据及其分类方式。
实际结果: 当我使用 embedded_chars 张量作为投影仪的输入时,注意负载。但是,当我使用 emb_var 时,我看到了嵌入加载。问题是 emb_var 只是我的词汇,但我需要查看我的实际数据集。
想通了。
final_embed_matrix = sess.run(cnn_rnn.emb_var)
embedding_var = tf.Variable(final_embed_matrix, name='embedding_viz' + str(i))
saver_embed = tf.train.Saver([embedding_var, output_var])
sess.run(embedding_var.initializer)
config = projector.ProjectorConfig()
config.model_checkpoint_path = emb_dir + '/' + foldername + str(best_at_step)+'viz' +'.ckpt'
emb_writer = tf.summary.FileWriter(emb_dir, sess.graph)
embedding = config.embeddings.add()
embedding.metadata_path = foldername + '_metadata.tsv'
embedding.tensor_name = output_var.name
embedding = config.embeddings.add()
embedding.metadata_path = 'metadata.tsv'
embedding.tensor_name = embedding_var.name
projector.visualize_embeddings(emb_writer, config)
saver_embed.save(sess, checkpoint_viz_prefix + str(best_at_step)+'viz' +'.ckpt')