是否可以在 tensorboard 中可视化 keras 嵌入?
Is it possible to visualize keras embeddings in tensorboard?
keras 能够使用 keras.callbacks.TensorBoard
以 tensorboard 兼容格式导出它的一些训练数据
但是不支持tensorboard中的embedding visualisation
有解决办法吗?
找到解决方案:
import os
import keras
import tensorflow
ROOT_DIR = '/tmp/tfboard'
os.makedirs(ROOT_DIR, exist_ok=True)
OUTPUT_MODEL_FILE_NAME = os.path.join(ROOT_DIR,'tf.ckpt')
# get the keras model
model = get_model()
# get the tensor name from the embedding layer
tensor_name = next(filter(lambda x: x.name == 'embedding', model.layers)).W.name
# the vocabulary
metadata_file_name = os.path.join(ROOT_DIR,tensor_name)
embedding_df = get_embedding()
embedding_df.to_csv(metadata_file_name, header=False, columns=[])
saver = tensorflow.train.Saver()
saver.save(keras.backend.get_session(), OUTPUT_MODEL_FILE_NAME)
summary_writer = tensorflow.train.SummaryWriter(ROOT_DIR)
config = tensorflow.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = tensor_name
embedding.metadata_path = metadata_file_name
tensorflow.contrib.tensorboard.plugins.projector.visualize_embeddings(summary_writer, config)
存在具有此功能的拉取请求 - https://github.com/fchollet/keras/pull/5247 扩展回调以创建特定嵌入层的可视化。
现在可以直接使用 keras.callbacks.TensorBoard
回调:
from keras import callbacks
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=10,
callbacks=[
callbacks.TensorBoard(batch_size=batch_size,
embeddings_freq=3, # Store embeddings every 3 epochs (this can be time consuming)
embeddings_layer_names=['fc1', 'fc2'], # Embeddings are taken from layers with names fc1 and fc2
embeddings_metadata='metadata.tsv', # This file will describe the embeddings data (see below)
embeddings_data=x_test), # Data used for the embeddings
],
)
# Use this metadata.tsv file before you have a trained model:
with open("metadata.tsv", 'w') as f:
f.write("label\tidx\n")
f.write('\n'.join(["{}\t{}".format(class_names[int(y.argmax())], i)
for i, y in enumerate(y_test)]))
# After the model is trained, you can update the metadata file to include more information, such as the predicted labels and the mistakes:
y_pred = model.predict(x_test)
with open("metadata.tsv", 'w') as f:
f.write("label\tidx\tpredicted\tcorrect\n")
f.write('\n'.join(["{}\t{}\t{}\t{}".format(class_names[int(y.argmax())],
i,
class_names[int(y_pred[i].argmax())],
class_names[int(y.argmax())]==class_names[int(y_pred[i].argmax())])
for i, y in enumerate(y_test)]))
注意: Tensorboard 通常会在 logs
目录中查找您的 metadata.tsv
。如果找不到它,它会告诉你它正在寻找的路径,你可以将它复制到那里并刷新 tensorboard。
keras 能够使用 keras.callbacks.TensorBoard
以 tensorboard 兼容格式导出它的一些训练数据但是不支持tensorboard中的embedding visualisation
有解决办法吗?
找到解决方案:
import os
import keras
import tensorflow
ROOT_DIR = '/tmp/tfboard'
os.makedirs(ROOT_DIR, exist_ok=True)
OUTPUT_MODEL_FILE_NAME = os.path.join(ROOT_DIR,'tf.ckpt')
# get the keras model
model = get_model()
# get the tensor name from the embedding layer
tensor_name = next(filter(lambda x: x.name == 'embedding', model.layers)).W.name
# the vocabulary
metadata_file_name = os.path.join(ROOT_DIR,tensor_name)
embedding_df = get_embedding()
embedding_df.to_csv(metadata_file_name, header=False, columns=[])
saver = tensorflow.train.Saver()
saver.save(keras.backend.get_session(), OUTPUT_MODEL_FILE_NAME)
summary_writer = tensorflow.train.SummaryWriter(ROOT_DIR)
config = tensorflow.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = tensor_name
embedding.metadata_path = metadata_file_name
tensorflow.contrib.tensorboard.plugins.projector.visualize_embeddings(summary_writer, config)
存在具有此功能的拉取请求 - https://github.com/fchollet/keras/pull/5247 扩展回调以创建特定嵌入层的可视化。
现在可以直接使用 keras.callbacks.TensorBoard
回调:
from keras import callbacks
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=10,
callbacks=[
callbacks.TensorBoard(batch_size=batch_size,
embeddings_freq=3, # Store embeddings every 3 epochs (this can be time consuming)
embeddings_layer_names=['fc1', 'fc2'], # Embeddings are taken from layers with names fc1 and fc2
embeddings_metadata='metadata.tsv', # This file will describe the embeddings data (see below)
embeddings_data=x_test), # Data used for the embeddings
],
)
# Use this metadata.tsv file before you have a trained model:
with open("metadata.tsv", 'w') as f:
f.write("label\tidx\n")
f.write('\n'.join(["{}\t{}".format(class_names[int(y.argmax())], i)
for i, y in enumerate(y_test)]))
# After the model is trained, you can update the metadata file to include more information, such as the predicted labels and the mistakes:
y_pred = model.predict(x_test)
with open("metadata.tsv", 'w') as f:
f.write("label\tidx\tpredicted\tcorrect\n")
f.write('\n'.join(["{}\t{}\t{}\t{}".format(class_names[int(y.argmax())],
i,
class_names[int(y_pred[i].argmax())],
class_names[int(y.argmax())]==class_names[int(y_pred[i].argmax())])
for i, y in enumerate(y_test)]))
注意: Tensorboard 通常会在 logs
目录中查找您的 metadata.tsv
。如果找不到它,它会告诉你它正在寻找的路径,你可以将它复制到那里并刷新 tensorboard。