使用“tensorflow.python.keras.estimator.model_to_estimator”将 Keras 模型转换为 Estimator API 时如何通知 class 权重?
How to inform class weights when using `tensorflow.python.keras.estimator.model_to_estimator` to convert Keras Models to Estimator API?
我在不平衡数据集上将纯 Keras 模型转换为 TensorFlow Estimator API 时遇到了一些问题。
当使用纯 Keras API 时,class_weight
参数在 model.fit
方法中可用,但是当使用 tensorflow.python.keras.estimator.model_to_estimator
将 Keras 模型转换为 TensorFlow Estimator 时,没有告知的地方class_weights
.
如何克服这个问题?
我在 Ubuntu 18、Cuda 9、Cudnn 7
上使用 TF 1.12
纯 Keras 模型:
def keras_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
return model
model = keras_model(5)
model.fit(train_X, train_Y_onehot,
batch_size=128,
epochs=10,
validation_data=(eval_X,eval_Y_onehot),
class_weight=class_weights,
verbose=1)
Keras 模型到 TensorFlow Estimator:
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
class_weight
参数用于对损失函数进行加权(仅在训练期间)。因此,要获得相同的效果,您必须改变损失函数。我没有找到任何现有的 TensorFlow 损失函数,只有 feature request 用于具有 class 权重的交叉熵损失。提到了一个解决方法:
顺便说一句,这个问题已经解决了,因为在 Keras 中是可能的:)
我编写了一个解决方法,它似乎有效。我
import tensorflow as tf
from tensorflow.python.keras import backend as K
def weighted_loss_fn(class_weights):
def _loss_fn(y_true, y_pred):
class_weights_tensor = K.variable(class_weights)
y_true_labels = K.argmax(y_true,axis=1)
weights = K.gather(class_weights_tensor,y_true_labels)
return tf.losses.softmax_cross_entropy(onehot_labels=y_true, logits=y_pred, weights=weights)
return _loss_fn
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None, class_weights=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
loss_fn = weighted_loss_fn(class_weights)
model.compile(loss=loss_fn,
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
就我而言 class_weights
= [ 0.17041813 42.00318471 35.26470588 29.70495495 42.00318471 44.55743243]
我在不平衡数据集上将纯 Keras 模型转换为 TensorFlow Estimator API 时遇到了一些问题。
当使用纯 Keras API 时,class_weight
参数在 model.fit
方法中可用,但是当使用 tensorflow.python.keras.estimator.model_to_estimator
将 Keras 模型转换为 TensorFlow Estimator 时,没有告知的地方class_weights
.
如何克服这个问题?
我在 Ubuntu 18、Cuda 9、Cudnn 7
上使用 TF 1.12纯 Keras 模型:
def keras_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
return model
model = keras_model(5)
model.fit(train_X, train_Y_onehot,
batch_size=128,
epochs=10,
validation_data=(eval_X,eval_Y_onehot),
class_weight=class_weights,
verbose=1)
Keras 模型到 TensorFlow Estimator:
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
class_weight
参数用于对损失函数进行加权(仅在训练期间)。因此,要获得相同的效果,您必须改变损失函数。我没有找到任何现有的 TensorFlow 损失函数,只有 feature request 用于具有 class 权重的交叉熵损失。提到了一个解决方法:
顺便说一句,这个问题已经解决了,因为在 Keras 中是可能的:)
我编写了一个解决方法,它似乎有效。我
import tensorflow as tf
from tensorflow.python.keras import backend as K
def weighted_loss_fn(class_weights):
def _loss_fn(y_true, y_pred):
class_weights_tensor = K.variable(class_weights)
y_true_labels = K.argmax(y_true,axis=1)
weights = K.gather(class_weights_tensor,y_true_labels)
return tf.losses.softmax_cross_entropy(onehot_labels=y_true, logits=y_pred, weights=weights)
return _loss_fn
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None, class_weights=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
loss_fn = weighted_loss_fn(class_weights)
model.compile(loss=loss_fn,
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
就我而言 class_weights
= [ 0.17041813 42.00318471 35.26470588 29.70495495 42.00318471 44.55743243]