Tensorflow:如何将预训练模型已经嵌入的数据输入到 LSTM 模型中?
Tensorflow: How to input data already embedded by pre-train model into a LSTM model?
我是 Tensorflow 的新手。我正在构建一个简单的 LSTM 来进行情绪分析(二元分类)。我在名为 traindata
的 python 列表中有 15391 个句子。所有句子都已嵌入预训练模型。现在句子的形状是 (591, 200)
:每个句子有 591 个单词,每个单词被嵌入为一个 (200,) 向量。因此,数据是 (591, 200)
张量的 python 列表。下面是我的模型:
BATCH_SIZE = 32 #128
def lstm_model(input_shape, units):
input_data = Input(shape=input_shape, dtype="float32", name="input_layer")
x = LSTM(units, input_shape=(591, 200), name="lstm_layer_1", activation="tanh", return_sequences=False)(input_data)
x = Dense(units=1, name="full_connection_layer_1")(x)
x = Activation("sigmoid", name="activation_layer")(x)
model = Model(inputs=input_data, outputs=x)
return model
model = lstm_model(traindata[0].shape, 32)
model.summary()
tf.compat.v1.random.set_random_seed(111)
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2), metrics=['binary_accuracy', 'Precision', "Recall", "AUC"])
以上模型总结:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_layer (InputLayer) [(None, 591, 200)] 0
_________________________________________________________________
lstm_layer_1 (LSTM) (None, 32) 29824
_________________________________________________________________
full_connection_layer_1 (Den (None, 1) 33
_________________________________________________________________
activation_layer (Activation (None, 1) 0
=================================================================
Total params: 29,857
Trainable params: 29,857
Non-trainable params: 0
我指定了 BATCH_SIZE = 32
:
history_weighted_lstm_model = model.fit(traindata, train_labels, epochs = 2, batch_size = BATCH_SIZE, shuffle=True, class_weight=class_weight)
不幸的是,它引发了错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_5152/3209127219.py in <module>
----> 1 history_weighted_lstm_model = model.fit(traindata, train_labels, epochs = 2, batch_size = BATCH_SIZE, shuffle=True, class_weight=class_weight)
E:\anaconda\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
707 steps=steps_per_epoch,
708 validation_split=validation_split,
--> 709 shuffle=shuffle)
710
711 # Prepare validation data.
E:\anaconda\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset)
2649 feed_input_shapes,
2650 check_batch_axis=False, # Don't enforce the batch size.
-> 2651 exception_prefix='input')
2652
2653 if y is not None:
E:\anaconda\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\keras\engine\training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
374 ': expected ' + names[i] + ' to have ' +
375 str(len(shape)) + ' dimensions, but got array '
--> 376 'with shape ' + str(data_shape))
377 if not check_batch_axis:
378 data_shape = data_shape[1:]
ValueError: Error when checking input: expected input_layer to have 3 dimensions, but got array with shape (591, 200)
我认为该模型每次需要 32 (591, 200) 个张量。看来我错了。你能帮我弄清楚如何解决它吗?
您可能混淆了输入形状。尝试这样的事情:
import tensorflow as tf
BATCH_SIZE = 32
def lstm_model(input_shape, units):
input_data = tf.keras.layers.Input(shape=input_shape, dtype="float32", name="input_layer")
x = tf.keras.layers.LSTM(units, name="lstm_layer_1", activation="tanh", return_sequences=False)(input_data)
x = tf.keras.layers.Dense(units=1, name="full_connection_layer_1")(x)
x = tf.keras.layers.Activation("sigmoid", name="activation_layer")(x)
model = tf.keras.Model(inputs=input_data, outputs=x)
return model
model = lstm_model(input_shape=(591, 200), units=32)
model.summary()
samples, words, embedding_representation = 64, 591, 200
traindata = tf.random.normal((samples, words, embedding_representation))
train_labels = tf.random.uniform((64, 1), maxval=2, dtype=tf.int32)
class_weight = {0: 1.,
1: 50.}
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2), metrics=['binary_accuracy', 'Precision', "Recall", "AUC"])
history_weighted_lstm_model = model.fit(traindata, train_labels, epochs = 2, batch_size = BATCH_SIZE, shuffle=True, class_weight=class_weight)
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_layer (InputLayer) [(None, 591, 200)] 0
lstm_layer_1 (LSTM) (None, 32) 29824
full_connection_layer_1 (De (None, 1) 33
nse)
activation_layer (Activatio (None, 1) 0
n)
=================================================================
Total params: 29,857
Trainable params: 29,857
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
2/2 [==============================] - 5s 104ms/step - loss: 20.8765 - binary_accuracy: 0.5000 - precision: 0.5758 - recall: 0.5135 - auc: 0.5010
Epoch 2/2
2/2 [==============================] - 0s 100ms/step - loss: 6.4090 - binary_accuracy: 0.7188 - precision: 0.6727 - recall: 1.0000 - auc: 0.9780
我是 Tensorflow 的新手。我正在构建一个简单的 LSTM 来进行情绪分析(二元分类)。我在名为 traindata
的 python 列表中有 15391 个句子。所有句子都已嵌入预训练模型。现在句子的形状是 (591, 200)
:每个句子有 591 个单词,每个单词被嵌入为一个 (200,) 向量。因此,数据是 (591, 200)
张量的 python 列表。下面是我的模型:
BATCH_SIZE = 32 #128
def lstm_model(input_shape, units):
input_data = Input(shape=input_shape, dtype="float32", name="input_layer")
x = LSTM(units, input_shape=(591, 200), name="lstm_layer_1", activation="tanh", return_sequences=False)(input_data)
x = Dense(units=1, name="full_connection_layer_1")(x)
x = Activation("sigmoid", name="activation_layer")(x)
model = Model(inputs=input_data, outputs=x)
return model
model = lstm_model(traindata[0].shape, 32)
model.summary()
tf.compat.v1.random.set_random_seed(111)
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2), metrics=['binary_accuracy', 'Precision', "Recall", "AUC"])
以上模型总结:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_layer (InputLayer) [(None, 591, 200)] 0
_________________________________________________________________
lstm_layer_1 (LSTM) (None, 32) 29824
_________________________________________________________________
full_connection_layer_1 (Den (None, 1) 33
_________________________________________________________________
activation_layer (Activation (None, 1) 0
=================================================================
Total params: 29,857
Trainable params: 29,857
Non-trainable params: 0
我指定了 BATCH_SIZE = 32
:
history_weighted_lstm_model = model.fit(traindata, train_labels, epochs = 2, batch_size = BATCH_SIZE, shuffle=True, class_weight=class_weight)
不幸的是,它引发了错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_5152/3209127219.py in <module>
----> 1 history_weighted_lstm_model = model.fit(traindata, train_labels, epochs = 2, batch_size = BATCH_SIZE, shuffle=True, class_weight=class_weight)
E:\anaconda\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
707 steps=steps_per_epoch,
708 validation_split=validation_split,
--> 709 shuffle=shuffle)
710
711 # Prepare validation data.
E:\anaconda\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset)
2649 feed_input_shapes,
2650 check_batch_axis=False, # Don't enforce the batch size.
-> 2651 exception_prefix='input')
2652
2653 if y is not None:
E:\anaconda\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\keras\engine\training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
374 ': expected ' + names[i] + ' to have ' +
375 str(len(shape)) + ' dimensions, but got array '
--> 376 'with shape ' + str(data_shape))
377 if not check_batch_axis:
378 data_shape = data_shape[1:]
ValueError: Error when checking input: expected input_layer to have 3 dimensions, but got array with shape (591, 200)
我认为该模型每次需要 32 (591, 200) 个张量。看来我错了。你能帮我弄清楚如何解决它吗?
您可能混淆了输入形状。尝试这样的事情:
import tensorflow as tf
BATCH_SIZE = 32
def lstm_model(input_shape, units):
input_data = tf.keras.layers.Input(shape=input_shape, dtype="float32", name="input_layer")
x = tf.keras.layers.LSTM(units, name="lstm_layer_1", activation="tanh", return_sequences=False)(input_data)
x = tf.keras.layers.Dense(units=1, name="full_connection_layer_1")(x)
x = tf.keras.layers.Activation("sigmoid", name="activation_layer")(x)
model = tf.keras.Model(inputs=input_data, outputs=x)
return model
model = lstm_model(input_shape=(591, 200), units=32)
model.summary()
samples, words, embedding_representation = 64, 591, 200
traindata = tf.random.normal((samples, words, embedding_representation))
train_labels = tf.random.uniform((64, 1), maxval=2, dtype=tf.int32)
class_weight = {0: 1.,
1: 50.}
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2), metrics=['binary_accuracy', 'Precision', "Recall", "AUC"])
history_weighted_lstm_model = model.fit(traindata, train_labels, epochs = 2, batch_size = BATCH_SIZE, shuffle=True, class_weight=class_weight)
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_layer (InputLayer) [(None, 591, 200)] 0
lstm_layer_1 (LSTM) (None, 32) 29824
full_connection_layer_1 (De (None, 1) 33
nse)
activation_layer (Activatio (None, 1) 0
n)
=================================================================
Total params: 29,857
Trainable params: 29,857
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
2/2 [==============================] - 5s 104ms/step - loss: 20.8765 - binary_accuracy: 0.5000 - precision: 0.5758 - recall: 0.5135 - auc: 0.5010
Epoch 2/2
2/2 [==============================] - 0s 100ms/step - loss: 6.4090 - binary_accuracy: 0.7188 - precision: 0.6727 - recall: 1.0000 - auc: 0.9780