Keras 字符级 LSTM 文本分类未训练

Keras character level LSTM text classification not training

我正在使用 LSTM(我的第一个模型)为字符级文本 classification 制作一个 keras 模型。该模型应该 class 验证来自 twitch 聊天的正常、垃圾邮件和粗鲁消息。然而,我得到的结果非常令人失望和困惑。

LSTM 网络学习很少,无论我做什么,准确性都非常糟糕。

这是我的代码

import tensorflow as tf
''' import tensorflowjs.converters '''
from tensorflow import keras
from tensorflow.keras import layers
import json
import numpy as np
import re
import random
import sys

np.set_printoptions(threshold=sys.maxsize)

vocab = " qwertyuiopasdfghjklñzxcvbnmç1234567890?¿¡!,:-+/@#áéíóú\/"

dropout = 0.2

x_train = []
y_train = []
one_hot_encode = []
sentence = []

# amount of examples in each class
maxofeachtype = 1600

countnormal = 0
countspam = 0
countofensivo = 0

# Load dataset from data.json
with open("./data.json", 'r', encoding="utf8") as file:
    data = json.load(file)

# suffle it
random.shuffle(data)

# create the vocabulary map
mapping = {}
for x in range(len(vocab)):
    mapping[vocab[x]] = x

# this is some to balance the dataset adjusting it to "maxofeachtype"
for example in data:
    if(example["y"] == [1, 0, 0] and countnormal < maxofeachtype):
        countnormal += 1
    elif(example["y"] == [0, 1, 0] and countspam < maxofeachtype):
        countspam += 1
    elif(example["y"] == [0, 0, 1] and countofensivo < maxofeachtype):
        countofensivo += 1
    elif(countnormal == maxofeachtype or countspam == maxofeachtype or countofensivo == maxofeachtype):
        continue

    # remove unwanted characters to only have the ones in vocab
    cleanexample = re.sub(
        r'[^qwertyuiopasdfghjklñzxcvbnmç1234567890?¿¡!,:@#áéíóú\/]', '', str(example["x"]))

    # if the sentence is less than 500 characters long (the max you can type in twitch) add spaces until it gets to 500 chars long
    if len(cleanexample) != 500:
        for a in range(500 - len(cleanexample)):
            cleanexample = cleanexample + " "
    for character in cleanexample:
        sentence.append(mapping[character])

    # print(sentence)
    y_train_ohe = tf.one_hot(sentence, depth=len(vocab)).numpy()
    # print(y_train_ohe)
    x_train.append(y_train_ohe)
    y_train.append(np.array(example["y"]))
    sentence = []

x_train = np.array(x_train)
y_train = np.array(y_train)
""" print(x_train[0][0:5], x_train[0][-5:], y_train[0]) """
print(x_train.shape[1], x_train.shape[2])
print(x_train.shape)
print(y_train.shape)

# Create the model
model = keras.Sequential()

model.add(layers.LSTM(256, activation="tanh",
                      return_sequences=True, dropout=dropout, input_shape=(500, 57)))

model.add(layers.LSTM(128, activation="tanh",
                      return_sequences=False, dropout=dropout))

model.add(layers.Dense(3, activation="softmax"))

optimizer = keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss="categorical_crossentropy",
              metrics=["accuracy"])

model.summary()

model.fit(x=x_train, y=y_train, epochs=15, shuffle=True,
          batch_size=25, validation_split=0.2)

model.save('model_py.h5')

''' tensorflowjs.converters.save_keras_model(model, "./modelo_js") '''

训练样本在处理之前看起来像这样。 [1, 0, 0] 表示正常,[0, 1, 0] 表示粗鲁语言,[0, 0, 1] 表示垃圾邮件:

"x": "sentence",
        "y": [
            1,
            0,
            0
        ]

经过处理后,它们看起来像这样。我将它们热编码为 57 的矢量长度,即词汇量大小。 1开头的数组都是空格:

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]]
  ....
 [1 0 0]

在对每个 class 和 validation_split = 0.2 的 1600 个示例进行训练后,结果如下:

(4800, 500, 57)
(4800, 3)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
lstm (LSTM)                  (None, 500, 256)          321536
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               197120
_________________________________________________________________
dense (Dense)                (None, 3)                 387
=================================================================
Total params: 519,043
Trainable params: 519,043
Non-trainable params: 0
_________________________________________________________________
Epoch 1/15
2020-09-09 12:35:47.606648: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cublas64_10.dll
2020-09-09 12:35:47.872095: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudnn64_7.dll
154/154 [==============================] - 13s 87ms/step - loss: 1.0811 - accuracy: 0.4120 - val_loss: 2.0132 - val_accuracy: 0.0219
Epoch 2/15
154/154 [==============================] - 12s 78ms/step - loss: 1.0577 - accuracy: 0.4177 - val_loss: 2.0314 - val_accuracy: 0.0000e+00
Epoch 3/15
154/154 [==============================] - 12s 76ms/step - loss: 1.0457 - accuracy: 0.4154 - val_loss: 1.6968 - val_accuracy: 0.0000e+00
Epoch 4/15
154/154 [==============================] - 12s 76ms/step - loss: 1.0506 - accuracy: 0.4161 - val_loss: 1.7731 - val_accuracy: 0.0000e+00
Epoch 5/15
154/154 [==============================] - 11s 73ms/step - loss: 1.0511 - accuracy: 0.4313 - val_loss: 1.9052 - val_accuracy: 0.0000e+00
Epoch 6/15
154/154 [==============================] - 12s 75ms/step - loss: 1.0473 - accuracy: 0.4104 - val_loss: 1.6291 - val_accuracy: 0.0000e+00
Epoch 7/15
154/154 [==============================] - 13s 84ms/step - loss: 1.0464 - accuracy: 0.4135 - val_loss: 1.8916 - val_accuracy: 0.0000e+00
Epoch 8/15
154/154 [==============================] - 12s 76ms/step - loss: 1.0404 - accuracy: 0.4208 - val_loss: 1.8094 - val_accuracy: 0.0000e+00
Epoch 9/15
154/154 [==============================] - 12s 76ms/step - loss: 1.0449 - accuracy: 0.4096 - val_loss: 1.9690 - val_accuracy: 0.0219
Epoch 10/15
154/154 [==============================] - 12s 77ms/step - loss: 1.0489 - accuracy: 0.4104 - val_loss: 1.9596 - val_accuracy: 0.0000e+00
Epoch 11/15
154/154 [==============================] - 13s 83ms/step - loss: 1.0455 - accuracy: 0.4141 - val_loss: 1.8082 - val_accuracy: 0.0000e+00
Epoch 12/15
154/154 [==============================] - 12s 76ms/step - loss: 1.0465 - accuracy: 0.4219 - val_loss: 1.7066 - val_accuracy: 0.0000e+00
Epoch 13/15
154/154 [==============================] - 12s 75ms/step - loss: 1.0424 - accuracy: 0.4161 - val_loss: 1.5192 - val_accuracy: 0.0000e+00
Epoch 14/15
154/154 [==============================] - 12s 75ms/step - loss: 1.0481 - accuracy: 0.4154 - val_loss: 1.5999 - val_accuracy: 0.0000e+00
Epoch 15/15
154/154 [==============================] - 12s 77ms/step - loss: 1.0476 - accuracy: 0.4008 - val_loss: 2.0612 - val_accuracy: 0.0000e+00 

奇怪的是,如果我增加验证拆分,结果会有所改善。这对我来说毫无意义,因为它的训练数据较少。

这是 validation_split = 0.6

77/77 [==============================] - 8s 103ms/step - loss: 1.0352 - accuracy: 0.4432 - val_loss: 1.4233 - val_accuracy: 0.2313
Epoch 2/15
77/77 [==============================] - 7s 93ms/step - loss: 0.9906 - accuracy: 0.4443 - val_loss: 1.7316 - val_accuracy: 0.2937
Epoch 3/15
77/77 [==============================] - 7s 92ms/step - loss: 0.9863 - accuracy: 0.4812 - val_loss: 1.5367 - val_accuracy: 0.2313
Epoch 4/15
77/77 [==============================] - 7s 94ms/step - loss: 0.9874 - accuracy: 0.4635 - val_loss: 1.4075 - val_accuracy: 0.2937
Epoch 5/15
77/77 [==============================] - 7s 93ms/step - loss: 0.9905 - accuracy: 0.4594 - val_loss: 1.5759 - val_accuracy: 0.2937
Epoch 6/15
77/77 [==============================] - 7s 93ms/step - loss: 0.9808 - accuracy: 0.4703 - val_loss: 1.3886 - val_accuracy: 0.2937
Epoch 7/15
77/77 [==============================] - 7s 96ms/step - loss: 0.9815 - accuracy: 0.4781 - val_loss: 1.2495 - val_accuracy: 0.2313
Epoch 8/15
77/77 [==============================] - 7s 96ms/step - loss: 0.9824 - accuracy: 0.4698 - val_loss: 1.4516 - val_accuracy: 0.2313
Epoch 9/15
77/77 [==============================] - 7s 92ms/step - loss: 0.9916 - accuracy: 0.4573 - val_loss: 1.4488 - val_accuracy: 0.2313
Epoch 10/15
77/77 [==============================] - 7s 90ms/step - loss: 0.9858 - accuracy: 0.4760 - val_loss: 1.3868 - val_accuracy: 0.2313
Epoch 11/15
77/77 [==============================] - 7s 93ms/step - loss: 0.9861 - accuracy: 0.4734 - val_loss: 1.5702 - val_accuracy: 0.2313
Epoch 12/15
77/77 [==============================] - 7s 91ms/step - loss: 0.9880 - accuracy: 0.4630 - val_loss: 1.4439 - val_accuracy: 0.2313
Epoch 13/15
77/77 [==============================] - 7s 91ms/step - loss: 0.9796 - accuracy: 0.4865 - val_loss: 1.3597 - val_accuracy: 0.2313
Epoch 14/15
77/77 [==============================] - 7s 91ms/step - loss: 0.9832 - accuracy: 0.4745 - val_loss: 1.5791 - val_accuracy: 0.2313
Epoch 15/15
77/77 [==============================] - 7s 90ms/step - loss: 0.9919 - accuracy: 0.4760 - val_loss: 1.6243 - val_accuracy: 0.2313

并且 validation_split = 0.8

39/39 [==============================] - 7s 171ms/step - loss: 1.1238 - accuracy: 0.4484 - val_loss: 1.3041 - val_accuracy: 0.3158
Epoch 2/15
39/39 [==============================] - 6s 143ms/step - loss: 0.9795 - accuracy: 0.4692 - val_loss: 1.2562 - val_accuracy: 0.3174
Epoch 3/15
39/39 [==============================] - 6s 146ms/step - loss: 0.9757 - accuracy: 0.4724 - val_loss: 1.3583 - val_accuracy: 0.3437
Epoch 4/15
39/39 [==============================] - 6s 149ms/step - loss: 0.9741 - accuracy: 0.4703 - val_loss: 1.3565 - val_accuracy: 0.2976
Epoch 5/15
39/39 [==============================] - 6s 148ms/step - loss: 0.9748 - accuracy: 0.4578 - val_loss: 1.3904 - val_accuracy: 0.2976
Epoch 6/15
39/39 [==============================] - 5s 137ms/step - loss: 0.9697 - accuracy: 0.4755 - val_loss: 1.3418 - val_accuracy: 0.2976
Epoch 7/15
39/39 [==============================] - 5s 136ms/step - loss: 0.9716 - accuracy: 0.4765 - val_loss: 1.3053 - val_accuracy: 0.3262
Epoch 8/15
39/39 [==============================] - 5s 136ms/step - loss: 0.9748 - accuracy: 0.4557 - val_loss: 1.3529 - val_accuracy: 0.2976
Epoch 9/15
39/39 [==============================] - 5s 140ms/step - loss: 0.9768 - accuracy: 0.4505 - val_loss: 1.3260 - val_accuracy: 0.2976
Epoch 10/15
39/39 [==============================] - 5s 136ms/step - loss: 0.9724 - accuracy: 0.4859 - val_loss: 1.3351 - val_accuracy: 0.3627
Epoch 11/15
39/39 [==============================] - 6s 143ms/step - loss: 0.9748 - accuracy: 0.4588 - val_loss: 1.3203 - val_accuracy: 0.3770
Epoch 12/15
39/39 [==============================] - 6s 144ms/step - loss: 0.9690 - accuracy: 0.4640 - val_loss: 1.3207 - val_accuracy: 0.3517
Epoch 13/15
39/39 [==============================] - 5s 137ms/step - loss: 0.9661 - accuracy: 0.4369 - val_loss: 1.3153 - val_accuracy: 0.3681
Epoch 14/15
39/39 [==============================] - 6s 141ms/step - loss: 0.9628 - accuracy: 0.4661 - val_loss: 1.3405 - val_accuracy: 0.2976
Epoch 15/15
39/39 [==============================] - 5s 137ms/step - loss: 0.9625 - accuracy: 0.4703 - val_loss: 1.3586 - val_accuracy: 0.3457

我试过只使用密集层,结果好多了,这对我来说没有意义,因为它们无法理解序列。然而,这清除了数据集错误的选项。

使用此配置(validation_split 回到 0.2):

model = keras.Sequential()

model.add(layers.Input(shape=(500, 57)))

model.add(layers.Flatten())

model.add(layers.Dense(256, activation="relu"))

model.add(layers.Dense(128, activation="relu"))

model.add(layers.Dense(64, activation="relu"))

model.add(layers.Dense(3, activation="softmax"))

optimizer = keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss="categorical_crossentropy",
              metrics=["accuracy"])

我得到这些结果:

154/154 [==============================] - 1s 6ms/step - loss: 0.7377 - accuracy: 0.7844 - val_loss: 1.4061 - val_accuracy: 0.0250
Epoch 2/15
154/154 [==============================] - 1s 4ms/step - loss: 0.3479 - accuracy: 0.8448 - val_loss: 0.8703 - val_accuracy: 0.6927
Epoch 3/15
154/154 [==============================] - 1s 4ms/step - loss: 0.3033 - accuracy: 0.8794 - val_loss: 1.4597 - val_accuracy: 0.6938
Epoch 4/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2899 - accuracy: 0.8966 - val_loss: 1.6684 - val_accuracy: 0.4896
Epoch 5/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2447 - accuracy: 0.9042 - val_loss: 1.6465 - val_accuracy: 0.4812
Epoch 6/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2269 - accuracy: 0.9211 - val_loss: 3.9954 - val_accuracy: 0.7312
Epoch 7/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2071 - accuracy: 0.9201 - val_loss: 2.7729 - val_accuracy: 0.4698
Epoch 8/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2081 - accuracy: 0.9302 - val_loss: 5.1325 - val_accuracy: 0.4229
Epoch 9/15
154/154 [==============================] - 1s 4ms/step - loss: 0.1581 - accuracy: 0.9378 - val_loss: 4.4410 - val_accuracy: 0.3688
Epoch 10/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2184 - accuracy: 0.9333 - val_loss: 2.6669 - val_accuracy: 0.5396
Epoch 11/15
154/154 [==============================] - 1s 4ms/step - loss: 0.1673 - accuracy: 0.9341 - val_loss: 3.6476 - val_accuracy: 0.2750
Epoch 12/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2111 - accuracy: 0.9443 - val_loss: 1.6768 - val_accuracy: 0.6885
Epoch 13/15
154/154 [==============================] - 1s 4ms/step - loss: 0.1210 - accuracy: 0.9547 - val_loss: 2.6785 - val_accuracy: 0.5406
Epoch 14/15
154/154 [==============================] - 1s 4ms/step - loss: 0.1179 - accuracy: 0.9542 - val_loss: 3.4468 - val_accuracy: 0.4385
Epoch 15/15
154/154 [==============================] - 1s 4ms/step - loss: 0.1265 - accuracy: 0.9469 - val_loss: 2.0159 - val_accuracy: 0.7083

模型发生了很大变化val_accuracy,但至少我知道它正在学习一些东西。

我试过(使用 LSTM 模型):

-改变层数 -改变每一层的神经元数量 -改变学习率 -将优化器更改为 SGD -改变损失函数 -改变纪元数 -改变训练样本的数量(复制每个样本) - 在 Adam

中使用衰减

我在这里看到两个问题:

仅更改数据(和损失函数),我做了一个基于您的架构(添加了嵌入层)的示例:

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import sys

np.set_printoptions(threshold=sys.maxsize)

X_train = ['They like my dog', 'I hate my cat', 'We will love my hamster', 
           'I dislike your llama']
X_test = ['We love our hamster', 'They hate our platypus']
y_train = [1, 0, 1, 0]
y_test = [1, 0]

labels = {0: 'negative', 1: 'positive'}

encoder = keras.preprocessing.text.Tokenizer()

encoder.fit_on_texts(X_train)

X_train = encoder.texts_to_sequences(X_train)
X_test = encoder.texts_to_sequences(X_test)

max_length = max(map(len, X_train))

x_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_length)
x_test = keras.preprocessing.sequence.pad_sequences(X_test, maxlen=max_length)

x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

embedding_dim = 4
# print(x_train.shape[1], x_train.shape[2])
print(x_train.shape)
print(y_train.shape)

# Create the model
model = keras.Sequential()

model.add(layers.Embedding(len(encoder.index_word) + 1, embedding_dim))

model.add(layers.LSTM(8, activation="tanh",
                      return_sequences=True, dropout=.2))

model.add(layers.LSTM(8, activation="tanh",
                      return_sequences=False, dropout=.2))

model.add(layers.Dense(2, activation="softmax"))

optimizer = keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

model.build(input_shape=x_train.shape)
model.summary()

history = model.fit(x=x_train, y=y_train, epochs=25, shuffle=True,
          batch_size=25, validation_data=(x_test, y_test))

如果有任何需要澄清的地方,请告诉我。