我怎样才能实现这个模型?

How can I implement this model?

问题陈述

我有 3 个 类(A、B 和 C)。我有 6 个功能:

train_x = [[ 6.442  6.338  7.027  8.789 10.009 12.566]
           [ 6.338  7.027  5.338 10.009  8.122 11.217]
           [ 7.027  5.338  5.335  8.122  5.537  6.408]
           [ 5.338  5.335  5.659  5.537  5.241  7.043]]

这些特征表示由 3-类(例如 AABBC 等)组成的 5 个字符的字符串模式。让,一个5个字符的字符串模式被one-hot编码如下:

train_z = [[0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0.]    
           [0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0.]
           [0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0.]    
           [0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1.]]

我的实现

我已经使用顺序模型实现了上述问题,如下所示:

import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import sys
import time
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
import numpy as np

# <editor-fold desc="handle GPU">
# resolve GPU related issues.
try:
    physical_devices = tf.config.list_physical_devices("GPU")
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except Exception as e:
    print("GPU not found!")
# END of try
# </editor-fold>

# Directories and files
CLASS_INDEX = 4
FEATURE_START_INDEX = 6
OUTPUT_PATH = r"./"
INPUT_PATH = r"./"
INPUT_DATA_FILE = "dist-5.dat"
TRAINING_PROGRESS_FILE = "training.txt"
MODEL_FILE = "model.h5"

# classification size
CLASSES_COUNT = 3
FEATURES_COUNT = 6
OUTPUTS_COUNT = 15

# Network parameters.
LAYER_1_NEURON_COUNT = 128
LAYER_2_NEURON_COUNT = 128

# Training parameters.
LEARNING_RATE = 0.01
EPOCHS = 1000  # 500
BATCH_SIZE = 10
NO_OF_INPUT_LINES = 10000
VALIDATION_PART = 0.5
MODEL_SAVE_FREQUENCY = 10

# <editor-fold desc="encoding()">
# <editor-fold desc="def encode(letter)">
def encode(letter: str):
    if letter == 'A':
        return [1.0, 0.0, 0.0]
    elif letter == 'B':
        return [0.0, 1.0, 0.0]
    elif letter == 'C':
        return [0.0, 0.0, 1.0]
# </editor-fold>

# <editor-fold desc="encode_string()">
def encode_string_1(pattern_str: str):
    # Iterate over the string
    one_hot_binary_str = []
    for ch in pattern_str:
        one_hot_binary_str = one_hot_binary_str + encode(ch)
    # END of for loop
    return one_hot_binary_str
# END of function

def encode_string_2(pattern_str: str):
    # Iterate over the string
    one_hot_binary_str = []
    for ch in pattern_str:
        temp_encoded_vect = [encode(ch)]
        one_hot_binary_str = one_hot_binary_str + temp_encoded_vect
    # END of for loop
    return one_hot_binary_str
# END of function
# </editor-fold>

# <editor-fold desc="def load_data()">
def load_data_k(fname: str, class_index: int, feature_start_index: int, **selection):
    i = 0
    file = open(fname)
    if "top_n_lines" in selection:
        lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
    elif "random_n_lines" in selection:
        tmp_lines = file.readlines()
        lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
    else:
        lines = file.readlines()

    data_x, data_y, data_z = [], [], []
    for l in lines:
        row = l.strip().split()  # return a list of words from the line.
        x = [float(ix) for ix in row[feature_start_index:]]  # convert 3rd to 20th word into a vector of float numbers.
        y = encode(row[class_index])  # convert the 3rd word into binary.
        z = encode_string_1(row[class_index+1])
        data_x.append(x)  # append the vector into 'data_x'
        data_y.append(y)  # append the vector into 'data_y'
        data_z.append(z)  # append the vector into 'data_z'
    # END for l in lines

    num_rows = len(data_x)
    given_fraction = selection.get("validation_part", 1.0)
    if given_fraction > 0.9999:
        valid_x, valid_y, valid_z = data_x, data_y, data_z
    else:
        n = int(num_rows * given_fraction)
        valid_x, valid_y, valid_z = data_x[n:], data_y[n:], data_z[n:]
        data_x, data_y, data_z = data_x[:n], data_y[:n], data_z[:n]
    # END of if-else block

    tx = tf.convert_to_tensor(data_x, np.float32)
    ty = tf.convert_to_tensor(data_y, np.float32)
    tz = tf.convert_to_tensor(data_z, np.float32)
    vx = tf.convert_to_tensor(valid_x, np.float32)
    vy = tf.convert_to_tensor(valid_y, np.float32)
    vz = tf.convert_to_tensor(valid_z, np.float32)

    return tx, ty, tz, vx, vy, vz
# END of the function
# </editor-fold>
# </editor-fold>

# <editor-fold desc="def create_model()">
def create_model(n_hidden_1, n_hidden_2, num_outputs, num_features):
    # a simple sequential model
    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=(num_features,)))
    model.add(tf.keras.layers.Dense(n_hidden_1, activation="relu"))
    model.add(tf.keras.layers.Dense(n_hidden_2, activation="relu"))
    model.add(tf.keras.layers.Dense(num_outputs))
    return model
# </editor-fold>

# custom loss to take into the dependency between the 3 bits
def loss(y_true, y_pred):
    l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
    l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
    l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
    l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
    l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
    return l1 + l2 + l3 + l4 + l5


if __name__ == "__main__":
    len_int = len(sys.argv)
    arg_str = None

    if len_int > 1:
        arg_str = sys.argv[1]
    else:
        arg_str = os.path.join(INPUT_PATH, INPUT_DATA_FILE)
    # END of if len_int > 1:

    # load training data from the disk
    train_x, train_y, train_z, validate_x,validate_y, validate_z = load_data_k(
        os.path.join(INPUT_PATH, INPUT_DATA_FILE),
        class_index=CLASS_INDEX,
        feature_start_index=FEATURE_START_INDEX,
        top_n_lines=NO_OF_INPUT_LINES,
        validation_part=VALIDATION_PART
    )

    #print(train_y)
    print("z = " + str(train_z))

    # create Stochastic Gradient Descent optimizer for the NN model
    opt_function = keras.optimizers.Adam(
        learning_rate=LEARNING_RATE
    )
    # create a sequential NN model
    model = create_model(
        LAYER_1_NEURON_COUNT,
        LAYER_2_NEURON_COUNT,
        OUTPUTS_COUNT,
        FEATURES_COUNT
    )
    #
    model.compile(optimizer=opt_function, loss=loss, metrics=['accuracy'])
    model.fit(train_x, train_z, epochs=EPOCHS,batch_size=BATCH_SIZE)

问题

这个源代码的问题是,模型没有收敛,即准确度没有随着时代的增加而增加。

问题

我该如何实施这个模型?

当您只有一个网络输入和输出时,使用顺序。在当前设置中,您有多个输出层,要考虑连续的 3 个输出值组是链接的。这也可以通过损失函数来强制执行。

import numpy as np
import tensorflow as tf

# random input data with 6 features
inp = tf.random.uniform(shape=(1000, 6))

# output data taking into consideration that 3 consecutive bits are one class.
out1 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out2 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out3 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out4 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out5 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)

out = tf.concat([out1, out2, out3, out4, out5], axis=1)

# a simple sequential model 
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(6,)))
model.add(tf.keras.layers.Dense(20, activation="relu"))
model.add(tf.keras.layers.Dense(20, activation="relu"))
model.add(tf.keras.layers.Dense(15))


# custom loss to take into the dependency between the 3 bits

def loss(y_true, y_pred):
    l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
    l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
    l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
    l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
    l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
    
    return l1 + l2 + l3 + l4 + l5

opt_function = tf.keras.optimizers.SGD()

model.compile(optimizer=opt_function, loss=loss)
model.fit(inp, out, batch_size=10)

评估网络时也需要使用相同的想法。您需要分别取 argmax 超过 3 位(5 次),以便获得 5 类 的序列作为输出。

我想这就是问题所在。

 model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
...
loss=['categorical_crossentropy'] * 5

>>> Shapes (10, 3) and (10, 15) are incompatible

你真的不想那样搞乱你的损失函数。尝试修复您的输出。使用 Sequential API 创建的模型是具有 single/output 的更简单的模型。如果您想在更简单的布局中更改功能 API 模型,您应该将 inputs/outputs 合并为一个 input/output。这意味着您还应该在单热编码后合并标签。

WARNING:tensorflow:AutoGraph could not transform <function loss at 0x000001F571B4F820> and will run it as-is. Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, export AUTOGRAPH_VERBOSITY=10) and attach the full output. Cause: module 'gast' has no attribute 'Index' To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert

这个警告不会让你的模型停止训练,所以你可以忽略它。 如果它不训练,那么您可能应该开始调整超参数!

在我提到我的解决方案之前,我会警告您它是不正确的,因为方法是错误的,但如果您有一个非常大的数据集,它可能会起作用。您想要做的是将一组 3 个值视为 multi-class 问题,将字符视为 multi-label 问题,这是不可能的。对于顺序模型,你不能像这样划分你的问题但是如果你有一个大数据集,那么你可以将它视为一个 multi-label 问题作为一个整体,在这种情况下,你会得到 2 个活动标签中的任何一个这 3 组,你必须以某种方式应用 post-processing。说 - 单独设置具有最高 sigmoid 值的标签 active。

问题在于 keras 如何计算准确度。例如,在下面的代码中

y_true = np.array([[1,0,0,0,1,0,0,0,1]]) 
y_pred = np.array([[.8,.1,.1,1,10,2,2,3,5.5]]) 

metric = tf.keras.metrics.Accuracy()
metric.update_state(y_true,y_pred)
metric.result().numpy()

通过比较

计算出的准确度为零
  1. [.8,.1,.1][1,0,0]
  2. [1,10,2][0,1,0]
  3. [2,3,5.5][0,0,1]

我们知道 y_pred 实际上非常准确,这可能就是您的模型不起作用的原因。为了在当前模型下处理这个问题,在输出层应用 sigmoid 激活可能会有所帮助,您可以通过 运行 下面的代码

来检查
import numpy as np
import tensorflow as tf 
import keras
from sklearn.preprocessing import MinMaxScaler


def dataset_gen(num_samples):
    # each data row consists of six floats, which is the feature vector of a 5-character 
    # string pattern comprising of 3-classes(e.g. AABBC, etc.)
    # in order to represent this 5-character string, a sequentially ordered one-hot encoding vector is used 
    np.random.seed(0)
    output_classes = np.random.randint(0,3,size=(num_samples,5))
    transform_mat = np.arange(-15,15).reshape(5,6) + .1*np.random.rand(5,6)
    print(transform_mat)
    feature_vec = output_classes @ transform_mat
    output_classes += np.array([0,3,6,9,12])
    # convert output_classes to one-hot encoding 
    output_vec = np.zeros((num_samples,15))
    for ind,item in enumerate(output_classes):
        output_vec[ind][item] = 1.
    
    return feature_vec,output_vec


def create_model():
    # a simple sequential model
    n_hidden,num_features,num_outputs = 16,6,15
    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=(num_features,)))
    model.add(tf.keras.layers.Dense(n_hidden,activation="relu"))
    model.add(tf.keras.layers.Dense(num_outputs,activation="sigmoid"))
    return model

def loss(y_true, y_pred):
    l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
    l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
    l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
    l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
    l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
    
    return l1 + l2 + l3 + l4 + l5

# create Stochastic Gradient Descent optimizer for the NN model
# opt_function = keras.optimizers.Adam(learning_rate=.1)
# create a sequential NN model
model = create_model()
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])

es = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',mode='max',verbose=1,patience=100)
history = model.fit(test_x,test_z,epochs=2000,batch_size=8,
                    callbacks=es,validation_split=0.2,
                    verbose=0)