我怎样才能实现这个模型?
How can I implement this model?
问题陈述
我有 3 个 类(A、B 和 C)。我有 6 个功能:
train_x = [[ 6.442 6.338 7.027 8.789 10.009 12.566]
[ 6.338 7.027 5.338 10.009 8.122 11.217]
[ 7.027 5.338 5.335 8.122 5.537 6.408]
[ 5.338 5.335 5.659 5.537 5.241 7.043]]
这些特征表示由 3-类(例如 AABBC 等)组成的 5 个字符的字符串模式。让,一个5个字符的字符串模式被one-hot编码如下:
train_z = [[0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0.]
[0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0.]
[0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0.]
[0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1.]]
我的实现
我已经使用顺序模型实现了上述问题,如下所示:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import sys
import time
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
import numpy as np
# <editor-fold desc="handle GPU">
# resolve GPU related issues.
try:
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)
except Exception as e:
print("GPU not found!")
# END of try
# </editor-fold>
# Directories and files
CLASS_INDEX = 4
FEATURE_START_INDEX = 6
OUTPUT_PATH = r"./"
INPUT_PATH = r"./"
INPUT_DATA_FILE = "dist-5.dat"
TRAINING_PROGRESS_FILE = "training.txt"
MODEL_FILE = "model.h5"
# classification size
CLASSES_COUNT = 3
FEATURES_COUNT = 6
OUTPUTS_COUNT = 15
# Network parameters.
LAYER_1_NEURON_COUNT = 128
LAYER_2_NEURON_COUNT = 128
# Training parameters.
LEARNING_RATE = 0.01
EPOCHS = 1000 # 500
BATCH_SIZE = 10
NO_OF_INPUT_LINES = 10000
VALIDATION_PART = 0.5
MODEL_SAVE_FREQUENCY = 10
# <editor-fold desc="encoding()">
# <editor-fold desc="def encode(letter)">
def encode(letter: str):
if letter == 'A':
return [1.0, 0.0, 0.0]
elif letter == 'B':
return [0.0, 1.0, 0.0]
elif letter == 'C':
return [0.0, 0.0, 1.0]
# </editor-fold>
# <editor-fold desc="encode_string()">
def encode_string_1(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
one_hot_binary_str = one_hot_binary_str + encode(ch)
# END of for loop
return one_hot_binary_str
# END of function
def encode_string_2(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
temp_encoded_vect = [encode(ch)]
one_hot_binary_str = one_hot_binary_str + temp_encoded_vect
# END of for loop
return one_hot_binary_str
# END of function
# </editor-fold>
# <editor-fold desc="def load_data()">
def load_data_k(fname: str, class_index: int, feature_start_index: int, **selection):
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y, data_z = [], [], []
for l in lines:
row = l.strip().split() # return a list of words from the line.
x = [float(ix) for ix in row[feature_start_index:]] # convert 3rd to 20th word into a vector of float numbers.
y = encode(row[class_index]) # convert the 3rd word into binary.
z = encode_string_1(row[class_index+1])
data_x.append(x) # append the vector into 'data_x'
data_y.append(y) # append the vector into 'data_y'
data_z.append(z) # append the vector into 'data_z'
# END for l in lines
num_rows = len(data_x)
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y, valid_z = data_x, data_y, data_z
else:
n = int(num_rows * given_fraction)
valid_x, valid_y, valid_z = data_x[n:], data_y[n:], data_z[n:]
data_x, data_y, data_z = data_x[:n], data_y[:n], data_z[:n]
# END of if-else block
tx = tf.convert_to_tensor(data_x, np.float32)
ty = tf.convert_to_tensor(data_y, np.float32)
tz = tf.convert_to_tensor(data_z, np.float32)
vx = tf.convert_to_tensor(valid_x, np.float32)
vy = tf.convert_to_tensor(valid_y, np.float32)
vz = tf.convert_to_tensor(valid_z, np.float32)
return tx, ty, tz, vx, vy, vz
# END of the function
# </editor-fold>
# </editor-fold>
# <editor-fold desc="def create_model()">
def create_model(n_hidden_1, n_hidden_2, num_outputs, num_features):
# a simple sequential model
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(num_features,)))
model.add(tf.keras.layers.Dense(n_hidden_1, activation="relu"))
model.add(tf.keras.layers.Dense(n_hidden_2, activation="relu"))
model.add(tf.keras.layers.Dense(num_outputs))
return model
# </editor-fold>
# custom loss to take into the dependency between the 3 bits
def loss(y_true, y_pred):
l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
return l1 + l2 + l3 + l4 + l5
if __name__ == "__main__":
len_int = len(sys.argv)
arg_str = None
if len_int > 1:
arg_str = sys.argv[1]
else:
arg_str = os.path.join(INPUT_PATH, INPUT_DATA_FILE)
# END of if len_int > 1:
# load training data from the disk
train_x, train_y, train_z, validate_x,validate_y, validate_z = load_data_k(
os.path.join(INPUT_PATH, INPUT_DATA_FILE),
class_index=CLASS_INDEX,
feature_start_index=FEATURE_START_INDEX,
top_n_lines=NO_OF_INPUT_LINES,
validation_part=VALIDATION_PART
)
#print(train_y)
print("z = " + str(train_z))
# create Stochastic Gradient Descent optimizer for the NN model
opt_function = keras.optimizers.Adam(
learning_rate=LEARNING_RATE
)
# create a sequential NN model
model = create_model(
LAYER_1_NEURON_COUNT,
LAYER_2_NEURON_COUNT,
OUTPUTS_COUNT,
FEATURES_COUNT
)
#
model.compile(optimizer=opt_function, loss=loss, metrics=['accuracy'])
model.fit(train_x, train_z, epochs=EPOCHS,batch_size=BATCH_SIZE)
问题
这个源代码的问题是,模型没有收敛,即准确度没有随着时代的增加而增加。
问题
我该如何实施这个模型?
当您只有一个网络输入和输出时,使用顺序。在当前设置中,您有多个输出层,要考虑连续的 3 个输出值组是链接的。这也可以通过损失函数来强制执行。
import numpy as np
import tensorflow as tf
# random input data with 6 features
inp = tf.random.uniform(shape=(1000, 6))
# output data taking into consideration that 3 consecutive bits are one class.
out1 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out2 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out3 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out4 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out5 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out = tf.concat([out1, out2, out3, out4, out5], axis=1)
# a simple sequential model
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(6,)))
model.add(tf.keras.layers.Dense(20, activation="relu"))
model.add(tf.keras.layers.Dense(20, activation="relu"))
model.add(tf.keras.layers.Dense(15))
# custom loss to take into the dependency between the 3 bits
def loss(y_true, y_pred):
l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
return l1 + l2 + l3 + l4 + l5
opt_function = tf.keras.optimizers.SGD()
model.compile(optimizer=opt_function, loss=loss)
model.fit(inp, out, batch_size=10)
评估网络时也需要使用相同的想法。您需要分别取 argmax 超过 3 位(5 次),以便获得 5 类 的序列作为输出。
我想这就是问题所在。
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
...
loss=['categorical_crossentropy'] * 5
>>> Shapes (10, 3) and (10, 15) are incompatible
你真的不想那样搞乱你的损失函数。尝试修复您的输出。使用 Sequential API 创建的模型是具有 single/output 的更简单的模型。如果您想在更简单的布局中更改功能 API 模型,您应该将 inputs/outputs 合并为一个 input/output。这意味着您还应该在单热编码后合并标签。
WARNING:tensorflow:AutoGraph could not transform <function loss at 0x000001F571B4F820> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, export AUTOGRAPH_VERBOSITY=10
) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert
这个警告不会让你的模型停止训练,所以你可以忽略它。
如果它不训练,那么您可能应该开始调整超参数!
在我提到我的解决方案之前,我会警告您它是不正确的,因为方法是错误的,但如果您有一个非常大的数据集,它可能会起作用。您想要做的是将一组 3 个值视为 multi-class
问题,将字符视为 multi-label
问题,这是不可能的。对于顺序模型,你不能像这样划分你的问题但是如果你有一个大数据集,那么你可以将它视为一个 multi-label
问题作为一个整体,在这种情况下,你会得到 2 个活动标签中的任何一个这 3 组,你必须以某种方式应用 post-processing。说 - 单独设置具有最高 sigmoid 值的标签 active。
问题在于 keras
如何计算准确度。例如,在下面的代码中
y_true = np.array([[1,0,0,0,1,0,0,0,1]])
y_pred = np.array([[.8,.1,.1,1,10,2,2,3,5.5]])
metric = tf.keras.metrics.Accuracy()
metric.update_state(y_true,y_pred)
metric.result().numpy()
通过比较
计算出的准确度为零
[.8,.1,.1]
与 [1,0,0]
[1,10,2]
与 [0,1,0]
[2,3,5.5]
与 [0,0,1]
我们知道 y_pred
实际上非常准确,这可能就是您的模型不起作用的原因。为了在当前模型下处理这个问题,在输出层应用 sigmoid 激活可能会有所帮助,您可以通过 运行 下面的代码
来检查
import numpy as np
import tensorflow as tf
import keras
from sklearn.preprocessing import MinMaxScaler
def dataset_gen(num_samples):
# each data row consists of six floats, which is the feature vector of a 5-character
# string pattern comprising of 3-classes(e.g. AABBC, etc.)
# in order to represent this 5-character string, a sequentially ordered one-hot encoding vector is used
np.random.seed(0)
output_classes = np.random.randint(0,3,size=(num_samples,5))
transform_mat = np.arange(-15,15).reshape(5,6) + .1*np.random.rand(5,6)
print(transform_mat)
feature_vec = output_classes @ transform_mat
output_classes += np.array([0,3,6,9,12])
# convert output_classes to one-hot encoding
output_vec = np.zeros((num_samples,15))
for ind,item in enumerate(output_classes):
output_vec[ind][item] = 1.
return feature_vec,output_vec
def create_model():
# a simple sequential model
n_hidden,num_features,num_outputs = 16,6,15
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(num_features,)))
model.add(tf.keras.layers.Dense(n_hidden,activation="relu"))
model.add(tf.keras.layers.Dense(num_outputs,activation="sigmoid"))
return model
def loss(y_true, y_pred):
l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
return l1 + l2 + l3 + l4 + l5
# create Stochastic Gradient Descent optimizer for the NN model
# opt_function = keras.optimizers.Adam(learning_rate=.1)
# create a sequential NN model
model = create_model()
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
es = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',mode='max',verbose=1,patience=100)
history = model.fit(test_x,test_z,epochs=2000,batch_size=8,
callbacks=es,validation_split=0.2,
verbose=0)
问题陈述
我有 3 个 类(A、B 和 C)。我有 6 个功能:
train_x = [[ 6.442 6.338 7.027 8.789 10.009 12.566]
[ 6.338 7.027 5.338 10.009 8.122 11.217]
[ 7.027 5.338 5.335 8.122 5.537 6.408]
[ 5.338 5.335 5.659 5.537 5.241 7.043]]
这些特征表示由 3-类(例如 AABBC 等)组成的 5 个字符的字符串模式。让,一个5个字符的字符串模式被one-hot编码如下:
train_z = [[0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0.]
[0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0.]
[0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0.]
[0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1.]]
我的实现
我已经使用顺序模型实现了上述问题,如下所示:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import sys
import time
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
import numpy as np
# <editor-fold desc="handle GPU">
# resolve GPU related issues.
try:
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)
except Exception as e:
print("GPU not found!")
# END of try
# </editor-fold>
# Directories and files
CLASS_INDEX = 4
FEATURE_START_INDEX = 6
OUTPUT_PATH = r"./"
INPUT_PATH = r"./"
INPUT_DATA_FILE = "dist-5.dat"
TRAINING_PROGRESS_FILE = "training.txt"
MODEL_FILE = "model.h5"
# classification size
CLASSES_COUNT = 3
FEATURES_COUNT = 6
OUTPUTS_COUNT = 15
# Network parameters.
LAYER_1_NEURON_COUNT = 128
LAYER_2_NEURON_COUNT = 128
# Training parameters.
LEARNING_RATE = 0.01
EPOCHS = 1000 # 500
BATCH_SIZE = 10
NO_OF_INPUT_LINES = 10000
VALIDATION_PART = 0.5
MODEL_SAVE_FREQUENCY = 10
# <editor-fold desc="encoding()">
# <editor-fold desc="def encode(letter)">
def encode(letter: str):
if letter == 'A':
return [1.0, 0.0, 0.0]
elif letter == 'B':
return [0.0, 1.0, 0.0]
elif letter == 'C':
return [0.0, 0.0, 1.0]
# </editor-fold>
# <editor-fold desc="encode_string()">
def encode_string_1(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
one_hot_binary_str = one_hot_binary_str + encode(ch)
# END of for loop
return one_hot_binary_str
# END of function
def encode_string_2(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
temp_encoded_vect = [encode(ch)]
one_hot_binary_str = one_hot_binary_str + temp_encoded_vect
# END of for loop
return one_hot_binary_str
# END of function
# </editor-fold>
# <editor-fold desc="def load_data()">
def load_data_k(fname: str, class_index: int, feature_start_index: int, **selection):
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y, data_z = [], [], []
for l in lines:
row = l.strip().split() # return a list of words from the line.
x = [float(ix) for ix in row[feature_start_index:]] # convert 3rd to 20th word into a vector of float numbers.
y = encode(row[class_index]) # convert the 3rd word into binary.
z = encode_string_1(row[class_index+1])
data_x.append(x) # append the vector into 'data_x'
data_y.append(y) # append the vector into 'data_y'
data_z.append(z) # append the vector into 'data_z'
# END for l in lines
num_rows = len(data_x)
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y, valid_z = data_x, data_y, data_z
else:
n = int(num_rows * given_fraction)
valid_x, valid_y, valid_z = data_x[n:], data_y[n:], data_z[n:]
data_x, data_y, data_z = data_x[:n], data_y[:n], data_z[:n]
# END of if-else block
tx = tf.convert_to_tensor(data_x, np.float32)
ty = tf.convert_to_tensor(data_y, np.float32)
tz = tf.convert_to_tensor(data_z, np.float32)
vx = tf.convert_to_tensor(valid_x, np.float32)
vy = tf.convert_to_tensor(valid_y, np.float32)
vz = tf.convert_to_tensor(valid_z, np.float32)
return tx, ty, tz, vx, vy, vz
# END of the function
# </editor-fold>
# </editor-fold>
# <editor-fold desc="def create_model()">
def create_model(n_hidden_1, n_hidden_2, num_outputs, num_features):
# a simple sequential model
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(num_features,)))
model.add(tf.keras.layers.Dense(n_hidden_1, activation="relu"))
model.add(tf.keras.layers.Dense(n_hidden_2, activation="relu"))
model.add(tf.keras.layers.Dense(num_outputs))
return model
# </editor-fold>
# custom loss to take into the dependency between the 3 bits
def loss(y_true, y_pred):
l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
return l1 + l2 + l3 + l4 + l5
if __name__ == "__main__":
len_int = len(sys.argv)
arg_str = None
if len_int > 1:
arg_str = sys.argv[1]
else:
arg_str = os.path.join(INPUT_PATH, INPUT_DATA_FILE)
# END of if len_int > 1:
# load training data from the disk
train_x, train_y, train_z, validate_x,validate_y, validate_z = load_data_k(
os.path.join(INPUT_PATH, INPUT_DATA_FILE),
class_index=CLASS_INDEX,
feature_start_index=FEATURE_START_INDEX,
top_n_lines=NO_OF_INPUT_LINES,
validation_part=VALIDATION_PART
)
#print(train_y)
print("z = " + str(train_z))
# create Stochastic Gradient Descent optimizer for the NN model
opt_function = keras.optimizers.Adam(
learning_rate=LEARNING_RATE
)
# create a sequential NN model
model = create_model(
LAYER_1_NEURON_COUNT,
LAYER_2_NEURON_COUNT,
OUTPUTS_COUNT,
FEATURES_COUNT
)
#
model.compile(optimizer=opt_function, loss=loss, metrics=['accuracy'])
model.fit(train_x, train_z, epochs=EPOCHS,batch_size=BATCH_SIZE)
问题
这个源代码的问题是,模型没有收敛,即准确度没有随着时代的增加而增加。
问题
我该如何实施这个模型?
当您只有一个网络输入和输出时,使用顺序。在当前设置中,您有多个输出层,要考虑连续的 3 个输出值组是链接的。这也可以通过损失函数来强制执行。
import numpy as np
import tensorflow as tf
# random input data with 6 features
inp = tf.random.uniform(shape=(1000, 6))
# output data taking into consideration that 3 consecutive bits are one class.
out1 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out2 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out3 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out4 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out5 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out = tf.concat([out1, out2, out3, out4, out5], axis=1)
# a simple sequential model
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(6,)))
model.add(tf.keras.layers.Dense(20, activation="relu"))
model.add(tf.keras.layers.Dense(20, activation="relu"))
model.add(tf.keras.layers.Dense(15))
# custom loss to take into the dependency between the 3 bits
def loss(y_true, y_pred):
l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
return l1 + l2 + l3 + l4 + l5
opt_function = tf.keras.optimizers.SGD()
model.compile(optimizer=opt_function, loss=loss)
model.fit(inp, out, batch_size=10)
评估网络时也需要使用相同的想法。您需要分别取 argmax 超过 3 位(5 次),以便获得 5 类 的序列作为输出。
我想这就是问题所在。
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
...
loss=['categorical_crossentropy'] * 5
>>> Shapes (10, 3) and (10, 15) are incompatible
你真的不想那样搞乱你的损失函数。尝试修复您的输出。使用 Sequential API 创建的模型是具有 single/output 的更简单的模型。如果您想在更简单的布局中更改功能 API 模型,您应该将 inputs/outputs 合并为一个 input/output。这意味着您还应该在单热编码后合并标签。
WARNING:tensorflow:AutoGraph could not transform <function loss at 0x000001F571B4F820> and will run it as-is. Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux,
export AUTOGRAPH_VERBOSITY=10
) and attach the full output. Cause: module 'gast' has no attribute 'Index' To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert
这个警告不会让你的模型停止训练,所以你可以忽略它。 如果它不训练,那么您可能应该开始调整超参数!
在我提到我的解决方案之前,我会警告您它是不正确的,因为方法是错误的,但如果您有一个非常大的数据集,它可能会起作用。您想要做的是将一组 3 个值视为 multi-class
问题,将字符视为 multi-label
问题,这是不可能的。对于顺序模型,你不能像这样划分你的问题但是如果你有一个大数据集,那么你可以将它视为一个 multi-label
问题作为一个整体,在这种情况下,你会得到 2 个活动标签中的任何一个这 3 组,你必须以某种方式应用 post-processing。说 - 单独设置具有最高 sigmoid 值的标签 active。
问题在于 keras
如何计算准确度。例如,在下面的代码中
y_true = np.array([[1,0,0,0,1,0,0,0,1]])
y_pred = np.array([[.8,.1,.1,1,10,2,2,3,5.5]])
metric = tf.keras.metrics.Accuracy()
metric.update_state(y_true,y_pred)
metric.result().numpy()
通过比较
计算出的准确度为零[.8,.1,.1]
与[1,0,0]
[1,10,2]
与[0,1,0]
[2,3,5.5]
与[0,0,1]
我们知道 y_pred
实际上非常准确,这可能就是您的模型不起作用的原因。为了在当前模型下处理这个问题,在输出层应用 sigmoid 激活可能会有所帮助,您可以通过 运行 下面的代码
import numpy as np
import tensorflow as tf
import keras
from sklearn.preprocessing import MinMaxScaler
def dataset_gen(num_samples):
# each data row consists of six floats, which is the feature vector of a 5-character
# string pattern comprising of 3-classes(e.g. AABBC, etc.)
# in order to represent this 5-character string, a sequentially ordered one-hot encoding vector is used
np.random.seed(0)
output_classes = np.random.randint(0,3,size=(num_samples,5))
transform_mat = np.arange(-15,15).reshape(5,6) + .1*np.random.rand(5,6)
print(transform_mat)
feature_vec = output_classes @ transform_mat
output_classes += np.array([0,3,6,9,12])
# convert output_classes to one-hot encoding
output_vec = np.zeros((num_samples,15))
for ind,item in enumerate(output_classes):
output_vec[ind][item] = 1.
return feature_vec,output_vec
def create_model():
# a simple sequential model
n_hidden,num_features,num_outputs = 16,6,15
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(num_features,)))
model.add(tf.keras.layers.Dense(n_hidden,activation="relu"))
model.add(tf.keras.layers.Dense(num_outputs,activation="sigmoid"))
return model
def loss(y_true, y_pred):
l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
return l1 + l2 + l3 + l4 + l5
# create Stochastic Gradient Descent optimizer for the NN model
# opt_function = keras.optimizers.Adam(learning_rate=.1)
# create a sequential NN model
model = create_model()
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
es = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',mode='max',verbose=1,patience=100)
history = model.fit(test_x,test_z,epochs=2000,batch_size=8,
callbacks=es,validation_split=0.2,
verbose=0)