Implementing custom loss function in Tensorflow leading to "ValueError: 'outputs' must be defined before the loop."
Implementing custom loss function in Tensorflow leading to "ValueError: 'outputs' must be defined before the loop."
我正在 Python 开展我的第一个机器学习项目 - 使用 TensorFlow 尝试使用 Moby Hyphenator II 数据集对单词进行音节化。
我将其视为多标签分类问题,其中单词及其音节按以下格式编码:
T e n - s o r - f l o w
0 0 1 0 0 1 0 0 0 0
当以 this guide 作为起点阅读时,我看到作者使用了一个自定义函数——他们在 PyTorch 中对加权二元交叉熵和均方根误差求平均:
def bce_rmse(pred, target, pos_weight = 1.3, epsilon = 1e-12):
# Weighted binary cross entropy
loss_pos = target * torch.log(pred + epsilon)
loss_neg = (1 - target) * torch.log(1 - pred + epsilon)
bce = torch.mean(torch.neg(pos_weight * loss_pos + loss_neg))
# Root mean squared error
mse = (torch.sum(pred, dim = 0) - torch.sum(target, dim = 0)) ** 2
rmse = torch.mean(torch.sqrt(mse + epsilon))
return (bce + rmse) / 2
我尝试通过以下方式在 TensorFlow 中实现它:
def weighted_bce_mse(y_true, y_prediction):
# Binary crossentropy with weighting
epsilon = 1e-12
positive_weight = 4.108897148948174
loss_positive = y_true * tf.math.log(y_prediction + epsilon)
loss_negative = (1 - y_true) * tf.math.log(1 - y_prediction + epsilon)
bce_loss = np.mean(tf.math.negative(positive_weight * loss_positive + loss_negative))
# Mean squared error
mse = tf.keras.losses.MeanSquaredError()
mse_loss = mse(y_true, y_prediction)
averaged_bce_mse = (bce_loss + mse_loss) / 2
return averaged_bce_mse
这样做时,我收到错误 ValueError: 'outputs' must be defined before the loop.
,我不确定为什么在构建和编译我的模型之前定义此函数。
我正在使用 Keras Functional API,我的编译和拟合阶段是:
model.compile(optimizer="adam", loss=weighted_bce_mse, metrics=["accuracy"], steps_per_execution=64)
history = model.fit(padded_inputs, padded_outputs, validation_data=(validation_inputs, validation_outputs), epochs=10, verbose=2)
如前所述,显示的错误与自定义损失函数无关。您显示的代码还有许多其他错误,例如未正确导入 tf.keras.layers
。修复这些错误后,查看下面的代码并测试下面的版本(工作正常):
tensorflow 2.4.1
numpy 1.19.5
python 3.9.6
import tensorflow as tf
# Custom loss function - mean of binary crossentropy and mean squared error
def mean_weighted_bce_mse(y_true, y_prediction):
# Binary crossentropy with weighting
epsilon = 1e-12
positive_weight = 4.108897148948174
loss_positive = y_true * tf.math.log(y_prediction + epsilon)
loss_negative = (1 - y_true) * tf.math.log(1 - y_prediction + epsilon)
bce_loss = np.mean(tf.math.negative(positive_weight * loss_positive + loss_negative))
# Mean squared error
mse = tf.keras.losses.MeanSquaredError()
mse_loss = mse(y_true, y_prediction)
averaged_bce_mse = (bce_loss + mse_loss) / 2
return tf.math.reduce_mean(averaged_bce_mse, axis=-1)
inputs = tf.keras.Input(shape=(15,))
x = tf.keras.layers.Embedding(64, 64, mask_zero=True)(inputs)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Conv1D(64, kernel_size=1)(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Conv1D(64, kernel_size=1)(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.GlobalMaxPool1D()(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(32, activation="relu")(x)
x = tf.keras.layers.Dense(15, activation="sigmoid")(x)
model = tf.keras.models.Model(inputs=inputs, outputs=x)
model.compile(optimizer="adam", loss=mean_weighted_bce_mse, metrics=["accuracy"], steps_per_execution=64)
# history = model.fit(padded_inputs,
# padded_outputs,
# validation_data=(validation_inputs, validation_outputs),
# epochs=20,
# batch_size=8)
在下面的代码行中:
model.compile(optimizer="adam", loss=mean_weighted_bce_mse, metrics=["accuracy"], steps_per_execution=64)
history = model.fit(padded_inputs,
padded_outputs,
validation_data=(validation_inputs, validation_outputs),
epochs=20,
batch_size=8)
您的输入数据的长度是多少?
steps_per_execution 应该是 len(input_data)/Batch_size
。
删除 steps_per_execution
并再次检查。
我发现错误源于我在自定义损失函数中使用的操作:
bce_loss = np.mean(tf.math.negative(positive_weight * loss_positive + loss_negative))
此行使用 np.mean
,这导致了错误 - 将其替换为 tf.math.reduce_mean
并通过 [= 将 y_true
和 y_prediction
转换为 tf.float32
17=] 解决了问题:
# Custom loss function - mean of binary crossentropy and mean squared error
def mean_weighted_bce_mse(y_true, y_prediction):
y_true = tf.cast(y_true, tf.float32)
y_prediction = tf.cast(y_prediction, tf.float32)
# Binary crossentropy with weighting
epsilon = 1e-12
positive_weight = 4.108897148948174
loss_positive = y_true * tf.math.log(y_prediction + epsilon)
loss_negative = (1 - y_true) * tf.math.log(1 - y_prediction + epsilon)
bce_loss = tf.math.reduce_mean(tf.math.negative(positive_weight * loss_positive + loss_negative))
# Mean squared error
mse = tf.keras.losses.MeanSquaredError()
mse_loss = mse(y_true, y_prediction)
averaged_bce_mse = (bce_loss + mse_loss) / 2
return averaged_bce_mse
我正在 Python 开展我的第一个机器学习项目 - 使用 TensorFlow 尝试使用 Moby Hyphenator II 数据集对单词进行音节化。
我将其视为多标签分类问题,其中单词及其音节按以下格式编码:
T e n - s o r - f l o w
0 0 1 0 0 1 0 0 0 0
当以 this guide 作为起点阅读时,我看到作者使用了一个自定义函数——他们在 PyTorch 中对加权二元交叉熵和均方根误差求平均:
def bce_rmse(pred, target, pos_weight = 1.3, epsilon = 1e-12):
# Weighted binary cross entropy
loss_pos = target * torch.log(pred + epsilon)
loss_neg = (1 - target) * torch.log(1 - pred + epsilon)
bce = torch.mean(torch.neg(pos_weight * loss_pos + loss_neg))
# Root mean squared error
mse = (torch.sum(pred, dim = 0) - torch.sum(target, dim = 0)) ** 2
rmse = torch.mean(torch.sqrt(mse + epsilon))
return (bce + rmse) / 2
我尝试通过以下方式在 TensorFlow 中实现它:
def weighted_bce_mse(y_true, y_prediction):
# Binary crossentropy with weighting
epsilon = 1e-12
positive_weight = 4.108897148948174
loss_positive = y_true * tf.math.log(y_prediction + epsilon)
loss_negative = (1 - y_true) * tf.math.log(1 - y_prediction + epsilon)
bce_loss = np.mean(tf.math.negative(positive_weight * loss_positive + loss_negative))
# Mean squared error
mse = tf.keras.losses.MeanSquaredError()
mse_loss = mse(y_true, y_prediction)
averaged_bce_mse = (bce_loss + mse_loss) / 2
return averaged_bce_mse
这样做时,我收到错误 ValueError: 'outputs' must be defined before the loop.
,我不确定为什么在构建和编译我的模型之前定义此函数。
我正在使用 Keras Functional API,我的编译和拟合阶段是:
model.compile(optimizer="adam", loss=weighted_bce_mse, metrics=["accuracy"], steps_per_execution=64)
history = model.fit(padded_inputs, padded_outputs, validation_data=(validation_inputs, validation_outputs), epochs=10, verbose=2)
如前所述,显示的错误与自定义损失函数无关。您显示的代码还有许多其他错误,例如未正确导入 tf.keras.layers
。修复这些错误后,查看下面的代码并测试下面的版本(工作正常):
tensorflow 2.4.1
numpy 1.19.5
python 3.9.6
import tensorflow as tf
# Custom loss function - mean of binary crossentropy and mean squared error
def mean_weighted_bce_mse(y_true, y_prediction):
# Binary crossentropy with weighting
epsilon = 1e-12
positive_weight = 4.108897148948174
loss_positive = y_true * tf.math.log(y_prediction + epsilon)
loss_negative = (1 - y_true) * tf.math.log(1 - y_prediction + epsilon)
bce_loss = np.mean(tf.math.negative(positive_weight * loss_positive + loss_negative))
# Mean squared error
mse = tf.keras.losses.MeanSquaredError()
mse_loss = mse(y_true, y_prediction)
averaged_bce_mse = (bce_loss + mse_loss) / 2
return tf.math.reduce_mean(averaged_bce_mse, axis=-1)
inputs = tf.keras.Input(shape=(15,))
x = tf.keras.layers.Embedding(64, 64, mask_zero=True)(inputs)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Conv1D(64, kernel_size=1)(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Conv1D(64, kernel_size=1)(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.GlobalMaxPool1D()(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(32, activation="relu")(x)
x = tf.keras.layers.Dense(15, activation="sigmoid")(x)
model = tf.keras.models.Model(inputs=inputs, outputs=x)
model.compile(optimizer="adam", loss=mean_weighted_bce_mse, metrics=["accuracy"], steps_per_execution=64)
# history = model.fit(padded_inputs,
# padded_outputs,
# validation_data=(validation_inputs, validation_outputs),
# epochs=20,
# batch_size=8)
在下面的代码行中:
model.compile(optimizer="adam", loss=mean_weighted_bce_mse, metrics=["accuracy"], steps_per_execution=64)
history = model.fit(padded_inputs,
padded_outputs,
validation_data=(validation_inputs, validation_outputs),
epochs=20,
batch_size=8)
您的输入数据的长度是多少?
steps_per_execution 应该是 len(input_data)/Batch_size
。
删除 steps_per_execution
并再次检查。
我发现错误源于我在自定义损失函数中使用的操作:
bce_loss = np.mean(tf.math.negative(positive_weight * loss_positive + loss_negative))
此行使用 np.mean
,这导致了错误 - 将其替换为 tf.math.reduce_mean
并通过 [= 将 y_true
和 y_prediction
转换为 tf.float32
17=] 解决了问题:
# Custom loss function - mean of binary crossentropy and mean squared error
def mean_weighted_bce_mse(y_true, y_prediction):
y_true = tf.cast(y_true, tf.float32)
y_prediction = tf.cast(y_prediction, tf.float32)
# Binary crossentropy with weighting
epsilon = 1e-12
positive_weight = 4.108897148948174
loss_positive = y_true * tf.math.log(y_prediction + epsilon)
loss_negative = (1 - y_true) * tf.math.log(1 - y_prediction + epsilon)
bce_loss = tf.math.reduce_mean(tf.math.negative(positive_weight * loss_positive + loss_negative))
# Mean squared error
mse = tf.keras.losses.MeanSquaredError()
mse_loss = mse(y_true, y_prediction)
averaged_bce_mse = (bce_loss + mse_loss) / 2
return averaged_bce_mse