Custom Loss Function returning - InvalidArgumentError: The second input must be a scalar, but it has shape [64]
Custom Loss Function returning - InvalidArgumentError: The second input must be a scalar, but it has shape [64]
我正在尝试使用 的修改版本,但出现以下错误
InvalidArgumentError: The second input must be a scalar, but it has shape [64] [[{{node gradient_tape/custom_loss/cond_1/StatelessIf/gradient_tape/custom_loss/weighted_loss/Mul/_30}}]] [Op:__inference_train_function_147002]
Function call stack:
train_function
这是代码
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras.losses import Loss
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Flatten
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
def custom_loss(y_true, y_pred):
mse = tf.keras.losses.MeanSquaredError()
penalty = 10
# penalize the loss heavily if the actual and the prediction are on different sides of zero
loss = tf.cond( tf.logical_or(
(tf.logical_and(tf.greater(y_true, 0.0), tf.less(y_pred, 0.0))),
(tf.logical_and(tf.less(y_true, 0.0), tf.greater(y_pred, 0.0)))
),
lambda: mse(y_true, y_pred) * penalty,
lambda: mse(y_true, y_pred) * penalty / 4)
print("starting second condition")
# add slightly more penalty if prediction overshoots actual in any direction
loss = tf.cond( tf.logical_or(
(tf.logical_and(tf.greater(y_true, 0.0), tf.greater(y_pred, y_true))),
(tf.logical_and(tf.less(y_true, 0.0), tf.less(y_pred, y_true)))
),
lambda: loss * penalty / 5,
lambda: loss * penalty / 10)
return loss
EPOCHS = 25
BATCH_SIZE = 64
MODEL_NAME = f"MODEL 01-{str(int(time.time())}"
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(1))
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=1e-6)
metric= tf.keras.metrics.MeanSquaredError()
model.compile(loss=custom_loss, optimizer=opt, metrics=[metric])
val_metric = 'val_'+metric.name
tensorboard = TensorBoard(log_dir=f'logs/{MODEL_NAME}')
filepath = base_path+"cryptodata/models/RNN_Final-{epoch:02d}-{val_mean_squared_error:.3f}-"+str(int(time.time()))+".hd5"
checkpoint = ModelCheckpoint(filepath=filepath, monitor=val_metric, verbose=0, mode='max',metric=metric)
train_x = np.random.randn(1588, 60, 34)
train_y = np.random.rand(1588,)
val_x = np.random.randn(85, 60, 34)
val_y = np.random.randn(85,)
history = model.fit(train_x, train_y,
batch_size=BATCH_SIZE,
epochs=100,
validation_data=(val_x, val_y),
callbacks=[checkpoint, tensorboard])
我试过像这样 y_pred=tf.convert_to_tensor(y_pred); y_true = tf.cast(y_true, y_pred.dtype
那样在自定义损失函数中转换 y_true
和 y_pred
但这没有用。另外加上print函数显示调用了两次成功,之后又失败了。
我在使用内置损失函数时没有收到错误消息。
问题是您的 custom_loss
返回的是函数而不是标量值。如果您将 tf.cond
替换为 tf.where
,您的代码将起作用。
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Flatten
def custom_loss(y_true, y_pred):
mse = tf.keras.losses.MeanSquaredError()
penalty = 10
# penalize the loss heavily if the actual and the prediction are on different sides of zero
loss = tf.where(
condition=tf.logical_or((tf.logical_and(tf.greater(y_true, 0.0), tf.less(y_pred, 0.0))), (tf.logical_and(tf.less(y_true, 0.0), tf.greater(y_pred, 0.0)))),
x=mse(y_true, y_pred) * penalty,
y=mse(y_true, y_pred) * penalty / 4
)
# add slightly more penalty if prediction overshoots actual in any direction
loss = tf.where(
condition=tf.logical_or((tf.logical_and(tf.greater(y_true, 0.0), tf.greater(y_pred, y_true))), (tf.logical_and(tf.less(y_true, 0.0), tf.less(y_pred, y_true)))),
x=loss * penalty / 5,
y=loss * penalty / 10
)
return loss
train_x = np.random.randn(1588, 60, 34)
train_y = np.random.rand(1588, )
val_x = np.random.randn(85, 60, 34)
val_y = np.random.randn(85, )
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(1))
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=1e-6)
model.compile(loss=custom_loss, optimizer=opt, metrics=['mse'])
model.fit(train_x, train_y, batch_size=128,
epochs=3, validation_data=(val_x, val_y))
# Epoch 1/3
# 13/13 [==============================] - 8s 321ms/step - loss: 11.3129 - mse: 1.6341 - val_loss: 6.9313 - val_mse: 1.1116
# Epoch 2/3
# 13/13 [==============================] - 3s 234ms/step - loss: 7.3409 - mse: 1.0789 - val_loss: 7.2055 - val_mse: 1.1238
# Epoch 3/3
# 13/13 [==============================] - 3s 231ms/step - loss: 5.3962 - mse: 0.8513 - val_loss: 7.4492 - val_mse: 1.1512
model.predict(train_x)
# array([[0.25150445],
# [0.2647993 ],
# [0.2405027 ],
# ...,
# [0.31251353],
# [0.29376918],
# [0.21620636]], dtype=float32)
我正在尝试使用
InvalidArgumentError: The second input must be a scalar, but it has shape [64] [[{{node gradient_tape/custom_loss/cond_1/StatelessIf/gradient_tape/custom_loss/weighted_loss/Mul/_30}}]] [Op:__inference_train_function_147002]
Function call stack:
train_function
这是代码
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras.losses import Loss
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Flatten
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
def custom_loss(y_true, y_pred):
mse = tf.keras.losses.MeanSquaredError()
penalty = 10
# penalize the loss heavily if the actual and the prediction are on different sides of zero
loss = tf.cond( tf.logical_or(
(tf.logical_and(tf.greater(y_true, 0.0), tf.less(y_pred, 0.0))),
(tf.logical_and(tf.less(y_true, 0.0), tf.greater(y_pred, 0.0)))
),
lambda: mse(y_true, y_pred) * penalty,
lambda: mse(y_true, y_pred) * penalty / 4)
print("starting second condition")
# add slightly more penalty if prediction overshoots actual in any direction
loss = tf.cond( tf.logical_or(
(tf.logical_and(tf.greater(y_true, 0.0), tf.greater(y_pred, y_true))),
(tf.logical_and(tf.less(y_true, 0.0), tf.less(y_pred, y_true)))
),
lambda: loss * penalty / 5,
lambda: loss * penalty / 10)
return loss
EPOCHS = 25
BATCH_SIZE = 64
MODEL_NAME = f"MODEL 01-{str(int(time.time())}"
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(1))
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=1e-6)
metric= tf.keras.metrics.MeanSquaredError()
model.compile(loss=custom_loss, optimizer=opt, metrics=[metric])
val_metric = 'val_'+metric.name
tensorboard = TensorBoard(log_dir=f'logs/{MODEL_NAME}')
filepath = base_path+"cryptodata/models/RNN_Final-{epoch:02d}-{val_mean_squared_error:.3f}-"+str(int(time.time()))+".hd5"
checkpoint = ModelCheckpoint(filepath=filepath, monitor=val_metric, verbose=0, mode='max',metric=metric)
train_x = np.random.randn(1588, 60, 34)
train_y = np.random.rand(1588,)
val_x = np.random.randn(85, 60, 34)
val_y = np.random.randn(85,)
history = model.fit(train_x, train_y,
batch_size=BATCH_SIZE,
epochs=100,
validation_data=(val_x, val_y),
callbacks=[checkpoint, tensorboard])
我试过像这样 y_pred=tf.convert_to_tensor(y_pred); y_true = tf.cast(y_true, y_pred.dtype
那样在自定义损失函数中转换 y_true
和 y_pred
但这没有用。另外加上print函数显示调用了两次成功,之后又失败了。
我在使用内置损失函数时没有收到错误消息。
问题是您的 custom_loss
返回的是函数而不是标量值。如果您将 tf.cond
替换为 tf.where
,您的代码将起作用。
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Flatten
def custom_loss(y_true, y_pred):
mse = tf.keras.losses.MeanSquaredError()
penalty = 10
# penalize the loss heavily if the actual and the prediction are on different sides of zero
loss = tf.where(
condition=tf.logical_or((tf.logical_and(tf.greater(y_true, 0.0), tf.less(y_pred, 0.0))), (tf.logical_and(tf.less(y_true, 0.0), tf.greater(y_pred, 0.0)))),
x=mse(y_true, y_pred) * penalty,
y=mse(y_true, y_pred) * penalty / 4
)
# add slightly more penalty if prediction overshoots actual in any direction
loss = tf.where(
condition=tf.logical_or((tf.logical_and(tf.greater(y_true, 0.0), tf.greater(y_pred, y_true))), (tf.logical_and(tf.less(y_true, 0.0), tf.less(y_pred, y_true)))),
x=loss * penalty / 5,
y=loss * penalty / 10
)
return loss
train_x = np.random.randn(1588, 60, 34)
train_y = np.random.rand(1588, )
val_x = np.random.randn(85, 60, 34)
val_y = np.random.randn(85, )
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(1))
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=1e-6)
model.compile(loss=custom_loss, optimizer=opt, metrics=['mse'])
model.fit(train_x, train_y, batch_size=128,
epochs=3, validation_data=(val_x, val_y))
# Epoch 1/3
# 13/13 [==============================] - 8s 321ms/step - loss: 11.3129 - mse: 1.6341 - val_loss: 6.9313 - val_mse: 1.1116
# Epoch 2/3
# 13/13 [==============================] - 3s 234ms/step - loss: 7.3409 - mse: 1.0789 - val_loss: 7.2055 - val_mse: 1.1238
# Epoch 3/3
# 13/13 [==============================] - 3s 231ms/step - loss: 5.3962 - mse: 0.8513 - val_loss: 7.4492 - val_mse: 1.1512
model.predict(train_x)
# array([[0.25150445],
# [0.2647993 ],
# [0.2405027 ],
# ...,
# [0.31251353],
# [0.29376918],
# [0.21620636]], dtype=float32)