ValueError: No gradients provided for any variable in my custom loss - Why?
ValueError: No gradients provided for any variable in my custom loss - Why?
这是我的代码(你可以复制粘贴执行)
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import MinMaxScaler
x = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]).astype(np.float32)
y = np.array([[-1], [3], [7], [-2]]).astype(np.float32)
# scale x and y
x_scaler = MinMaxScaler()
x_scaler.fit(x)
x_sc = x_scaler.transform(x)
y_scaler = MinMaxScaler()
y_scaler.fit(y)
y_sc = y_scaler.transform(y)
batch_size = 2
ds = tf.data.Dataset.from_tensor_slices((x_sc, y_sc)).batch(batch_size=batch_size)
# create the model
model = tf.keras.Sequential(
[
tf.keras.layers.Input(shape=(2,)),
tf.keras.layers.Dense(units=3, activation='relu'),
tf.keras.layers.Dense(units=1)
]
)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
def standard_loss(y_batch, y_pred, y_min_max):
batches = y_pred.shape[0]
loss = 0.0
y_true_unsc = tf.convert_to_tensor(y_min_max.inverse_transform(y_batch), tf.float32)
y_pred_unsc = tf.convert_to_tensor(y_min_max.inverse_transform(y_pred), tf.float32)
for batch in range(batches):
loss += tf.math.reduce_mean(tf.math.square(y_true_unsc[batch] - y_pred_unsc[batch]))
return loss / batches
# training loop
epochs = 1
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch, ))
for step, (x_batch, y_batch) in enumerate(ds):
with tf.GradientTape() as tape:
y_pred = model(x_batch, training=True)
loss_value = standard_loss(y_batch, y_pred, y_scaler)
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
问题出在我的成本函数中(standard_loss)。当我不对我的数据进行缩放时,一切都会更好地工作,如下所示:
def standard_loss(y_batch, y_pred, y_min_max):
batches = y_pred.shape[0]
loss = 0.0
for batch in range(batches):
loss += tf.math.reduce_mean(tf.math.square(y_batch[batch] - y_pred[batch]))
return loss / batches
但是当我按照上面的方式设置时,我得到了这个错误:
ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
我需要取消缩放我的数据以将其用于其他计算。
有人可以帮我理解为什么会这样吗?
编辑 1:
问题出在磁带上(在tf.GradientTape()中为磁带)记录了所有的操作,这一系列的操作在计算梯度时是反方向上升的。我现在的目标是弄清楚如何在不使用“磁带”保存它并在计算梯度时误入歧途的情况下取消缩放我的 y_pred 变量。想法?
编辑 2:
在我的自定义损失中,我的 unscale 操作是一个 numpy 操作,并且由于我们离开了 tensorflow 领域,所以这个操作没有被“磁带”记录下来。这就是错误出现的原因。因此,我将寻找一种方法来使用 tensorflow 操作扩展我的数据,以便使用 tensorflow 操作取消缩放它们。
解决方案:
编辑 2 是解决方案。现在,一切正常。
在我的自定义损失中,我的非缩放操作是一个 numpy 操作,并且由于我们走出了 tensorflow 领域,所以这个操作没有被“磁带”记录下来。这就是错误出现的原因。一种解决方案是使用 tensorflow 操作缩放和取消缩放数据,以便让磁带记录路径。请参阅下面的代码,
import tensorflow as tf
import numpy as np
x = tf.convert_to_tensor([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=tf.float32)
y = tf.convert_toètensor([[-1], [3], [7], [-2]], dtype=tf.float32)
# retrieve x and y min max
xmin, xmax = tf.reduce_min(x, axis=0), tf.reduce_max(x, axis=0)
ymin, ymax = tf.reduce_min(y, axis=0), tf.reduce_max(y, axis=0)
batch_size = 2
ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
# create the model
model = tf.keras.Sequential(
[
tf.keras.layers.Input(shape=(2,)),
tf.keras.layers.Dense(units=3, activation='relu'),
tf.keras.layers.Dense(units=1)
]
)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
def standard_loss(y_batch, y_pred):
# unscale y_pred (note that y_batch has never been scaled)
y_pred_unsc = y_pred * (ymax - ymin) + ymin
return tf.reduce_mean(tf.square(y_batch - y_pred_unsc)
# training loop
epochs = 1
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch, ))
for step, (x_batch, y_batch) in enumerate(ds):
with tf.GradientTape() as tape:
# scale data (we see that I do not quit tensorflow operations)
x_scale = (x_batch - xmin)/(xmax - xmin)
y_pred = model(x_scale, training=True)
loss_value = standard_loss(y_batch, y_pred)
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
这是我的代码(你可以复制粘贴执行)
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import MinMaxScaler
x = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]).astype(np.float32)
y = np.array([[-1], [3], [7], [-2]]).astype(np.float32)
# scale x and y
x_scaler = MinMaxScaler()
x_scaler.fit(x)
x_sc = x_scaler.transform(x)
y_scaler = MinMaxScaler()
y_scaler.fit(y)
y_sc = y_scaler.transform(y)
batch_size = 2
ds = tf.data.Dataset.from_tensor_slices((x_sc, y_sc)).batch(batch_size=batch_size)
# create the model
model = tf.keras.Sequential(
[
tf.keras.layers.Input(shape=(2,)),
tf.keras.layers.Dense(units=3, activation='relu'),
tf.keras.layers.Dense(units=1)
]
)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
def standard_loss(y_batch, y_pred, y_min_max):
batches = y_pred.shape[0]
loss = 0.0
y_true_unsc = tf.convert_to_tensor(y_min_max.inverse_transform(y_batch), tf.float32)
y_pred_unsc = tf.convert_to_tensor(y_min_max.inverse_transform(y_pred), tf.float32)
for batch in range(batches):
loss += tf.math.reduce_mean(tf.math.square(y_true_unsc[batch] - y_pred_unsc[batch]))
return loss / batches
# training loop
epochs = 1
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch, ))
for step, (x_batch, y_batch) in enumerate(ds):
with tf.GradientTape() as tape:
y_pred = model(x_batch, training=True)
loss_value = standard_loss(y_batch, y_pred, y_scaler)
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
问题出在我的成本函数中(standard_loss)。当我不对我的数据进行缩放时,一切都会更好地工作,如下所示:
def standard_loss(y_batch, y_pred, y_min_max):
batches = y_pred.shape[0]
loss = 0.0
for batch in range(batches):
loss += tf.math.reduce_mean(tf.math.square(y_batch[batch] - y_pred[batch]))
return loss / batches
但是当我按照上面的方式设置时,我得到了这个错误:
ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
我需要取消缩放我的数据以将其用于其他计算。
有人可以帮我理解为什么会这样吗?
编辑 1:
问题出在磁带上(在tf.GradientTape()中为磁带)记录了所有的操作,这一系列的操作在计算梯度时是反方向上升的。我现在的目标是弄清楚如何在不使用“磁带”保存它并在计算梯度时误入歧途的情况下取消缩放我的 y_pred 变量。想法?
编辑 2:
在我的自定义损失中,我的 unscale 操作是一个 numpy 操作,并且由于我们离开了 tensorflow 领域,所以这个操作没有被“磁带”记录下来。这就是错误出现的原因。因此,我将寻找一种方法来使用 tensorflow 操作扩展我的数据,以便使用 tensorflow 操作取消缩放它们。
解决方案:
编辑 2 是解决方案。现在,一切正常。
在我的自定义损失中,我的非缩放操作是一个 numpy 操作,并且由于我们走出了 tensorflow 领域,所以这个操作没有被“磁带”记录下来。这就是错误出现的原因。一种解决方案是使用 tensorflow 操作缩放和取消缩放数据,以便让磁带记录路径。请参阅下面的代码,
import tensorflow as tf
import numpy as np
x = tf.convert_to_tensor([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=tf.float32)
y = tf.convert_toètensor([[-1], [3], [7], [-2]], dtype=tf.float32)
# retrieve x and y min max
xmin, xmax = tf.reduce_min(x, axis=0), tf.reduce_max(x, axis=0)
ymin, ymax = tf.reduce_min(y, axis=0), tf.reduce_max(y, axis=0)
batch_size = 2
ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
# create the model
model = tf.keras.Sequential(
[
tf.keras.layers.Input(shape=(2,)),
tf.keras.layers.Dense(units=3, activation='relu'),
tf.keras.layers.Dense(units=1)
]
)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
def standard_loss(y_batch, y_pred):
# unscale y_pred (note that y_batch has never been scaled)
y_pred_unsc = y_pred * (ymax - ymin) + ymin
return tf.reduce_mean(tf.square(y_batch - y_pred_unsc)
# training loop
epochs = 1
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch, ))
for step, (x_batch, y_batch) in enumerate(ds):
with tf.GradientTape() as tape:
# scale data (we see that I do not quit tensorflow operations)
x_scale = (x_batch - xmin)/(xmax - xmin)
y_pred = model(x_scale, training=True)
loss_value = standard_loss(y_batch, y_pred)
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))