自定义损失函数不可微
custom loss function not differentiable
我正在尝试使用自定义损失函数。
这是代码结构:
图书馆:
将 matplotlib.pyplot 导入为 plt
将 numpy 导入为 np
将 pandas 导入为 pd
将 tensorflow 导入为 tf
将 tensorflow_addons 导入为 tfa
将 tensorflow_probability 导入为 tfp\
从tensorflow.keras导入图层,损失
来自 tensorflow.keras.模型导入模型
导入字符串
随机导入
导入数学
进口泡菜\
数据集的形式为:
https://drive.google.com/file/d/1sB8at-hZl-HXeFyFSp1Mm2Bhd3eV8ZxA/view?usp=sharing
源代码:
latent_dim = 64 #Number of Nodes in Hidden layes \
train_size = 10000 #Size of training vocablry
# The trainin dataset has been downloaded form - https://norvig.com/ngrams/
words = pd.read_csv("./enable2.csv") #Opensource data corpus
words = list(words['aa'])
random.shuffle(words)
# Creating the list with all possible characters in english language
letters = list(string.ascii_letters)
punc = list(string.punctuation)
space = [' ',' ',' ']
letters.extend(punc)
letters.extend(space)
random.shuffle(letters)
def word_to_vector(words):
# Creating features out of the word based on the list of characters built in previous step
features = list()
for word in words:
feature = ([0]*v_size)
for i in range(len(word)):
w = word[i]
feature[i] = letters.index(w)
features.append(feature)
return features
vector_words = word_to_vector(words)
# Splitting the vocab into training and test datasets
x_train = np.array(vector_words[:train_size])
x_train.shape
def train_nn_with_custom_loss_function(custom_loss_function, nn, lr, batch_size, nb_epochs, save_to_folder=None, plot_loss=True):
"""
Train a neural net with custom loss function, plots the loss
and returns the history (output of nn)
Args:
custom_loss_function (function): One of the customly developed loss
functions
"""
# reset session
tf.keras.backend.clear_session()
# It's important to use run_eagerly=True in this example because the custom loss
# function converts tensors to numpy arrays which requires to be on eager mode
opt = tf.keras.optimizers.Adam(learning_rate=lr)
nn.compile(optimizer=opt, loss=custom_loss_function, run_eagerly=True)
# nn.build(input_shape=(100,))
nn.summary()
history = nn.fit(x_train, x_train,
epochs=nb_epochs, batch_size = batch_size, verbose=1,
shuffle=True)
if save_to_folder is not None:
nn.save(save_to_folder)
plt.plot(history.history['loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
# plt.set_xticks(np.arange(len(history.history['loss'])))
plt.show()
pred = nn.predict(x_train)
classes = np.argmax(pred, axis=1)
y, x = np.histogram(classes, bins=np.arange(16))
x = x[1:]
plt.bar(x, y)
plt.ylabel('input')
plt.xlabel('class')
plt.xticks(np.arange(1, 16, step=1))
plt.show()
return y
损失函数:
from scipy.spatial import distance
# we don't have a y_true in unsupervised
def distance_loss_fn(y_true, y_pred):
# calculate the centroid of every cluster
#thresh=-1;
#y_pred = tf.gather(y_pred, tf.where(y_pred>thresh))
pred = tf.argmax(y_pred, axis=1).numpy()
# centroids coordinates
centroids = np.zeros((15, 100), dtype=float)
# centroids frequency
centroids_f = np.zeros(15)
# points has the coordinates of the space of the points to cluster
points = y_true.numpy()
# iterate on points
for idx in range(points.shape[0]):
# sum point to calculate centroid later
centroids[pred[idx]] += points[idx]
centroids_f[pred[idx]] += 1
# average over all points to calculate centroid
for idx in range(len(centroids)):
if centroids_f[idx] == 0:
continue
centroids[idx] /= centroids_f[idx]
# sum over the distance between each point and the centroid to calculate the loss to minimize
loss = 0
for idx in range(len(points)):
# add the distance between each coordinate and the centroid of the predicted class
# tfa.metrics.hamming.hamming_loss_fn(y, output, threshold=0.5, mode='multilabel')
# loss += tf.reduce_sum(centroids[pred[idx]] - points[idx])
loss += distance.hamming(centroids[pred[idx]], points[idx])
# loss += tfa.metrics.hamming.hamming_loss_fn(centroids[pred[idx]],
# points[idx],
# threshold=0.5,
# mode='multilabel')
# print('d ', distance.hamming(centroids[pred[idx]], points[idx]))
l = tf.reduce_sum(y_pred)
# print(loss)
ret = tf.constant(loss, dtype=tf.float32)
return ret
架构:
base_nn = tf.keras.Sequential([
layers.Flatten(input_shape=(100,)),
layers.Dense(128, activation='relu'),
layers.Dropout(0.2),
layers.Dense(15, activation='softmax')
])
diff = train_nn_with_custom_loss_function(distance_loss_fn, base_nn, 0.00001, 32, 5)
我得到的错误:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 100) 0
_________________________________________________________________
dense (Dense) (None, 128) 12928
_________________________________________________________________
dropout (Dropout) (None, 128) 0
_________________________________________________________________
dense_1 (Dense) (None, 15) 1935
=================================================================
Total params: 14,863
Trainable params: 14,863
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-134-8e360d25be14> in <module>()
7
8
----> 9 diff = train_nn_with_custom_loss_function(distance_loss_fn, base_nn, 0.00001, 32, 5)
12 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/utils.py in filter_empty_gradients(grads_and_vars)
74 if not filtered:
75 raise ValueError("No gradients provided for any variable: %s." %
---> 76 ([v.name for _, v in grads_and_vars],))
77 if vars_with_empty_grads:
78 logging.warning(
ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
我有这个损失函数。问题是损失函数不可微分
任何有关如何使此损失函数可区分的帮助将不胜感激。
关于此方法的简要信息:这是一个无监督的 nn,其中输入类似于“获取文件名”。我需要预测 1 到 15 个数字之间的 class。
损失函数信息:
损失 3:带有距离的标签
- 从批处理的最后一层提取预测的 class。
- 通过平均每个class
的点和计算每个class的质心
- 将每个点与质心之间的距离(使用的汉明距离)相加,从而得出损失
问题可能出在损失函数上,因为pred = tf.argmax(y_pred, axis=1).numpy()
。此操作无法推导,因此梯度不会流过它。这导致了您的错误。
我正在尝试使用自定义损失函数。 这是代码结构:
图书馆:
将 matplotlib.pyplot 导入为 plt
将 numpy 导入为 np
将 pandas 导入为 pd
将 tensorflow 导入为 tf
将 tensorflow_addons 导入为 tfa
将 tensorflow_probability 导入为 tfp\
从tensorflow.keras导入图层,损失
来自 tensorflow.keras.模型导入模型
导入字符串
随机导入
导入数学
进口泡菜\
数据集的形式为:
https://drive.google.com/file/d/1sB8at-hZl-HXeFyFSp1Mm2Bhd3eV8ZxA/view?usp=sharing
源代码:
latent_dim = 64 #Number of Nodes in Hidden layes \
train_size = 10000 #Size of training vocablry
# The trainin dataset has been downloaded form - https://norvig.com/ngrams/
words = pd.read_csv("./enable2.csv") #Opensource data corpus
words = list(words['aa'])
random.shuffle(words)
# Creating the list with all possible characters in english language
letters = list(string.ascii_letters)
punc = list(string.punctuation)
space = [' ',' ',' ']
letters.extend(punc)
letters.extend(space)
random.shuffle(letters)
def word_to_vector(words):
# Creating features out of the word based on the list of characters built in previous step
features = list()
for word in words:
feature = ([0]*v_size)
for i in range(len(word)):
w = word[i]
feature[i] = letters.index(w)
features.append(feature)
return features
vector_words = word_to_vector(words)
# Splitting the vocab into training and test datasets
x_train = np.array(vector_words[:train_size])
x_train.shape
def train_nn_with_custom_loss_function(custom_loss_function, nn, lr, batch_size, nb_epochs, save_to_folder=None, plot_loss=True):
"""
Train a neural net with custom loss function, plots the loss
and returns the history (output of nn)
Args:
custom_loss_function (function): One of the customly developed loss
functions
"""
# reset session
tf.keras.backend.clear_session()
# It's important to use run_eagerly=True in this example because the custom loss
# function converts tensors to numpy arrays which requires to be on eager mode
opt = tf.keras.optimizers.Adam(learning_rate=lr)
nn.compile(optimizer=opt, loss=custom_loss_function, run_eagerly=True)
# nn.build(input_shape=(100,))
nn.summary()
history = nn.fit(x_train, x_train,
epochs=nb_epochs, batch_size = batch_size, verbose=1,
shuffle=True)
if save_to_folder is not None:
nn.save(save_to_folder)
plt.plot(history.history['loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
# plt.set_xticks(np.arange(len(history.history['loss'])))
plt.show()
pred = nn.predict(x_train)
classes = np.argmax(pred, axis=1)
y, x = np.histogram(classes, bins=np.arange(16))
x = x[1:]
plt.bar(x, y)
plt.ylabel('input')
plt.xlabel('class')
plt.xticks(np.arange(1, 16, step=1))
plt.show()
return y
损失函数:
from scipy.spatial import distance
# we don't have a y_true in unsupervised
def distance_loss_fn(y_true, y_pred):
# calculate the centroid of every cluster
#thresh=-1;
#y_pred = tf.gather(y_pred, tf.where(y_pred>thresh))
pred = tf.argmax(y_pred, axis=1).numpy()
# centroids coordinates
centroids = np.zeros((15, 100), dtype=float)
# centroids frequency
centroids_f = np.zeros(15)
# points has the coordinates of the space of the points to cluster
points = y_true.numpy()
# iterate on points
for idx in range(points.shape[0]):
# sum point to calculate centroid later
centroids[pred[idx]] += points[idx]
centroids_f[pred[idx]] += 1
# average over all points to calculate centroid
for idx in range(len(centroids)):
if centroids_f[idx] == 0:
continue
centroids[idx] /= centroids_f[idx]
# sum over the distance between each point and the centroid to calculate the loss to minimize
loss = 0
for idx in range(len(points)):
# add the distance between each coordinate and the centroid of the predicted class
# tfa.metrics.hamming.hamming_loss_fn(y, output, threshold=0.5, mode='multilabel')
# loss += tf.reduce_sum(centroids[pred[idx]] - points[idx])
loss += distance.hamming(centroids[pred[idx]], points[idx])
# loss += tfa.metrics.hamming.hamming_loss_fn(centroids[pred[idx]],
# points[idx],
# threshold=0.5,
# mode='multilabel')
# print('d ', distance.hamming(centroids[pred[idx]], points[idx]))
l = tf.reduce_sum(y_pred)
# print(loss)
ret = tf.constant(loss, dtype=tf.float32)
return ret
架构:
base_nn = tf.keras.Sequential([
layers.Flatten(input_shape=(100,)),
layers.Dense(128, activation='relu'),
layers.Dropout(0.2),
layers.Dense(15, activation='softmax')
])
diff = train_nn_with_custom_loss_function(distance_loss_fn, base_nn, 0.00001, 32, 5)
我得到的错误:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 100) 0
_________________________________________________________________
dense (Dense) (None, 128) 12928
_________________________________________________________________
dropout (Dropout) (None, 128) 0
_________________________________________________________________
dense_1 (Dense) (None, 15) 1935
=================================================================
Total params: 14,863
Trainable params: 14,863
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-134-8e360d25be14> in <module>()
7
8
----> 9 diff = train_nn_with_custom_loss_function(distance_loss_fn, base_nn, 0.00001, 32, 5)
12 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/utils.py in filter_empty_gradients(grads_and_vars)
74 if not filtered:
75 raise ValueError("No gradients provided for any variable: %s." %
---> 76 ([v.name for _, v in grads_and_vars],))
77 if vars_with_empty_grads:
78 logging.warning(
ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
我有这个损失函数。问题是损失函数不可微分 任何有关如何使此损失函数可区分的帮助将不胜感激。 关于此方法的简要信息:这是一个无监督的 nn,其中输入类似于“获取文件名”。我需要预测 1 到 15 个数字之间的 class。 损失函数信息: 损失 3:带有距离的标签
- 从批处理的最后一层提取预测的 class。
- 通过平均每个class 的点和计算每个class的质心
- 将每个点与质心之间的距离(使用的汉明距离)相加,从而得出损失
问题可能出在损失函数上,因为pred = tf.argmax(y_pred, axis=1).numpy()
。此操作无法推导,因此梯度不会流过它。这导致了您的错误。