KERAS 计算总损失函数而不是 epoch 个体损失

KERAS compute total loss function instead of epoch individual losses

# Importing the required Keras modules containing model and layers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D,Conv2DTranspose
from keras.utils.vis_utils import plot_model

# Creating a Sequential Model and adding the layers
model = Sequential()
#63 kernels - Conv of 3X3
model.add(Conv2D(63, kernel_size=(3,3), input_shape=input_shape)) 
#Then pooling of 2X2
model.add(MaxPooling2D(pool_size=(2, 2)))
# Flattening the 2D arrays for fully connected layers
model.add(Flatten()) 
model.add(Dense(1000, activation=tf.nn.relu))
model.add(Dropout(0.2))  #Combat Overfitting, drop random elements
#Softmax layer must have neurons = range of labels, 0-9 for this case
model.add(Dense(10,activation=tf.nn.softmax))   


#Visualize Network Architecture
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])
history = model.fit(x=x_train,y=y_train,batch_size=batch_size,
                epochs=30,
                verbose=verbosity,
                validation_split=validation_split)

正如你在上面看到的,我们有损失函数='sparse_categorical_crossentropy',但是模型运行了 30 个 epoch,为每个 epoch 生成训练和验证损失。在这种情况下,我们有一个 class 为 10,在最后一个密集层中说明。

*****我的问题是我们如何根据所有 30 个时期的 sparse_categorical_crossentropy 计算 30 个时期的总损失,而不是每个时期的个别损失。

fit() 方法并不是真正为定制而设计的。那你为什么不使用自定义循环呢?

import tensorflow as tf
from sklearn.datasets import load_iris
iris, target = load_iris(return_X_y=True)

X = iris[:, :3]
y = iris[:, 3].reshape(-1, 1)

ds = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(25).batch(8)

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(16, activation='relu', input_shape=(3,)),
    tf.keras.layers.Dense(1)])

loss_object = tf.keras.losses.MeanSquaredError()

optimizer = tf.keras.optimizers.Adam(learning_rate=5e-4)

loss = tf.keras.metrics.Mean(name='loss')
error = tf.keras.metrics.MeanSquaredError()
cumul_loss = tf.keras.metrics.Sum()

@tf.function
def train_step(inputs, targets):
    with tf.GradientTape() as tape:
        predictions = model(inputs)
        run_loss = loss_object(targets, predictions)
    gradients = tape.gradient(run_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    loss(run_loss)
    cumul_loss(run_loss)
    error(predictions, targets)

for epoch in range(10):
  for data, labels in ds:
    train_step(data, labels)

  template = 'Epoch {:>2}, Loss: {:>7.4f}, MSE: {:>6.2f}, Cumul MSE: {:>7.2f}'
  print(template.format(epoch+1,
                        loss.result(),
                        error.result()*100,
                        cumul_loss.result()))

  loss.reset_states()
  error.reset_states()
Epoch  5, Loss:  0.0947, MSE:   9.37, Cumul MSE:   16.39
Epoch  6, Loss:  0.0807, MSE:   8.04, Cumul MSE:   17.92
Epoch  7, Loss:  0.0675, MSE:   6.76, Cumul MSE:   19.21
Epoch  8, Loss:  0.0623, MSE:   6.19, Cumul MSE:   20.39
Epoch  9, Loss:  0.0559, MSE:   5.52, Cumul MSE:   21.45
Epoch 10, Loss:  0.0521, MSE:   5.15, Cumul MSE:   22.44

这是一个使用顺序 API 的示例,通过子类化 keras.callbacks:

import tensorflow as tf
from tensorflow.keras.layers import Dense
from sklearn.datasets import load_iris
import numpy as np
iris, target = load_iris(return_X_y=True)

X = iris[:, :3].astype(np.float32)
y = iris[:, 3]

model = tf.keras.models.Sequential([
    Dense(8, input_shape=(3,), activation='relu'),
    Dense(3, activation='softmax')
])
cumul_loss = tf.metrics.Sum(name='cumul_loss')

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

class CustomCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        cumul_loss.update_state(logs['loss'])
        print(' {}: {:.4f}'.format(cumul_loss.name, cumul_loss.result()), end='')

history = model.fit(X, y, callbacks=[CustomCallback()], epochs=10)
Epoch 10/10
 32/150 [=====>........................] - ETA: 0s - loss: 1.7872 cumul_loss: 25.0464
150/150 [==============================] - 0s 113us/sample - loss: 1.8853