Vgg-16 在尝试训练 MNIST 数据集时产生错误
Vgg-16 generating errors when trying to train MNIST dataset
我正在尝试在 MNIST 数据集上使用 Tensorflow 和 Keras 创建一个 Vgg-16 模型。
我已成功构建模型,但在训练我的 MNIST 数据集时出现错误。
我已经检查了这个错误的不同解决方案,但它似乎不起作用,因为我是这个领域的新手
Errors generated
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:756 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:152 __call__
losses = call_fn(y_true, y_pred)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:256 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 10, 10) and (None, 2) are incompatible
Model buildup
model = Sequential()
model.add(Conv2D(input_shape=(input_shape),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2), padding='same'))
model.add(Flatten())
model.add(Dense(units=4096,activation="relu")); model.add(Dropout(0.5))
model.add(Dense(units=4096,activation="relu")); model.add(Dropout(0.5))
model.add(Dense(units=2, activation="softmax"))
# Compile the Model
model.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
MNISt Dataset
mnist = tf.keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
#processing
rows, cols = 28, 28
X_train = X_train.reshape(X_train.shape[0], rows, cols, 1)
X_test = X_test.reshape(X_test.shape[0], rows, cols, 1)
input_shape = (rows, cols, 1)
Y_train = tf.keras.utils.to_categorical(Y_train, 10)
Y_test = tf.keras.utils.to_categorical(Y_test, 10)
# Normalize
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train = X_train / 255.0
X_test = X_test / 255.0
training
history = model.fit(X_train, Y_train,
batch_size= 128,
epochs= 5,
verbose= 1)
最后一层应该输出 10 个值,因为 MNIST 包含 10 个 类。
model.add(Dense(units=10, activation="softmax"))
我复制了你的代码并运行它。一旦将顶层更改为具有 10 个神经元,它就可以正常运行。但是你的模型训练不好。我在下面提供了一个训练良好的简单模型。我还将您的测试集作为验证集包括在内。代码如下
model = tf.keras.Sequential([
Conv2D(16, 3, padding='same', activation='relu', input_shape=input_shape ),
MaxPooling2D(strides=1),
Conv2D(32, 3, padding='same', activation='relu'),
MaxPooling2D(strides=1),
Conv2D(64, 3, padding='same', activation='relu'),
MaxPooling2D(strides=1),
Conv2D(128, 3, padding='same', activation='relu'),
MaxPooling2D(strides=1),
Conv2D(256, 3, padding='same', activation='relu'),
MaxPooling2D(strides=1),
Flatten(),
Dense(128, activation='relu'),
Dropout(.3),
Dense(64, activation='relu'),
Dropout(.3),
Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
print (model.summary())
val_data=(X_test, Y_test)
history = model.fit(X_train, Y_train, validation_data=val_data,
batch_size= 128,
epochs= 5,
verbose= 1)
# after 5 epochs result is accuracy: 0.9883 - val_accuracy: 0.9915
我正在尝试在 MNIST 数据集上使用 Tensorflow 和 Keras 创建一个 Vgg-16 模型。 我已成功构建模型,但在训练我的 MNIST 数据集时出现错误。 我已经检查了这个错误的不同解决方案,但它似乎不起作用,因为我是这个领域的新手
Errors generated
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:756 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:152 __call__
losses = call_fn(y_true, y_pred)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:256 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 10, 10) and (None, 2) are incompatible
Model buildup
model = Sequential()
model.add(Conv2D(input_shape=(input_shape),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2), padding='same'))
model.add(Flatten())
model.add(Dense(units=4096,activation="relu")); model.add(Dropout(0.5))
model.add(Dense(units=4096,activation="relu")); model.add(Dropout(0.5))
model.add(Dense(units=2, activation="softmax"))
# Compile the Model
model.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
MNISt Dataset
mnist = tf.keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
#processing
rows, cols = 28, 28
X_train = X_train.reshape(X_train.shape[0], rows, cols, 1)
X_test = X_test.reshape(X_test.shape[0], rows, cols, 1)
input_shape = (rows, cols, 1)
Y_train = tf.keras.utils.to_categorical(Y_train, 10)
Y_test = tf.keras.utils.to_categorical(Y_test, 10)
# Normalize
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train = X_train / 255.0
X_test = X_test / 255.0
training
history = model.fit(X_train, Y_train,
batch_size= 128,
epochs= 5,
verbose= 1)
最后一层应该输出 10 个值,因为 MNIST 包含 10 个 类。
model.add(Dense(units=10, activation="softmax"))
我复制了你的代码并运行它。一旦将顶层更改为具有 10 个神经元,它就可以正常运行。但是你的模型训练不好。我在下面提供了一个训练良好的简单模型。我还将您的测试集作为验证集包括在内。代码如下
model = tf.keras.Sequential([
Conv2D(16, 3, padding='same', activation='relu', input_shape=input_shape ),
MaxPooling2D(strides=1),
Conv2D(32, 3, padding='same', activation='relu'),
MaxPooling2D(strides=1),
Conv2D(64, 3, padding='same', activation='relu'),
MaxPooling2D(strides=1),
Conv2D(128, 3, padding='same', activation='relu'),
MaxPooling2D(strides=1),
Conv2D(256, 3, padding='same', activation='relu'),
MaxPooling2D(strides=1),
Flatten(),
Dense(128, activation='relu'),
Dropout(.3),
Dense(64, activation='relu'),
Dropout(.3),
Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
print (model.summary())
val_data=(X_test, Y_test)
history = model.fit(X_train, Y_train, validation_data=val_data,
batch_size= 128,
epochs= 5,
verbose= 1)
# after 5 epochs result is accuracy: 0.9883 - val_accuracy: 0.9915