ValueError: Error when checking target: expected up_sampling2d_2 to have 4 dimensions, but got array with shape (128, 1)
ValueError: Error when checking target: expected up_sampling2d_2 to have 4 dimensions, but got array with shape (128, 1)
我正在尝试使用自定义数据生成器训练堆叠式卷积自动编码器,因为它是我生成的非常大的合成数据集。我已经按照 https://medium.com/@mrgarg.rajat/training-on-large-datasets-that-dont-fit-in-memory-in-keras-60a974785d71 教程进行操作,但仍然无法正常工作
我的数据集目录是这样的:
real_train
- img 1.png
- img 2.png
- ....
这是我的 My_Data_Generator class
class My_Data_Generator(keras.utils.Sequence):
def __init__(self, image_filenames, labels, batch_size):
self.image_filenames = image_filenames
self.labels = labels
self.batch_size = batch_size
self.n = 0
def __next__(self):
# Get one batch of data
data = self.__getitem__(self.n)
# Batch index
self.n += 1
# If we have processed the entire dataset then
if self.n >= self.__len__():
self.on_epoch_end
self.n = 0
return data
def __len__(self) :
return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
def __getitem__(self, idx):
batch_x = self.image_filenames[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]
return np.array([
resize(imread('E:/FontRecognition/Dataset_Final/preprocessed/real_train/' + str(file_name)), (105,105,1))
for file_name in batch_x])/255.0, np.array(batch_y)
这是我的代码
# load
X_train = np.load('X_train_filenames.npy')
X_val = np.load('X_val_filenames.npy')
# print(X_train.shape)
# print(X_val.shape)
batch_size = 128
my_training_batch_generator = My_Data_Generator(X_train, X_train, batch_size=batch_size)
my_validation_batch_generator = My_Data_Generator(X_val, X_val, batch_size=batch_size)
images, labels = next(my_training_batch_generator)
print("Train")
print(images.shape)
print(labels.shape)
images, labels = next(my_validation_batch_generator)
print("Val")
print(images.shape)
print(labels.shape)
input_img = Input(shape=(105,105,1))
x = Conv2D(64, kernel_size=(48,48), activation='relu', padding='same', strides=1)(input_img)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2)) (x)
x = Conv2D(128, kernel_size=(24,24), activation='relu', padding='same', strides=1)(x)
x = BatchNormalization()(x)
encoded = MaxPooling2D(pool_size=(2,2))(x)
x = Conv2D(64, kernel_size=(24,24), activation='relu', padding='same', strides=1)(encoded)
x = UpSampling2D(size=(2,2))(x)
x = Conv2D(1, kernel_size=(48,48), activation='relu', padding='same', strides=1)(x)
decoded = UpSampling2D(size=(2,2))(x)
adam = keras.optimizers.Adam(lr=0.01)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer=adam, loss='mean_squared_error')
autoencoder.summary()
num_epochs = 20
autoencoder.fit_generator(generator=my_training_batch_generator,
steps_per_epoch=(int(1836695 // batch_size)),
epochs=num_epochs,
verbose=1,
validation_data=my_validation_batch_generator,
validation_steps=(int(459174 // batch_size))
# use_multiprocessing=True,
# workers=6
)
print("Finished")
我尝试 运行 代码这是输出:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 105, 105, 1) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 105, 105, 64) 147520
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 52, 52, 64) 0
_________________________________________________________________
batch_normalization_1 (Batch (None, 52, 52, 64) 256
_________________________________________________________________
conv2d_2 (Conv2D) (None, 52, 52, 128) 4718720
_________________________________________________________________
batch_normalization_2 (Batch (None, 52, 52, 128) 512
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 26, 26, 128) 0
_________________________________________________________________
conv2d_3 (Conv2D) (None, 26, 26, 64) 4718656
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 52, 52, 64) 0
_________________________________________________________________
conv2d_4 (Conv2D) (None, 52, 52, 1) 147457
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 104, 104, 1) 0
=================================================================
Total params: 9,733,121
Trainable params: 9,732,737
Non-trainable params: 384
_________________________________________________________________
Epoch 1/20
Traceback (most recent call last):
File "SCAE_train.py", line 142, in <module>
validation_steps=(int(459174 // batch_size))
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 1732, in fit_generator
initial_epoch=initial_epoch)
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training_generator.py", line 221, in fit_generator
reset_metrics=False)
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 1508, in train_on_batch
class_weight=class_weight)
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 621, in _standardize_user_data
exception_prefix='target')
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training_utils.py", line 135, in standardize_input_data
'with shape ' + str(data_shape))
ValueError: Error when checking target: expected up_sampling2d_2 to have 4 dimensions, but got array with shape (128, 1)
我是 keras 和 python 的新手,我仍然不知道是什么原因造成的..
首先,你的模型输入输出形状不匹配。您的模型输入大小为 105x105,而输出大小为 104x104。使用类似的输入大小或调整卷积层中的 kernel/stride 大小。
但是要回答您的问题,请注意您所关注的 tutorial 执行分类,因此使用了 (batch_size, number_of_categories) 的目标形状。但是,您正在使用自动编码器,这意味着您应该将数据生成器更改为 return 适当的目标,即 (batch_size, HEIGHT, WIDTH, NUM_CHANNELS) 的形状与你的输入。
你的输入和输出图像是一样的,所以你不需要在你的数据生成器中额外的labels参数,只需要读取图像和return两份他们中的。假设您的图像文件正确 format/directory,我已将您的代码编辑为如下所示:
您的数据生成器:
class My_Custom_Generator(keras.utils.Sequence) :
def __init__(self, image_filenames, batch_size) :
self.image_filenames = image_filenames
self.batch_size = batch_size
def __len__(self) :
return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
def __getitem__(self, idx) :
batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
current_x = np.array(
resize(imread('E:/FontRecognition/Dataset_Final/preprocessed/real_train/' + str(file_name)), (105,105,1))
for file_name in batch_x])/255.0
return current_x, current_x
您的模型和脚本:
# load
X_train = np.load('X_train_filenames.npy')
X_val = np.load('X_val_filenames.npy')
# print(X_train.shape)
# print(X_val.shape)
batch_size = 128
my_training_batch_generator = My_Data_Generator(X_train, batch_size=batch_size)
my_validation_batch_generator = My_Data_Generator(X_val, batch_size=batch_size)
input_img = keras.layers.Input(shape=(104,104,1))
x = keras.layers.Conv2D(64, kernel_size=(48,48), activation='relu', padding='same', strides=1)(input_img)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same') (x)
x = keras.layers.Conv2D(128, kernel_size=(24,24), activation='relu', padding='same', strides=1)(x)
x = keras.layers.BatchNormalization()(x)
encoded = keras.layers.MaxPooling2D(pool_size=(2,2))(x)
x = keras.layers.Conv2D(64, kernel_size=(24,24), activation='relu', padding='same', strides=1)(encoded)
x = keras.layers.UpSampling2D(size=(2,2))(x)
x = keras.layers.Conv2D(1, kernel_size=(48,48), activation='relu', padding='same', strides=1)(x)
decoded = keras.layers.UpSampling2D(size=(2,2))(x)
autoencoder = keras.Model(input_img, decoded)
autoencoder.summary()
adam = keras.optimizers.Adam(lr=0.01)
autoencoder.compile(optimizer=adam, loss='mean_squared_error')
num_epochs = 20
autoencoder.fit_generator(generator=my_training_batch_generator,
epochs=num_epochs,
verbose=1,
validation_data=my_validation_batch_generator
# use_multiprocessing=True,
# workers=6
)
请注意,我已经删除了 steps_per_epoch 和 validation_steps 参数,因为自定义数据生成器继承了 keras.utils.Sequence不需要它们,可以直接从数据中推断出来。
我正在尝试使用自定义数据生成器训练堆叠式卷积自动编码器,因为它是我生成的非常大的合成数据集。我已经按照 https://medium.com/@mrgarg.rajat/training-on-large-datasets-that-dont-fit-in-memory-in-keras-60a974785d71 教程进行操作,但仍然无法正常工作
我的数据集目录是这样的:
real_train
- img 1.png
- img 2.png
- ....
这是我的 My_Data_Generator class
class My_Data_Generator(keras.utils.Sequence):
def __init__(self, image_filenames, labels, batch_size):
self.image_filenames = image_filenames
self.labels = labels
self.batch_size = batch_size
self.n = 0
def __next__(self):
# Get one batch of data
data = self.__getitem__(self.n)
# Batch index
self.n += 1
# If we have processed the entire dataset then
if self.n >= self.__len__():
self.on_epoch_end
self.n = 0
return data
def __len__(self) :
return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
def __getitem__(self, idx):
batch_x = self.image_filenames[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]
return np.array([
resize(imread('E:/FontRecognition/Dataset_Final/preprocessed/real_train/' + str(file_name)), (105,105,1))
for file_name in batch_x])/255.0, np.array(batch_y)
这是我的代码
# load
X_train = np.load('X_train_filenames.npy')
X_val = np.load('X_val_filenames.npy')
# print(X_train.shape)
# print(X_val.shape)
batch_size = 128
my_training_batch_generator = My_Data_Generator(X_train, X_train, batch_size=batch_size)
my_validation_batch_generator = My_Data_Generator(X_val, X_val, batch_size=batch_size)
images, labels = next(my_training_batch_generator)
print("Train")
print(images.shape)
print(labels.shape)
images, labels = next(my_validation_batch_generator)
print("Val")
print(images.shape)
print(labels.shape)
input_img = Input(shape=(105,105,1))
x = Conv2D(64, kernel_size=(48,48), activation='relu', padding='same', strides=1)(input_img)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2)) (x)
x = Conv2D(128, kernel_size=(24,24), activation='relu', padding='same', strides=1)(x)
x = BatchNormalization()(x)
encoded = MaxPooling2D(pool_size=(2,2))(x)
x = Conv2D(64, kernel_size=(24,24), activation='relu', padding='same', strides=1)(encoded)
x = UpSampling2D(size=(2,2))(x)
x = Conv2D(1, kernel_size=(48,48), activation='relu', padding='same', strides=1)(x)
decoded = UpSampling2D(size=(2,2))(x)
adam = keras.optimizers.Adam(lr=0.01)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer=adam, loss='mean_squared_error')
autoencoder.summary()
num_epochs = 20
autoencoder.fit_generator(generator=my_training_batch_generator,
steps_per_epoch=(int(1836695 // batch_size)),
epochs=num_epochs,
verbose=1,
validation_data=my_validation_batch_generator,
validation_steps=(int(459174 // batch_size))
# use_multiprocessing=True,
# workers=6
)
print("Finished")
我尝试 运行 代码这是输出:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 105, 105, 1) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 105, 105, 64) 147520
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 52, 52, 64) 0
_________________________________________________________________
batch_normalization_1 (Batch (None, 52, 52, 64) 256
_________________________________________________________________
conv2d_2 (Conv2D) (None, 52, 52, 128) 4718720
_________________________________________________________________
batch_normalization_2 (Batch (None, 52, 52, 128) 512
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 26, 26, 128) 0
_________________________________________________________________
conv2d_3 (Conv2D) (None, 26, 26, 64) 4718656
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 52, 52, 64) 0
_________________________________________________________________
conv2d_4 (Conv2D) (None, 52, 52, 1) 147457
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 104, 104, 1) 0
=================================================================
Total params: 9,733,121
Trainable params: 9,732,737
Non-trainable params: 384
_________________________________________________________________
Epoch 1/20
Traceback (most recent call last):
File "SCAE_train.py", line 142, in <module>
validation_steps=(int(459174 // batch_size))
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 1732, in fit_generator
initial_epoch=initial_epoch)
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training_generator.py", line 221, in fit_generator
reset_metrics=False)
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 1508, in train_on_batch
class_weight=class_weight)
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 621, in _standardize_user_data
exception_prefix='target')
File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training_utils.py", line 135, in standardize_input_data
'with shape ' + str(data_shape))
ValueError: Error when checking target: expected up_sampling2d_2 to have 4 dimensions, but got array with shape (128, 1)
我是 keras 和 python 的新手,我仍然不知道是什么原因造成的..
首先,你的模型输入输出形状不匹配。您的模型输入大小为 105x105,而输出大小为 104x104。使用类似的输入大小或调整卷积层中的 kernel/stride 大小。
但是要回答您的问题,请注意您所关注的 tutorial 执行分类,因此使用了 (batch_size, number_of_categories) 的目标形状。但是,您正在使用自动编码器,这意味着您应该将数据生成器更改为 return 适当的目标,即 (batch_size, HEIGHT, WIDTH, NUM_CHANNELS) 的形状与你的输入。
你的输入和输出图像是一样的,所以你不需要在你的数据生成器中额外的labels参数,只需要读取图像和return两份他们中的。假设您的图像文件正确 format/directory,我已将您的代码编辑为如下所示:
您的数据生成器:
class My_Custom_Generator(keras.utils.Sequence) :
def __init__(self, image_filenames, batch_size) :
self.image_filenames = image_filenames
self.batch_size = batch_size
def __len__(self) :
return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
def __getitem__(self, idx) :
batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
current_x = np.array(
resize(imread('E:/FontRecognition/Dataset_Final/preprocessed/real_train/' + str(file_name)), (105,105,1))
for file_name in batch_x])/255.0
return current_x, current_x
您的模型和脚本:
# load
X_train = np.load('X_train_filenames.npy')
X_val = np.load('X_val_filenames.npy')
# print(X_train.shape)
# print(X_val.shape)
batch_size = 128
my_training_batch_generator = My_Data_Generator(X_train, batch_size=batch_size)
my_validation_batch_generator = My_Data_Generator(X_val, batch_size=batch_size)
input_img = keras.layers.Input(shape=(104,104,1))
x = keras.layers.Conv2D(64, kernel_size=(48,48), activation='relu', padding='same', strides=1)(input_img)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same') (x)
x = keras.layers.Conv2D(128, kernel_size=(24,24), activation='relu', padding='same', strides=1)(x)
x = keras.layers.BatchNormalization()(x)
encoded = keras.layers.MaxPooling2D(pool_size=(2,2))(x)
x = keras.layers.Conv2D(64, kernel_size=(24,24), activation='relu', padding='same', strides=1)(encoded)
x = keras.layers.UpSampling2D(size=(2,2))(x)
x = keras.layers.Conv2D(1, kernel_size=(48,48), activation='relu', padding='same', strides=1)(x)
decoded = keras.layers.UpSampling2D(size=(2,2))(x)
autoencoder = keras.Model(input_img, decoded)
autoencoder.summary()
adam = keras.optimizers.Adam(lr=0.01)
autoencoder.compile(optimizer=adam, loss='mean_squared_error')
num_epochs = 20
autoencoder.fit_generator(generator=my_training_batch_generator,
epochs=num_epochs,
verbose=1,
validation_data=my_validation_batch_generator
# use_multiprocessing=True,
# workers=6
)
请注意,我已经删除了 steps_per_epoch 和 validation_steps 参数,因为自定义数据生成器继承了 keras.utils.Sequence不需要它们,可以直接从数据中推断出来。