InvalidArgumentError: Incompatible shapes: [8,3] vs. [8,4]
InvalidArgumentError: Incompatible shapes: [8,3] vs. [8,4]
我正在使用包含四个 类 的 X 射线扫描的 DICOM 图像来解决对象检测和分类问题。当我将所有图像转换为 numpy 数组并将其拟合到模型时,我的模型起作用了。但后来我决定编写一个图像生成器来一次提供一批图像,它会抛出一个错误:
InvalidArgumentError: Incompatible shapes: [8,3] vs. [8,4] [[node mean_squared_error/SquaredDifference (defined at <ipython-input-25-90d4137ca5f8>:6) ]] [Op:__inference_train_function_3036]
8
这是我的 batch_size.
这是我的生成器的代码:
class My_Generator(tf.keras.utils.Sequence):
def __init__(self, filepaths, dataframe, batch_size, image_target_size=(224, 224)):
self.filepaths = filepaths
self.dataframe = dataframe
self.batch_size = batch_size
self.image_target_size = image_target_size
def __len__(self):
return (np.ceil(len(self.filepaths) / float(self.batch_size))).astype(np.int)
def __getitem__(self, ind) :
filepaths_batch = self.filepaths[ind * self.batch_size : (ind + 1) * self.batch_size]
images = []
labels = []
bboxes = []
for filepath in filepaths_batch:
image = self._preprocess_image(filepath)
label, bbox = self._get_label_and_bbox(filepath)
images.append(image)
labels.append(label)
bboxes.append(bbox)
images_batch = np.array(images, dtype="float32")
labels_batch = np.array(labels)
bboxes_batch = np.array(bboxes, dtype="float32")
lb = LabelBinarizer()
labels_batch = lb.fit_transform(labels_batch)
return images_batch, (labels_batch, bboxes_batch)
def _preprocess_image(self, filepath):
"""
Returns np array of image pixels of particular size
"""
image_bytes = tf.io.read_file(filepath)
image = tfio.image.decode_dicom_image(image_bytes, color_dim=True, on_error='lossy', scale='auto', dtype=tf.uint16)
image = tf.squeeze(image, [0])
self.h, self.w, _ = image.shape
image = tf.image.resize(image, self.image_target_size)
# Convert images from 1 channel to 3 channels (RGB), needed for feeding into the Keras model
image = tf.image.grayscale_to_rgb(image).numpy()
image /= 65536
return image
def _get_label_and_bbox(self, filepath):
filename = filepath.rsplit('/')[-1].replace('.dcm', '')
row_array = self.dataframe.loc[self.dataframe['id'] == filename].values
bbox = [0, 0, 0, 0]
xmin = row_array[0][1]
ymin = row_array[0][2]
xmax = row_array[0][3]
ymax = row_array[0][4]
label = row_array[0][5]
print(self.h, self.w)
if label == 'negative':
bbox[0] = 0
bbox[1] = 0
bbox[2] = 1 / self.image_target_size[1]
bbox[3] = 1 / self.image_target_size[0]
else:
bbox[0] = xmin / self.w
bbox[1] = ymin / self.h
bbox[2] = xmax / self.w
bbox[3] = ymax / self.h
return label, bbox
train_generator = My_Generator(train_data, train_result, batch_size)
val_generator = My_Generator(val_data, train_result, batch_size)
这是我的模型:
base_model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(224, 224, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Flatten()
])
flatten = base_model.output
bbox_head = Dense(128, activation="relu")(flatten)
bbox_head = Dense(64, activation="relu")(bbox_head)
bbox_head = Dense(32, activation="relu")(bbox_head)
bbox_head = Dense(4, activation="sigmoid", name="bounding_box")(bbox_head)
label_head = Dense(512, activation="relu")(flatten)
label_head = Dropout(0.5)(label_head)
label_head = Dense(512, activation="relu")(label_head)
label_head = Dropout(0.5)(label_head)
label_head = Dense(4, activation="softmax", name="class_label")(label_head)
model = Model(inputs=base_model.input, outputs=(bbox_head, label_head))
losses = {"class_label": "categorical_crossentropy", "bounding_box": "mean_squared_error",}
loss_weights = {"class_label": 1.0, "bounding_box": 1.0}
opt = Adam(learning_rate=INITIAL_LR)
model.compile(loss=losses, optimizer=opt, metrics=["accuracy"], loss_weights=loss_weights)
model.fit(train_generator,
steps_per_epoch = int(len(train_data) // batch_size),
epochs = 10,
verbose = 1,
validation_data = val_generator
)
这些行
lb = LabelBinarizer()
labels_batch = lb.fit_transform(labels_batch)
意味着如果 8 个标签中的 none 是 3
,那么您将只得到 (8, 3)
输出,例如
>>> lb = LabelBinarizer()
>>> lb.fit_transform([0, 1, 2, 0, 2, 0, 0, 1])
array([[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[1, 0, 0],
[0, 0, 1],
[1, 0, 0],
[1, 0, 0],
[0, 1, 0]])
有 2 个简单的解决方法
- 反正你用的是tf.keras,就用官方的keras方法
>>> labels_batch = tf.keras.utils.to_categorical(labels_batch, 4)
您可以使用前面的示例进行测试
>>> tf.keras.utils.to_categorical([0, 1, 2, 0, 2, 0, 0, 1], 4)
array([[1., 0., 0., 0.],
[0., 1., 0., 0.],
[0., 0., 1., 0.],
[1., 0., 0., 0.],
[0., 0., 1., 0.],
[1., 0., 0., 0.],
[1., 0., 0., 0.],
[0., 1., 0., 0.]], dtype=float32)
- 将
LabelBinarizer
放入 __init__
并在那里调用 lb.fit
并使用 lb.transform
而不是 lb.fit_transform
def __init__
self.filepaths = filepaths
self.dataframe = dataframe
self.batch_size = batch_size
self.image_target_size = image_target_size
self.lb = LabelBinarizer()
self.lb.fit([0, 1, 2, 3])
def __getitem__
labels_batch = self.lb.transform(labels_batch)
这方面的例子是
>>> lb = LabelBinarizer()
>>> lb.fit([0, 1, 2, 3])
>>> lb.transform([0, 1, 2, 0, 2, 0, 0, 1])
array([[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[1, 0, 0, 0],
[0, 0, 1, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[0, 1, 0, 0]])
我正在使用包含四个 类 的 X 射线扫描的 DICOM 图像来解决对象检测和分类问题。当我将所有图像转换为 numpy 数组并将其拟合到模型时,我的模型起作用了。但后来我决定编写一个图像生成器来一次提供一批图像,它会抛出一个错误:
InvalidArgumentError: Incompatible shapes: [8,3] vs. [8,4] [[node mean_squared_error/SquaredDifference (defined at <ipython-input-25-90d4137ca5f8>:6) ]] [Op:__inference_train_function_3036]
8
这是我的 batch_size.
这是我的生成器的代码:
class My_Generator(tf.keras.utils.Sequence):
def __init__(self, filepaths, dataframe, batch_size, image_target_size=(224, 224)):
self.filepaths = filepaths
self.dataframe = dataframe
self.batch_size = batch_size
self.image_target_size = image_target_size
def __len__(self):
return (np.ceil(len(self.filepaths) / float(self.batch_size))).astype(np.int)
def __getitem__(self, ind) :
filepaths_batch = self.filepaths[ind * self.batch_size : (ind + 1) * self.batch_size]
images = []
labels = []
bboxes = []
for filepath in filepaths_batch:
image = self._preprocess_image(filepath)
label, bbox = self._get_label_and_bbox(filepath)
images.append(image)
labels.append(label)
bboxes.append(bbox)
images_batch = np.array(images, dtype="float32")
labels_batch = np.array(labels)
bboxes_batch = np.array(bboxes, dtype="float32")
lb = LabelBinarizer()
labels_batch = lb.fit_transform(labels_batch)
return images_batch, (labels_batch, bboxes_batch)
def _preprocess_image(self, filepath):
"""
Returns np array of image pixels of particular size
"""
image_bytes = tf.io.read_file(filepath)
image = tfio.image.decode_dicom_image(image_bytes, color_dim=True, on_error='lossy', scale='auto', dtype=tf.uint16)
image = tf.squeeze(image, [0])
self.h, self.w, _ = image.shape
image = tf.image.resize(image, self.image_target_size)
# Convert images from 1 channel to 3 channels (RGB), needed for feeding into the Keras model
image = tf.image.grayscale_to_rgb(image).numpy()
image /= 65536
return image
def _get_label_and_bbox(self, filepath):
filename = filepath.rsplit('/')[-1].replace('.dcm', '')
row_array = self.dataframe.loc[self.dataframe['id'] == filename].values
bbox = [0, 0, 0, 0]
xmin = row_array[0][1]
ymin = row_array[0][2]
xmax = row_array[0][3]
ymax = row_array[0][4]
label = row_array[0][5]
print(self.h, self.w)
if label == 'negative':
bbox[0] = 0
bbox[1] = 0
bbox[2] = 1 / self.image_target_size[1]
bbox[3] = 1 / self.image_target_size[0]
else:
bbox[0] = xmin / self.w
bbox[1] = ymin / self.h
bbox[2] = xmax / self.w
bbox[3] = ymax / self.h
return label, bbox
train_generator = My_Generator(train_data, train_result, batch_size)
val_generator = My_Generator(val_data, train_result, batch_size)
这是我的模型:
base_model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(224, 224, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Flatten()
])
flatten = base_model.output
bbox_head = Dense(128, activation="relu")(flatten)
bbox_head = Dense(64, activation="relu")(bbox_head)
bbox_head = Dense(32, activation="relu")(bbox_head)
bbox_head = Dense(4, activation="sigmoid", name="bounding_box")(bbox_head)
label_head = Dense(512, activation="relu")(flatten)
label_head = Dropout(0.5)(label_head)
label_head = Dense(512, activation="relu")(label_head)
label_head = Dropout(0.5)(label_head)
label_head = Dense(4, activation="softmax", name="class_label")(label_head)
model = Model(inputs=base_model.input, outputs=(bbox_head, label_head))
losses = {"class_label": "categorical_crossentropy", "bounding_box": "mean_squared_error",}
loss_weights = {"class_label": 1.0, "bounding_box": 1.0}
opt = Adam(learning_rate=INITIAL_LR)
model.compile(loss=losses, optimizer=opt, metrics=["accuracy"], loss_weights=loss_weights)
model.fit(train_generator,
steps_per_epoch = int(len(train_data) // batch_size),
epochs = 10,
verbose = 1,
validation_data = val_generator
)
这些行
lb = LabelBinarizer()
labels_batch = lb.fit_transform(labels_batch)
意味着如果 8 个标签中的 none 是 3
,那么您将只得到 (8, 3)
输出,例如
>>> lb = LabelBinarizer()
>>> lb.fit_transform([0, 1, 2, 0, 2, 0, 0, 1])
array([[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[1, 0, 0],
[0, 0, 1],
[1, 0, 0],
[1, 0, 0],
[0, 1, 0]])
有 2 个简单的解决方法
- 反正你用的是tf.keras,就用官方的keras方法
>>> labels_batch = tf.keras.utils.to_categorical(labels_batch, 4)
您可以使用前面的示例进行测试
>>> tf.keras.utils.to_categorical([0, 1, 2, 0, 2, 0, 0, 1], 4)
array([[1., 0., 0., 0.],
[0., 1., 0., 0.],
[0., 0., 1., 0.],
[1., 0., 0., 0.],
[0., 0., 1., 0.],
[1., 0., 0., 0.],
[1., 0., 0., 0.],
[0., 1., 0., 0.]], dtype=float32)
- 将
LabelBinarizer
放入__init__
并在那里调用lb.fit
并使用lb.transform
而不是lb.fit_transform
def __init__
self.filepaths = filepaths
self.dataframe = dataframe
self.batch_size = batch_size
self.image_target_size = image_target_size
self.lb = LabelBinarizer()
self.lb.fit([0, 1, 2, 3])
def __getitem__
labels_batch = self.lb.transform(labels_batch)
这方面的例子是
>>> lb = LabelBinarizer()
>>> lb.fit([0, 1, 2, 3])
>>> lb.transform([0, 1, 2, 0, 2, 0, 0, 1])
array([[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[1, 0, 0, 0],
[0, 0, 1, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[0, 1, 0, 0]])