我正在使用包含四个 类 的 X 射线扫描的 DICOM 图像来解决对象检测和分类问题。当我将所有图像转换为 numpy 数组并将其拟合到模型时,我的模型起作用了。但后来我决定编写一个图像生成器来一次提供一批图像,它会抛出一个错误:

InvalidArgumentError: Incompatible shapes: [8,3] vs. [8,4] [[node mean_squared_error/SquaredDifference (defined at <ipython-input-25-90d4137ca5f8>:6) ]] [Op:__inference_train_function_3036]

8 这是我的 batch_size.


class My_Generator(tf.keras.utils.Sequence):
    def __init__(self, filepaths, dataframe, batch_size, image_target_size=(224, 224)):
        self.filepaths = filepaths
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.image_target_size = image_target_size
    def __len__(self):
        return (np.ceil(len(self.filepaths) / float(self.batch_size))).astype(
    def __getitem__(self, ind) :
        filepaths_batch = self.filepaths[ind * self.batch_size : (ind + 1) * self.batch_size]
        images = []
        labels = []
        bboxes = []
        for filepath in filepaths_batch:
            image = self._preprocess_image(filepath)
            label, bbox = self._get_label_and_bbox(filepath)
        images_batch = np.array(images, dtype="float32")
        labels_batch = np.array(labels)
        bboxes_batch = np.array(bboxes, dtype="float32")
        lb = LabelBinarizer()
        labels_batch = lb.fit_transform(labels_batch)
        return images_batch, (labels_batch, bboxes_batch)
    def _preprocess_image(self, filepath):
        Returns np array of image pixels of particular size
        image_bytes =

        image = tfio.image.decode_dicom_image(image_bytes, color_dim=True, on_error='lossy', scale='auto', dtype=tf.uint16)

        image = tf.squeeze(image, [0])
        self.h, self.w, _ = image.shape

        image = tf.image.resize(image, self.image_target_size)

        # Convert images from 1 channel to 3 channels (RGB), needed for feeding into the Keras model
        image = tf.image.grayscale_to_rgb(image).numpy()
        image /= 65536
        return image
    def _get_label_and_bbox(self, filepath):
        filename = filepath.rsplit('/')[-1].replace('.dcm', '')

        row_array = self.dataframe.loc[self.dataframe['id'] == filename].values
        bbox = [0, 0, 0, 0]
        xmin = row_array[0][1]
        ymin = row_array[0][2]
        xmax = row_array[0][3]
        ymax = row_array[0][4]
        label = row_array[0][5]
        print(self.h, self.w)

        if label == 'negative':
            bbox[0] = 0
            bbox[1] = 0
            bbox[2] = 1 / self.image_target_size[1]
            bbox[3] = 1 / self.image_target_size[0]
            bbox[0] = xmin / self.w
            bbox[1] = ymin / self.h
            bbox[2] = xmax / self.w
            bbox[3] = ymax / self.h
        return label, bbox

train_generator = My_Generator(train_data, train_result, batch_size)
val_generator = My_Generator(val_data, train_result, batch_size)


base_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),

flatten = base_model.output

bbox_head = Dense(128, activation="relu")(flatten)
bbox_head = Dense(64, activation="relu")(bbox_head)
bbox_head = Dense(32, activation="relu")(bbox_head)
bbox_head = Dense(4, activation="sigmoid", name="bounding_box")(bbox_head)

label_head = Dense(512, activation="relu")(flatten)
label_head = Dropout(0.5)(label_head)
label_head = Dense(512, activation="relu")(label_head)
label_head = Dropout(0.5)(label_head)
label_head = Dense(4, activation="softmax", name="class_label")(label_head)

model = Model(inputs=base_model.input, outputs=(bbox_head, label_head))

losses = {"class_label": "categorical_crossentropy", "bounding_box": "mean_squared_error",}

loss_weights = {"class_label": 1.0, "bounding_box": 1.0}

opt = Adam(learning_rate=INITIAL_LR)
model.compile(loss=losses, optimizer=opt, metrics=["accuracy"], loss_weights=loss_weights),
                    steps_per_epoch = int(len(train_data) // batch_size),
                    epochs = 10,
                    verbose = 1,
                    validation_data = val_generator


lb = LabelBinarizer()
labels_batch = lb.fit_transform(labels_batch)

意味着如果 8 个标签中的 none 是 3,那么您将只得到 (8, 3) 输出,例如

>>> lb = LabelBinarizer()
>>> lb.fit_transform([0, 1, 2, 0, 2, 0, 0, 1])
array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0]])

有 2 个简单的解决方法

  1. 反正你用的是tf.keras,就用官方的keras方法
>>> labels_batch = tf.keras.utils.to_categorical(labels_batch, 4)


>>> tf.keras.utils.to_categorical([0, 1, 2, 0, 2, 0, 0, 1], 4)
array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.]], dtype=float32)
  1. LabelBinarizer 放入 __init__ 并在那里调用 并使用 lb.transform 而不是 lb.fit_transform
    def __init__
        self.filepaths = filepaths
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.image_target_size = image_target_size = LabelBinarizer()[0, 1, 2, 3])

    def __getitem__

        labels_batch =


>>> lb = LabelBinarizer()
>>>[0, 1, 2, 3])
>>> lb.transform([0, 1, 2, 0, 2, 0, 0, 1])
array([[1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [0, 1, 0, 0]])