U-net 图像分割的骰子系数不增加
Dice coefficent not increasing for U-net image segmentation
问题
我正在使用 Image segmentation guide by fchollet 来执行语义分割。我试图通过将 8 位 img 掩码值标记为 1 和 2 来修改指南以适合我的数据集,就像在 Oxford Pets 数据集中将被减去 class Generator(keras.utils.Sequence)
中的 0 和 1。输入图像是 RGB 图像。
我试过的
我不知道为什么,但我的骰子系数根本没有增加。我尝试降低学习率并将优化器更改为 SGD/RMSProp、规范化数据、考虑不平衡标签,但结果非常奇怪。模型的 accuracy/IoU 随着编号的增加而减少。时代增加。
如果有帮助,我之前问过一个关于我应该用于不平衡数据集的指标的问题 。预测的可视化还可以,但指标不行。
接下来我可以做什么来调试这个问题?我的代码有什么问题吗?将不胜感激任何建议。
结果如下
Epoch 1/10
304/304 [==============================] - 693s 2s/step - loss: 0.7648 - accuracy: 0.5100 - dice_metric: 0.6664 - IOU: 0.5100 - jaccard_distance_loss: 50.0260 - val_loss: 0.6799 - val_accuracy: 0.5178 - val_dice_metric: 0.6664 - val_IOU: 0.5178 - val_jaccard_distance_loss: 50.0260
Epoch 2/10
304/304 [==============================] - 176s 579ms/step - loss: 0.6727 - accuracy: 0.3135 - dice_metric: 0.6664 - IOU: 0.3135 - jaccard_distance_loss: 50.0257 - val_loss: 0.6396 - val_accuracy: 0.1632 - val_dice_metric: 0.6664 - val_IOU: 0.1632 - val_jaccard_distance_loss: 50.0260
Epoch 3/10
304/304 [==============================] - 176s 579ms/step - loss: 0.6377 - accuracy: 0.1728 - dice_metric: 0.6664 - IOU: 0.1728 - jaccard_distance_loss: 50.0258 - val_loss: 0.6574 - val_accuracy: 0.2565 - val_dice_metric: 0.6664 - val_IOU: 0.2565 - val_jaccard_distance_loss: 50.0260
Epoch 4/10
304/304 [==============================] - 176s 579ms/step - loss: 0.5886 - accuracy: 0.0689 - dice_metric: 0.6664 - IOU: 0.0689 - jaccard_distance_loss: 50.0264 - val_loss: 0.5933 - val_accuracy: 0.0334 - val_dice_metric: 0.6664 - val_IOU: 0.0334 - val_jaccard_distance_loss: 50.0260
Epoch 5/10
304/304 [==============================] - 176s 579ms/step - loss: 0.5710 - accuracy: 0.0281 - dice_metric: 0.6664 - IOU: 0.0281 - jaccard_distance_loss: 50.0260 - val_loss: 0.5643 - val_accuracy: 0.0130 - val_dice_metric: 0.6664 - val_IOU: 0.0130 - val_jaccard_distance_loss: 50.0260
Epoch 6/10
304/304 [==============================] - 176s 579ms/step - loss: 0.5601 - accuracy: 0.0188 - dice_metric: 0.6664 - IOU: 0.0188 - jaccard_distance_loss: 50.0252 - val_loss: 0.5457 - val_accuracy: 0.0082 - val_dice_metric: 0.6664 - val_IOU: 0.0082 - val_jaccard_distance_loss: 50.0260
Epoch 7/10
304/304 [==============================] - 176s 580ms/step - loss: 0.5494 - accuracy: 0.0147 - dice_metric: 0.6664 - IOU: 0.0147 - jaccard_distance_loss: 50.0254 - val_loss: 0.5353 - val_accuracy: 0.0068 - val_dice_metric: 0.6664 - val_IOU: 0.0068 - val_jaccard_distance_loss: 50.0260
Epoch 8/10
304/304 [==============================] - 176s 580ms/step - loss: 0.5383 - accuracy: 0.0115 - dice_metric: 0.6664 - IOU: 0.0115 - jaccard_distance_loss: 50.0264 - val_loss: 0.5241 - val_accuracy: 0.0051 - val_dice_metric: 0.6664 - val_IOU: 0.0051 - val_jaccard_distance_loss: 50.0260
Epoch 9/10
304/304 [==============================] - 176s 579ms/step - loss: 0.5268 - accuracy: 0.0090 - dice_metric: 0.6664 - IOU: 0.0090 - jaccard_distance_loss: 50.0268 - val_loss: 0.5115 - val_accuracy: 0.0039 - val_dice_metric: 0.6664 - val_IOU: 0.0039 - val_jaccard_distance_loss: 50.0260
Epoch 10/10
304/304 [==============================] - 176s 579ms/step - loss: 0.5149 - accuracy: 0.0069 - dice_metric: 0.6664 - IOU: 0.0069 - jaccard_distance_loss: 50.0254 - val_loss: 0.4960 - val_accuracy: 0.0033 - val_dice_metric: 0.6664 - val_IOU: 0.0033 - val_jaccard_distance_loss: 50.0260
这是我的代码
batch_size = 4
num_classes = 2
img_size = (512, 512)
# |--------------------- Load image and masks ---------------------|
input_dir = "/content/drive/MyDrive/input_dir"
target_dir = "/content/drive/MyDrive/target_dir"
# Sort images and masks
input_img_paths = sorted(
[
os.path.join(input_dir, fname)
for fname in os.listdir(input_dir)
if fname.endswith(".png")
]
)
target_img_paths = sorted(
[
os.path.join(target_dir, fname)
for fname in os.listdir(target_dir)
if fname.endswith(".png") and not fname.startswith(".")
]
)
# |--------------------- Define custom generator ---------------------|
class Generator(keras.utils.Sequence):
"""Helper to iterate over the data (as Numpy arrays)."""
def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
self.batch_size = batch_size
self.img_size = img_size
self.input_img_paths = input_img_paths
self.target_img_paths = target_img_paths
def __len__(self):
return len(self.target_img_paths) // self.batch_size
def __getitem__(self, idx):
"""Returns tuple (input, target) correspond to batch #idx."""
i = idx * self.batch_size
batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
x /= 255.0 # normalize data
for j, path in enumerate(batch_input_img_paths):
img = load_img(path, target_size=self.img_size)
x[j] = img
y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
for j, path in enumerate(batch_target_img_paths):
img = load_img(path, target_size=self.img_size, color_mode="grayscale")
y[j] = np.expand_dims(img, 2)
# Ground truth labels are 1, 2. Subtract one to make them 0, 1:
y[j] -= 1
return x, y
# |--------------------- Train Validation Split ---------------------|
val_samples = 304
random.Random(7).shuffle(input_img_paths)
random.Random(7).shuffle(target_img_paths)
train_input_img_paths = input_img_paths[:-val_samples]
train_target_img_paths = target_img_paths[:-val_samples]
val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]
# Instantiate data Sequences for each split
train_gen = Generator(batch_size, img_size, train_input_img_paths, train_target_img_paths)
val_gen = Generator(batch_size, img_size, val_input_img_paths, val_target_img_paths)
# |--------------------- Define U-net Model ---------------------|
def conv_block(tensor, nfilters, size=3, padding='same', initializer="he_normal"):
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(tensor, residual, nfilters, size=3, padding='same', strides=(2, 2)):
y = Conv2DTranspose(nfilters, kernel_size=(size, size), strides=strides, padding=padding)(tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, nfilters)
return y
def Unet(img_height, img_width, nclasses=2, filters=64):
# down
input_layer = Input(shape=(img_height, img_width, 3), name='image_input')
conv1 = conv_block(input_layer, nfilters=filters)
conv1_out = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = conv_block(conv1_out, nfilters=filters*2)
conv2_out = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = conv_block(conv2_out, nfilters=filters*4)
conv3_out = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = conv_block(conv3_out, nfilters=filters*8)
conv4_out = MaxPooling2D(pool_size=(2, 2))(conv4)
conv4_out = Dropout(0.5)(conv4_out)
conv5 = conv_block(conv4_out, nfilters=filters*16)
conv5 = Dropout(0.5)(conv5)
# up
deconv6 = deconv_block(conv5, residual=conv4, nfilters=filters*8)
deconv6 = Dropout(0.5)(deconv6)
deconv7 = deconv_block(deconv6, residual=conv3, nfilters=filters*4)
deconv7 = Dropout(0.5)(deconv7)
deconv8 = deconv_block(deconv7, residual=conv2, nfilters=filters*2)
deconv9 = deconv_block(deconv8, residual=conv1, nfilters=filters)
# output
output_layer = Conv2D(filters=3, kernel_size=(1, 1))(deconv9)
output_layer = BatchNormalization()(output_layer)
output_layer = Conv2D(nclasses, 3, activation="softmax", padding="same")(output_layer)
model = Model(inputs=input_layer, outputs=output_layer, name='Unet')
return model
model = Unet(512,512)
# |--------------------- Define custom metrics ---------------------|
def jaccard_distance_loss(y_true, y_pred, smooth=100):
intersection = K.sum(K.sum(K.abs(y_true * y_pred), axis=-1))
sum_ = K.sum(K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1))
jac = (intersection + smooth) / (sum_ - intersection + smooth)
return (1 - jac) * smooth
def IOU(y_true, y_pred):
true_pixels = K.argmax(y_true, axis=-1)
pred_pixels = K.argmax(y_pred, axis=-1)
true_pixels=K.flatten(true_pixels)
pred_pixels=K.flatten(pred_pixels)
true_labels = K.equal(true_pixels, 0) # target label
pred_labels = K.equal(pred_pixels, 0) # target label
inter = tf.cast(true_labels & pred_labels,tf.int32)
union = tf.cast(true_labels | pred_labels,tf.int32)
iou = K.sum(inter)/K.sum(union)
return iou
def dice_metric(y_pred, y_true):
intersection = K.sum(K.sum(K.abs(y_true * y_pred), axis=-1))
union = K.sum(K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1))
return 2*intersection / union
# |--------------------- Compile Model ---------------------|
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
loss='sparse_categorical_crossentropy',
sample_weight_mode='temporal',
metrics=['accuracy', dice_metric, IOU, jaccard_distance_loss])
# |--------------------- Train Model ---------------------|
model.fit(train_gen,
epochs=10,
validation_data=val_gen)
编辑(解决方案)
模型输出错误。它应该是一个带有 1 个输出通道的 sigmoid 激活函数。将 output_layer = Conv2D(nclasses, 3, activation="softmax", padding="same")(output_layer)
更改为 output_layer = Conv2D(1, 1, activation="sigmoid", padding="same")(output_layer)
解决了我的问题。
此外,我决定使用真阳性率 (TPR),也通常称为 recall/sensitivity/probability of detection as my main metric after reading this post。
def POD(y_true, y_pred):
y_true_pos = K.flatten(y_true)
y_pred_pos = K.flatten(y_pred)
true_pos = K.sum(y_true_pos * y_pred_pos)
false_neg = K.sum(y_true_pos * (1 - y_pred_pos))
return true_pos / (true_pos + false_neg)
问题
我正在使用 Image segmentation guide by fchollet 来执行语义分割。我试图通过将 8 位 img 掩码值标记为 1 和 2 来修改指南以适合我的数据集,就像在 Oxford Pets 数据集中将被减去 class Generator(keras.utils.Sequence)
中的 0 和 1。输入图像是 RGB 图像。
我试过的
我不知道为什么,但我的骰子系数根本没有增加。我尝试降低学习率并将优化器更改为 SGD/RMSProp、规范化数据、考虑不平衡标签,但结果非常奇怪。模型的 accuracy/IoU 随着编号的增加而减少。时代增加。
如果有帮助,我之前问过一个关于我应该用于不平衡数据集的指标的问题
接下来我可以做什么来调试这个问题?我的代码有什么问题吗?将不胜感激任何建议。
结果如下
Epoch 1/10
304/304 [==============================] - 693s 2s/step - loss: 0.7648 - accuracy: 0.5100 - dice_metric: 0.6664 - IOU: 0.5100 - jaccard_distance_loss: 50.0260 - val_loss: 0.6799 - val_accuracy: 0.5178 - val_dice_metric: 0.6664 - val_IOU: 0.5178 - val_jaccard_distance_loss: 50.0260
Epoch 2/10
304/304 [==============================] - 176s 579ms/step - loss: 0.6727 - accuracy: 0.3135 - dice_metric: 0.6664 - IOU: 0.3135 - jaccard_distance_loss: 50.0257 - val_loss: 0.6396 - val_accuracy: 0.1632 - val_dice_metric: 0.6664 - val_IOU: 0.1632 - val_jaccard_distance_loss: 50.0260
Epoch 3/10
304/304 [==============================] - 176s 579ms/step - loss: 0.6377 - accuracy: 0.1728 - dice_metric: 0.6664 - IOU: 0.1728 - jaccard_distance_loss: 50.0258 - val_loss: 0.6574 - val_accuracy: 0.2565 - val_dice_metric: 0.6664 - val_IOU: 0.2565 - val_jaccard_distance_loss: 50.0260
Epoch 4/10
304/304 [==============================] - 176s 579ms/step - loss: 0.5886 - accuracy: 0.0689 - dice_metric: 0.6664 - IOU: 0.0689 - jaccard_distance_loss: 50.0264 - val_loss: 0.5933 - val_accuracy: 0.0334 - val_dice_metric: 0.6664 - val_IOU: 0.0334 - val_jaccard_distance_loss: 50.0260
Epoch 5/10
304/304 [==============================] - 176s 579ms/step - loss: 0.5710 - accuracy: 0.0281 - dice_metric: 0.6664 - IOU: 0.0281 - jaccard_distance_loss: 50.0260 - val_loss: 0.5643 - val_accuracy: 0.0130 - val_dice_metric: 0.6664 - val_IOU: 0.0130 - val_jaccard_distance_loss: 50.0260
Epoch 6/10
304/304 [==============================] - 176s 579ms/step - loss: 0.5601 - accuracy: 0.0188 - dice_metric: 0.6664 - IOU: 0.0188 - jaccard_distance_loss: 50.0252 - val_loss: 0.5457 - val_accuracy: 0.0082 - val_dice_metric: 0.6664 - val_IOU: 0.0082 - val_jaccard_distance_loss: 50.0260
Epoch 7/10
304/304 [==============================] - 176s 580ms/step - loss: 0.5494 - accuracy: 0.0147 - dice_metric: 0.6664 - IOU: 0.0147 - jaccard_distance_loss: 50.0254 - val_loss: 0.5353 - val_accuracy: 0.0068 - val_dice_metric: 0.6664 - val_IOU: 0.0068 - val_jaccard_distance_loss: 50.0260
Epoch 8/10
304/304 [==============================] - 176s 580ms/step - loss: 0.5383 - accuracy: 0.0115 - dice_metric: 0.6664 - IOU: 0.0115 - jaccard_distance_loss: 50.0264 - val_loss: 0.5241 - val_accuracy: 0.0051 - val_dice_metric: 0.6664 - val_IOU: 0.0051 - val_jaccard_distance_loss: 50.0260
Epoch 9/10
304/304 [==============================] - 176s 579ms/step - loss: 0.5268 - accuracy: 0.0090 - dice_metric: 0.6664 - IOU: 0.0090 - jaccard_distance_loss: 50.0268 - val_loss: 0.5115 - val_accuracy: 0.0039 - val_dice_metric: 0.6664 - val_IOU: 0.0039 - val_jaccard_distance_loss: 50.0260
Epoch 10/10
304/304 [==============================] - 176s 579ms/step - loss: 0.5149 - accuracy: 0.0069 - dice_metric: 0.6664 - IOU: 0.0069 - jaccard_distance_loss: 50.0254 - val_loss: 0.4960 - val_accuracy: 0.0033 - val_dice_metric: 0.6664 - val_IOU: 0.0033 - val_jaccard_distance_loss: 50.0260
这是我的代码
batch_size = 4
num_classes = 2
img_size = (512, 512)
# |--------------------- Load image and masks ---------------------|
input_dir = "/content/drive/MyDrive/input_dir"
target_dir = "/content/drive/MyDrive/target_dir"
# Sort images and masks
input_img_paths = sorted(
[
os.path.join(input_dir, fname)
for fname in os.listdir(input_dir)
if fname.endswith(".png")
]
)
target_img_paths = sorted(
[
os.path.join(target_dir, fname)
for fname in os.listdir(target_dir)
if fname.endswith(".png") and not fname.startswith(".")
]
)
# |--------------------- Define custom generator ---------------------|
class Generator(keras.utils.Sequence):
"""Helper to iterate over the data (as Numpy arrays)."""
def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
self.batch_size = batch_size
self.img_size = img_size
self.input_img_paths = input_img_paths
self.target_img_paths = target_img_paths
def __len__(self):
return len(self.target_img_paths) // self.batch_size
def __getitem__(self, idx):
"""Returns tuple (input, target) correspond to batch #idx."""
i = idx * self.batch_size
batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
x /= 255.0 # normalize data
for j, path in enumerate(batch_input_img_paths):
img = load_img(path, target_size=self.img_size)
x[j] = img
y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
for j, path in enumerate(batch_target_img_paths):
img = load_img(path, target_size=self.img_size, color_mode="grayscale")
y[j] = np.expand_dims(img, 2)
# Ground truth labels are 1, 2. Subtract one to make them 0, 1:
y[j] -= 1
return x, y
# |--------------------- Train Validation Split ---------------------|
val_samples = 304
random.Random(7).shuffle(input_img_paths)
random.Random(7).shuffle(target_img_paths)
train_input_img_paths = input_img_paths[:-val_samples]
train_target_img_paths = target_img_paths[:-val_samples]
val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]
# Instantiate data Sequences for each split
train_gen = Generator(batch_size, img_size, train_input_img_paths, train_target_img_paths)
val_gen = Generator(batch_size, img_size, val_input_img_paths, val_target_img_paths)
# |--------------------- Define U-net Model ---------------------|
def conv_block(tensor, nfilters, size=3, padding='same', initializer="he_normal"):
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(tensor, residual, nfilters, size=3, padding='same', strides=(2, 2)):
y = Conv2DTranspose(nfilters, kernel_size=(size, size), strides=strides, padding=padding)(tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, nfilters)
return y
def Unet(img_height, img_width, nclasses=2, filters=64):
# down
input_layer = Input(shape=(img_height, img_width, 3), name='image_input')
conv1 = conv_block(input_layer, nfilters=filters)
conv1_out = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = conv_block(conv1_out, nfilters=filters*2)
conv2_out = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = conv_block(conv2_out, nfilters=filters*4)
conv3_out = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = conv_block(conv3_out, nfilters=filters*8)
conv4_out = MaxPooling2D(pool_size=(2, 2))(conv4)
conv4_out = Dropout(0.5)(conv4_out)
conv5 = conv_block(conv4_out, nfilters=filters*16)
conv5 = Dropout(0.5)(conv5)
# up
deconv6 = deconv_block(conv5, residual=conv4, nfilters=filters*8)
deconv6 = Dropout(0.5)(deconv6)
deconv7 = deconv_block(deconv6, residual=conv3, nfilters=filters*4)
deconv7 = Dropout(0.5)(deconv7)
deconv8 = deconv_block(deconv7, residual=conv2, nfilters=filters*2)
deconv9 = deconv_block(deconv8, residual=conv1, nfilters=filters)
# output
output_layer = Conv2D(filters=3, kernel_size=(1, 1))(deconv9)
output_layer = BatchNormalization()(output_layer)
output_layer = Conv2D(nclasses, 3, activation="softmax", padding="same")(output_layer)
model = Model(inputs=input_layer, outputs=output_layer, name='Unet')
return model
model = Unet(512,512)
# |--------------------- Define custom metrics ---------------------|
def jaccard_distance_loss(y_true, y_pred, smooth=100):
intersection = K.sum(K.sum(K.abs(y_true * y_pred), axis=-1))
sum_ = K.sum(K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1))
jac = (intersection + smooth) / (sum_ - intersection + smooth)
return (1 - jac) * smooth
def IOU(y_true, y_pred):
true_pixels = K.argmax(y_true, axis=-1)
pred_pixels = K.argmax(y_pred, axis=-1)
true_pixels=K.flatten(true_pixels)
pred_pixels=K.flatten(pred_pixels)
true_labels = K.equal(true_pixels, 0) # target label
pred_labels = K.equal(pred_pixels, 0) # target label
inter = tf.cast(true_labels & pred_labels,tf.int32)
union = tf.cast(true_labels | pred_labels,tf.int32)
iou = K.sum(inter)/K.sum(union)
return iou
def dice_metric(y_pred, y_true):
intersection = K.sum(K.sum(K.abs(y_true * y_pred), axis=-1))
union = K.sum(K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1))
return 2*intersection / union
# |--------------------- Compile Model ---------------------|
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
loss='sparse_categorical_crossentropy',
sample_weight_mode='temporal',
metrics=['accuracy', dice_metric, IOU, jaccard_distance_loss])
# |--------------------- Train Model ---------------------|
model.fit(train_gen,
epochs=10,
validation_data=val_gen)
编辑(解决方案)
模型输出错误。它应该是一个带有 1 个输出通道的 sigmoid 激活函数。将 output_layer = Conv2D(nclasses, 3, activation="softmax", padding="same")(output_layer)
更改为 output_layer = Conv2D(1, 1, activation="sigmoid", padding="same")(output_layer)
解决了我的问题。
此外,我决定使用真阳性率 (TPR),也通常称为 recall/sensitivity/probability of detection as my main metric after reading this post。
def POD(y_true, y_pred):
y_true_pos = K.flatten(y_true)
y_pred_pos = K.flatten(y_pred)
true_pos = K.sum(y_true_pos * y_pred_pos)
false_neg = K.sum(y_true_pos * (1 - y_pred_pos))
return true_pos / (true_pos + false_neg)