为实例分割构建混淆矩阵(来自 detectron2 的 mask r-cnn)

Build confusion matrix for instance segmantation (mask r-cnn from detectron2)

我已经在玉米图像上训练了一个 mask r-cnn(我无法展示示例,因为它们是机密),但它们基本上是散落在平面上的玉米粒的图片。

我希望能够分割和 classify 不同种类的玉米粒。我知道 AP 指标是衡量实例分割算法性能的最佳方式,而且我知道这种算法的混淆矩阵通常没有意义。

但对于他的具体情况,我有 4 个非常相似的对象 class,我希望能够设置一个固定的 AP 值,例如 AP50/AP75 并构建一个混淆矩阵为此。

有可能吗?我该怎么做?

我使用 detectron2 库来训练和获得预测。下面是我用来从磁盘加载经过训练的模型、在验证集中生成预测并可视化结果的代码:

import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

import numpy as np
import matplotlib.pyplot as plt
import os, json, cv2, random, gc

from detectron2 import model_zoo
from detectron2.data.datasets import register_coco_instances
from detectron2.checkpoint import DetectionCheckpointer, Checkpointer
from detectron2.data import MetadataCatalog, DatasetCatalog, build_detection_test_loader
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.modeling import build_model
from detectron2.evaluation import COCOEvaluator, inference_on_dataset

train_annotations_path = "./data/cvat-corn-train-coco-1.0/annotations/instances_default.json"
train_images_path = "./data/cvat-corn-train-coco-1.0/images"
validation_annotations_path = "./data/cvat-corn-validation-coco-1.0/annotations/instances_default.json"
validation_images_path = "./data/cvat-corn-validation-coco-1.0/images"

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("train-corn",)
cfg.DATASETS.TEST = ("validation-corn",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4
cfg.OUTPUT_DIR = "./output"
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7

register_coco_instances(
    "train-corn",
    {},
    train_annotations_path,
    train_images_path
)
register_coco_instances(
    "validation-corn",
    {},
    validation_annotations_path,
    validation_images_path
)
metadata_train = MetadataCatalog.get("train-corn")
dataset_dicts = DatasetCatalog.get("train-corn")

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
predictor = DefaultPredictor(cfg)

predicted_images_path = os.path.abspath("./predicted/")

dataset_dicts_validation = DatasetCatalog.get("validation-corn")
for d in dataset_dicts_validation:    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   metadata=metadata_train, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    fig = plt.figure(frameon=False, dpi=1)
    fig.set_size_inches(1024,1024)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    ax.imshow(cv2.cvtColor(out.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB), aspect='auto')
    fig.savefig(f"{predicted_images_path}/{d['file_name'].split('/')[-1]}")

这就是我对给定图像的输出:

它是一个以Instances对象为唯一值的字典,Instances对象有四个列表:pred_boxes、scores、pred_classes和pred_masks。并且可以使用detectron2 visualizer进行可视化,但是出于保密原因我不能显示可视化。

这些是我现在拥有的模型指标:

每个 class:

而且我在视觉上注意到一些内核与其他 classes 混淆了,特别是在 classes ardido 和 fermentado 之间,这就是为什么我想以某种方式能够构建一个混淆矩阵。

我希望混淆矩阵看起来像这样:

编辑: 我找到了这个存储库:

https://github.com/kaanakan/object_detection_confusion_matrix

并尝试使用它:

from confusion_matrix import ConfusionMatrix
cm = ConfusionMatrix(4, CONF_THRESHOLD=0.3, IOU_THRESHOLD=0.3)

for d in dataset_dicts_validation:
    img = cv2.imread(d["file_name"])
    outputs = predictor(img)
    labels = list()
    detections = list()
    for ann in d["annotations"]:
        labels.append([ann["category_id"]] + ann["bbox"])
    for coord, conf, cls in zip(
        outputs["instances"].get("pred_boxes").tensor.cpu().numpy(), 
        outputs["instances"].get("scores").cpu().numpy(), 
        outputs["instances"].get("pred_classes").cpu().numpy()
    ):
        detections.append(list(coord) + [conf] + [cls])
    cm.process_batch(np.array(detections), np.array(labels))

但是我得到的矩阵显然是错误的,我很难修复它。

我做到了,我从头开始构建了混淆矩阵函数:

import pandas as pd
import torch
from detectron2.structures import Boxes, pairwise_iou

def coco_bbox_to_coordinates(bbox):
    out = bbox.copy().astype(float)
    out[:, 2] = bbox[:, 0] + bbox[:, 2]
    out[:, 3] = bbox[:, 1] + bbox[:, 3]
    return out

def conf_matrix_calc(labels, detections, n_classes, conf_thresh, iou_thresh):
    confusion_matrix = np.zeros([n_classes + 1, n_classes + 1])
    l_classes = np.array(labels)[:, 0].astype(int)
    l_bboxs = coco_bbox_to_coordinates((np.array(labels)[:, 1:]))
    d_confs = np.array(detections)[:, 4]
    d_bboxs = (np.array(detections)[:, :4])
    d_classes = np.array(detections)[:, -1].astype(int)
    detections = detections[np.where(d_confs > conf_thresh)]
    labels_detected = np.zeros(len(labels))
    detections_matched = np.zeros(len(detections))
    for l_idx, (l_class, l_bbox) in enumerate(zip(l_classes, l_bboxs)):
        for d_idx, (d_bbox, d_class) in enumerate(zip(d_bboxs, d_classes)):
            iou = pairwise_iou(Boxes(torch.from_numpy(np.array([l_bbox]))), Boxes(torch.from_numpy(np.array([d_bbox]))))
            if iou >= iou_thresh:
                confusion_matrix[l_class, d_class] += 1
                labels_detected[l_idx] = 1
                detections_matched[d_idx] = 1
    for i in np.where(labels_detected == 0)[0]:
        confusion_matrix[l_classes[i], -1] += 1
    for i in np.where(detections_matched == 0)[0]:
        confusion_matrix[-1, d_classes[i]] += 1
    return confusion_matrix

n_classes = 4
confusion_matrix = np.zeros([n_classes + 1, n_classes + 1])
for d in dataset_dicts_validation:
    img = cv2.imread(d["file_name"])
    outputs = predictor(img)
    labels = list()
    detections = list()
    for coord, conf, cls, ann in zip(
        outputs["instances"].get("pred_boxes").tensor.cpu().numpy(),
        outputs["instances"].get("scores").cpu().numpy(),
        outputs["instances"].get("pred_classes").cpu().numpy(),
        d["annotations"]
    ):
        labels.append([ann["category_id"]] + ann["bbox"])
        detections.append(list(coord) + [conf] + [cls])
    confusion_matrix += conf_matrix_calc(np.array(labels), np.array(detections), n_classes, conf_thresh=0.5, iou_thresh=0.5)
matrix_indexes = metadata_train.get("thing_classes") + ["null"]
pd.DataFrame(confusion_matrix, columns=matrix_indexes, index=matrix_indexes)

我构建了 conf_matrix_calc,它为每个图像计算 conf_matrix,然后为每个图像执行它。我花了一段时间才让它工作,因为有一个隐藏的问题。由于某种原因,标签以与检测不同的格式保存,而不是 [x1, y1, x2, y2],它被保存为 [x1, y2, x1-x2, y1-y2],在线搜索我没有在 detectron 或 coco 的文档中找不到任何描述的地方,但我发现一种格式保存为 [(x1+x2)/2, (y1+y2)/2, x1-x2, y1-y2],无论如何那不是我的情况,我只是发现了这一点,因为我打开了图像并检查了标签和预测中框的像素坐标,并发现出了点问题。 无论如何,现在它起作用了,这就是我的结果: