如何从 faster-RCNN 计算 F1 分数和其他分类指标? (PyTorch 中的对象检测)
How can I calculate the F1-score and other classification metrics from a faster-RCNN? (object detection in PyTorch)
我正在努力解决这个问题,但很难理解如何在对象检测任务中计算 f1 分数。
理想情况下,我想知道图像中每个目标的假阳性、真阳性、假阴性和真阴性(这是一个二元问题,图像中的对象作为一个 class 和背景和另一个 class).
最终我也想从图像中提取误报边界框。我不确定这是否有效,但我会将图像名称和 bbox 预测以及它们是否是误报等保存到一个 numpy 文件中。
我目前将此设置为批量大小为 1,因此我可以对每个图像应用非最大抑制算法:
def apply_nms(orig_prediction, iou_thresh=0.3):
# torchvision returns the indices of the bboxes to keep
keep = torchvision.ops.nms(orig_prediction['boxes'], orig_prediction['scores'], iou_thresh)
final_prediction = orig_prediction
final_prediction['boxes'] = final_prediction['boxes'][keep]
final_prediction['scores'] = final_prediction['scores'][keep]
final_prediction['labels'] = final_prediction['labels'][keep]
return final_prediction
cpu_device = torch.device("cpu")
model.eval()
with torch.no_grad():
for images, targets in valid_data_loader:
images = list(img.to(device) for img in images)
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
predictions = apply_nms(outputs[0], iou_thresh=0.3)
关于如何确定上述 class化指标和 f1 分数的任何想法?
我在 torchvision 提供的评估代码中遇到了这一行,想知道它是否对我的前进有帮助:
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
在对象检测中使用术语精度、召回率和 F1 分数有点令人困惑,因为这些指标最初用于二元评估任务(例如分类)。无论如何,在对象检测中它们的含义略有不同:
让:
TP - 与地面真实对象成功匹配的一组预测对象(高于您使用的任何数据集的 IOU 阈值,通常为 0.5 或 0.7)
FP - 一组未成功匹配到地面实况对象的预测对象
FN - 未成功匹配预测对象的地面实况对象集
Precision: TP / (TP + FP)
Recall: TP / (TP + FN)
F1: 2*Precision*Recall /(Precision + Recall)
你可以找到很多匹配步骤(匹配ground truth和预测对象)的实现,通常提供数据集进行评估,或者你可以自己实现。我建议 py-motmetrics repository.
IOU 计算的简单实现可能如下所示:
def iou(self,a,b):
"""
Description
-----------
Calculates intersection over union for all sets of boxes in a and b
Parameters
----------
a : tensor of size [batch_size,4]
bounding boxes
b : tensor of size [batch_size,4]
bounding boxes.
Returns
-------
iou - float between [0,1]
average iou for a and b
"""
area_a = (a[2]-a[0]) * (a[3]-a[1])
area_b = (b[2]-b[0]) * (b[3]-b[1])
minx = max(a[0], b[0])
maxx = min(a[2], b[2])
miny = max(a[1], b[1])
maxy = min(a[3], b[3])
intersection = max(0, maxx-minx) * max(0,maxy-miny)
union = area_a + area_b - intersection
iou = intersection/union
return iou
所以我实现了全局计算的 f1 分数——这是针对整个数据集的。
下面的实现给出了一个确定验证集的 f1 分数的示例。
模型的输出是字典格式,所以我们需要像这样将它们放入张量中:
predicted_boxes (list): [[train_index, class_prediction, prob_score, x1, y1, x2, y2],[],...[]]
train_index:特定bbox来自的图像索引
class_prediction:代表class预测的整数值
prob_score:bbox 的输出客观性分数
x1,y1,x2,y2: (x1, y1) 和 (x2,y2) bbox 坐标
gt_boxes (list): [[train_index, class_prediction, prob_score, x1, y1, x2, y2],[],...[]]
其中 prob_score
只是 1
用于地面实况输入(只要指定并填写该维度,它实际上可以是任何东西)。
IoU 也在 torchvision 中实现,这让一切变得更容易。
我希望这对其他人有帮助,因为我在其他任何地方都找不到对象检测中 f1 分数的另一种实现。
model_test.eval()
with torch.no_grad():
global_tp = []
global_fp = []
global_gt = []
valid_df_unique = get_unique(valid_df['image_id'])
for images, targets in valid_data_loader:
images = list(img.to(device) for img in images)
outputs = model_test(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
predictions = apply_nms(outputs[0], iou_thresh=0.1)
# looping through each class
for c in range(num_classes):
# detections (list): predicted_boxes that are class c
detections = []
# ground_truths (list): gt_boxes that are class c
ground_truths = []
for b,la,s in zip(predictions['boxes'], predictions['labels'],predictions['scores']):
updated_detection_array = [targets[0]['image_id'].item(), la.item(), s.item(), b[0].item(),b[1].item(),b[2].item(),b[3].item()]
if la.item() == c:
detections.append(updated_detection_array)
for b,la in zip(targets[0]['boxes'], targets[0]['labels']):
updated_gt_array = [targets[0]['image_id'].item(), la.item(), 1, b[0].item(),b[1].item(),b[2].item(),b[3].item()]
if la.item() == c:
ground_truths.append(updated_gt_array)
global_gt.append(updated_gt_array)
# use Counter to create a dictionary where key is image # and value
# is the # of bboxes in the given image
amount_bboxes = Counter([gt[0] for gt in ground_truths])
# goal: keep track of the gt bboxes we have already "detected" with prior predicted bboxes
# key: image #
# value: tensor of 0's (size is equal to # of bboxes in the given image)
for key, value in amount_bboxes.items():
amount_bboxes[key] = torch.zeros(value)
# sort over the probabiliity scores of the detections
detections.sort(key = lambda x: x[2], reverse = True)
true_Positives = torch.zeros(len(detections))
false_Positives = torch.zeros(len(detections))
total_gt_bboxes = len(ground_truths)
false_positives_frame = []
true_positives_frame = []
# iterate through all detections in given class c
for detection_index, detection in enumerate(detections):
# detection[0] indicates image #
# ground_truth_image: the gt bbox's that are in same image as detection
ground_truth_image = [bbox for bbox in ground_truths if bbox[0] == detection[0]]
# num_gt_boxes: number of ground truth boxes in given image
num_gt_boxes = len(ground_truth_image)
best_iou = 0
best_gt_index = 0
for index, gt in enumerate(ground_truth_image):
iou = torchvision.ops.box_iou(torch.tensor(detection[3:]).unsqueeze(0),
torch.tensor(gt[3:]).unsqueeze(0))
if iou > best_iou:
best_iou = iou
best_gt_index = index
if best_iou > iou_threshold:
# check if gt_bbox with best_iou was already covered by previous detection with higher confidence score
# amount_bboxes[detection[0]][best_gt_index] == 0 if not discovered yet, 1 otherwise
if amount_bboxes[detection[0]][best_gt_index] == 0:
true_Positives[detection_index] = 1
amount_bboxes[detection[0]][best_gt_index] == 1
true_positives_frame.append(detection)
global_tp.append(detection)
else:
false_Positives[detection_index] = 1
false_positives_frame.append(detection)
global_fp.append(detection)
else:
false_Positives[detection_index] = 1
false_positives_frame.append(detection)
global_fp.append(detection)
# remove nan values from ground truth list as list contains every mitosis image row entry (including images with no targets)
global_gt_updated = []
for gt in global_gt:
if math.isnan(gt[3]) == False:
global_gt_updated.append(gt)
global_fn = len(global_gt_updated) - len(global_tp)
precision = len(global_tp)/ (len(global_tp)+ len(global_fp))
recall = len(global_tp)/ (len(global_tp) + global_fn)
f1_score = 2* (precision * recall)/ (precision + recall)
print(len(global_tp))
print(recall)
print(precision)
print(f1_score)
我正在努力解决这个问题,但很难理解如何在对象检测任务中计算 f1 分数。
理想情况下,我想知道图像中每个目标的假阳性、真阳性、假阴性和真阴性(这是一个二元问题,图像中的对象作为一个 class 和背景和另一个 class).
最终我也想从图像中提取误报边界框。我不确定这是否有效,但我会将图像名称和 bbox 预测以及它们是否是误报等保存到一个 numpy 文件中。
我目前将此设置为批量大小为 1,因此我可以对每个图像应用非最大抑制算法:
def apply_nms(orig_prediction, iou_thresh=0.3):
# torchvision returns the indices of the bboxes to keep
keep = torchvision.ops.nms(orig_prediction['boxes'], orig_prediction['scores'], iou_thresh)
final_prediction = orig_prediction
final_prediction['boxes'] = final_prediction['boxes'][keep]
final_prediction['scores'] = final_prediction['scores'][keep]
final_prediction['labels'] = final_prediction['labels'][keep]
return final_prediction
cpu_device = torch.device("cpu")
model.eval()
with torch.no_grad():
for images, targets in valid_data_loader:
images = list(img.to(device) for img in images)
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
predictions = apply_nms(outputs[0], iou_thresh=0.3)
关于如何确定上述 class化指标和 f1 分数的任何想法?
我在 torchvision 提供的评估代码中遇到了这一行,想知道它是否对我的前进有帮助:
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
在对象检测中使用术语精度、召回率和 F1 分数有点令人困惑,因为这些指标最初用于二元评估任务(例如分类)。无论如何,在对象检测中它们的含义略有不同:
让: TP - 与地面真实对象成功匹配的一组预测对象(高于您使用的任何数据集的 IOU 阈值,通常为 0.5 或 0.7) FP - 一组未成功匹配到地面实况对象的预测对象 FN - 未成功匹配预测对象的地面实况对象集
Precision: TP / (TP + FP)
Recall: TP / (TP + FN)
F1: 2*Precision*Recall /(Precision + Recall)
你可以找到很多匹配步骤(匹配ground truth和预测对象)的实现,通常提供数据集进行评估,或者你可以自己实现。我建议 py-motmetrics repository.
IOU 计算的简单实现可能如下所示:
def iou(self,a,b):
"""
Description
-----------
Calculates intersection over union for all sets of boxes in a and b
Parameters
----------
a : tensor of size [batch_size,4]
bounding boxes
b : tensor of size [batch_size,4]
bounding boxes.
Returns
-------
iou - float between [0,1]
average iou for a and b
"""
area_a = (a[2]-a[0]) * (a[3]-a[1])
area_b = (b[2]-b[0]) * (b[3]-b[1])
minx = max(a[0], b[0])
maxx = min(a[2], b[2])
miny = max(a[1], b[1])
maxy = min(a[3], b[3])
intersection = max(0, maxx-minx) * max(0,maxy-miny)
union = area_a + area_b - intersection
iou = intersection/union
return iou
所以我实现了全局计算的 f1 分数——这是针对整个数据集的。
下面的实现给出了一个确定验证集的 f1 分数的示例。
模型的输出是字典格式,所以我们需要像这样将它们放入张量中:
predicted_boxes (list): [[train_index, class_prediction, prob_score, x1, y1, x2, y2],[],...[]]
train_index:特定bbox来自的图像索引 class_prediction:代表class预测的整数值 prob_score:bbox 的输出客观性分数 x1,y1,x2,y2: (x1, y1) 和 (x2,y2) bbox 坐标
gt_boxes (list): [[train_index, class_prediction, prob_score, x1, y1, x2, y2],[],...[]]
其中 prob_score
只是 1
用于地面实况输入(只要指定并填写该维度,它实际上可以是任何东西)。
IoU 也在 torchvision 中实现,这让一切变得更容易。
我希望这对其他人有帮助,因为我在其他任何地方都找不到对象检测中 f1 分数的另一种实现。
model_test.eval()
with torch.no_grad():
global_tp = []
global_fp = []
global_gt = []
valid_df_unique = get_unique(valid_df['image_id'])
for images, targets in valid_data_loader:
images = list(img.to(device) for img in images)
outputs = model_test(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
predictions = apply_nms(outputs[0], iou_thresh=0.1)
# looping through each class
for c in range(num_classes):
# detections (list): predicted_boxes that are class c
detections = []
# ground_truths (list): gt_boxes that are class c
ground_truths = []
for b,la,s in zip(predictions['boxes'], predictions['labels'],predictions['scores']):
updated_detection_array = [targets[0]['image_id'].item(), la.item(), s.item(), b[0].item(),b[1].item(),b[2].item(),b[3].item()]
if la.item() == c:
detections.append(updated_detection_array)
for b,la in zip(targets[0]['boxes'], targets[0]['labels']):
updated_gt_array = [targets[0]['image_id'].item(), la.item(), 1, b[0].item(),b[1].item(),b[2].item(),b[3].item()]
if la.item() == c:
ground_truths.append(updated_gt_array)
global_gt.append(updated_gt_array)
# use Counter to create a dictionary where key is image # and value
# is the # of bboxes in the given image
amount_bboxes = Counter([gt[0] for gt in ground_truths])
# goal: keep track of the gt bboxes we have already "detected" with prior predicted bboxes
# key: image #
# value: tensor of 0's (size is equal to # of bboxes in the given image)
for key, value in amount_bboxes.items():
amount_bboxes[key] = torch.zeros(value)
# sort over the probabiliity scores of the detections
detections.sort(key = lambda x: x[2], reverse = True)
true_Positives = torch.zeros(len(detections))
false_Positives = torch.zeros(len(detections))
total_gt_bboxes = len(ground_truths)
false_positives_frame = []
true_positives_frame = []
# iterate through all detections in given class c
for detection_index, detection in enumerate(detections):
# detection[0] indicates image #
# ground_truth_image: the gt bbox's that are in same image as detection
ground_truth_image = [bbox for bbox in ground_truths if bbox[0] == detection[0]]
# num_gt_boxes: number of ground truth boxes in given image
num_gt_boxes = len(ground_truth_image)
best_iou = 0
best_gt_index = 0
for index, gt in enumerate(ground_truth_image):
iou = torchvision.ops.box_iou(torch.tensor(detection[3:]).unsqueeze(0),
torch.tensor(gt[3:]).unsqueeze(0))
if iou > best_iou:
best_iou = iou
best_gt_index = index
if best_iou > iou_threshold:
# check if gt_bbox with best_iou was already covered by previous detection with higher confidence score
# amount_bboxes[detection[0]][best_gt_index] == 0 if not discovered yet, 1 otherwise
if amount_bboxes[detection[0]][best_gt_index] == 0:
true_Positives[detection_index] = 1
amount_bboxes[detection[0]][best_gt_index] == 1
true_positives_frame.append(detection)
global_tp.append(detection)
else:
false_Positives[detection_index] = 1
false_positives_frame.append(detection)
global_fp.append(detection)
else:
false_Positives[detection_index] = 1
false_positives_frame.append(detection)
global_fp.append(detection)
# remove nan values from ground truth list as list contains every mitosis image row entry (including images with no targets)
global_gt_updated = []
for gt in global_gt:
if math.isnan(gt[3]) == False:
global_gt_updated.append(gt)
global_fn = len(global_gt_updated) - len(global_tp)
precision = len(global_tp)/ (len(global_tp)+ len(global_fp))
recall = len(global_tp)/ (len(global_tp) + global_fn)
f1_score = 2* (precision * recall)/ (precision + recall)
print(len(global_tp))
print(recall)
print(precision)
print(f1_score)