如何将 cv2 矩形边界框合并为多边形? (不是 overlap/threshold)

How can I merge cv2 rectangle bounding boxes into polygons? (Not by overlap/threshold)

我有多个矩形边界框,我知道它们属于同一 object(报纸文章的一部分),如第一张图片所示。对于整篇文章,我正在尝试找出一种将它们合并到一个多边形边界框的方法,如第二张图片所示。

我见过很多基于合并重叠边界框的解决方案,但我不关心它们是否重叠 - 我已经知道它们属于同一篇文章的一部分。在某些情况下标题很远(例如图片上方),因此基于填充的解决方案也不起作用。

我觉得应该有一个 cv2 函数可以执行此操作,但如果有的话,我会错过它。任何建议都会非常有帮助。

创建这两张图片的代码:

# Individual bounding boxes

image_0 = cv2.imread('63976500-anderson-herald-bulletin-Jun-18-1968-p-64.jpg')
# Black box, to reproduce: image_0 = np.zeros((5000, 6000, 3), dtype = "uint8")

bbox_list = [[195, 3455, 633, 4213], [658, 3427, 1094, 4222], [1120, 3435, 1553, 4473], [295, 3421, 531, 3451], [201, 3313, 1548, 3409]]

for bbox in bbox_list:
    image_0 = cv2.rectangle(image_0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 10)

cv2.imwrite("original_bboxes.jpg", image_0)


# Grouped bounding box

image_1 = cv2.imread('63976500-anderson-herald-bulletin-Jun-18-1968-p-64.jpg')
# Black box, to reproduce: image_1 = np.zeros((5000, 6000, 3), dtype = "uint8")

coordinates = np.array([[195,3313],[195,4222],[1120,4222],[1120,4473],[1553,4473],[1553,3313]], np.int32)

image_1 = cv2.polylines(image_1, [coordinates], True, (0,255,0), 10)

cv2.imwrite("grouped_bboxes.jpg", image_1)

你可以画出轮廓点的凸包(这是手工画的):
(错误的凸包图像)
然后只保留外轮廓并尝试多边形近似。我必须承认,我想不出更聪明的方法来只获取垂直线和水平线。

正如 Christoph Rackwitz 所说,我错了。凸壳,不起作用。也许 α 形状可以解决问题,但我不确定。另一种方法是提取定义边界框的所有直线方程,然后为每个点计算将其连接到最近直线的线段。如果该线是同一边界框的一部分,或者如果该点落在边界框之外,则删除线段。

比我想象的要难,因为我Python OpenCV 的熟练程度还不够。不过你几乎可以得到你想要的问题:

from cv2 import cv2
import numpy as np

image_0 = cv2.imread('63976500-anderson-herald-bulletin-Jun-18-1968-p-64.jpg')
bwimage = np.zeros((image_0.shape[0], image_0.shape[1]), dtype=np.uint8)

bbox_list = [[195, 3455, 633, 4213], [658, 3427, 1094, 4222], [1120, 3435, 1553, 4473], [295, 3421, 531, 3451],
             [201, 3313, 1548, 3409]]

for bbox in bbox_list:
    bwimage = cv2.rectangle(bwimage, (bbox[0], bbox[1]), (bbox[2], bbox[3]), 255, 1)

#cv2.imwrite("original_bboxes.png", image_0)

# create list of corners with bbox index
corners = []
for i, bbox in enumerate(bbox_list):
    corners.append((bbox[0], bbox[1], i))
    corners.append((bbox[0], bbox[3], i))
    corners.append((bbox[2], bbox[1], i))
    corners.append((bbox[2], bbox[3], i))

# for each corner find nearest border
for c in corners:
    min_dist = float('inf')
    min_dist_i = None
    min_dist_type = None
    for i, bb in enumerate(bbox_list):
        for side in range(4):
            thisdim = side % 2
            otherdim = 1 - thisdim
            dist = abs(c[thisdim] - bb[side])
            if dist == 0 and c[2] == i:
                pass
            elif min_dist > dist and bb[otherdim] < c[otherdim] < bb[otherdim + 2]:
                min_dist = dist
                min_dist_i = i
                min_dist_type = side

    if min_dist_i is not None:
        bb = bbox_list[min_dist_i]
        print(f"Corner ({c[0]}, {c[1]}) nearest BB: {min_dist_i} [({bb[0]}, {bb[1]})->({bb[2]}, {bb[3]})]")
        if min_dist_type % 2 == 0:
            dest = (bb[min_dist_type], c[1])
        else:
            dest = (c[0], bb[min_dist_type])
        bwimage = cv2.line(bwimage, (c[0], c[1]), dest, 255, 1)

contours, _ = cv2.findContours(image=bwimage, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
image_0 = cv2.drawContours(image_0, contours, -1, (0, 255, 0), 1)

cv2.imwrite("result.png", image_0)

结果如下: