如何将 cv2 矩形边界框合并为多边形? (不是 overlap/threshold)
How can I merge cv2 rectangle bounding boxes into polygons? (Not by overlap/threshold)
我有多个矩形边界框,我知道它们属于同一 object(报纸文章的一部分),如第一张图片所示。对于整篇文章,我正在尝试找出一种将它们合并到一个多边形边界框的方法,如第二张图片所示。
我见过很多基于合并重叠边界框的解决方案,但我不关心它们是否重叠 - 我已经知道它们属于同一篇文章的一部分。在某些情况下标题很远(例如图片上方),因此基于填充的解决方案也不起作用。
我觉得应该有一个 cv2
函数可以执行此操作,但如果有的话,我会错过它。任何建议都会非常有帮助。
创建这两张图片的代码:
# Individual bounding boxes
image_0 = cv2.imread('63976500-anderson-herald-bulletin-Jun-18-1968-p-64.jpg')
# Black box, to reproduce: image_0 = np.zeros((5000, 6000, 3), dtype = "uint8")
bbox_list = [[195, 3455, 633, 4213], [658, 3427, 1094, 4222], [1120, 3435, 1553, 4473], [295, 3421, 531, 3451], [201, 3313, 1548, 3409]]
for bbox in bbox_list:
image_0 = cv2.rectangle(image_0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 10)
cv2.imwrite("original_bboxes.jpg", image_0)
# Grouped bounding box
image_1 = cv2.imread('63976500-anderson-herald-bulletin-Jun-18-1968-p-64.jpg')
# Black box, to reproduce: image_1 = np.zeros((5000, 6000, 3), dtype = "uint8")
coordinates = np.array([[195,3313],[195,4222],[1120,4222],[1120,4473],[1553,4473],[1553,3313]], np.int32)
image_1 = cv2.polylines(image_1, [coordinates], True, (0,255,0), 10)
cv2.imwrite("grouped_bboxes.jpg", image_1)
你可以画出轮廓点的凸包(这是手工画的):
(错误的凸包图像)
然后只保留外轮廓并尝试多边形近似。我必须承认,我想不出更聪明的方法来只获取垂直线和水平线。
正如 Christoph Rackwitz 所说,我错了。凸壳,不起作用。也许 α 形状可以解决问题,但我不确定。另一种方法是提取定义边界框的所有直线方程,然后为每个点计算将其连接到最近直线的线段。如果该线是同一边界框的一部分,或者如果该点落在边界框之外,则删除线段。
比我想象的要难,因为我Python OpenCV 的熟练程度还不够。不过你几乎可以得到你想要的问题:
from cv2 import cv2
import numpy as np
image_0 = cv2.imread('63976500-anderson-herald-bulletin-Jun-18-1968-p-64.jpg')
bwimage = np.zeros((image_0.shape[0], image_0.shape[1]), dtype=np.uint8)
bbox_list = [[195, 3455, 633, 4213], [658, 3427, 1094, 4222], [1120, 3435, 1553, 4473], [295, 3421, 531, 3451],
[201, 3313, 1548, 3409]]
for bbox in bbox_list:
bwimage = cv2.rectangle(bwimage, (bbox[0], bbox[1]), (bbox[2], bbox[3]), 255, 1)
#cv2.imwrite("original_bboxes.png", image_0)
# create list of corners with bbox index
corners = []
for i, bbox in enumerate(bbox_list):
corners.append((bbox[0], bbox[1], i))
corners.append((bbox[0], bbox[3], i))
corners.append((bbox[2], bbox[1], i))
corners.append((bbox[2], bbox[3], i))
# for each corner find nearest border
for c in corners:
min_dist = float('inf')
min_dist_i = None
min_dist_type = None
for i, bb in enumerate(bbox_list):
for side in range(4):
thisdim = side % 2
otherdim = 1 - thisdim
dist = abs(c[thisdim] - bb[side])
if dist == 0 and c[2] == i:
pass
elif min_dist > dist and bb[otherdim] < c[otherdim] < bb[otherdim + 2]:
min_dist = dist
min_dist_i = i
min_dist_type = side
if min_dist_i is not None:
bb = bbox_list[min_dist_i]
print(f"Corner ({c[0]}, {c[1]}) nearest BB: {min_dist_i} [({bb[0]}, {bb[1]})->({bb[2]}, {bb[3]})]")
if min_dist_type % 2 == 0:
dest = (bb[min_dist_type], c[1])
else:
dest = (c[0], bb[min_dist_type])
bwimage = cv2.line(bwimage, (c[0], c[1]), dest, 255, 1)
contours, _ = cv2.findContours(image=bwimage, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
image_0 = cv2.drawContours(image_0, contours, -1, (0, 255, 0), 1)
cv2.imwrite("result.png", image_0)
结果如下:
我有多个矩形边界框,我知道它们属于同一 object(报纸文章的一部分),如第一张图片所示。对于整篇文章,我正在尝试找出一种将它们合并到一个多边形边界框的方法,如第二张图片所示。
我见过很多基于合并重叠边界框的解决方案,但我不关心它们是否重叠 - 我已经知道它们属于同一篇文章的一部分。在某些情况下标题很远(例如图片上方),因此基于填充的解决方案也不起作用。
我觉得应该有一个 cv2
函数可以执行此操作,但如果有的话,我会错过它。任何建议都会非常有帮助。
创建这两张图片的代码:
# Individual bounding boxes
image_0 = cv2.imread('63976500-anderson-herald-bulletin-Jun-18-1968-p-64.jpg')
# Black box, to reproduce: image_0 = np.zeros((5000, 6000, 3), dtype = "uint8")
bbox_list = [[195, 3455, 633, 4213], [658, 3427, 1094, 4222], [1120, 3435, 1553, 4473], [295, 3421, 531, 3451], [201, 3313, 1548, 3409]]
for bbox in bbox_list:
image_0 = cv2.rectangle(image_0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 10)
cv2.imwrite("original_bboxes.jpg", image_0)
# Grouped bounding box
image_1 = cv2.imread('63976500-anderson-herald-bulletin-Jun-18-1968-p-64.jpg')
# Black box, to reproduce: image_1 = np.zeros((5000, 6000, 3), dtype = "uint8")
coordinates = np.array([[195,3313],[195,4222],[1120,4222],[1120,4473],[1553,4473],[1553,3313]], np.int32)
image_1 = cv2.polylines(image_1, [coordinates], True, (0,255,0), 10)
cv2.imwrite("grouped_bboxes.jpg", image_1)
你可以画出轮廓点的凸包(这是手工画的):
(错误的凸包图像)
然后只保留外轮廓并尝试多边形近似。我必须承认,我想不出更聪明的方法来只获取垂直线和水平线。
正如 Christoph Rackwitz 所说,我错了。凸壳,不起作用。也许 α 形状可以解决问题,但我不确定。另一种方法是提取定义边界框的所有直线方程,然后为每个点计算将其连接到最近直线的线段。如果该线是同一边界框的一部分,或者如果该点落在边界框之外,则删除线段。
比我想象的要难,因为我Python OpenCV 的熟练程度还不够。不过你几乎可以得到你想要的问题:
from cv2 import cv2
import numpy as np
image_0 = cv2.imread('63976500-anderson-herald-bulletin-Jun-18-1968-p-64.jpg')
bwimage = np.zeros((image_0.shape[0], image_0.shape[1]), dtype=np.uint8)
bbox_list = [[195, 3455, 633, 4213], [658, 3427, 1094, 4222], [1120, 3435, 1553, 4473], [295, 3421, 531, 3451],
[201, 3313, 1548, 3409]]
for bbox in bbox_list:
bwimage = cv2.rectangle(bwimage, (bbox[0], bbox[1]), (bbox[2], bbox[3]), 255, 1)
#cv2.imwrite("original_bboxes.png", image_0)
# create list of corners with bbox index
corners = []
for i, bbox in enumerate(bbox_list):
corners.append((bbox[0], bbox[1], i))
corners.append((bbox[0], bbox[3], i))
corners.append((bbox[2], bbox[1], i))
corners.append((bbox[2], bbox[3], i))
# for each corner find nearest border
for c in corners:
min_dist = float('inf')
min_dist_i = None
min_dist_type = None
for i, bb in enumerate(bbox_list):
for side in range(4):
thisdim = side % 2
otherdim = 1 - thisdim
dist = abs(c[thisdim] - bb[side])
if dist == 0 and c[2] == i:
pass
elif min_dist > dist and bb[otherdim] < c[otherdim] < bb[otherdim + 2]:
min_dist = dist
min_dist_i = i
min_dist_type = side
if min_dist_i is not None:
bb = bbox_list[min_dist_i]
print(f"Corner ({c[0]}, {c[1]}) nearest BB: {min_dist_i} [({bb[0]}, {bb[1]})->({bb[2]}, {bb[3]})]")
if min_dist_type % 2 == 0:
dest = (bb[min_dist_type], c[1])
else:
dest = (c[0], bb[min_dist_type])
bwimage = cv2.line(bwimage, (c[0], c[1]), dest, 255, 1)
contours, _ = cv2.findContours(image=bwimage, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
image_0 = cv2.drawContours(image_0, contours, -1, (0, 255, 0), 1)
cv2.imwrite("result.png", image_0)
结果如下: