Python & OpenCV:如何向无网格添加线条 table
Python & OpenCV: How to add lines to gridless table
我有以下 table:
我想编写一个脚本,根据 table 文本上的自然断行来创建行。结果将如下所示:
是否有 OpenCV 实现可以绘制这些线?我查看了问题 here and here 的答案,但均无效。解决此问题的最佳方法是什么?
这是一种获取 Python/OpenCV 中的水平线的方法,方法是计算图像每一行中白色像素的数量,找到它们的中心 y 值。可以通过类似的过程添加垂直线。
输入:
import cv2
import numpy as np
# read image
img = cv2.imread("table.png")
hh, ww = img.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# threshold gray image
thresh = cv2.threshold(gray, 254, 255, cv2.THRESH_BINARY)[1]
# count number of non-zero pixels in each row
count = np.count_nonzero(thresh, axis=1)
# threshold count at ww (width of image)
count_thresh = count.copy()
count_thresh[count==ww] = 255
count_thresh[count<ww] = 0
count_thresh = count_thresh.astype(np.uint8)
# get contours
contours = cv2.findContours(count_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# loop over contours and get bounding boxes and ycenter and draw horizontal line at ycenter
result = img.copy()
for cntr in contours:
x,y,w,h = cv2.boundingRect(cntr)
ycenter = y+h//2
cv2.line(result, (0,ycenter), (ww-1,ycenter), (0, 0, 0), 2)
# write results
cv2.imwrite("table_thresh.png", thresh)
cv2.imwrite("table_lines.png", result)
# display results
cv2.imshow("THRESHOLD", thresh)
cv2.imshow("RESULT", result)
cv2.waitKey(0)
阈值图像:
行结果:
加法
这是一种稍微简单一些的替代方法。它将图像平均化为一列,而不是计算白色像素。
import cv2
import numpy as np
# read image
img = cv2.imread("table.png")
hh, ww = img.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# average gray image to one column
column = cv2.resize(gray, (1,hh), interpolation = cv2.INTER_AREA)
# threshold on white
thresh = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# loop over contours and get bounding boxes and ycenter and draw horizontal line at ycenter
result = img.copy()
for cntr in contours:
x,y,w,h = cv2.boundingRect(cntr)
ycenter = y+h//2
cv2.line(result, (0,ycenter), (ww-1,ycenter), (0, 0, 0), 2)
# write results
cv2.imwrite("table_lines2.png", result)
# display results
cv2.imshow("RESULT", result)
cv2.waitKey(0)
结果:
我有以下 table:
我想编写一个脚本,根据 table 文本上的自然断行来创建行。结果将如下所示:
是否有 OpenCV 实现可以绘制这些线?我查看了问题 here and here 的答案,但均无效。解决此问题的最佳方法是什么?
这是一种获取 Python/OpenCV 中的水平线的方法,方法是计算图像每一行中白色像素的数量,找到它们的中心 y 值。可以通过类似的过程添加垂直线。
输入:
import cv2
import numpy as np
# read image
img = cv2.imread("table.png")
hh, ww = img.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# threshold gray image
thresh = cv2.threshold(gray, 254, 255, cv2.THRESH_BINARY)[1]
# count number of non-zero pixels in each row
count = np.count_nonzero(thresh, axis=1)
# threshold count at ww (width of image)
count_thresh = count.copy()
count_thresh[count==ww] = 255
count_thresh[count<ww] = 0
count_thresh = count_thresh.astype(np.uint8)
# get contours
contours = cv2.findContours(count_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# loop over contours and get bounding boxes and ycenter and draw horizontal line at ycenter
result = img.copy()
for cntr in contours:
x,y,w,h = cv2.boundingRect(cntr)
ycenter = y+h//2
cv2.line(result, (0,ycenter), (ww-1,ycenter), (0, 0, 0), 2)
# write results
cv2.imwrite("table_thresh.png", thresh)
cv2.imwrite("table_lines.png", result)
# display results
cv2.imshow("THRESHOLD", thresh)
cv2.imshow("RESULT", result)
cv2.waitKey(0)
阈值图像:
行结果:
加法
这是一种稍微简单一些的替代方法。它将图像平均化为一列,而不是计算白色像素。
import cv2
import numpy as np
# read image
img = cv2.imread("table.png")
hh, ww = img.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# average gray image to one column
column = cv2.resize(gray, (1,hh), interpolation = cv2.INTER_AREA)
# threshold on white
thresh = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# loop over contours and get bounding boxes and ycenter and draw horizontal line at ycenter
result = img.copy()
for cntr in contours:
x,y,w,h = cv2.boundingRect(cntr)
ycenter = y+h//2
cv2.line(result, (0,ycenter), (ww-1,ycenter), (0, 0, 0), 2)
# write results
cv2.imwrite("table_lines2.png", result)
# display results
cv2.imshow("RESULT", result)
cv2.waitKey(0)
结果: