使用 Python OpenCV 在两行之间查找文本
Finding text between two lines using Python OpenCV
我想使用 Python (cv2) 识别并突出显示/裁剪两行之间的文本。
一行是顶部的波浪线,第二行是页面中的某处。此行可以出现在页面的任何高度,范围从刚好在第 1 行之后到刚好在最后一行之前。
一个例子,
我认为我需要使用 HoughLinesP()
以某种方式使用适当的参数。
我尝试了一些涉及 erode
+ dilate
+ HoughLinesP
.
组合的示例
例如
img = cv2.imread(image)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel_size = 5
blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0)
# erode / dilate
erode_kernel_param = (5, 200) # (5, 50)
dilate_kernel_param = (5, 5) # (5, 75)
img_erode = cv2.erode(blur_gray, np.ones(erode_kernel_param))
img_dilate = cv2.dilate(img_erode, np.ones(dilate_kernel_param))
# %% Second, process edge detection use Canny.
low_threshold = 50
high_threshold = 150
edges = cv2.Canny(img_dilate, low_threshold, high_threshold)
# %% Then, use HoughLinesP to get the lines.
# Adjust the parameters for better performance.
rho = 1 # distance resolution in pixels of the Hough grid
theta = np.pi / 180 # angular resolution in radians of the Hough grid
threshold = 15 # min number of votes (intersections in Hough grid cell)
min_line_length = 600 # min number of pixels making up a line
max_line_gap = 20 # max gap in pixels between connectable line segments
line_image = np.copy(img) * 0 # creating a blank to draw lines on
# %% Run Hough on edge detected image
# Output "lines" is an array containing endpoints of detected line segments
lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
min_line_length, max_line_gap)
if lines is not None:
for line in lines:
for x1, y1, x2, y2 in line:
cv2.line(line_image, (x1, y1), (x2, y2), (255, 0, 0), 5)
# %% Draw the lines on the image
lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0)
然而,在许多情况下,这些行不会被识别 属性。
错误的一些例子是,
- 识别出的行太多(文本中也有)
- 行未完全识别
- 行根本没有被识别
我走在正确的轨道上吗?为此,我是否只需要输入正确的参数组合?还是有更简单的方法/技巧可以让我可靠地裁剪这两行之间的文本?
如果相关,我需要为 ~450 页做这件事。
这是本书的 link,以防有人想查看更多页面示例。
https://archive.org/details/in.ernet.dli.2015.553713/page/n13/mode/2up
谢谢。
解决方案
我对 Ari 的回答做了一些小修改(谢谢),为了我自己的缘故,让代码更容易理解,这是我的代码。
核心思想是,
- 查找轮廓及其边界矩形。
- 两条“最宽”的等高线代表两条线。
- 然后,取顶部矩形的下边和底部矩形的上边来界定我们感兴趣的区域(文本)。
for image in images:
base_img = cv2.imread(image)
height, width, channels = base_img.shape
img = cv2.cvtColor(base_img, cv2.COLOR_BGR2GRAY)
ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
img = cv2.bitwise_not(img)
contours, hierarchy = cv2.findContours(
img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
)
# Get rectangle bounding contour
rects = [cv2.boundingRect(contour) for contour in contours]
# Rectangle is (x, y, w, h)
# Top-Left point of the image is (0, 0), rightwards X, downwards Y
# Sort the contours bigger width first
rects.sort(key=lambda r: r[2], reverse=True)
# Get the 2 "widest" rectangles
line_rects = rects[:2]
line_rects.sort(key=lambda r: r[1])
# If at least two rectangles (contours) were found
if len(line_rects) >= 2:
top_x, top_y, top_w, top_h = line_rects[0]
bot_x, bot_y, bot_w, bot_h = line_rects[1]
# Cropping the img
# Crop between bottom y of the upper rectangle (i.e. top_y + top_h)
# and the top y of lower rectangle (i.e. bot_y)
crop_img = base_img[top_y+top_h:bot_y]
# Highlight the area by drawing the rectangle
# For full width, 0 and width can be used, while
# For exact width (erroneous) top_x and bot_x + bot_w can be used
rect_img = cv2.rectangle(
base_img,
pt1=(0, top_y + top_h),
pt2=(width, bot_y),
color=(0, 255, 0),
thickness=2
)
cv2.imwrite(image.replace('.jpg', '.rect.jpg'), rect_img)
cv2.imwrite(image.replace('.jpg', '.crop.jpg'), crop_img)
else:
print(f"Insufficient contours in {image}")
你可以找到Contours,然后取宽度最大的两个。
base_img = cv2.imread('a.png')
img = cv2.cvtColor(base_img, cv2.COLOR_BGR2GRAY)
ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
img = cv2.bitwise_not(img)
cnts, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# sort the cnts bigger width first
cnts.sort(key=lambda c: cv2.boundingRect(c)[2], reverse=True)
# get the 2 big lines
lines = [cv2.boundingRect(cnts[0]), cv2.boundingRect(cnts[1])]
# higher line first
lines.sort(key=lambda c: c[1])
# croping the img
crop_img = base_img[lines[0][1]:lines[1][1]]
我想使用 Python (cv2) 识别并突出显示/裁剪两行之间的文本。
一行是顶部的波浪线,第二行是页面中的某处。此行可以出现在页面的任何高度,范围从刚好在第 1 行之后到刚好在最后一行之前。
一个例子,
我认为我需要使用 HoughLinesP()
以某种方式使用适当的参数。
我尝试了一些涉及 erode
+ dilate
+ HoughLinesP
.
例如
img = cv2.imread(image)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel_size = 5
blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0)
# erode / dilate
erode_kernel_param = (5, 200) # (5, 50)
dilate_kernel_param = (5, 5) # (5, 75)
img_erode = cv2.erode(blur_gray, np.ones(erode_kernel_param))
img_dilate = cv2.dilate(img_erode, np.ones(dilate_kernel_param))
# %% Second, process edge detection use Canny.
low_threshold = 50
high_threshold = 150
edges = cv2.Canny(img_dilate, low_threshold, high_threshold)
# %% Then, use HoughLinesP to get the lines.
# Adjust the parameters for better performance.
rho = 1 # distance resolution in pixels of the Hough grid
theta = np.pi / 180 # angular resolution in radians of the Hough grid
threshold = 15 # min number of votes (intersections in Hough grid cell)
min_line_length = 600 # min number of pixels making up a line
max_line_gap = 20 # max gap in pixels between connectable line segments
line_image = np.copy(img) * 0 # creating a blank to draw lines on
# %% Run Hough on edge detected image
# Output "lines" is an array containing endpoints of detected line segments
lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
min_line_length, max_line_gap)
if lines is not None:
for line in lines:
for x1, y1, x2, y2 in line:
cv2.line(line_image, (x1, y1), (x2, y2), (255, 0, 0), 5)
# %% Draw the lines on the image
lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0)
然而,在许多情况下,这些行不会被识别 属性。 错误的一些例子是,
- 识别出的行太多(文本中也有)
- 行未完全识别
- 行根本没有被识别
我走在正确的轨道上吗?为此,我是否只需要输入正确的参数组合?还是有更简单的方法/技巧可以让我可靠地裁剪这两行之间的文本?
如果相关,我需要为 ~450 页做这件事。 这是本书的 link,以防有人想查看更多页面示例。 https://archive.org/details/in.ernet.dli.2015.553713/page/n13/mode/2up
谢谢。
解决方案
我对 Ari 的回答做了一些小修改(谢谢),为了我自己的缘故,让代码更容易理解,这是我的代码。
核心思想是,
- 查找轮廓及其边界矩形。
- 两条“最宽”的等高线代表两条线。
- 然后,取顶部矩形的下边和底部矩形的上边来界定我们感兴趣的区域(文本)。
for image in images:
base_img = cv2.imread(image)
height, width, channels = base_img.shape
img = cv2.cvtColor(base_img, cv2.COLOR_BGR2GRAY)
ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
img = cv2.bitwise_not(img)
contours, hierarchy = cv2.findContours(
img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
)
# Get rectangle bounding contour
rects = [cv2.boundingRect(contour) for contour in contours]
# Rectangle is (x, y, w, h)
# Top-Left point of the image is (0, 0), rightwards X, downwards Y
# Sort the contours bigger width first
rects.sort(key=lambda r: r[2], reverse=True)
# Get the 2 "widest" rectangles
line_rects = rects[:2]
line_rects.sort(key=lambda r: r[1])
# If at least two rectangles (contours) were found
if len(line_rects) >= 2:
top_x, top_y, top_w, top_h = line_rects[0]
bot_x, bot_y, bot_w, bot_h = line_rects[1]
# Cropping the img
# Crop between bottom y of the upper rectangle (i.e. top_y + top_h)
# and the top y of lower rectangle (i.e. bot_y)
crop_img = base_img[top_y+top_h:bot_y]
# Highlight the area by drawing the rectangle
# For full width, 0 and width can be used, while
# For exact width (erroneous) top_x and bot_x + bot_w can be used
rect_img = cv2.rectangle(
base_img,
pt1=(0, top_y + top_h),
pt2=(width, bot_y),
color=(0, 255, 0),
thickness=2
)
cv2.imwrite(image.replace('.jpg', '.rect.jpg'), rect_img)
cv2.imwrite(image.replace('.jpg', '.crop.jpg'), crop_img)
else:
print(f"Insufficient contours in {image}")
你可以找到Contours,然后取宽度最大的两个。
base_img = cv2.imread('a.png')
img = cv2.cvtColor(base_img, cv2.COLOR_BGR2GRAY)
ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
img = cv2.bitwise_not(img)
cnts, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# sort the cnts bigger width first
cnts.sort(key=lambda c: cv2.boundingRect(c)[2], reverse=True)
# get the 2 big lines
lines = [cv2.boundingRect(cnts[0]), cv2.boundingRect(cnts[1])]
# higher line first
lines.sort(key=lambda c: c[1])
# croping the img
crop_img = base_img[lines[0][1]:lines[1][1]]