使用 Python OpenCV 在两行之间查找文本

Question

我想使用 Python (cv2) 识别并突出显示/裁剪两行之间的文本。

一行是顶部的波浪线，第二行是页面中的某处。此行可以出现在页面的任何高度，范围从刚好在第 1 行之后到刚好在最后一行之前。

一个例子，

我认为我需要使用 HoughLinesP() 以某种方式使用适当的参数。我尝试了一些涉及 erode + dilate + HoughLinesP.

组合的示例

例如


    img = cv2.imread(image)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    kernel_size = 5
    blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0)

    # erode / dilate
    erode_kernel_param = (5, 200)   # (5, 50)
    dilate_kernel_param = (5, 5)  # (5, 75)

    img_erode = cv2.erode(blur_gray, np.ones(erode_kernel_param))
    img_dilate = cv2.dilate(img_erode, np.ones(dilate_kernel_param))

    # %% Second, process edge detection use Canny.

    low_threshold = 50
    high_threshold = 150
    edges = cv2.Canny(img_dilate, low_threshold, high_threshold)

    # %% Then, use HoughLinesP to get the lines.
    # Adjust the parameters for better performance.

    rho = 1  # distance resolution in pixels of the Hough grid
    theta = np.pi / 180  # angular resolution in radians of the Hough grid
    threshold = 15  # min number of votes (intersections in Hough grid cell)
    min_line_length = 600  # min number of pixels making up a line
    max_line_gap = 20  # max gap in pixels between connectable line segments
    line_image = np.copy(img) * 0  # creating a blank to draw lines on

    # %%  Run Hough on edge detected image
    # Output "lines" is an array containing endpoints of detected line segments

    lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
                            min_line_length, max_line_gap)

    if lines is not None:
        for line in lines:
            for x1, y1, x2, y2 in line:
                cv2.line(line_image, (x1, y1), (x2, y2), (255, 0, 0), 5)

    # %% Draw the lines on the  image

    lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0)

然而，在许多情况下，这些行不会被识别属性。错误的一些例子是，

识别出的行太多（文本中也有）
行未完全识别
行根本没有被识别

我走在正确的轨道上吗？为此，我是否只需要输入正确的参数组合？还是有更简单的方法/技巧可以让我可靠地裁剪这两行之间的文本？

如果相关，我需要为 ~450 页做这件事。这是本书的 link，以防有人想查看更多页面示例。 https://archive.org/details/in.ernet.dli.2015.553713/page/n13/mode/2up

谢谢。

解决方案

我对 Ari 的回答做了一些小修改（谢谢），为了我自己的缘故，让代码更容易理解，这是我的代码。

核心思想是，

查找轮廓及其边界矩形。
两条“最宽”的等高线代表两条线。
然后，取顶部矩形的下边和底部矩形的上边来界定我们感兴趣的区域（文本）。


for image in images:
    base_img = cv2.imread(image)
    height, width, channels = base_img.shape

    img = cv2.cvtColor(base_img, cv2.COLOR_BGR2GRAY)
    ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    img = cv2.bitwise_not(img)

    contours, hierarchy = cv2.findContours(
        img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
    )

    # Get rectangle bounding contour
    rects = [cv2.boundingRect(contour) for contour in contours]

    # Rectangle is (x, y, w, h)
    # Top-Left point of the image is (0, 0), rightwards X, downwards Y

    # Sort the contours bigger width first
    rects.sort(key=lambda r: r[2], reverse=True)

    # Get the 2 "widest" rectangles
    line_rects = rects[:2]
    line_rects.sort(key=lambda r: r[1])

    # If at least two rectangles (contours) were found
    if len(line_rects) >= 2:
        top_x, top_y, top_w, top_h = line_rects[0]
        bot_x, bot_y, bot_w, bot_h = line_rects[1]

        # Cropping the img
        # Crop between bottom y of the upper rectangle (i.e. top_y + top_h)
        # and the top y of lower rectangle (i.e. bot_y)
        crop_img = base_img[top_y+top_h:bot_y]

        # Highlight the area by drawing the rectangle
        # For full width, 0 and width can be used, while
        # For exact width (erroneous) top_x and bot_x + bot_w can be used
        rect_img = cv2.rectangle(
            base_img,
            pt1=(0, top_y + top_h),
            pt2=(width, bot_y),
            color=(0, 255, 0),
            thickness=2
        )
        cv2.imwrite(image.replace('.jpg', '.rect.jpg'), rect_img)
        cv2.imwrite(image.replace('.jpg', '.crop.jpg'), crop_img)
    else:
        print(f"Insufficient contours in {image}")

Answer 1

你可以找到Contours，然后取宽度最大的两个。

base_img = cv2.imread('a.png')

img = cv2.cvtColor(base_img, cv2.COLOR_BGR2GRAY)
ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
img = cv2.bitwise_not(img)

cnts, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# sort the cnts bigger width first
cnts.sort(key=lambda c: cv2.boundingRect(c)[2], reverse=True)

# get the 2 big lines
lines = [cv2.boundingRect(cnts[0]), cv2.boundingRect(cnts[1])]
# higher line first
lines.sort(key=lambda c: c[1])
# croping the img
crop_img = base_img[lines[0][1]:lines[1][1]]

使用 Python OpenCV 在两行之间查找文本

Finding text between two lines using Python OpenCV

python

opencv

python-3.x