在 Python 中使用 OpenCv 中已排序的轮廓对关联的层次结构进行排序

Sort associated Hierarchy with already sorted Contours in OpenCv in Python

我正在使用以下代码从图像中提取最里面的轮廓(input.png
我正在使用 Python3.6.3opencv-python==3.4.0.12)

input.png

import copy
import cv2

BLACK_THRESHOLD = 200
THIN_THRESHOLD = 10
ANNOTATION_COLOUR = (0, 0, 255)

img = cv2.imread('input.png')
orig = copy.copy(img)
gray = cv2.cvtColor(img, 6)
thresh = cv2.threshold(gray, thresh=BLACK_THRESHOLD, maxval=255, type=cv2.THRESH_BINARY_INV)[1]

# Find the contours
_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

hierarchy = hierarchy[0]  # get the actual inner list of hierarchy descriptions
idx = 0
# For each contour, find the bounding rectangle and extract it
for component in zip(contours, hierarchy):
    currentContour = component[0]
    currentHierarchy = component[1]
    x, y, w, h = cv2.boundingRect(currentContour)
    roi = img[y+2:y + h-2, x+2:x + w-2]
    # Skip thin contours (vertical and horizontal lines)
    if h < THIN_THRESHOLD or w < THIN_THRESHOLD:
        continue
    if h > 300 and w > 300:
        continue
    if h < 40 or w < 40:
        continue
    if currentHierarchy[3] > 0:
        # these are the innermost child components
        idx += 1
        cv2.imwrite(str(idx) + '.png', roi)

结果:

如您所见,提取的图像没有任何特定顺序。因此,为了解决这个问题,我 根据它们的 x 轴坐标 对轮廓 进行了排序]。下面是代码:

import copy
import cv2

BLACK_THRESHOLD = 200
THIN_THRESHOLD = 10
ANNOTATION_COLOUR = (0, 0, 255)

img = cv2.imread('input.png')
orig = copy.copy(img)
gray = cv2.cvtColor(img, 6)
thresh = cv2.threshold(gray, thresh=BLACK_THRESHOLD, maxval=255, type=cv2.THRESH_BINARY_INV)[1]

# Find the contours
_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

# Sort Contours on the basis of their x-axis coordinates in ascending order
def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0
    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True
    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1
    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
                                        key=lambda b: b[1][i], reverse=reverse))
    # return the list of sorted contours
    return cnts


sorted_contours = sort_contours(contours)

idx = 0
# For each contour, find the bounding rectangle and extract it
for component in sorted_contours:
    currentContour = component
    x, y, w, h = cv2.boundingRect(currentContour)
    roi = img[y + 2:y + h - 2, x + 2:x + w - 2]
    # Skip thin contours (vertical and horizontal lines)
    if h < THIN_THRESHOLD or w < THIN_THRESHOLD:
        continue
    if h > 300 and w > 300:
        continue
    if h < 40 or w < 40:
        continue
    idx += 1
    print(x, idx)
    cv2.imwrite(str(idx) + '.png', roi)

结果:

这已经完美地对等高线进行了排序。但是现在你可以看到我得到了所有的轮廓(这是每个数字两份副本的原因因为我没有使用层次结构 但是当我花了一些时间调试时,我意识到 只有轮廓被排序,而不是它们相关的层次结构 。因此,任何人都可以告诉我如何将层次结构与轮廓一起排序,以便我只能获得已排序轮廓的最内层轮廓。谢谢!

让我们从您的第一个脚本开始,因为它为您提供了不错的结果,只是排序不正确。

观察到基于层次结构的唯一决定(当您决定是否将给定轮廓视为数字时)是 currentHierarchy[3] > 0 为什么我们不从仅选择符合此标准的轮廓开始, 并仅对该子集执行进一步处理(不必再关心层次结构)。

# Find the contours
_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

hierarchy = hierarchy[0]  # get the actual inner list of hierarchy descriptions

# Grab only the innermost child components
inner_contours = [c[0] for c in zip(contours, hierarchy) if c[1][3] > 0]

现在我们只剩下我们感兴趣的轮廓,我们只需要对它们进行排序。我们可以重用您原始排序函数的简化版本:

# Sort Contours on the basis of their x-axis coordinates in ascending order
def sort_contours(contours):
    # construct the list of bounding boxes and sort them from top to bottom
    boundingBoxes = [cv2.boundingRect(c) for c in contours]
    (contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes)
       , key=lambda b: b[1][0], reverse=False))
    # return the list of sorted contours
    return contours

并得到排序轮廓:

sorted_contours = sort_contours(inner_contours)

最后,我们要过滤掉垃圾,输出正确标注好的轮廓:

MIN_SIZE = 40
MAX_SIZE = 300
THIN_THRESHOLD = max(10, MIN_SIZE)
PADDING = 2

# ...

idx = 0
# For each contour, find the bounding rectangle and extract it
for contour in sorted_contours:
    x, y, w, h = cv2.boundingRect(contour)
    roi = img[(y + PADDING):(y + h - PADDING), (x + PADDING):(x + w - PADDING)]
    # Skip thin contours (vertical and horizontal lines)
    if (h < THIN_THRESHOLD) or (w < THIN_THRESHOLD):
        continue
    if (h > MAX_SIZE) and (w > MAX_SIZE):
        continue
    idx += 1
    cv2.imwrite(str(idx) + '.png', roi)

完整脚本(使用Python 2.7.x和OpenCV 3.4.1)

import cv2

BLACK_THRESHOLD = 200
MIN_SIZE = 40
MAX_SIZE = 300
THIN_THRESHOLD = max(10, MIN_SIZE)
FILE_NAME = "numbers.png"
PADDING = 2

# ============================================================================

# Sort Contours on the basis of their x-axis coordinates in ascending order
def sort_contours(contours):
    # construct the list of bounding boxes and sort them from top to bottom
    boundingBoxes = [cv2.boundingRect(c) for c in contours]
    (contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes)
       , key=lambda b: b[1][0], reverse=False))
    # return the list of sorted contours
    return contours

# ============================================================================

img = cv2.imread(FILE_NAME)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Don't use magic numbers
thresh = cv2.threshold(gray, thresh=BLACK_THRESHOLD, maxval=255, type=cv2.THRESH_BINARY_INV)[1]

# Find the contours
_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

hierarchy = hierarchy[0]  # get the actual inner list of hierarchy descriptions

# Grab only the innermost child components
inner_contours = [c[0] for c in zip(contours, hierarchy) if c[1][3] > 0]

sorted_contours = sort_contours(inner_contours)

idx = 0
# For each contour, find the bounding rectangle and extract it
for contour in sorted_contours:
    x, y, w, h = cv2.boundingRect(contour)
    roi = img[(y + PADDING):(y + h - PADDING), (x + PADDING):(x + w - PADDING)]
    # Skip thin contours (vertical and horizontal lines)
    if (h < THIN_THRESHOLD) or (w < THIN_THRESHOLD):
        continue
    if (h > MAX_SIZE) and (w > MAX_SIZE):
        continue
    idx += 1
    cv2.imwrite(str(idx) + '.png', roi)

及其生成的图像: