如何使用 OpenCV 检测垂直文本以进行提取
How do I detect vertical text with OpenCV for extraction
我是 OpenCV 的新手,想看看我是否能找到一种方法来检测附加图像的垂直文本。
在第 3 行的这种情况下,我想获得原始成本周围的边界框和以下金额($200,000.00)。
同样,我想获得 Amount Existing Liens 周围的边界框和下面的相关金额。然后我会使用这些数据发送到 OCR 引擎来读取文本。传统的 OCR 引擎逐行提取并丢失上下文。
这是我到目前为止尝试过的 -
import cv2
import numpy as np
img = cv2.imread('Test3.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray,100,100,apertureSize = 3)
cv2.imshow('edges',edges)
cv2.waitKey(0)
minLineLength = 20
maxLineGap = 10
lines = cv2.HoughLinesP(edges,1,np.pi/180,15,minLineLength=minLineLength,maxLineGap=maxLineGap)
for x in range(0, len(lines)):
for x1,y1,x2,y2 in lines[x]:
cv2.line(img,(x1,y1),(x2,y2),(0,255,0),2)
cv2.imshow('hough',img)
cv2.waitKey(0)
我假设边界框是固定的(能够容纳“原始金额和下面的金额”的矩形)。您可以使用文本检测来使用 OCR 和裁剪来检测“原始金额”和“现有留置权金额”根据检测到的位置输出图像,以进一步对金额进行OCR。您可以参考此link进行文本检测
尝试使用图像中的线条将图像划分为不同的单元格。
例如,首先通过检测水平线将输入分成行。这可以通过使用 cv.HoughLinesP
并检查每一行是否开始和结束点的 y-coordinate 之间的差异小于某个阈值 abs(y2 - y1) < 10
来完成。如果您有一条水平线,则它是新行的分隔符。您可以使用此行的 y-coordinate 水平拆分输入。
接下来,对于您感兴趣的行,使用相同的技术将区域划分为列,但现在确保起点和终点的 x-coordinates 之间的差异小于某个值阈值,因为您现在正在寻找垂直线。
您现在可以使用水平线的 y-coordinate 和垂直线的 x-coordinates 将图像裁剪到不同的单元格。将这些裁剪区域一个一个地传递给 OCR 引擎,您将为每个单元格提供相应的文本。
这是我基于Kanan Vyas and Adrian Rosenbrock
的解决方案
它可能不像您希望的那样“规范”。
但它似乎(或多或少...)适用于您提供的图像。
请注意:代码会在 运行ning 所在的目录中查找名为“Cropped”的文件夹,裁剪后的图像将放在其中存储。所以,不要 运行 将它放在一个已经包含名为“Cropped”的文件夹的目录中,因为它会在每个 运行 删除该文件夹中的所有内容。明白了吗?如果您不确定 运行 它在单独的文件夹中。
代码:
# Import required packages
import cv2
import numpy as np
import pathlib
###################################################################################################################################
# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
###################################################################################################################################
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b:b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
###################################################################################################################################
# https://medium.com/coinmonks/a-box-detection-algorithm-for-any-image-containing-boxes-756c15d7ed26 (with a few modifications)
###################################################################################################################################
def box_extraction(img_for_box_extraction_path, cropped_dir_path):
img = cv2.imread(img_for_box_extraction_path, 0) # Read the image
(thresh, img_bin) = cv2.threshold(img, 128, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU) # Thresholding the image
img_bin = 255-img_bin # Invert the imagecv2.imwrite("Image_bin.jpg",img_bin)
# Defining a kernel length
kernel_length = np.array(img).shape[1]//200
# A verticle kernel of (1 X kernel_length), which will detect all the verticle lines from the image.
verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
# A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
# A kernel of (3 X 3) ones.
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))# Morphological operation to detect verticle lines from an image
img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
#cv2.imwrite("verticle_lines.jpg",verticle_lines_img)# Morphological operation to detect horizontal lines from an image
img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
#cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)# Weighting parameters, this will decide the quantity of an image to be added to make a new image.
alpha = 0.5
beta = 1.0 - alpha
# This function helps to add two image with specific weight parameter to get a third image as summation of two image.
img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
(thresh, img_final_bin) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)# For Debugging
# Enable this line to see verticle and horizontal lines in the image which is used to find boxes
#cv2.imwrite("img_final_bin.jpg",img_final_bin)
# Find contours for image, which will detect all the boxes
contours, hierarchy = cv2.findContours(
img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Sort all the contours by top to bottom.
(contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")
idx = 0
for c in contours:
# Returns the location and width,height for every contour
x, y, w, h = cv2.boundingRect(c)# If the box height is greater then 20, widht is >80, then only save it as a box in "cropped/" folder.
if (w > 50 and h > 20):# and w > 3*h:
idx += 1
new_img = img[y:y+h, x:x+w]
cv2.imwrite(cropped_dir_path+str(x)+'_'+str(y) + '.png', new_img)
###########################################################################################################################################################
def prepare_cropped_folder():
p=pathlib.Path('./Cropped')
if p.exists(): # Cropped folder non empty. Let's clean up
files = [x for x in p.glob('*.*') if x.is_file()]
for f in files:
f.unlink()
else:
p.mkdir()
###########################################################################################################################################################
# MAIN
###########################################################################################################################################################
prepare_cropped_folder()
# Read image from which text needs to be extracted
img = cv2.imread("dkesg.png")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Performing OTSU threshold
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
thresh1=255-thresh1
bin_y=np.zeros(thresh1.shape[0])
for x in range(0,len(bin_y)):
bin_y[x]=sum(thresh1[x,:])
bin_y=bin_y/max(bin_y)
ry=np.where(bin_y>0.995)[0]
for i in range(0,len(ry)):
cv2.line(img, (0, ry[i]), (thresh1.shape[1], ry[i]), (0, 0, 0), 1)
# We need to draw abox around the picture with a white border in order for box_detection to work
cv2.line(img,(0,0),(0,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(img.shape[1]-1,0),(img.shape[1]-1,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(0,0),(img.shape[1]-1,0),(255,255,255),2)
cv2.line(img,(0,img.shape[0]-1),(img.shape[1]-1,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(0,0),(0,img.shape[0]-1),(0,0,0),1)
cv2.line(img,(img.shape[1]-3,0),(img.shape[1]-3,img.shape[0]-1),(0,0,0),1)
cv2.line(img,(0,0),(img.shape[1]-1,0),(0,0,0),1)
cv2.line(img,(0,img.shape[0]-2),(img.shape[1]-1,img.shape[0]-2),(0,0,0),1)
cv2.imwrite('out.png',img)
box_extraction("out.png", "./Cropped/")
现在...它将裁剪区域放入裁剪文件夹中。它们被命名为 x_y.png,其中 (x,y) 是原始图像上的位置。
这里有两个输出示例
和
现在,在终端中。我在这两张图片上使用了 pytesseract。
结果如下:
1)
原价
200,000.00 美元
2)
现有留置权数量
494,215.00 美元
正如你所看到的,pytesseract 在第二种情况下得到了错误的数量......所以,要小心。
此致,
斯蒂芬
我是 OpenCV 的新手,想看看我是否能找到一种方法来检测附加图像的垂直文本。
在第 3 行的这种情况下,我想获得原始成本周围的边界框和以下金额($200,000.00)。
同样,我想获得 Amount Existing Liens 周围的边界框和下面的相关金额。然后我会使用这些数据发送到 OCR 引擎来读取文本。传统的 OCR 引擎逐行提取并丢失上下文。
这是我到目前为止尝试过的 -
import cv2
import numpy as np
img = cv2.imread('Test3.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray,100,100,apertureSize = 3)
cv2.imshow('edges',edges)
cv2.waitKey(0)
minLineLength = 20
maxLineGap = 10
lines = cv2.HoughLinesP(edges,1,np.pi/180,15,minLineLength=minLineLength,maxLineGap=maxLineGap)
for x in range(0, len(lines)):
for x1,y1,x2,y2 in lines[x]:
cv2.line(img,(x1,y1),(x2,y2),(0,255,0),2)
cv2.imshow('hough',img)
cv2.waitKey(0)
我假设边界框是固定的(能够容纳“原始金额和下面的金额”的矩形)。您可以使用文本检测来使用 OCR 和裁剪来检测“原始金额”和“现有留置权金额”根据检测到的位置输出图像,以进一步对金额进行OCR。您可以参考此link进行文本检测
尝试使用图像中的线条将图像划分为不同的单元格。
例如,首先通过检测水平线将输入分成行。这可以通过使用 cv.HoughLinesP
并检查每一行是否开始和结束点的 y-coordinate 之间的差异小于某个阈值 abs(y2 - y1) < 10
来完成。如果您有一条水平线,则它是新行的分隔符。您可以使用此行的 y-coordinate 水平拆分输入。
接下来,对于您感兴趣的行,使用相同的技术将区域划分为列,但现在确保起点和终点的 x-coordinates 之间的差异小于某个值阈值,因为您现在正在寻找垂直线。
您现在可以使用水平线的 y-coordinate 和垂直线的 x-coordinates 将图像裁剪到不同的单元格。将这些裁剪区域一个一个地传递给 OCR 引擎,您将为每个单元格提供相应的文本。
这是我基于Kanan Vyas and Adrian Rosenbrock
的解决方案它可能不像您希望的那样“规范”。 但它似乎(或多或少...)适用于您提供的图像。
请注意:代码会在 运行ning 所在的目录中查找名为“Cropped”的文件夹,裁剪后的图像将放在其中存储。所以,不要 运行 将它放在一个已经包含名为“Cropped”的文件夹的目录中,因为它会在每个 运行 删除该文件夹中的所有内容。明白了吗?如果您不确定 运行 它在单独的文件夹中。
代码:
# Import required packages
import cv2
import numpy as np
import pathlib
###################################################################################################################################
# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
###################################################################################################################################
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b:b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
###################################################################################################################################
# https://medium.com/coinmonks/a-box-detection-algorithm-for-any-image-containing-boxes-756c15d7ed26 (with a few modifications)
###################################################################################################################################
def box_extraction(img_for_box_extraction_path, cropped_dir_path):
img = cv2.imread(img_for_box_extraction_path, 0) # Read the image
(thresh, img_bin) = cv2.threshold(img, 128, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU) # Thresholding the image
img_bin = 255-img_bin # Invert the imagecv2.imwrite("Image_bin.jpg",img_bin)
# Defining a kernel length
kernel_length = np.array(img).shape[1]//200
# A verticle kernel of (1 X kernel_length), which will detect all the verticle lines from the image.
verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
# A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
# A kernel of (3 X 3) ones.
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))# Morphological operation to detect verticle lines from an image
img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
#cv2.imwrite("verticle_lines.jpg",verticle_lines_img)# Morphological operation to detect horizontal lines from an image
img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
#cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)# Weighting parameters, this will decide the quantity of an image to be added to make a new image.
alpha = 0.5
beta = 1.0 - alpha
# This function helps to add two image with specific weight parameter to get a third image as summation of two image.
img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
(thresh, img_final_bin) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)# For Debugging
# Enable this line to see verticle and horizontal lines in the image which is used to find boxes
#cv2.imwrite("img_final_bin.jpg",img_final_bin)
# Find contours for image, which will detect all the boxes
contours, hierarchy = cv2.findContours(
img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Sort all the contours by top to bottom.
(contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")
idx = 0
for c in contours:
# Returns the location and width,height for every contour
x, y, w, h = cv2.boundingRect(c)# If the box height is greater then 20, widht is >80, then only save it as a box in "cropped/" folder.
if (w > 50 and h > 20):# and w > 3*h:
idx += 1
new_img = img[y:y+h, x:x+w]
cv2.imwrite(cropped_dir_path+str(x)+'_'+str(y) + '.png', new_img)
###########################################################################################################################################################
def prepare_cropped_folder():
p=pathlib.Path('./Cropped')
if p.exists(): # Cropped folder non empty. Let's clean up
files = [x for x in p.glob('*.*') if x.is_file()]
for f in files:
f.unlink()
else:
p.mkdir()
###########################################################################################################################################################
# MAIN
###########################################################################################################################################################
prepare_cropped_folder()
# Read image from which text needs to be extracted
img = cv2.imread("dkesg.png")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Performing OTSU threshold
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
thresh1=255-thresh1
bin_y=np.zeros(thresh1.shape[0])
for x in range(0,len(bin_y)):
bin_y[x]=sum(thresh1[x,:])
bin_y=bin_y/max(bin_y)
ry=np.where(bin_y>0.995)[0]
for i in range(0,len(ry)):
cv2.line(img, (0, ry[i]), (thresh1.shape[1], ry[i]), (0, 0, 0), 1)
# We need to draw abox around the picture with a white border in order for box_detection to work
cv2.line(img,(0,0),(0,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(img.shape[1]-1,0),(img.shape[1]-1,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(0,0),(img.shape[1]-1,0),(255,255,255),2)
cv2.line(img,(0,img.shape[0]-1),(img.shape[1]-1,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(0,0),(0,img.shape[0]-1),(0,0,0),1)
cv2.line(img,(img.shape[1]-3,0),(img.shape[1]-3,img.shape[0]-1),(0,0,0),1)
cv2.line(img,(0,0),(img.shape[1]-1,0),(0,0,0),1)
cv2.line(img,(0,img.shape[0]-2),(img.shape[1]-1,img.shape[0]-2),(0,0,0),1)
cv2.imwrite('out.png',img)
box_extraction("out.png", "./Cropped/")
现在...它将裁剪区域放入裁剪文件夹中。它们被命名为 x_y.png,其中 (x,y) 是原始图像上的位置。
这里有两个输出示例
和
现在,在终端中。我在这两张图片上使用了 pytesseract。
结果如下:
1)
原价
200,000.00 美元
2)
现有留置权数量
494,215.00 美元
正如你所看到的,pytesseract 在第二种情况下得到了错误的数量......所以,要小心。
此致, 斯蒂芬