使用 OpenCV 为 pytesseract OCR 预处理图像

Preprocess images using OpenCV for pytesseract OCR

我想使用 OCR (pytesseract) 来识别位于如下图像中的文本:

我有成千上万支这样的箭。到目前为止,程序如下:我首先调整图像的大小(用于另一个过程)。然后我裁剪图像以去除箭头的大部分。接下来,我绘制一个白色矩形作为框架以消除更多噪音,但文本和图像边界之间仍然有距离,以便更好地识别文本。我再次调整图像大小以确保大写字母的高度为 ~30 像素 (https://groups.google.com/forum/#!msg/tesseract-ocr/Wdh_JJwnw94/24JHDYQbBQAJ)。最后我用 150 的阈值对图像进行二值化。

完整代码:

import cv2

image_file = '001.jpg'

# load the input image and grab the image dimensions
image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
(h_1, w_1) = image.shape[:2]

# resize the image and grab the new image dimensions
image = cv2.resize(image, (int(w_1*320/h_1), 320))
(h_1, w_1) = image.shape

# crop image
image_2 = image[70:h_1-70, 20:w_1-20]

# get image_2 height, width
(h_2, w_2) = image_2.shape

# draw white rectangle as a frame around the number -> remove noise
cv2.rectangle(image_2, (0, 0), (w_2, h_2), (255, 255, 255), 40)

# resize image, that capital letters are ~ 30 px in height
image_2 = cv2.resize(image_2, (int(w_2*50/h_2), 50))

# image binarization
ret, image_2 = cv2.threshold(image_2, 150, 255, cv2.THRESH_BINARY)

# save image to file
cv2.imwrite('processed_' + image_file, image_2)

# tesseract part can be commented out
import pytesseract
config_7 = ("-c tessedit_char_whitelist=0123456789AB --oem 1 --psm 7")
text = pytesseract.image_to_string(image_2, config=config_7)
print("OCR TEXT: " + "{}\n".format(text))

问题是位于箭头中的文本从未居中。有时我用上面描述的方法删除部分文本(例如在图像50A中)。

图像处理中有没有更优雅的去除箭头的方法?例如使用轮廓检测​​和删除?我对OpenCV部分比tesseract部分更感兴趣来识别文本。

感谢任何帮助。

如果你看图片你会发现图片中有一个白色箭头也是最大的轮廓(特别是如果你在图片上画了一个黑色边框)。如果你制作一个空白蒙版并绘制箭头(图像上最大的轮廓)然后稍微腐蚀它,你可以对实际图像和腐蚀蒙版执行每个元素的按位连接。不清楚的看最下面的代码和注释,其实很简单

# imports
import cv2
import numpy as np

img = cv2.imread("number.png")  # read image
# you can resize the image here if you like - it should still work for both sizes
h, w = img.shape[:2]  # get the actual images height and width
img = cv2.resize(img, (int(w*320/h), 320))
h, w = img.shape[:2]

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # transform to grayscale
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]  # perform OTSU threhold
cv2.rectangle(thresh, (0, 0), (w, h), (0, 0, 0), 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0]  # search for contours
max_cnt = max(contours, key=cv2.contourArea)  # select biggest one
mask = np.zeros((h, w), dtype=np.uint8)  # create a black mask
cv2.drawContours(mask, [max_cnt], -1, (255, 255, 255), -1)  # draw biggest contour on the mask
kernel = np.ones((15, 15), dtype=np.uint8)  # make a kernel with appropriate values - in both cases (resized and original) 15 is ok
erosion = cv2.erode(mask, kernel, iterations=1)  # erode the mask with given kernel

reverse = cv2.bitwise_not(img.copy())  # reversed image of the actual image 0 becomes 255 and 255 becomes 0
img = cv2.bitwise_and(reverse, reverse, mask=erosion)  # per-element bit-wise conjunction of the actual image and eroded mask (erosion)
img = cv2.bitwise_not(img)  # revers the image again

# save image to file and display
cv2.imwrite("res.png", img)
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

结果:

您可以尝试简单的 Python 脚本:

import cv2
import numpy as np
img = cv2.imread('mmubS.png', cv2.IMREAD_GRAYSCALE)
thresh = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY_INV )[1]
im_flood_fill = thresh.copy()
h, w = thresh.shape[:2]
im_flood_fill=cv2.rectangle(im_flood_fill, (0,0), (w-1,h-1), 255, 2)
mask = np.zeros((h + 2, w + 2), np.uint8)
cv2.floodFill(im_flood_fill, mask, (0, 0), 0)
im_flood_fill = cv2.bitwise_not(im_flood_fill)
cv2.imshow('clear text', im_flood_fill)
cv2.imwrite('text.png', im_flood_fill)

结果: