Pytesseract 无法识别文本

Pytesseract Not Recognising Text

我正在尝试使用 Pytesseract 从下图中读取数字:

Low Resolution Image

不幸的是,程序没有返回任何解决方案,即使在使用灰度、阈值、噪声检测或 canny 边缘检测之后也是如此。当使用配置仅将数字和 $/ 列入白名单时,程序甚至会停止检测高分辨率图像。 (here)

代码如下:


class NumberAnalyser:

    # boilerplate code to pre-process image
    # get grayscale image
    def get_grayscale(self, image):
        return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # noise removal
    def remove_noise(self, image):
        return cv2.medianBlur(image, 5)

    # thresholding
    def thresholding(self, image):
        gray = self.get_grayscale(image)
        (T, threshInv) = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
        # visualize only the masked regions in the image
        masked = cv2.bitwise_not(gray, gray, mask=threshInv)
        ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
        ret, thresh2 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
        ret, thresh3 = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
        ret, thresh4 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
        ret, thresh5 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO_INV)
        return thresh4

    # dilation
    def dilate(self, image):
        kernel = np.ones((5, 5), np.uint8)
        return cv2.dilate(image, kernel, iterations=1)

    # erosion
    def erode(self, image):
        kernel = np.ones((5, 5), np.uint8)
        return cv2.erode(image, kernel, iterations=1)

    # opening - erosion followed by dilation
    def opening(self, image):
        kernel = np.ones((5, 5), np.uint8)
        return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

    # canny edge detection
    def canny(self, image):
        return cv2.Canny(image, 100, 200)

    # skew correction
    def deskew(self, image):
        coords = np.column_stack(np.where(image > 0))
        angle = cv2.minAreaRect(coords)[-1]
        if angle < -45:
            angle = -(90 + angle)
        else:
            angle = -angle
            (h, w) = image.shape[:2]
            center = (w // 2, h // 2)
            M = cv2.getRotationMatrix2D(center, angle, 1.0)
            rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
            return rotated

    # template matching
    def match_template(self, image, template):
        return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)

    def numbers(self, img_path):

        reader = cv2.imread(img_path)
        # reader = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_RGB2BGR)'

        gray = self.get_grayscale(reader)
        thresh = self.thresholding(reader)
        opening = self.opening(reader)
        canny = self.canny(reader)
        noiseless = self.remove_noise(reader)

        # cv2.imshow('canny', canny)
        # cv2.waitKey(0)
        # cv2.imshow('gray', gray)
        # cv2.waitKey(0)
        cv2.imshow('threshold', thresh)
        cv2.waitKey(0)
        # cv2.imshow('opening', opening)
        # cv2.waitKey(0)
        # cv2.imshow('noise removal', noiseless)
        # cv2.waitKey(0)
        # cv2.imshow('og', reader)
        # cv2.waitKey(0)

        print('yes')
        print(pt.image_to_string(thresh, config='--psm 11, -c tessedit_char_whitelist=$,0123456789'))

--psm 11 配置 addition/deletion 没有任何改变。

如有任何帮助,我们将不胜感激!

您连续应用多个简单阈值,但您还应该使用其他类型的阈值进行测试,例如 adaptive 和 inRange。

例如,如果您对给定示例使用 inRange thresholding

高分辨率图像的结果将是:

0.38 版本的输出:

20000
4.000
100

低分辨率图像的结果将是:

0.38 版本的输出:

44.900
16.000
34

不幸的是,只有中间的数字被正确识别。如果您设置范围值,生成的图像可能会给出更好的结果。

阅读更多内容:提高输出质量 Tesseract 文档

代码:

import cv2
import pytesseract
from numpy import array

img = cv2.imread("eO1XG.png")  # Load the images: high-res: l9Zbt.png, low-res: eO1XG.png
img = cv2.cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
msk = cv2.inRange(img, array([94, 0, 196]), array([179, 84, 255]))  # for low resolution
# msk = cv2.inRange(img, array([0, 0, 0]), array([179, 26, 255]))  # for high resolution
krn = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dlt = cv2.dilate(msk, krn, iterations=1)
thr = 255 - cv2.bitwise_and(dlt, msk)
txt = pytesseract.image_to_string(thr, config='--psm 6 digits')
print(txt)
cv2.imshow("", thr)
cv2.waitKey(0)