无法使用 Pytesseract 读取数字

Cannot Read Number using Pytesseract

所以我正在尝试从图像中获取数字。

我试过这个:

import numpy as np
import cv2
from mss import mss
from PIL import Image
import pytesseract
from PIL import Image, ImageFilter

pytesseract.pytesseract.tesseract_cmd =  r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'


bounding_box = {'top': 250, 'left': 630, 'width': 20, 'height': 12}

sct = mss()

while True:
    sct_img = sct.grab(bounding_box)

    sct_img = cv2.cvtColor(np.array(sct_img), cv2.COLOR_RGB2HSV)

    sct_img = cv2.inRange(np.array(sct_img), (36, 25, 25), (70, 255, 255))

    scale_percent = 600  # percent of original size
    width = int(sct_img.shape[1] * scale_percent / 100)
    height = int(sct_img.shape[0] * scale_percent / 100)
    dim = (width, height)

    # resize image
    sct_img = cv2.resize(np.array(sct_img), dim, interpolation=cv2.INTER_AREA)

    cv2.adaptiveThreshold(np.array(sct_img), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)

    data = pytesseract.image_to_string(np.array(sct_img), config=' --psm 13 --oem 3  -c tessedit_char_whitelist=0123456789/')
    print(data)

    cv2.imshow('screen', np.array(sct_img))

    if (cv2.waitKey(1) & 0xFF) == ord('q'):
        cv2.destroyAllWindows()
        break

当我运行脚本

时我明白了

运行我没有得到任何结果。我试过让 pytesseract 工作一段时间,但现在无法

解决当前问题需要了解以下内容:

图像太小,无法准确预测。因此我建议扩大规模并获得二进制掩码

    1. 将图像转换为 HSV 颜色-space
    1. 获取二进制掩码
    1. 放大二进制掩码
    1. 反转二进制掩码

结果:

OCR 结果将是 (python tesseract 0.3.7):

99
99
100
100

代码:


import cv2
import numpy as np
import pytesseract

# Loaf the image
img = cv2.imread("1h30P.png")

# Convert to the HSV color-space
gry = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# Get the binary mask
thr = cv2.inRange(gry, np.array([0, 255, 255]), np.array([179, 255, 255]))

# Up-sample
thr = cv2.resize(thr, (0, 0), fx=2, fy=2)

# Inverse the binary mask
thr = cv2.bitwise_not(thr)

# OCR
txt = pytesseract.image_to_string(thr, config="--psm 6 digits")
print(txt)

# Display
cv2.imshow("", thr)
cv2.waitKey(0)