pytesseract 不拾取单个字符
pytesseract not picking up individual characters
我目前正在挣扎。 Pytesseract 无法检测单个数字。您可以看到我正在尝试读取的图像、代码和我收到的当前结果。任何帮助将不胜感激。
当前结果= ['WLDOT', 'ROOTOO2', 'Boombastic', 'Loukan', 'ExpertAz', 'Stryzhh', 'Najm' , 'JAMIN', ' ', '7157', '5618', '4864', '4762', '4294', '3287', '26', '34', '23', '32', '241', '240', '171', '137', '183', '200', '136', '181', '762', '689707', '733165', '698822', '724485 ', '647404', '566613', '580621', '566721', '189025']
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image2 = r'C:\Reader\unknown.png'
image = cv2.imread(image2, 0)
# Edit for accuracy (Image read)
thresh = cv2.threshold(image, 180, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
result = 255 - close
cv2.imshow('result', result)
cv2.waitKey()
textOffImage = str(pytesseract.image_to_string(result, config='--psm 3')).split("\n")
textOffImage = list(filter(None, textOffImage))
print(textOffImage)
您可以使用 inRange
阈值
结果将是:
现在,如果您使用 --psm 6
阅读:
WLDOT 17790 14 0 241 o 733165 :
ROOTOO2 17576 24 1 240 0 698822
Boombastic 17157 19 5 171 762 724485
Loukan 15618 26 4 137 0 647404 y
ExpertAz 14864 34 1 183 0 566613
Stryzhh 14762 23 3 200 0 580621 ,
Najm 14294 32 1 136 0 566721
JAMIN 13287 16 Q 181 689707 189025
k
如您所见,存在一些缺陷,但大多数输入都被正确识别。
如果你只想要数字,你可以使用--psm 6 digits
:
17790 14 0 241 733165
00002 17576 24 1 240 0 698822
17157 19 5 171 762 724485
15618 26 4 137 0 647404
14864 34 1 183 0 566613
14762 23 3 200 0 580621
14294 32 1 136 0 566721
13287 16 0 181 689707189025
从上面可以看出,所有数字都被正确识别了。
更多内容请阅读:Improving the quality of the output
代码:
import cv2
import pytesseract
from numpy import array
img = cv2.imread("TI5Jc.png") # Load the image
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) #
msk = cv2.inRange(hsv, array([0, 0, 0]), array([179, 84, 255]))
krn = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dlt = cv2.dilate(msk, krn, iterations=1)
thr = 255 - cv2.bitwise_and(dlt, msk)
txt = pytesseract.image_to_string(thr, config='--psm 6 digits')
print(txt)
我目前正在挣扎。 Pytesseract 无法检测单个数字。您可以看到我正在尝试读取的图像、代码和我收到的当前结果。任何帮助将不胜感激。
当前结果= ['WLDOT', 'ROOTOO2', 'Boombastic', 'Loukan', 'ExpertAz', 'Stryzhh', 'Najm' , 'JAMIN', ' ', '7157', '5618', '4864', '4762', '4294', '3287', '26', '34', '23', '32', '241', '240', '171', '137', '183', '200', '136', '181', '762', '689707', '733165', '698822', '724485 ', '647404', '566613', '580621', '566721', '189025']
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image2 = r'C:\Reader\unknown.png'
image = cv2.imread(image2, 0)
# Edit for accuracy (Image read)
thresh = cv2.threshold(image, 180, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
result = 255 - close
cv2.imshow('result', result)
cv2.waitKey()
textOffImage = str(pytesseract.image_to_string(result, config='--psm 3')).split("\n")
textOffImage = list(filter(None, textOffImage))
print(textOffImage)
您可以使用 inRange
阈值
结果将是:
现在,如果您使用 --psm 6
阅读:
WLDOT 17790 14 0 241 o 733165 :
ROOTOO2 17576 24 1 240 0 698822
Boombastic 17157 19 5 171 762 724485
Loukan 15618 26 4 137 0 647404 y
ExpertAz 14864 34 1 183 0 566613
Stryzhh 14762 23 3 200 0 580621 ,
Najm 14294 32 1 136 0 566721
JAMIN 13287 16 Q 181 689707 189025
k
如您所见,存在一些缺陷,但大多数输入都被正确识别。
如果你只想要数字,你可以使用--psm 6 digits
:
17790 14 0 241 733165
00002 17576 24 1 240 0 698822
17157 19 5 171 762 724485
15618 26 4 137 0 647404
14864 34 1 183 0 566613
14762 23 3 200 0 580621
14294 32 1 136 0 566721
13287 16 0 181 689707189025
从上面可以看出,所有数字都被正确识别了。
更多内容请阅读:Improving the quality of the output
代码:
import cv2
import pytesseract
from numpy import array
img = cv2.imread("TI5Jc.png") # Load the image
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) #
msk = cv2.inRange(hsv, array([0, 0, 0]), array([179, 84, 255]))
krn = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dlt = cv2.dilate(msk, krn, iterations=1)
thr = 255 - cv2.bitwise_and(dlt, msk)
txt = pytesseract.image_to_string(thr, config='--psm 6 digits')
print(txt)