Python 使用 OpenCV 和 PIL 的 Tesseract 未检测到字符
Python Tesseract with OpenCV and PIL not detecting characters
我正在尝试图像识别来自英雄联盟大厅的文本,以便我可以进行数据挖掘。
我猜它没有识别字体,因为程序的输出是:Doel seen aay
源代码:
import numpy as nm
import pytesseract
import cv2
from PIL import ImageGrab, Image
def imToString():
# Path of tesseract executable
pytesseract.pytesseract.tesseract_cmd ='C:\Program Files\Tesseract-OCR\Tesseract.exe'
while(True):
cap = ImageGrab.grab(bbox =(242, 884, 561, 990))
cap.save('test.png')
tesstr = pytesseract.image_to_string(
cv2.cvtColor(nm.array(cap), cv2.COLOR_BGR2GRAY),
lang ='eng',config='--psm 7')
print(tesstr)
imToString()
The image I'm using to test
看来您需要对图像进行一些预处理。
试试这个。
import numpy as np
import cv2
img = cv2.imread('wXQMF.png', 0)
print(img.max(), img.min())
ret, thr1 = cv2.threshold(img, 10, 255, cv2.THRESH_BINARY_INV)
kernel_size_row = 3
kernel_size_col = 3
kernel = np.ones((3, 3), np.uint8)
erosion_image = cv2.erode(thr1, kernel, iterations=1) #// make erosion image
cv2.imwrite('a.png',thr1)
我正在尝试图像识别来自英雄联盟大厅的文本,以便我可以进行数据挖掘。
我猜它没有识别字体,因为程序的输出是:Doel seen aay
源代码:
import numpy as nm
import pytesseract
import cv2
from PIL import ImageGrab, Image
def imToString():
# Path of tesseract executable
pytesseract.pytesseract.tesseract_cmd ='C:\Program Files\Tesseract-OCR\Tesseract.exe'
while(True):
cap = ImageGrab.grab(bbox =(242, 884, 561, 990))
cap.save('test.png')
tesstr = pytesseract.image_to_string(
cv2.cvtColor(nm.array(cap), cv2.COLOR_BGR2GRAY),
lang ='eng',config='--psm 7')
print(tesstr)
imToString()
The image I'm using to test
看来您需要对图像进行一些预处理。
试试这个。
import numpy as np
import cv2
img = cv2.imread('wXQMF.png', 0)
print(img.max(), img.min())
ret, thr1 = cv2.threshold(img, 10, 255, cv2.THRESH_BINARY_INV)
kernel_size_row = 3
kernel_size_col = 3
kernel = np.ones((3, 3), np.uint8)
erosion_image = cv2.erode(thr1, kernel, iterations=1) #// make erosion image
cv2.imwrite('a.png',thr1)