为什么 tesseract 在图像中找不到这个简单的文本?
Why tesseract wont find this easy text in image?
我已经尝试了几个小时来调整此图像的大小和颜色,但始终无法获得正确的字母。请看下图。这是我正在使用的测试图像。目标是将其用于自动化目的。谢谢!
更大的样本
import numpy as np
import pytesseract
from PIL import ImageGrab
import win32gui
import time
toplist, winlist = [], []
#time.sleep(3)
def enum_cb(hwnd, results):
if 'FPS:' in win32gui.GetWindowText(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
winlist.append(hwnd)
win32gui.EnumWindows(enum_cb, None)
win32gui.SetForegroundWindow(winlist[0])
bbox = win32gui.GetWindowRect(winlist[0])
print(bbox)
img = np.array(ImageGrab.grab(bbox=(130, 810, 800, 1080)))
#percent by which the image is resized
scale_percent = 400
#calculate the 50 percent of original dimensions
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
# dsize
dsize = (width, height)
# resize image
output = cv2.resize(img, dsize)
#img.show()
pytesseract.pytesseract.tesseract_cmd = r"L:\Program Files\Tesseract-OCR\tesseract.exe"
# img = cv2.imread(r"L:\MGO2PC\MGO2 UNOFFICIAL PC\RPCS3 EMU\screenshots\screenshot-2021_04_06_17_13_03.png", 0)
#crop_img = img[800:900, 260:800]
#cv2.imshow("cropped", crop_img)
#cv2.waitKey(0)
i = cv2.imwrite("test.png",output)
text = pytesseract.image_to_string(output, lang='eng')
print(text)
最终发现我可以隔离文字颜色,之后tesseract阅读没有问题。
def cv2_from_screen(self):
boundaries = [
([0, 179, 105], [38, 255, 167]) # BGR
]
pytesseract.pytesseract.tesseract_cmd = r"L:\Program Files\Tesseract-
OCR\tesseract.exe"
def enum_cb(hwnd, results):
if 'FPS:' in win32gui.GetWindowText(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
self.winlist.append(hwnd)
win32gui.EnumWindows(enum_cb, None)
win32gui.EnumWindows(enum_cb, None)
win32gui.SetForegroundWindow(self.winlist[0])
image = pyautogui.screenshot()
image = cv2.cvtColor(np.array(image.crop(box=[0, 800, 1000, 1080])), cv2.COLOR_RGB2BGR) #COLOR_RGB2BGR and COLOR_BGR2GRAY
我已经尝试了几个小时来调整此图像的大小和颜色,但始终无法获得正确的字母。请看下图。这是我正在使用的测试图像。目标是将其用于自动化目的。谢谢!
import numpy as np
import pytesseract
from PIL import ImageGrab
import win32gui
import time
toplist, winlist = [], []
#time.sleep(3)
def enum_cb(hwnd, results):
if 'FPS:' in win32gui.GetWindowText(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
winlist.append(hwnd)
win32gui.EnumWindows(enum_cb, None)
win32gui.SetForegroundWindow(winlist[0])
bbox = win32gui.GetWindowRect(winlist[0])
print(bbox)
img = np.array(ImageGrab.grab(bbox=(130, 810, 800, 1080)))
#percent by which the image is resized
scale_percent = 400
#calculate the 50 percent of original dimensions
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
# dsize
dsize = (width, height)
# resize image
output = cv2.resize(img, dsize)
#img.show()
pytesseract.pytesseract.tesseract_cmd = r"L:\Program Files\Tesseract-OCR\tesseract.exe"
# img = cv2.imread(r"L:\MGO2PC\MGO2 UNOFFICIAL PC\RPCS3 EMU\screenshots\screenshot-2021_04_06_17_13_03.png", 0)
#crop_img = img[800:900, 260:800]
#cv2.imshow("cropped", crop_img)
#cv2.waitKey(0)
i = cv2.imwrite("test.png",output)
text = pytesseract.image_to_string(output, lang='eng')
print(text)
最终发现我可以隔离文字颜色,之后tesseract阅读没有问题。
def cv2_from_screen(self):
boundaries = [
([0, 179, 105], [38, 255, 167]) # BGR
]
pytesseract.pytesseract.tesseract_cmd = r"L:\Program Files\Tesseract-
OCR\tesseract.exe"
def enum_cb(hwnd, results):
if 'FPS:' in win32gui.GetWindowText(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
self.winlist.append(hwnd)
win32gui.EnumWindows(enum_cb, None)
win32gui.EnumWindows(enum_cb, None)
win32gui.SetForegroundWindow(self.winlist[0])
image = pyautogui.screenshot()
image = cv2.cvtColor(np.array(image.crop(box=[0, 800, 1000, 1080])), cv2.COLOR_RGB2BGR) #COLOR_RGB2BGR and COLOR_BGR2GRAY