我在识别图片中的文字时遇到问题,python
I'm having problems recognizing text from a picture, python
我得到了一个识别各种验证码的学校项目,我在实施时遇到了一些困难。
这种类型的图像将输入 ,,。
我用以下代码处理它们:
import cv2
import pytesseract
# load image
fname = 'picture.png'
im = cv2.imread(fname,cv2.COLOR_RGB2GRAY)
pytesseract.pytesseract.tesseract_cmd = r'C:\Tesseract-OCR\tesseract.exe'
im = im[0:90, 35:150]
im = cv2.blur(im,(3,3))
im = cv2.threshold(im, 223 , 250, cv2.THRESH_BINARY)
im = im[1]
cv2.imshow('',im)
cv2.waitKey(0)
经过所有处理后,图像看起来像这样: 而在这一点上,我有一个问题,我怎样才能修改图像以使其在计算机上具有良好的可读性,而不是错误的TAREQ.
他会显示 7TXB6Q
我正在尝试使用 pytesseract
库显示图像中的文本,如下所示
data = pytesseract.image_to_string(im, lang='eng', config='--psm 6 --oem 3 -c tessedit_char_whitelist= ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
print(data)
我写在这里希望得到宝贵的建议(也许你知道从图片中获取文本或处理上面固定的图像的最合适方法)。每个人的和平)
更多图片
你可以试试找国家,去掉面积小的国家。此预处理操作应该会增加 OCR 结果的成功率。
之前:
import cv2 as cv
import numpy as np
# your thresholded image im
bw = cv.imread('bw.png', cv.IMREAD_GRAYSCALE)
_, cnts, _ = cv.findContours(bw, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
# remove the largest contour which is background
cnts = np.array(cnts[1:], dtype=object)
areas = np.array(list(map(cv.contourArea, cnts)))
thr = 35
thr_cnts = cnts[areas > thr]
disp_img = 255 * np.ones(bw.shape, dtype=np.uint8)
disp_img = cv.drawContours(disp_img, thr_cnts, -1, (0, 0, 0), cv.FILLED)
disp_img = cv.bitwise_or(disp_img, bw)
cv.imshow('result', disp_img)
cv.waitKey()
cv.destroyAllWindows()
cv.imwrite('result.png', disp_img)
结果:
编辑: 合并这两个代码似乎没有给出相同的结果。这是从头到尾的完整代码。
输入:
import cv2 as cv
import numpy as np
# load image
fname = 'im.png'
im = cv.imread(fname, cv.IMREAD_GRAYSCALE)
# crop
im = im[0:90, 35:150]
# blurring is essential for denoising
im = cv.blur(im, (3,3))
thr = 219
# the binary threshold value is very important
# using 220 instead of 219 causes loss of a letter
# because it touches to the bottom edge and gets involved in the background
_, im = cv.threshold(im, thr, 255, cv.THRESH_BINARY)
cv.imshow('', im)
cv.waitKey(0)
阈值:
# binary image
bw = np.copy(im)
# find contours and corresponding areas
_, cnts, _ = cv.findContours(bw, cv.RETR_LIST, cv.CHAIN_APPROX_NONE)
cnts = np.array(cnts, dtype=object)
areas = np.array(list(map(cv.contourArea, cnts)))
thr = 35
# eliminate contours that are smaller than threshold
# also remove the largest contour which is background
thr_cnts = cnts[np.logical_and(areas > thr, areas != np.max(areas))]
# draw the remaining contours
disp_img = 255 * np.ones(bw.shape, dtype=np.uint8)
disp_img = cv.drawContours(disp_img, thr_cnts, -1, (0, 0, 0), cv.FILLED)
disp_img = cv.bitwise_or(disp_img, bw)
cv.imshow('', disp_img)
cv.waitKey()
cv.destroyAllWindows()
结果:
我得到了一个识别各种验证码的学校项目,我在实施时遇到了一些困难。
这种类型的图像将输入
我用以下代码处理它们:
import cv2
import pytesseract
# load image
fname = 'picture.png'
im = cv2.imread(fname,cv2.COLOR_RGB2GRAY)
pytesseract.pytesseract.tesseract_cmd = r'C:\Tesseract-OCR\tesseract.exe'
im = im[0:90, 35:150]
im = cv2.blur(im,(3,3))
im = cv2.threshold(im, 223 , 250, cv2.THRESH_BINARY)
im = im[1]
cv2.imshow('',im)
cv2.waitKey(0)
经过所有处理后,图像看起来像这样:TAREQ.
他会显示 7TXB6Q
我正在尝试使用 pytesseract
库显示图像中的文本,如下所示
data = pytesseract.image_to_string(im, lang='eng', config='--psm 6 --oem 3 -c tessedit_char_whitelist= ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
print(data)
我写在这里希望得到宝贵的建议(也许你知道从图片中获取文本或处理上面固定的图像的最合适方法)。每个人的和平)
更多图片
你可以试试找国家,去掉面积小的国家。此预处理操作应该会增加 OCR 结果的成功率。
之前:
import cv2 as cv
import numpy as np
# your thresholded image im
bw = cv.imread('bw.png', cv.IMREAD_GRAYSCALE)
_, cnts, _ = cv.findContours(bw, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
# remove the largest contour which is background
cnts = np.array(cnts[1:], dtype=object)
areas = np.array(list(map(cv.contourArea, cnts)))
thr = 35
thr_cnts = cnts[areas > thr]
disp_img = 255 * np.ones(bw.shape, dtype=np.uint8)
disp_img = cv.drawContours(disp_img, thr_cnts, -1, (0, 0, 0), cv.FILLED)
disp_img = cv.bitwise_or(disp_img, bw)
cv.imshow('result', disp_img)
cv.waitKey()
cv.destroyAllWindows()
cv.imwrite('result.png', disp_img)
结果:
编辑: 合并这两个代码似乎没有给出相同的结果。这是从头到尾的完整代码。
输入:
import cv2 as cv
import numpy as np
# load image
fname = 'im.png'
im = cv.imread(fname, cv.IMREAD_GRAYSCALE)
# crop
im = im[0:90, 35:150]
# blurring is essential for denoising
im = cv.blur(im, (3,3))
thr = 219
# the binary threshold value is very important
# using 220 instead of 219 causes loss of a letter
# because it touches to the bottom edge and gets involved in the background
_, im = cv.threshold(im, thr, 255, cv.THRESH_BINARY)
cv.imshow('', im)
cv.waitKey(0)
阈值:
# binary image
bw = np.copy(im)
# find contours and corresponding areas
_, cnts, _ = cv.findContours(bw, cv.RETR_LIST, cv.CHAIN_APPROX_NONE)
cnts = np.array(cnts, dtype=object)
areas = np.array(list(map(cv.contourArea, cnts)))
thr = 35
# eliminate contours that are smaller than threshold
# also remove the largest contour which is background
thr_cnts = cnts[np.logical_and(areas > thr, areas != np.max(areas))]
# draw the remaining contours
disp_img = 255 * np.ones(bw.shape, dtype=np.uint8)
disp_img = cv.drawContours(disp_img, thr_cnts, -1, (0, 0, 0), cv.FILLED)
disp_img = cv.bitwise_or(disp_img, bw)
cv.imshow('', disp_img)
cv.waitKey()
cv.destroyAllWindows()
结果: