如何用pytesseract对低质量代码图片进行OCR
How to OCR low quality code picture with pytesseract
我有一组相同格式代码的图片 (sample),我已经尝试了所有方法,但没有一个效果很好。
我尝试了模糊、hsv、脱粒等。
你能帮帮我吗?
import pytesseract
import cv2
imgr = cv2.imread("a.png")
img = cv2.resize(imgr, (int(imgr.shape[1] * 3), int(imgr.shape[0] * 3)), interpolation=cv2.INTER_AREA)
img = cv2.blur(img, (7, 7))
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
cv2.imshow("", v)
cv2.waitKey(0)
p = pytesseract.image_to_string(v)
print(p)
thresh = cv2.threshold(v, 170, 255, cv2.THRESH_BINARY)[1]
cv2.imshow("", thresh)
cv2.waitKey(0)
print(pytesseract.image_to_string(thresh))
ation
import cv2
import numpy as np
import pytesseract
from PIL import Image, ImageStat
# Load image
image = cv2.imread('a.png')
img=image.copy()
# Remove border
kernel_vertical = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
temp1 = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel_vertical)
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
temp2 = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, horizontal_kernel)
temp3 = cv2.add(temp1, temp2)
result = cv2.add(temp3, image)
# Convert to grayscale and Otsu's threshold
gray = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(5,5),0)
_,thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
kernel = np.ones((3,3), np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations = 5)
# Find the biggest Contour (Where the words are)
contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
Reg = []
for j in range(len(contours)-1):
for i in range(len(contours)-1):
if len(contours[i+1])>len(contours[i]):
Reg = contours[i]
contours [i] = contours[i+1]
contours [i+1] = Reg
x, y, w, h = cv2.boundingRect(contours[0])
img_cut = np.zeros(shape=(h,w))
img_cut = gray[y:y+h, x:x+w]
img_cut = 255-img_cut
# Tesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
print(pytesseract.image_to_string(img_cut, lang = 'eng', config='--psm 7 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-'))
cv2.imwrite('generator.jpg',img_cut)
cv2.imshow('img', img_cut)
cv2.waitKey()
Tesseract识别:EGO1-012R210124(其实是不利的,我尽力了。)
下面是一个可能的解决方案。
我觉得失真是问题的一部分。所以我试图“修复”它。
结果看起来不错:检测成功。
不幸的是,由于您只提供了一个样本,我无法确定这是否适用于其他样本……(可能不行……)不过,您可以试一试。
最好的祝福,
斯蒂芬
注意:我将 tesseract-5.0.0-alpha 与 tessdata_best 数据集一起使用。
这是控制台的输出:
Regression parameters for the second-degree polynomial:
[ 2.33735101e-04 -1.92211992e-01 2.43573673e+02]
=============================
Rectified image
RESULT: EG01-012R210126024
=============================
================================================
Test on the non rectified image
with the same blur, erode, threshold and
tesseract parameters
RESULT: EGO1-012R2101269
================================================
Press any key on an opened opencv window to close
下面是程序:
# Standard imports
import cv2
import numpy as np
from matplotlib import pyplot as plt
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'/usr/local/bin/tesseract'
# Read image
imgr = cv2.imread("a.png")
# Resizing, converting...
factor=3
imgr = cv2.resize(imgr, (int(imgr.shape[1]*factor ), int(imgr.shape[0]*factor)), interpolation=cv2.INTER_AREA)
# First detection in order to crop the image
# We want a detection. Not important if result is bad.
strings=pytesseract.image_to_data(imgr, lang = 'eng', config='--psm 11 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-')
strings=strings.split('\n')
for line in strings[2:]:
s=line.split('\t')
if len(s[11])>0:
xmin=int(s[6])
break
## We crop the image to keep the interesting part...
imgr=imgr[:,np.max([0,xmin-imgr.shape[1]//10]):,:]
cv2.imshow("Cropped image",imgr)
hsv = cv2.cvtColor(imgr, cv2.COLOR_BGR2HSV)
h0, s0, Im0 = cv2.split(hsv)
w=Im0.shape[1] # From now, this is the image we will work on.
h=Im0.shape[0]
# Blob image to compute the image distortion
blob=cv2.blur(Im0,(w//3,1))
blob=cv2.normalize(blob,None,0,255,cv2.NORM_MINMAX)
blob=cv2.threshold(blob,170,255,cv2.THRESH_BINARY)[1]
cv2.imshow("Blob image",blob)
x=[]
y=[]
for i in range(w):
for j in range(h):
if blob[j,i]==0:
x.append(i)
y.append(j)
x=np.array(x)
y=np.array(y)
model = np.polyfit(x,y, 2)
print("Regression parameters for the second-degree polynomial: ")
print(model)
plt.plot(x,y,'x')
X=np.linspace(0,w)
plt.plot(X,X*X*model[0]+X*model[1]+model[2])
Ymean=np.mean(X*X*model[0]+X*model[1]+model[2])
# Remapping the cropped image with the found model parameters
map_x = np.zeros((Im0.shape[0], Im0.shape[1]), dtype=np.float32)
map_y = np.zeros((Im0.shape[0], Im0.shape[1]), dtype=np.float32)
for i in range(w):
for j in range(h):
map_x[j,i]=i
map_y[j,i]=j+i*i*model[0]+i*model[1]+model[2]-Ymean
Im1=cv2.remap(Im0, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
# Actual detection on the rectified image: Im1
Im1=cv2.normalize(Im1,None,0,255,cv2.NORM_MINMAX)
blur_radius=8
threshold=120
Im1= cv2.blur(Im1, (blur_radius,blur_radius))
kernel = np.ones((4,4), np.uint8)
Im1=255-cv2.erode(255-Im1, kernel)#, cv2.BORDER_REPLICATE)
Im1=cv2.normalize(Im1,None,0,255,cv2.NORM_MINMAX)
Im1 = cv2.threshold(Im1, threshold, 255, cv2.THRESH_BINARY)[1]
cv2.imshow("Rectified image for text detection",Im1)
strings=pytesseract.image_to_string(Im1, lang = 'eng', config='--psm 11 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-')
strings=strings.split()
strings=max(strings,key=len)
print('=============================')
print("Rectified image")
print('RESULT: ',strings)
print('=============================')
# For comparison: detection on the non rectified image
# using the same parameters:
Im2 = Im0 # whithout remapping
Im2 = cv2.normalize(Im2,None,0,255,cv2.NORM_MINMAX)
Im2 = cv2.blur(Im2, (blur_radius,blur_radius))
Im2 = 255-cv2.erode(255-Im2, kernel)#, cv2.BORDER_REPLICATE)
Im2 = cv2.normalize(Im2,None,0,255,cv2.NORM_MINMAX)
Im2 = cv2.threshold(Im2, threshold, 255, cv2.THRESH_BINARY)[1]
strings=pytesseract.image_to_string(Im2, lang = 'eng', config='--psm 11 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-')
strings=strings.split()
strings=max(strings,key=len)
print('================================================')
print("Test on the non rectified image")
print("with the same blur, erode, threshold and")
print("tesseract parameters")
print('RESULT: ',strings)
print('================================================')
cv2.imshow("Unrectified image for text detection",Im2)
# Close windows
print("Press any key on an opened opencv window to close")
cv2.waitKey()
plt.close()
cv2.destroyAllWindows()
我有一组相同格式代码的图片 (sample),我已经尝试了所有方法,但没有一个效果很好。 我尝试了模糊、hsv、脱粒等。 你能帮帮我吗?
import pytesseract
import cv2
imgr = cv2.imread("a.png")
img = cv2.resize(imgr, (int(imgr.shape[1] * 3), int(imgr.shape[0] * 3)), interpolation=cv2.INTER_AREA)
img = cv2.blur(img, (7, 7))
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
cv2.imshow("", v)
cv2.waitKey(0)
p = pytesseract.image_to_string(v)
print(p)
thresh = cv2.threshold(v, 170, 255, cv2.THRESH_BINARY)[1]
cv2.imshow("", thresh)
cv2.waitKey(0)
print(pytesseract.image_to_string(thresh))
ation
import cv2
import numpy as np
import pytesseract
from PIL import Image, ImageStat
# Load image
image = cv2.imread('a.png')
img=image.copy()
# Remove border
kernel_vertical = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
temp1 = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel_vertical)
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
temp2 = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, horizontal_kernel)
temp3 = cv2.add(temp1, temp2)
result = cv2.add(temp3, image)
# Convert to grayscale and Otsu's threshold
gray = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(5,5),0)
_,thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
kernel = np.ones((3,3), np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations = 5)
# Find the biggest Contour (Where the words are)
contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
Reg = []
for j in range(len(contours)-1):
for i in range(len(contours)-1):
if len(contours[i+1])>len(contours[i]):
Reg = contours[i]
contours [i] = contours[i+1]
contours [i+1] = Reg
x, y, w, h = cv2.boundingRect(contours[0])
img_cut = np.zeros(shape=(h,w))
img_cut = gray[y:y+h, x:x+w]
img_cut = 255-img_cut
# Tesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
print(pytesseract.image_to_string(img_cut, lang = 'eng', config='--psm 7 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-'))
cv2.imwrite('generator.jpg',img_cut)
cv2.imshow('img', img_cut)
cv2.waitKey()
Tesseract识别:EGO1-012R210124(其实是不利的,我尽力了。)
下面是一个可能的解决方案。 我觉得失真是问题的一部分。所以我试图“修复”它。 结果看起来不错:检测成功。 不幸的是,由于您只提供了一个样本,我无法确定这是否适用于其他样本……(可能不行……)不过,您可以试一试。 最好的祝福, 斯蒂芬
注意:我将 tesseract-5.0.0-alpha 与 tessdata_best 数据集一起使用。 这是控制台的输出:
Regression parameters for the second-degree polynomial:
[ 2.33735101e-04 -1.92211992e-01 2.43573673e+02]
=============================
Rectified image
RESULT: EG01-012R210126024
=============================
================================================
Test on the non rectified image
with the same blur, erode, threshold and
tesseract parameters
RESULT: EGO1-012R2101269
================================================
Press any key on an opened opencv window to close
下面是程序:
# Standard imports
import cv2
import numpy as np
from matplotlib import pyplot as plt
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'/usr/local/bin/tesseract'
# Read image
imgr = cv2.imread("a.png")
# Resizing, converting...
factor=3
imgr = cv2.resize(imgr, (int(imgr.shape[1]*factor ), int(imgr.shape[0]*factor)), interpolation=cv2.INTER_AREA)
# First detection in order to crop the image
# We want a detection. Not important if result is bad.
strings=pytesseract.image_to_data(imgr, lang = 'eng', config='--psm 11 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-')
strings=strings.split('\n')
for line in strings[2:]:
s=line.split('\t')
if len(s[11])>0:
xmin=int(s[6])
break
## We crop the image to keep the interesting part...
imgr=imgr[:,np.max([0,xmin-imgr.shape[1]//10]):,:]
cv2.imshow("Cropped image",imgr)
hsv = cv2.cvtColor(imgr, cv2.COLOR_BGR2HSV)
h0, s0, Im0 = cv2.split(hsv)
w=Im0.shape[1] # From now, this is the image we will work on.
h=Im0.shape[0]
# Blob image to compute the image distortion
blob=cv2.blur(Im0,(w//3,1))
blob=cv2.normalize(blob,None,0,255,cv2.NORM_MINMAX)
blob=cv2.threshold(blob,170,255,cv2.THRESH_BINARY)[1]
cv2.imshow("Blob image",blob)
x=[]
y=[]
for i in range(w):
for j in range(h):
if blob[j,i]==0:
x.append(i)
y.append(j)
x=np.array(x)
y=np.array(y)
model = np.polyfit(x,y, 2)
print("Regression parameters for the second-degree polynomial: ")
print(model)
plt.plot(x,y,'x')
X=np.linspace(0,w)
plt.plot(X,X*X*model[0]+X*model[1]+model[2])
Ymean=np.mean(X*X*model[0]+X*model[1]+model[2])
# Remapping the cropped image with the found model parameters
map_x = np.zeros((Im0.shape[0], Im0.shape[1]), dtype=np.float32)
map_y = np.zeros((Im0.shape[0], Im0.shape[1]), dtype=np.float32)
for i in range(w):
for j in range(h):
map_x[j,i]=i
map_y[j,i]=j+i*i*model[0]+i*model[1]+model[2]-Ymean
Im1=cv2.remap(Im0, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
# Actual detection on the rectified image: Im1
Im1=cv2.normalize(Im1,None,0,255,cv2.NORM_MINMAX)
blur_radius=8
threshold=120
Im1= cv2.blur(Im1, (blur_radius,blur_radius))
kernel = np.ones((4,4), np.uint8)
Im1=255-cv2.erode(255-Im1, kernel)#, cv2.BORDER_REPLICATE)
Im1=cv2.normalize(Im1,None,0,255,cv2.NORM_MINMAX)
Im1 = cv2.threshold(Im1, threshold, 255, cv2.THRESH_BINARY)[1]
cv2.imshow("Rectified image for text detection",Im1)
strings=pytesseract.image_to_string(Im1, lang = 'eng', config='--psm 11 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-')
strings=strings.split()
strings=max(strings,key=len)
print('=============================')
print("Rectified image")
print('RESULT: ',strings)
print('=============================')
# For comparison: detection on the non rectified image
# using the same parameters:
Im2 = Im0 # whithout remapping
Im2 = cv2.normalize(Im2,None,0,255,cv2.NORM_MINMAX)
Im2 = cv2.blur(Im2, (blur_radius,blur_radius))
Im2 = 255-cv2.erode(255-Im2, kernel)#, cv2.BORDER_REPLICATE)
Im2 = cv2.normalize(Im2,None,0,255,cv2.NORM_MINMAX)
Im2 = cv2.threshold(Im2, threshold, 255, cv2.THRESH_BINARY)[1]
strings=pytesseract.image_to_string(Im2, lang = 'eng', config='--psm 11 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-')
strings=strings.split()
strings=max(strings,key=len)
print('================================================')
print("Test on the non rectified image")
print("with the same blur, erode, threshold and")
print("tesseract parameters")
print('RESULT: ',strings)
print('================================================')
cv2.imshow("Unrectified image for text detection",Im2)
# Close windows
print("Press any key on an opened opencv window to close")
cv2.waitKey()
plt.close()
cv2.destroyAllWindows()