如何为 Tesseract 提高此图像的质量?
How to increase the quality of this image for Tesseract?
我有这张图片:
而且,我以这种方式处理它,以便以尽可能最好的方式将它传递给 Tesseract:
sharpened = unsharp_mask(img, amount=1.5)
cv2.imwrite(TEMP_FOLDER + 'sharpened.png', sharpened)
thr = cv2.threshold(sharpened, 220, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# im = cv2.resize(thr, None, fx=2, fy=2, interpolation=cv2.INTER_AREA)
os.makedirs(TEMP_FOLDER, exist_ok=True)
cv2.imwrite(TEMP_FOLDER + 'inverted.png', thr)
inverted = cv2.imread(TEMP_FOLDER + 'inverted.png')
filtered_inverted = remove_black_boundaries(inverted)
filtered_inverted = cv2.resize(filtered_inverted, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
# kernel = np.ones((2, 2), np.uint8)
# filtered_inverted = cv2.dilate(filtered_inverted, kernel)
cv2.imwrite(TEMP_FOLDER + 'filtered.png', filtered_inverted)
median = cv2.medianBlur(filtered_inverted, 5)
# median = cv2.cvtColor(median, cv2.COLOR_RGB2GRAY)
# median = cv2.threshold(median, 127, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite(TEMP_FOLDER + 'median.png', median)
函数unsharp_mask
定义为:
def unsharp_mask(image: np.ndarray, kernel_size: Tuple[int] = (5, 5),
sigma: float = 1.0, amount: float = 1.0, threshold: float = 0) -> np.ndarray:
"""Return a sharpened version of the image, using an unsharp mask."""
blurred = cv2.GaussianBlur(image, kernel_size, sigma)
sharpened = float(amount + 1) * image - float(amount) * blurred
sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
sharpened = sharpened.round().astype(np.uint8)
if threshold > 0:
low_contrast_mask = np.absolute(image - blurred) < threshold
np.copyto(sharpened, image, where=low_contrast_mask)
return sharpened
并且,函数 remove_black_boundaries
(在这种情况下它是无用的,因为图像中没有黑色边界)定义为:
def remove_black_boundaries(img: np.ndarray) -> np.ndarray:
hh, ww = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
thresh = cv2.erode(thresh, np.ones((3, 3), np.uint8))
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
cnt = max(contours, key=cv2.contourArea)
# draw white contour on black background as mask
mask = np.zeros((hh, ww), dtype=np.uint8)
cv2.drawContours(mask, [cnt], 0, (255, 255, 255), cv2.FILLED)
# invert mask so shapes are white on black background
mask_inv = 255 - mask
# create new (white) background
bckgnd = np.full_like(img, (255, 255, 255))
# apply mask to image
image_masked = cv2.bitwise_and(img, img, mask=mask)
# apply inverse mask to background
bckgnd_masked = cv2.bitwise_and(bckgnd, bckgnd, mask=mask_inv)
# add together
result = cv2.add(image_masked, bckgnd_masked)
return result
所以我得到 sharpened
作为:
而且,我得到了倒置(和过滤)为:
因此,传递给 Tesseract 的图像是:
但是,我从 Tesseract 得到的是没有第一行的 Conteggio: 2900
。我也尝试调整图像大小,但得到相同的输出。关于如何改进发送到 Tesseract 的图像有什么想法吗?
您没有显示您的实际 pytesseract
代码,但没有任何(预处理)处理,我只切换到 page segmentation method 6
就得到了正确的结果,即:
Assume a single uniform block of text.
import cv2
import pytesseract
img = cv2.imread('KQ49Y.png', cv2.IMREAD_GRAYSCALE)
text = pytesseract.image_to_string(img, config='--psm 6')
print(text.replace('\f', ''))
# Facebook
# Conteggio: 2900
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.19042-SP0
Python: 3.9.6
PyCharm: 2021.2
OpenCV: 4.5.3
pytesseract: 5.0.0-alpha.20201127
----------------------------------------
我有这张图片:
而且,我以这种方式处理它,以便以尽可能最好的方式将它传递给 Tesseract:
sharpened = unsharp_mask(img, amount=1.5)
cv2.imwrite(TEMP_FOLDER + 'sharpened.png', sharpened)
thr = cv2.threshold(sharpened, 220, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# im = cv2.resize(thr, None, fx=2, fy=2, interpolation=cv2.INTER_AREA)
os.makedirs(TEMP_FOLDER, exist_ok=True)
cv2.imwrite(TEMP_FOLDER + 'inverted.png', thr)
inverted = cv2.imread(TEMP_FOLDER + 'inverted.png')
filtered_inverted = remove_black_boundaries(inverted)
filtered_inverted = cv2.resize(filtered_inverted, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
# kernel = np.ones((2, 2), np.uint8)
# filtered_inverted = cv2.dilate(filtered_inverted, kernel)
cv2.imwrite(TEMP_FOLDER + 'filtered.png', filtered_inverted)
median = cv2.medianBlur(filtered_inverted, 5)
# median = cv2.cvtColor(median, cv2.COLOR_RGB2GRAY)
# median = cv2.threshold(median, 127, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite(TEMP_FOLDER + 'median.png', median)
函数unsharp_mask
定义为:
def unsharp_mask(image: np.ndarray, kernel_size: Tuple[int] = (5, 5),
sigma: float = 1.0, amount: float = 1.0, threshold: float = 0) -> np.ndarray:
"""Return a sharpened version of the image, using an unsharp mask."""
blurred = cv2.GaussianBlur(image, kernel_size, sigma)
sharpened = float(amount + 1) * image - float(amount) * blurred
sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
sharpened = sharpened.round().astype(np.uint8)
if threshold > 0:
low_contrast_mask = np.absolute(image - blurred) < threshold
np.copyto(sharpened, image, where=low_contrast_mask)
return sharpened
并且,函数 remove_black_boundaries
(在这种情况下它是无用的,因为图像中没有黑色边界)定义为:
def remove_black_boundaries(img: np.ndarray) -> np.ndarray:
hh, ww = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
thresh = cv2.erode(thresh, np.ones((3, 3), np.uint8))
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
cnt = max(contours, key=cv2.contourArea)
# draw white contour on black background as mask
mask = np.zeros((hh, ww), dtype=np.uint8)
cv2.drawContours(mask, [cnt], 0, (255, 255, 255), cv2.FILLED)
# invert mask so shapes are white on black background
mask_inv = 255 - mask
# create new (white) background
bckgnd = np.full_like(img, (255, 255, 255))
# apply mask to image
image_masked = cv2.bitwise_and(img, img, mask=mask)
# apply inverse mask to background
bckgnd_masked = cv2.bitwise_and(bckgnd, bckgnd, mask=mask_inv)
# add together
result = cv2.add(image_masked, bckgnd_masked)
return result
所以我得到 sharpened
作为:
而且,我得到了倒置(和过滤)为:
因此,传递给 Tesseract 的图像是:
但是,我从 Tesseract 得到的是没有第一行的 Conteggio: 2900
。我也尝试调整图像大小,但得到相同的输出。关于如何改进发送到 Tesseract 的图像有什么想法吗?
您没有显示您的实际 pytesseract
代码,但没有任何(预处理)处理,我只切换到 page segmentation method 6
就得到了正确的结果,即:
Assume a single uniform block of text.
import cv2
import pytesseract
img = cv2.imread('KQ49Y.png', cv2.IMREAD_GRAYSCALE)
text = pytesseract.image_to_string(img, config='--psm 6')
print(text.replace('\f', ''))
# Facebook
# Conteggio: 2900
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.19042-SP0
Python: 3.9.6
PyCharm: 2021.2
OpenCV: 4.5.3
pytesseract: 5.0.0-alpha.20201127
----------------------------------------