opencv python - 去除二值化图像中的小点噪声
opencv python - remove small points noise in binarized image
我正在做一个文档 reader,将其中的所有文本解析为 google 电子表格,这个脚本应该可以节省我的工作时间,问题是二进制图像有一个很多干扰 pytesseract 的噪音(文本周围的小点)。我怎样才能消除这种噪音?我用来对图像进行二值化的代码是:
import pytesseract
import cv2
import numpy as np
import os
import re
import argparse
#binarization of images
def binarize(img):
#convert image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#apply adaptive thresholding
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
#return thresholded image
return thresh
#construct argument parser
parser = argparse.ArgumentParser(description='Binarize image and parse text in image to string')
parser.add_argument('-i', '--image', help='path to image', required=True)
parser.add_argument('-o', '--output', help='path to output file', required=True)
args = parser.parse_args()
# load image
img = cv2.imread(args.image)
#binarization of image
thresh = binarize(img)
#show image
cv2.imshow('image', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
#save image
cv2.imwrite(args.output+'/imagen3.jpg', thresh)
我要清理的结果图像是:
如果我应用侵蚀,结果如下:
哪个比另一个差
编辑: 原图为:
您只需要增加 Python/OpenCV 中的自适应阈值参数。
输入:
import cv2
# read image
img = cv2.imread("petrol.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# do adaptive threshold on gray image
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 21, 25)
# write results to disk
cv2.imwrite("petrol_threshold.png", thresh)
# display it
cv2.imshow("THRESHOLD", thresh)
cv2.waitKey(0)
结果:
我正在做一个文档 reader,将其中的所有文本解析为 google 电子表格,这个脚本应该可以节省我的工作时间,问题是二进制图像有一个很多干扰 pytesseract 的噪音(文本周围的小点)。我怎样才能消除这种噪音?我用来对图像进行二值化的代码是:
import pytesseract
import cv2
import numpy as np
import os
import re
import argparse
#binarization of images
def binarize(img):
#convert image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#apply adaptive thresholding
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
#return thresholded image
return thresh
#construct argument parser
parser = argparse.ArgumentParser(description='Binarize image and parse text in image to string')
parser.add_argument('-i', '--image', help='path to image', required=True)
parser.add_argument('-o', '--output', help='path to output file', required=True)
args = parser.parse_args()
# load image
img = cv2.imread(args.image)
#binarization of image
thresh = binarize(img)
#show image
cv2.imshow('image', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
#save image
cv2.imwrite(args.output+'/imagen3.jpg', thresh)
我要清理的结果图像是:
如果我应用侵蚀,结果如下:
哪个比另一个差
编辑: 原图为:
您只需要增加 Python/OpenCV 中的自适应阈值参数。
输入:
import cv2
# read image
img = cv2.imread("petrol.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# do adaptive threshold on gray image
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 21, 25)
# write results to disk
cv2.imwrite("petrol_threshold.png", thresh)
# display it
cv2.imshow("THRESHOLD", thresh)
cv2.waitKey(0)
结果: