Way to apply smart thresholding in images
我正在编写一个 OCR 应用程序(用于希伯来文字)。
def apply_threshold(img, is_cropped=False):
this function applies a threshold on the image,
the first is Otsu TH on all the image, and afterwards an adaptive TH,
based on the size of the image.
I apply a logical OR between all the THs, becasue my assumption is that a letter will always be black,
while the background can sometimes be black and sometimes white -
thus I need to apply OR to have the background white.
if len(np.unique(img)) == 2: # img is already binary
# return img
gray_img = rgb2gray(img)
_, binary_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return binary_img
gray_img = rgb2gray(img)
_, binary_img = cv2.threshold(gray_img.astype('uint8'), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
connectivity = 8
output_stats = cv2.connectedComponentsWithStats(binary_img.max() - binary_img, connectivity, cv2.CV_32S)
df = pd.DataFrame(output_stats[2], columns=['left', 'top', 'width', 'height', 'area'])[1:]
if df['area'].max() / df['area'].sum() > 0.1 and is_cropped and False:
binary_copy = gray_img.copy()
gray_img_max = gray_img[np.where(output_stats[1] == df['area'].argmax())]
TH1, _ = cv2.threshold(gray_img_max.astype('uint8'), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# curr_img = binary_copy[np.where(output_stats[1] == df['area'].argmax())]
binary_copy[np.where((output_stats[1] == df['area'].argmax()) & (gray_img > TH1))] = 255
binary_copy[np.where((output_stats[1] == df['area'].argmax()) & (gray_img <= TH1))] = 0
gray_img_not_max = gray_img[np.where(output_stats[1] != df['area'].argmax())]
TH2, _ = cv2.threshold(gray_img_not_max.astype('uint8'), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
binary_copy[np.where((output_stats[1] != df['area'].argmax()) & (gray_img > TH2))] = 255
binary_copy[np.where((output_stats[1] != df['area'].argmax()) & (gray_img <= TH2))] = 0
binary_img = binary_copy.copy()
# N = [3, 5, 7, 9, 11, 13,27, 45] # sizes to divide the image shape in
# N = [20,85]
N = [3, 5, 25]
min_dim = min(binary_img.shape)
for n in N:
block_size = int(min_dim / n)
if block_size % 2 == 0:
block_size += 1 # block_size needs to be odd
binary_img = binary_img | cv2.adaptiveThreshold(gray_img.astype('uint8'), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, block_size, 10)
return binary_img
一种方法是 Python/OpenCV 中的除法归一化。
import cv2
import numpy as np
# load image
img = cv2.imread("hebrew_text.jpg")
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# blur
blur = cv2.GaussianBlur(gray, (99,99), 0)
# divide
divide = cv2.divide(gray, blur, scale=255)
# write result to disk
cv2.imwrite("hebrew_text_division.png", divide)
# display it
#cv2.imshow("thresh", thresh)
cv2.imshow("gray", gray)
cv2.imshow("divide", divide)
我还建议,如果可能的话,将图像保存为 PNG 而不是 JPG。 JPG 具有有损压缩并引入了颜色变化。这可能是您在背景中遇到无关标记的一些问题的根源。
我正在编写一个 OCR 应用程序(用于希伯来文字)。
def apply_threshold(img, is_cropped=False):
this function applies a threshold on the image,
the first is Otsu TH on all the image, and afterwards an adaptive TH,
based on the size of the image.
I apply a logical OR between all the THs, becasue my assumption is that a letter will always be black,
while the background can sometimes be black and sometimes white -
thus I need to apply OR to have the background white.
if len(np.unique(img)) == 2: # img is already binary
# return img
gray_img = rgb2gray(img)
_, binary_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return binary_img
gray_img = rgb2gray(img)
_, binary_img = cv2.threshold(gray_img.astype('uint8'), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
connectivity = 8
output_stats = cv2.connectedComponentsWithStats(binary_img.max() - binary_img, connectivity, cv2.CV_32S)
df = pd.DataFrame(output_stats[2], columns=['left', 'top', 'width', 'height', 'area'])[1:]
if df['area'].max() / df['area'].sum() > 0.1 and is_cropped and False:
binary_copy = gray_img.copy()
gray_img_max = gray_img[np.where(output_stats[1] == df['area'].argmax())]
TH1, _ = cv2.threshold(gray_img_max.astype('uint8'), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# curr_img = binary_copy[np.where(output_stats[1] == df['area'].argmax())]
binary_copy[np.where((output_stats[1] == df['area'].argmax()) & (gray_img > TH1))] = 255
binary_copy[np.where((output_stats[1] == df['area'].argmax()) & (gray_img <= TH1))] = 0
gray_img_not_max = gray_img[np.where(output_stats[1] != df['area'].argmax())]
TH2, _ = cv2.threshold(gray_img_not_max.astype('uint8'), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
binary_copy[np.where((output_stats[1] != df['area'].argmax()) & (gray_img > TH2))] = 255
binary_copy[np.where((output_stats[1] != df['area'].argmax()) & (gray_img <= TH2))] = 0
binary_img = binary_copy.copy()
# N = [3, 5, 7, 9, 11, 13,27, 45] # sizes to divide the image shape in
# N = [20,85]
N = [3, 5, 25]
min_dim = min(binary_img.shape)
for n in N:
block_size = int(min_dim / n)
if block_size % 2 == 0:
block_size += 1 # block_size needs to be odd
binary_img = binary_img | cv2.adaptiveThreshold(gray_img.astype('uint8'), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, block_size, 10)
return binary_img
一种方法是 Python/OpenCV 中的除法归一化。
import cv2
import numpy as np
# load image
img = cv2.imread("hebrew_text.jpg")
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# blur
blur = cv2.GaussianBlur(gray, (99,99), 0)
# divide
divide = cv2.divide(gray, blur, scale=255)
# write result to disk
cv2.imwrite("hebrew_text_division.png", divide)
# display it
#cv2.imshow("thresh", thresh)
cv2.imshow("gray", gray)
cv2.imshow("divide", divide)
我还建议,如果可能的话,将图像保存为 PNG 而不是 JPG。 JPG 具有有损压缩并引入了颜色变化。这可能是您在背景中遇到无关标记的一些问题的根源。