cv2.rectangle 加入最近的边界框
cv2.rectangle join closest bounding box
我试图在扫描的页面中分离出中世纪手稿中的文字。
我正在使用 cv2 来检测区域 ant id 给了我一个非常令人满意的结果。我用增量数字标记了每个矩形,我担心检测到的区域不连续:
Here is a sample result of cv2 bounding box zones on a word
这是我使用的代码:
import numpy as np
import cv2
import matplotlib.pyplot as plt
# This is font for labels
font = cv2.FONT_HERSHEY_SIMPLEX
# I load a picture of a page, gray and blur it
im = cv2.imread('test.png')
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
image_blurred = cv2.GaussianBlur(imgray, (5, 5), 0)
image_blurred = cv2.dilate(image_blurred, None)
ret,thresh = cv2.threshold(image_blurred,0,255,0,cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# I try to retrieve contours and hierarchy on the sample
_, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
hierarchy = hierarchy[0]
# I read every contours and retrieve the bounding box
for i,component in enumerate(zip(contours, hierarchy)):
cnt = component[0]
currentHierarchy = component[1]
precision = 0.01
epsilon = precision*cv2.arcLength(cnt,True)
approx = cv2.approxPolyDP(cnt,epsilon,True)
# This is the best combination I found to isolate parents container
# It gives me the best result (even if I'm not sure what I'm doing)
# hierarchy[2/3] is "having child" / "having parent"
# I thought currentHierarchy[3] < 0 should be better
# but it gives no result
if currentHierarchy[2] > 0 and currentHierarchy[3] > 0:
x,y,w,h = cv2.boundingRect(approx)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2)
cv2.putText(im,str(i),(x+2,y+2), font, 1,(0,255,0),2,cv2.LINE_AA)
plt.imshow(im)
plt.show()
我想将最近的区域连接在一起,以便对我的页面进行词标记化。在我的示例图片中,我想加入2835、2847、2864、2878、2870和2868。
我该怎么办?我以为我可以将每个框的每个坐标存储在一个数组中,然后测试 (start_x、start_y) 和 (end_x、end_y) - 但对我来说这似乎很糟糕。
能给个提示吗?
谢谢,
我继续我的方法来找出单个单词。虽然不完全准确,但请看下面这张图片:
伪代码:
- 对灰度图像应用高斯模糊。
- 执行了 Otsu 的阈值。
执行了几个形态学操作:
3.1 侵蚀,尝试去除图像左上角的细线。
3.2 扩张连接由于之前的操作而分开的单个字母。
找到特定区域上方的等高线并标记它们
编辑
代码:
import numpy as np
import cv2
import matplotlib.pyplot as plt
font = cv2.FONT_HERSHEY_SIMPLEX
im = cv2.imread('corpus.png')
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
image_blurred = cv2.GaussianBlur(imgray, (9, 9), 0)
cv2.imshow('blur', image_blurred)
image_blurred_d = cv2.dilate(image_blurred, None)
cv2.imshow('dilated_blur', image_blurred_d)
ret,thresh = cv2.threshold(image_blurred_d, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
cv2.imshow('thresh', thresh)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
erosion = cv2.erode(thresh, kernel, iterations = 1)
cv2.imshow('erosion', erosion)
kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
dilation = cv2.dilate(erosion, kernel1, iterations = 1)
cv2.imshow('dilation', dilation)
_, contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
count = 0
for cnt in contours:
if (cv2.contourArea(cnt) > 100):
x, y, w, h = cv2.boundingRect(cnt)
cv2.rectangle(im, (x,y), (x+w,y+h), (0, 255, 0), 2)
count+=1
print('Number of probable words', count)
cv2.imshow('final', im)
cv2.waitKey(0)
cv2.destroyAllWindows()
谢谢 Jeru Luke, we could implement this try on a complete page. The values given were very helpful, overall for kernel adaptation in blurring and eroding operation. The final result on the Bible Historiale Manuscript page 还是很有趣的。我们可以在标识中看到一些 "black holes",据我所知,这是由于膨胀;这是第一个正在进行的工作。我们将不得不管理大图片和首字母大字母。
这是我们用来过滤框、在框上添加标签并将每个片段保存在单独文件中的代码:
for i,component in enumerate(zip(contours, hierarchy)):
cnt = component[0]
currentHierarchy = component[1]
if currentHierarchy[2] > 0 and currentHierarchy[3] > 0:
x,y,w,h = cv2.boundingRect(approx)
if h < 300 and h > 110 and w > 110:
cv2.rectangle(im,(x-5,y-5),(x+w+5,y+h+5),(0,255,0),8)
cv2.putText(im,str(i),(x+2,y+2), font, 1,(0,255,0),2,cv2.LINE_AA)
cv2.putText(im,str(cv2.contourArea(cnt)),(x+w-2,y+h-2), font, 1,(0,255,0),2,cv2.LINE_AA)
cv2.putText(im,str(h)+'/'+str(w),(x+w-2,y+h-2), font, 1,(0,0,255),2,cv2.LINE_AA)
fragment = im[y:y+h, x:x+w]
cv2.imwrite("res" + str(i) + ".png", fragment)
我试图在扫描的页面中分离出中世纪手稿中的文字。 我正在使用 cv2 来检测区域 ant id 给了我一个非常令人满意的结果。我用增量数字标记了每个矩形,我担心检测到的区域不连续: Here is a sample result of cv2 bounding box zones on a word
这是我使用的代码:
import numpy as np
import cv2
import matplotlib.pyplot as plt
# This is font for labels
font = cv2.FONT_HERSHEY_SIMPLEX
# I load a picture of a page, gray and blur it
im = cv2.imread('test.png')
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
image_blurred = cv2.GaussianBlur(imgray, (5, 5), 0)
image_blurred = cv2.dilate(image_blurred, None)
ret,thresh = cv2.threshold(image_blurred,0,255,0,cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# I try to retrieve contours and hierarchy on the sample
_, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
hierarchy = hierarchy[0]
# I read every contours and retrieve the bounding box
for i,component in enumerate(zip(contours, hierarchy)):
cnt = component[0]
currentHierarchy = component[1]
precision = 0.01
epsilon = precision*cv2.arcLength(cnt,True)
approx = cv2.approxPolyDP(cnt,epsilon,True)
# This is the best combination I found to isolate parents container
# It gives me the best result (even if I'm not sure what I'm doing)
# hierarchy[2/3] is "having child" / "having parent"
# I thought currentHierarchy[3] < 0 should be better
# but it gives no result
if currentHierarchy[2] > 0 and currentHierarchy[3] > 0:
x,y,w,h = cv2.boundingRect(approx)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2)
cv2.putText(im,str(i),(x+2,y+2), font, 1,(0,255,0),2,cv2.LINE_AA)
plt.imshow(im)
plt.show()
我想将最近的区域连接在一起,以便对我的页面进行词标记化。在我的示例图片中,我想加入2835、2847、2864、2878、2870和2868。
我该怎么办?我以为我可以将每个框的每个坐标存储在一个数组中,然后测试 (start_x、start_y) 和 (end_x、end_y) - 但对我来说这似乎很糟糕。
能给个提示吗?
谢谢,
我继续我的方法来找出单个单词。虽然不完全准确,但请看下面这张图片:
伪代码:
- 对灰度图像应用高斯模糊。
- 执行了 Otsu 的阈值。
执行了几个形态学操作:
3.1 侵蚀,尝试去除图像左上角的细线。
3.2 扩张连接由于之前的操作而分开的单个字母。
找到特定区域上方的等高线并标记它们
编辑
代码:
import numpy as np
import cv2
import matplotlib.pyplot as plt
font = cv2.FONT_HERSHEY_SIMPLEX
im = cv2.imread('corpus.png')
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
image_blurred = cv2.GaussianBlur(imgray, (9, 9), 0)
cv2.imshow('blur', image_blurred)
image_blurred_d = cv2.dilate(image_blurred, None)
cv2.imshow('dilated_blur', image_blurred_d)
ret,thresh = cv2.threshold(image_blurred_d, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
cv2.imshow('thresh', thresh)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
erosion = cv2.erode(thresh, kernel, iterations = 1)
cv2.imshow('erosion', erosion)
kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
dilation = cv2.dilate(erosion, kernel1, iterations = 1)
cv2.imshow('dilation', dilation)
_, contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
count = 0
for cnt in contours:
if (cv2.contourArea(cnt) > 100):
x, y, w, h = cv2.boundingRect(cnt)
cv2.rectangle(im, (x,y), (x+w,y+h), (0, 255, 0), 2)
count+=1
print('Number of probable words', count)
cv2.imshow('final', im)
cv2.waitKey(0)
cv2.destroyAllWindows()
谢谢 Jeru Luke, we could implement this try on a complete page. The values given were very helpful, overall for kernel adaptation in blurring and eroding operation. The final result on the Bible Historiale Manuscript page 还是很有趣的。我们可以在标识中看到一些 "black holes",据我所知,这是由于膨胀;这是第一个正在进行的工作。我们将不得不管理大图片和首字母大字母。 这是我们用来过滤框、在框上添加标签并将每个片段保存在单独文件中的代码:
for i,component in enumerate(zip(contours, hierarchy)):
cnt = component[0]
currentHierarchy = component[1]
if currentHierarchy[2] > 0 and currentHierarchy[3] > 0:
x,y,w,h = cv2.boundingRect(approx)
if h < 300 and h > 110 and w > 110:
cv2.rectangle(im,(x-5,y-5),(x+w+5,y+h+5),(0,255,0),8)
cv2.putText(im,str(i),(x+2,y+2), font, 1,(0,255,0),2,cv2.LINE_AA)
cv2.putText(im,str(cv2.contourArea(cnt)),(x+w-2,y+h-2), font, 1,(0,255,0),2,cv2.LINE_AA)
cv2.putText(im,str(h)+'/'+str(w),(x+w-2,y+h-2), font, 1,(0,0,255),2,cv2.LINE_AA)
fragment = im[y:y+h, x:x+w]
cv2.imwrite("res" + str(i) + ".png", fragment)