如何使用 Python 和 OpenCV 进行图像分割
How to do image segmentation with Python and OpenCV
我有一张发票的图片。我想将该图像拆分成多个部分并获得更小的图像。
我尝试做 OpenCV Kmeans,但作为输出,我只得到一个小黑色 window。
这是我的代码:
import numpy as np
import cv2
#read the image
img = cv2.imread("image1.jpg")
#reshape the image
img = img.reshape((-1,3))
img = np.float32(img)
#criteria for clustering
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER , 10, 1)
#defining number of clusters and iteration number
nubmer_of_clusters = 6
attempts = 50
#doing the clustering
ret, label, center = cv2.kmeans(img, nubmer_of_clusters, None, criteria, attempts, cv2.KMEANS_RANDOM_CENTERS)
center = np.uint8(center)
res = center[label.flatten()]
res = res.reshape((img.shape))
cv2.imshow("starting_image", res)
cv2.waitKey(2)
这是输入图像的例子:
用红色标出的是我要提取的图像部分。
我不知道我是否使用了正确的模型,或者即使我使用了正确的方法。但我需要图片上有文字的片段。
我试过轮廓,但我得到的是每个字母的轮廓,我想要每个文本段的轮廓:
img = cv2.imread("image1.jpg")
img=cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh=cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
for c in contours:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(img,(x,y),(x+w,y+h),(0,0,255),2)
cv2.imshow('Bounding rect',img)
关键是扩大(扩展)字母的轮廓以形成块。方法如下:
import cv2
import numpy as np
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_canny = cv2.Canny(img_gray, 0, 0)
return cv2.dilate(img_canny, np.ones((5, 5)), iterations=20)
def draw_segments(img):
contours, hierarchies = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if w * h > 70000:
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 5)
img = cv2.imread("document.jpg")
draw_segments(img)
cv2.imshow("Image", img)
cv2.waitKey(0)
输出:
解释:
- 导入必要的库:
import cv2
import numpy as np
- 定义一个处理图片的函数,具体见代码注释:
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Convert to grayscale
img_canny = cv2.Canny(img_gray, 0, 0) # Detect edges with canny edge detector
return cv2.dilate(img_canny, np.ones((5, 5)), iterations=20) # Dilate edges to convert scattered contours that are close to each others into chunks
- 定义一个接收图像的函数,并利用之前定义的
process
函数处理图像,并找到它的轮廓。然后它将遍历每个轮廓,如果轮廓的边界矩形的面积大于例如 70000 (消除停留文本),则在图像上绘制边界矩形:
def draw_segments(img):
contours, hierarchies = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if w * h > 70000:
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 5)
- 最后读入图片,调用
draw_segments
函数,显示图片:
img = cv2.imread("document.jpg")
draw_segments(img)
cv2.imshow("Image", img)
cv2.waitKey(0)
我有一张发票的图片。我想将该图像拆分成多个部分并获得更小的图像。 我尝试做 OpenCV Kmeans,但作为输出,我只得到一个小黑色 window。
这是我的代码:
import numpy as np
import cv2
#read the image
img = cv2.imread("image1.jpg")
#reshape the image
img = img.reshape((-1,3))
img = np.float32(img)
#criteria for clustering
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER , 10, 1)
#defining number of clusters and iteration number
nubmer_of_clusters = 6
attempts = 50
#doing the clustering
ret, label, center = cv2.kmeans(img, nubmer_of_clusters, None, criteria, attempts, cv2.KMEANS_RANDOM_CENTERS)
center = np.uint8(center)
res = center[label.flatten()]
res = res.reshape((img.shape))
cv2.imshow("starting_image", res)
cv2.waitKey(2)
这是输入图像的例子:
用红色标出的是我要提取的图像部分。
我不知道我是否使用了正确的模型,或者即使我使用了正确的方法。但我需要图片上有文字的片段。
我试过轮廓,但我得到的是每个字母的轮廓,我想要每个文本段的轮廓:
img = cv2.imread("image1.jpg")
img=cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh=cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
for c in contours:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(img,(x,y),(x+w,y+h),(0,0,255),2)
cv2.imshow('Bounding rect',img)
关键是扩大(扩展)字母的轮廓以形成块。方法如下:
import cv2
import numpy as np
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_canny = cv2.Canny(img_gray, 0, 0)
return cv2.dilate(img_canny, np.ones((5, 5)), iterations=20)
def draw_segments(img):
contours, hierarchies = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if w * h > 70000:
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 5)
img = cv2.imread("document.jpg")
draw_segments(img)
cv2.imshow("Image", img)
cv2.waitKey(0)
输出:
解释:
- 导入必要的库:
import cv2
import numpy as np
- 定义一个处理图片的函数,具体见代码注释:
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Convert to grayscale
img_canny = cv2.Canny(img_gray, 0, 0) # Detect edges with canny edge detector
return cv2.dilate(img_canny, np.ones((5, 5)), iterations=20) # Dilate edges to convert scattered contours that are close to each others into chunks
- 定义一个接收图像的函数,并利用之前定义的
process
函数处理图像,并找到它的轮廓。然后它将遍历每个轮廓,如果轮廓的边界矩形的面积大于例如 70000 (消除停留文本),则在图像上绘制边界矩形:
def draw_segments(img):
contours, hierarchies = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if w * h > 70000:
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 5)
- 最后读入图片,调用
draw_segments
函数,显示图片:
img = cv2.imread("document.jpg")
draw_segments(img)
cv2.imshow("Image", img)
cv2.waitKey(0)