如何使用 Python 和 OpenCV 在图像中裁剪多个 ROI
How to crop multiple ROI in image using Python and OpenCV
我有一张从 PDF 转换为 PNG 的图像。转换后的图像包含几个我想使用 OCR Tesseract 提取的关键字。
现在,我需要手动确定 ROI 以裁剪选定的 ROI。由于我要应用 5 个以上的 ROI,应用 ROI 的最有效方法是什么而不是通过尝试和错误来找到确切位置?
代码如下:
def cropped(self, event):
#1st ROI
y = 20
x = 405
h = 230
w = 425
#2nd ROI
y1 = 30
x1 = 305
h1 = 330
w1 = 525
#open the converted image
image = cv2.imread("Output.png")
#perform image cropping
crop_image = image[x:w, y:h]
crop_image1 = image[x1:w1, y1:h1]
#save the cropped image
cv2.imwrite("Cropped.png", crop_image)
cv2.imwrite("Cropped1.png", crop_image1)
#open the cropped image and pass to the OCR engine
im = cv2.imread("Cropped.png")
im1 = cv2.imread("Cropped1.png")
## Do the text extraction here
您可以使用鼠标事件 select 多个 ROI 并根据位置裁剪
#!/usr/bin/env python3
import argparse
import cv2
import numpy as np
from PIL import Image
import os
drawing = False # true if mouse is pressed
ix,iy = -1,-1
refPt = []
img = ""
clone = ""
ROIRegion = []
# mouse callback function
def draw_rectangle(event,x,y,flags,param):
global ix,iy,drawing,img,clone,refPt, ROIRegion
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix,iy = x,y
refPt = [(x, y)]
ROIRegion.append(refPt)
#clone = img.copy()
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
img = clone.copy()
cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),3)
a=x
b=y
if a != x | b != y:
cv2.rectangle(img,(ix,iy),(x,y),(0,0,0),-1)
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
refPt.append((x,y))
img = clone.copy()
cv2.rectangle(img, (ix,iy),(x,y), (0, 255, 0), 2)
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to the image")
args = vars(ap.parse_args())
# load the image, clone it, and setup the mouse callback function
img = cv2.imread(args["image"])
img = np.array(img)
clone = img.copy()
cv2.namedWindow('image')
cv2.setMouseCallback('image',draw_rectangle)
while(1):
cv2.imshow('image',img)
k = cv2.waitKey(1) & 0xFF
if k == ord("r"):
del ROIRegion[-1]
del refPt[-1]
img = clone.copy()
elif k == 27:
break
#Do your cropping here
for region in range(len(ROIRegion)):
cv2.rectangle(img, ROIRegion[region][0],ROIRegion[region][1], (0, 255, 0), 2)
roi = clone[ROIRegion[region][0][1]:ROIRegion[region][1][1], ROIRegion[region][0][0]:ROIRegion[region][1][0]]
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
这是 Python/OpenCV 中的一种方法。
- 读取输入
- 框轮廓颜色的阈值
- 应用形态学以确保闭合
- 获取外部轮廓
- 遍历每个轮廓,获取其边界框,裁剪输入中的区域并写入输出
输入:
import cv2
import numpy as np
# read image
img = cv2.imread('text_boxes.jpg')
# threshold on box outline color
lowerBound = (80,120,100)
upperBound = (160,200,180)
thresh = cv2.inRange(img, lowerBound, upperBound)
# apply morphology to ensure regions are filled and remove extraneous noise
kernel = np.ones((3,3), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# get bounding boxes
i = 1
for cntr in contours:
# get bounding boxes
x,y,w,h = cv2.boundingRect(cntr)
crop = img[y:y+h, x:x+w]
cv2.imwrite("text_boxes_crop_{0}.png".format(i), crop)
i = i + 1
# save threshold
cv2.imwrite("text_boxes_thresh.png",thresh)
# show thresh and result
cv2.imshow("thresh", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
阈值图像:
裁剪图像:
我有一张从 PDF 转换为 PNG 的图像。转换后的图像包含几个我想使用 OCR Tesseract 提取的关键字。
现在,我需要手动确定 ROI 以裁剪选定的 ROI。由于我要应用 5 个以上的 ROI,应用 ROI 的最有效方法是什么而不是通过尝试和错误来找到确切位置?
代码如下:
def cropped(self, event):
#1st ROI
y = 20
x = 405
h = 230
w = 425
#2nd ROI
y1 = 30
x1 = 305
h1 = 330
w1 = 525
#open the converted image
image = cv2.imread("Output.png")
#perform image cropping
crop_image = image[x:w, y:h]
crop_image1 = image[x1:w1, y1:h1]
#save the cropped image
cv2.imwrite("Cropped.png", crop_image)
cv2.imwrite("Cropped1.png", crop_image1)
#open the cropped image and pass to the OCR engine
im = cv2.imread("Cropped.png")
im1 = cv2.imread("Cropped1.png")
## Do the text extraction here
您可以使用鼠标事件 select 多个 ROI 并根据位置裁剪
#!/usr/bin/env python3
import argparse
import cv2
import numpy as np
from PIL import Image
import os
drawing = False # true if mouse is pressed
ix,iy = -1,-1
refPt = []
img = ""
clone = ""
ROIRegion = []
# mouse callback function
def draw_rectangle(event,x,y,flags,param):
global ix,iy,drawing,img,clone,refPt, ROIRegion
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix,iy = x,y
refPt = [(x, y)]
ROIRegion.append(refPt)
#clone = img.copy()
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
img = clone.copy()
cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),3)
a=x
b=y
if a != x | b != y:
cv2.rectangle(img,(ix,iy),(x,y),(0,0,0),-1)
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
refPt.append((x,y))
img = clone.copy()
cv2.rectangle(img, (ix,iy),(x,y), (0, 255, 0), 2)
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to the image")
args = vars(ap.parse_args())
# load the image, clone it, and setup the mouse callback function
img = cv2.imread(args["image"])
img = np.array(img)
clone = img.copy()
cv2.namedWindow('image')
cv2.setMouseCallback('image',draw_rectangle)
while(1):
cv2.imshow('image',img)
k = cv2.waitKey(1) & 0xFF
if k == ord("r"):
del ROIRegion[-1]
del refPt[-1]
img = clone.copy()
elif k == 27:
break
#Do your cropping here
for region in range(len(ROIRegion)):
cv2.rectangle(img, ROIRegion[region][0],ROIRegion[region][1], (0, 255, 0), 2)
roi = clone[ROIRegion[region][0][1]:ROIRegion[region][1][1], ROIRegion[region][0][0]:ROIRegion[region][1][0]]
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
这是 Python/OpenCV 中的一种方法。
- 读取输入
- 框轮廓颜色的阈值
- 应用形态学以确保闭合
- 获取外部轮廓
- 遍历每个轮廓,获取其边界框,裁剪输入中的区域并写入输出
输入:
import cv2
import numpy as np
# read image
img = cv2.imread('text_boxes.jpg')
# threshold on box outline color
lowerBound = (80,120,100)
upperBound = (160,200,180)
thresh = cv2.inRange(img, lowerBound, upperBound)
# apply morphology to ensure regions are filled and remove extraneous noise
kernel = np.ones((3,3), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# get bounding boxes
i = 1
for cntr in contours:
# get bounding boxes
x,y,w,h = cv2.boundingRect(cntr)
crop = img[y:y+h, x:x+w]
cv2.imwrite("text_boxes_crop_{0}.png".format(i), crop)
i = i + 1
# save threshold
cv2.imwrite("text_boxes_thresh.png",thresh)
# show thresh and result
cv2.imshow("thresh", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
阈值图像:
裁剪图像: