在 python 中使用 openCV 检测具有某些颜色和文本的区域
Detecting zone with some color and text with openCV in python
我想要的:
当用户点击一个按钮时,程序会在剪贴板中截取屏幕截图“这部分没问题”,检测感兴趣区域 (ZOI),剪切区域并为我的程序提取多个区域的文本.
我尝试从处方中提取信息。我知道感兴趣的区域将始终位于相同的颜色矩形中,但我不确定每次我需要提取信息时处方都会是全尺寸的。
你可以看到原始图像和我想要的 ZOI 是红色的。
我尝试了什么:
- 第一次我尝试使用字体中的颜色制作区域的轮廓。该程序可以在灰色特定区域周围创建一个区域,但这不是我想要的(绿色框)。您可以在原始图像上方看到,post-处理图像和代码
import numpy as np
import cv2
# Read input image
img = cv2.imread('test_image.png')
gray = np.all(img == (227,227,227), 2)
# Convert logical matrix to uint8
gray = gray.astype(np.uint8)*255
# Find contours
cnts = cv2.findContours(gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2] # Use index [-2] to be compatible to OpenCV 3 and 4
# Get contour with maximum area
c = max(cnts, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(c)
# Draw green rectangle for testing
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), thickness = 2)
# Show result
cv2.imshow('gray', gray)
cv2.imwrite('niveau_gris.jpg', gray)
cv2.imshow('img', img)
cv2.imwrite('test_image_resultat.jpg', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
- 第二次 我尝试使用两个参数制作区域的轮廓,目的是 select 带有文本的区域。该程序根本不创建区域。可以看下面的代码
import numpy as np
import cv2
frame = cv2.imread('test_image_constrasate.jpg')
# Convert BGR to HSV
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# define range of red color in HSV
lower_red = np.array([189,189,189])
upper_red = np.array([204,203,204])
mask = cv2.inRange (hsv, lower_red, upper_red)
contours = cv2.findContours(mask.copy(),
cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)[-2]
if len(contours) > 0:
red_area = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(red_area)
cv2.rectangle(frame,(x, y),(x+w, y+h),(0, 0, 255), 2)
cv2.imshow('frame', frame)
cv2.imshow('mask', mask)
cv2.waitKey(0)
- 第三次 我尝试使用 Hough Line Transform 但我对图像处理的实验不够 我不确定该过程的可重复性,因为屏幕截图可能不是是全尺寸的,我不确定如何使用线来制作盒子。
提供研究
我在 Whosebug 上搜索了以下术语:“OpenCv select 矩形”、“OpenCV select 基于颜色的区域”、“OpenCV 如何 select 区域基关于他的颜色"...
你能帮帮我吗?感谢您以后的帮助
标题的处理方法:
- 使用
matchTemplate
找到它左右两侧的按钮
- 标题是相对于那些
的矩形
table 的方法:
inRange
table header 的颜色
connectedComponentsWithStats
- 按高度过滤以仅查找 table header 个单元格
- 找到最宽的单元格
- 使用条纹背景分隔行
整个事情:https://gist.github.com/crackwitz/54a2a8ed3fdb2d07b969ef5aeae9dfcf
实用函数:
def crop(im, x, y, w, h):
(height, width) = im.shape[:2]
assert w > 0 and h > 0
assert x >= 0 and y >= 0
assert (x+w <= width) and (y+h <= height)
return im[y:y+h, x:x+w]
def find_template(haystack, needle):
(nw, nh) = needle.shape[:2]
scores = cv.matchTemplate(haystack, needle, method=cv.TM_SQDIFF)
(minval, maxval, minloc, maxloc) = cv.minMaxLoc(scores)
#print(minval, minloc)
# minval ought to be 0... bug?
assert minval <= nw*nh*3 * 1**2, "can't find template"
(x,y) = minloc
return (x, y, nw, nh)
加载:
im = cv.imread("YebIa.png")#, cv.IMREAD_GRAYSCALE)
(imh, imw) = im.shape[:2]
print("size:", imw, 'x', imh)
imshow(im)
从这张特定图片的 hand-picked 坐标中提取按钮模板。最好保存这些和 imread
而不是:
button1 = crop(im, 214, 88, 24, 24)
imshow(button1)
button2 = crop(im, 672, 88, 24, 24)
imshow(button2)
查找按钮,获取标题:
button1_rect = find_template(im, button1)
button2_rect = find_template(im, button2)
b1x, b1y, b1w, b1h = button1_rect
b2x, b2y, b2w, b2h = button2_rect
top = b1y
bottom = b1y + b1h
left = b1x + b1w
right = b2x
title = crop(im, left, top, right-left, bottom-top)
imshow(title)
inRange
:
# table header, first cell is largest
header_color = (194, 142, 93)
mask = cv.inRange(im, header_color, header_color)
连接的组件:
(nlabels, labels, stats, centroids) = cv.connectedComponentsWithStats(mask)
# print(stats) # x, y, w, h, area (ConnectedComponentsTypes)
筛选和排序组件:
comps = [(label, *stat) for label, stat in enumerate(stats)]
# (label, x, y, w, h, area)
comps = [comp for comp in comps if comp[4] == 25] # height: exactly 25 pixels
comps.sort(key=lambda comp: comp[5], reverse=True) # area, descending... or simply max(key=)
header_comp = comps[0] # largest area
header_rect = header_comp[1:5]
(hx,hy,hw,hh) = header_rect
header = crop(im, *header_rect)
imshow(header)
查找 table body 和行:
# table body
# pixel column just before the header cell (B) contains striped background but no text
# column to the left of that (A) contains only white, until the end
bx = hx
by = hy+hh + 1
bw = hw
columnA = crop(im, bx-2, by, 1, imh-by)[:,0,1]
(I,) = np.where(columnA != 255)
bh = I.min() # table body height
columnB = crop(im, bx-1, by, 1, bh)[:,0,1]
rowmask = (columnB == 255)#.astype(np.int8)
(I,) = np.where(np.diff(rowmask))
I += 1 # diff shifts things back, edge is on the second pixel, not the first
row_tops = np.concatenate(([0], I[:-1]))
row_bottoms = I
# np.vstack([row_tops, row_bottoms]).T
提取每一行:
print((bx, by, bw, bh))
for i,(top,bottom) in enumerate(zip(row_tops, row_bottoms)):
print(f"row {i+1}:")
imshow(crop(im, bx, by+top, bw, bottom-top))
print()
我想要的:
当用户点击一个按钮时,程序会在剪贴板中截取屏幕截图“这部分没问题”,检测感兴趣区域 (ZOI),剪切区域并为我的程序提取多个区域的文本.
我尝试从处方中提取信息。我知道感兴趣的区域将始终位于相同的颜色矩形中,但我不确定每次我需要提取信息时处方都会是全尺寸的。
你可以看到原始图像和我想要的 ZOI 是红色的。
- 第一次我尝试使用字体中的颜色制作区域的轮廓。该程序可以在灰色特定区域周围创建一个区域,但这不是我想要的(绿色框)。您可以在原始图像上方看到,post-处理图像和代码
import numpy as np
import cv2
# Read input image
img = cv2.imread('test_image.png')
gray = np.all(img == (227,227,227), 2)
# Convert logical matrix to uint8
gray = gray.astype(np.uint8)*255
# Find contours
cnts = cv2.findContours(gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2] # Use index [-2] to be compatible to OpenCV 3 and 4
# Get contour with maximum area
c = max(cnts, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(c)
# Draw green rectangle for testing
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), thickness = 2)
# Show result
cv2.imshow('gray', gray)
cv2.imwrite('niveau_gris.jpg', gray)
cv2.imshow('img', img)
cv2.imwrite('test_image_resultat.jpg', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
- 第二次 我尝试使用两个参数制作区域的轮廓,目的是 select 带有文本的区域。该程序根本不创建区域。可以看下面的代码
import numpy as np
import cv2
frame = cv2.imread('test_image_constrasate.jpg')
# Convert BGR to HSV
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# define range of red color in HSV
lower_red = np.array([189,189,189])
upper_red = np.array([204,203,204])
mask = cv2.inRange (hsv, lower_red, upper_red)
contours = cv2.findContours(mask.copy(),
cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)[-2]
if len(contours) > 0:
red_area = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(red_area)
cv2.rectangle(frame,(x, y),(x+w, y+h),(0, 0, 255), 2)
cv2.imshow('frame', frame)
cv2.imshow('mask', mask)
cv2.waitKey(0)
- 第三次 我尝试使用 Hough Line Transform 但我对图像处理的实验不够 我不确定该过程的可重复性,因为屏幕截图可能不是是全尺寸的,我不确定如何使用线来制作盒子。
提供研究
我在 Whosebug 上搜索了以下术语:“OpenCv select 矩形”、“OpenCV select 基于颜色的区域”、“OpenCV 如何 select 区域基关于他的颜色"...
你能帮帮我吗?感谢您以后的帮助
标题的处理方法:
- 使用
matchTemplate
找到它左右两侧的按钮
- 标题是相对于那些 的矩形
table 的方法:
inRange
table header 的颜色
connectedComponentsWithStats
- 按高度过滤以仅查找 table header 个单元格
- 找到最宽的单元格
- 使用条纹背景分隔行
整个事情:https://gist.github.com/crackwitz/54a2a8ed3fdb2d07b969ef5aeae9dfcf
实用函数:
def crop(im, x, y, w, h):
(height, width) = im.shape[:2]
assert w > 0 and h > 0
assert x >= 0 and y >= 0
assert (x+w <= width) and (y+h <= height)
return im[y:y+h, x:x+w]
def find_template(haystack, needle):
(nw, nh) = needle.shape[:2]
scores = cv.matchTemplate(haystack, needle, method=cv.TM_SQDIFF)
(minval, maxval, minloc, maxloc) = cv.minMaxLoc(scores)
#print(minval, minloc)
# minval ought to be 0... bug?
assert minval <= nw*nh*3 * 1**2, "can't find template"
(x,y) = minloc
return (x, y, nw, nh)
加载:
im = cv.imread("YebIa.png")#, cv.IMREAD_GRAYSCALE)
(imh, imw) = im.shape[:2]
print("size:", imw, 'x', imh)
imshow(im)
从这张特定图片的 hand-picked 坐标中提取按钮模板。最好保存这些和 imread
而不是:
button1 = crop(im, 214, 88, 24, 24)
imshow(button1)
button2 = crop(im, 672, 88, 24, 24)
imshow(button2)
查找按钮,获取标题:
button1_rect = find_template(im, button1)
button2_rect = find_template(im, button2)
b1x, b1y, b1w, b1h = button1_rect
b2x, b2y, b2w, b2h = button2_rect
top = b1y
bottom = b1y + b1h
left = b1x + b1w
right = b2x
title = crop(im, left, top, right-left, bottom-top)
imshow(title)
inRange
:
# table header, first cell is largest
header_color = (194, 142, 93)
mask = cv.inRange(im, header_color, header_color)
连接的组件:
(nlabels, labels, stats, centroids) = cv.connectedComponentsWithStats(mask)
# print(stats) # x, y, w, h, area (ConnectedComponentsTypes)
筛选和排序组件:
comps = [(label, *stat) for label, stat in enumerate(stats)]
# (label, x, y, w, h, area)
comps = [comp for comp in comps if comp[4] == 25] # height: exactly 25 pixels
comps.sort(key=lambda comp: comp[5], reverse=True) # area, descending... or simply max(key=)
header_comp = comps[0] # largest area
header_rect = header_comp[1:5]
(hx,hy,hw,hh) = header_rect
header = crop(im, *header_rect)
imshow(header)
查找 table body 和行:
# table body
# pixel column just before the header cell (B) contains striped background but no text
# column to the left of that (A) contains only white, until the end
bx = hx
by = hy+hh + 1
bw = hw
columnA = crop(im, bx-2, by, 1, imh-by)[:,0,1]
(I,) = np.where(columnA != 255)
bh = I.min() # table body height
columnB = crop(im, bx-1, by, 1, bh)[:,0,1]
rowmask = (columnB == 255)#.astype(np.int8)
(I,) = np.where(np.diff(rowmask))
I += 1 # diff shifts things back, edge is on the second pixel, not the first
row_tops = np.concatenate(([0], I[:-1]))
row_bottoms = I
# np.vstack([row_tops, row_bottoms]).T
提取每一行:
print((bx, by, bw, bh))
for i,(top,bottom) in enumerate(zip(row_tops, row_bottoms)):
print(f"row {i+1}:")
imshow(crop(im, bx, by+top, bw, bottom-top))
print()