如何扭曲矩形 object 以适应其较大的边界框
How to warp a rectangular object to fit its larger bounding box
给定这张图片:
我想让它旋转和拉伸以完全适合最大矩形框外侧没有空白的边界框。它还应该考虑更糟糕的透视情况,就像我稍后列出的链接一样。
基本上,虽然不明显,但矩形旋转了一点,我想修复这个变形。
但是,我在尝试检索轮廓的四个点时遇到错误。我已经确定并利用轮廓近似来隔离并仅获得相关的外观轮廓,正如您在图像中看到的那样,它是成功的,尽管我不能在其上使用透视扭曲。
我已经尝试过这里的链接:
- How to straighten a rotated rectangle area of an image using opencv in python?
- https://www.pyimagesearch.com/2014/05/05/building-pokedex-python-opencv-perspective-warping-step-5-6/
- https://www.pyimagesearch.com/2014/09/01/build-kick-ass-mobile-document-scanner-just-5-minutes/
紧随其后,只是做了一些小的修改(比如不缩小图像然后再放大)和不同的输入图像。
评论里有一个reader遇到了类似的错误,不过作者只是说用contour approximation。我这样做了,但我仍然收到同样的错误。
我已经检索到轮廓(连同它的边界框,是前面说明的图像),并使用此代码尝试透视扭曲:
def warp_perspective(cnt):
# reshape cnt to get tl, tr, br, bl points
pts = cnt.reshape(4, 2)
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmin(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[2] = pts[np.argmax(diff)]
# solve for the width of the image
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
# solve for the height of the image
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
# get the final dimensions
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
# construct the dst image
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
# calculate perspective transform matrix
# warp the perspective
M = cv2.getPerspectiveTransform(rect, dst)
warp = cv2.warpPerspective(orig, M, (maxWidth, maxHeight))
cv2.imshow("warped", warp)
return warp
该函数接受 cnt
作为单个轮廓。
在 运行 我 运行 进入我之前提到的这个错误:
in warp_perspective
pts = cnt.reshape(4, 2)
ValueError: cannot reshape array of size 2090 into shape (4,2)
我完全不明白。我已经成功地分离并检索了正确的轮廓和边界框,我唯一不同的是跳过了降尺度..
试试这个方法:
- 将图像转换为灰度并使用双边滤波器进行模糊处理
- 大津的门槛
- 寻找轮廓
- 对最大方形等高线进行等高线逼近
- 透视变换和旋转
结果
import cv2
import numpy as np
import imutils
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Transform the image
transformed = cv2.warpPerspective(image, matrix, (width, height))
# Rotate and return the result
return imutils.rotate_bound(transformed, angle=-90)
image = cv2.imread('1.png')
original = image.copy()
blur = cv2.bilateralFilter(image,9,75,75)
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,0,255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
cv2.drawContours(image,[c], 0, (36,255,12), 3)
transformed = perspective_transform(original, approx)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('transformed', transformed)
cv2.waitKey()
给定这张图片:
我想让它旋转和拉伸以完全适合最大矩形框外侧没有空白的边界框。它还应该考虑更糟糕的透视情况,就像我稍后列出的链接一样。
基本上,虽然不明显,但矩形旋转了一点,我想修复这个变形。
但是,我在尝试检索轮廓的四个点时遇到错误。我已经确定并利用轮廓近似来隔离并仅获得相关的外观轮廓,正如您在图像中看到的那样,它是成功的,尽管我不能在其上使用透视扭曲。
我已经尝试过这里的链接:
- How to straighten a rotated rectangle area of an image using opencv in python?
- https://www.pyimagesearch.com/2014/05/05/building-pokedex-python-opencv-perspective-warping-step-5-6/
- https://www.pyimagesearch.com/2014/09/01/build-kick-ass-mobile-document-scanner-just-5-minutes/
紧随其后,只是做了一些小的修改(比如不缩小图像然后再放大)和不同的输入图像。
评论里有一个reader遇到了类似的错误,不过作者只是说用contour approximation。我这样做了,但我仍然收到同样的错误。
我已经检索到轮廓(连同它的边界框,是前面说明的图像),并使用此代码尝试透视扭曲:
def warp_perspective(cnt):
# reshape cnt to get tl, tr, br, bl points
pts = cnt.reshape(4, 2)
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmin(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[2] = pts[np.argmax(diff)]
# solve for the width of the image
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
# solve for the height of the image
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
# get the final dimensions
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
# construct the dst image
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
# calculate perspective transform matrix
# warp the perspective
M = cv2.getPerspectiveTransform(rect, dst)
warp = cv2.warpPerspective(orig, M, (maxWidth, maxHeight))
cv2.imshow("warped", warp)
return warp
该函数接受 cnt
作为单个轮廓。
在 运行 我 运行 进入我之前提到的这个错误:
in warp_perspective
pts = cnt.reshape(4, 2)
ValueError: cannot reshape array of size 2090 into shape (4,2)
我完全不明白。我已经成功地分离并检索了正确的轮廓和边界框,我唯一不同的是跳过了降尺度..
试试这个方法:
- 将图像转换为灰度并使用双边滤波器进行模糊处理
- 大津的门槛
- 寻找轮廓
- 对最大方形等高线进行等高线逼近
- 透视变换和旋转
结果
import cv2
import numpy as np
import imutils
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Transform the image
transformed = cv2.warpPerspective(image, matrix, (width, height))
# Rotate and return the result
return imutils.rotate_bound(transformed, angle=-90)
image = cv2.imread('1.png')
original = image.copy()
blur = cv2.bilateralFilter(image,9,75,75)
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,0,255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
cv2.drawContours(image,[c], 0, (36,255,12), 3)
transformed = perspective_transform(original, approx)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('transformed', transformed)
cv2.waitKey()