使用 numpy meshgrid python 切割边界框

Cut a bounding box using numpy meshgrid python

我想使用 meshgrid 从以下尺寸创建边界框,但无法获得正确的框。

我的 parent 维度是 x = 0 to 19541y = 0 to 14394。除此之外,我想从 x' = 4692 到 12720 和 y' = 4273 to 10117.

剪切一个框

但是,我没有得到正确的界限。有人可以帮我吗?

from matplotlib.path import Path

        xmin, xmax = 4692, 12720
        ymin, ymax = 4273, 10117
        sar_ver = [(4692, 10117), (12720, 10117), (12658, 4274), (4769, 4273), (4692, 10117)]

        x, y = np.meshgrid(np.arange(xmin, xmax + 1), np.arange(ymin, ymax + 1))
        shx = x
        x, y = x.flatten(), y.flatten()
        points = np.vstack((x, y)).T

        path = Path(sar_ver)
        grid = path.contains_points(points)

        grid.shape = shx.shape # 5845 X 8029

        print grid

更新:这是我尝试过的,我接近我想要的,但不完全是。我想将原始原点从 0 更改为图像的周围框,如预期输出所示。

我使用的更新代码是这个

from matplotlib.path import Path
    nx, ny = 16886, 10079
    sar_ver = [(16886, 1085), (15139, 2122), (14475, 5226), (8419, 5601), (14046, 6876), (14147, 10079), (16816, 3748), (16886, 1085)]
    x, y = np.meshgrid(np.arange(nx), np.arange(ny))
    x, y = x.flatten(), y.flatten()
    points = np.vstack((x,y)).T
    path = Path(sar_ver)
    grid = path.contains_points(points)

    grid.shape = (10079, 16886)
    grid = np.multiply(grid,255)
    int_grid = grid.astype(np.uint8)
    grid_img = Image.fromarray(int_grid)
    grid_img.save('grid_image.png')  # ACTUAL OUTPUT IMAGE WITH ORIGIN NOT SHIFTED

输入几何:

预期输出是这样的:图像是否以相反的方向旋转并不重要,但如果对齐正确,则图像会在顶部。

但是我现在得到这个所以我发布的更新代码的实际输出是这样的:

所以我想围绕盒子移动原点。

获取掩码后的边界框问题详细信息:此代码出现在第二次更新中发布的行之后 grid_img.save('grid_image.png') # ACTUAL OUTPUT IMAGE WITH ORIGIN NOT SHIFTED

这里im是实际图像的矩阵。 x-y 的最小值,im 的最大值应该是多少才能与掩码具有相同的形状并将它们相乘以获得像素值,其余的用 0 抵消。

        img_x = 19541 # 0 - 19541
        img_y = 14394 # 0 - 14394
        im = np.fromfile(binary_file_path, dtype='>f4')
        im = np.reshape(im.astype(np.float32), (img_x, img_y))
        im = im[:10079, :16886]
        bb_list = np.multiply(grid, im)
        # slice and dice
        slice_rows = np.any(bb_list, axis=1)
        slice_cols = np.any(bb_list, axis=0)
        ymin, ymax = np.where(slice_rows)[0][[0, -1]]
        xmin, xmax = np.where(slice_cols)[0][[0, -1]]
        answer = bb_list[ymin:ymax + 1, xmin:xmax + 1]
        # convert to unit8
        int_ans = answer.astype(np.uint8)
        fin_img = Image.fromarray(int_ans)
        fin_img.save('test_this.jpeg')

我的目标是从给定图像中裁剪出给定几何体的多边形。因此,我从该多边形中取出蒙版,然后使用该蒙版从原始图像中剪切出相同的蒙版。因此,将掩码的 1 和 0 与图像中的像素值相乘,得到 1* 像素值。

我尝试了以下方法来将实际图像裁剪成具有相同的尺寸,以便我可以乘以 np.multiply(im, mask) 但它不起作用,因为图像的形状没有被裁剪成与蒙版相同的形状。我在下面尝试了您的最小值和最大值,但没有用!

im = im[xmin:xmax, ymin:ymax]
ipdb> im.shape
(5975, 8994)
ipdb> mask.shape
(8994, 8467) 

很明显我现在不能多重掩码和我。

我认为您在第一次尝试中几乎是正确的,在第二次尝试中,您正在为完整图像构建 meshgrid 而您只想要形状遮罩,不是吗?

import numpy as np
import matplotlib as mpl
from matplotlib.path import Path
from matplotlib import patches
import matplotlib.pyplot as plt

from PIL import Image

sar_ver = [(16886, 1085), (15139, 2122), (14475, 5226), (8419, 5601),
           (14046, 6876), (14147, 10079), (16816, 3748), (16886, 1085)]

path = Path(sar_ver)
xmin, ymin, xmax, ymax = np.asarray(path.get_extents(), dtype=int).ravel()

x, y = np.mgrid[xmin:xmax, ymin:ymax]
points = np.transpose((x.ravel(), y.ravel()))

mask = path.contains_points(points)
mask = mask.reshape(x.shape).T

img = Image.fromarray((mask * 255).astype(np.uint8))
img.save('mask.png')


# plot shape and mask for debug purposes
fig = plt.figure(figsize=(8,4))

gs = mpl.gridspec.GridSpec(1,2)
gs.update(wspace=0.2, hspace= 0.01)

ax = plt.subplot(gs[0])
patch = patches.PathPatch(path, facecolor='orange', lw=2)
ax.add_patch(patch)

ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)

ax = plt.subplot(gs[1])
ax.imshow(mask, origin='lower')

plt.savefig("shapes.png", bbox_inches="tight", pad_inches=0)

生成掩码:

并且还绘制掩码和路径以用于调试目的:

不同的方向来自 matplotlib 绘图和图像中的不同原点位置,但它应该足够简单,可以按照您想要的方式进行更改。

在最新问题编辑后进行编辑

这是一个更新的脚本,它可以拍摄图像、为您的路径生成遮罩并将其剪掉。我正在使用虚拟图像并稍微缩小形状,以便更容易使用。

import numpy as np
import matplotlib as mpl
from matplotlib.path import Path
from matplotlib import patches
import matplotlib.pyplot as plt

import skimage.transform
import skimage.data

from PIL import Image

sar_ver = np.asarray([(16886, 1085), (15139, 2122), (14475, 5226), (8419, 5601),
           (14046, 6876), (14147, 10079), (16816, 3748), (16886, 1085)])

# reshape into smaller path for faster debugging
sar_ver = sar_ver // 20

# create dummy image
img = skimage.data.chelsea()
img = skimage.transform.rescale(img, 2)

# matplotlib path
path = Path(sar_ver)
xmin, ymin, xmax, ymax = np.asarray(path.get_extents(), dtype=int).ravel()

# create a mesh grid of the shape of the final mask
x, y = np.mgrid[:img.shape[1], :img.shape[0]]
# mesh grid to points
points = np.vstack((x.ravel(), y.ravel())).T

# mask for the point included in the path
mask = path.contains_points(points)
mask = mask.reshape(x.shape).T

# plots
fig = plt.figure(figsize=(8,6))
gs = mpl.gridspec.GridSpec(2,2)
gs.update(wspace=0.2, hspace= 0.2)

# image + patch
ax = plt.subplot(gs[0])
ax.imshow(img)
patch = patches.PathPatch(path, facecolor="None", edgecolor="cyan", lw=3)
ax.add_patch(patch)

# mask
ax = plt.subplot(gs[1])
ax.imshow(mask)

# filter image with mask
ax = plt.subplot(gs[2])
ax.imshow(img * mask[..., np.newaxis])

# remove mask from image
ax = plt.subplot(gs[3])
ax.imshow(img * ~mask[..., np.newaxis])

# plt.show()
plt.savefig("shapes.png", bbox_inches="tight", pad_inches=0)

我尝试了开放的 cv2 库,它在大图像上似乎比 meshgrid 或 mgrid 更快。贴出opencv2解决方案:

import numpy as np
import cv2
import matplotlib.pyplot as plt
from matplotlib.path import Path

sar_ver = np.array([[[1688, 108], [1513, 212], [1447, 522], [841, 560], [1404, 687], [1414, 1007], [1681, 374], [1688, 108]]] , 'int32')
print sar_ver.shape
mask=np.zeros((1439, 1954))
cv2.fillPoly(mask, sar_ver, 255)

sar_ver = np.asarray([(1688, 108), (1513, 212), (1447, 522), (841, 560), (1404, 687), (1414, 1007), (1681, 374), (1688, 108)])
path = Path(sar_ver)
xmin, ymin, xmax, ymax = np.asarray(path.get_extents(), dtype=int).ravel()

plt.imshow(mask[ymin:ymax+1, xmin:xmax+1])
plt.show()

此外,在 Filippo 的帮助下和在线聊天中发布了 mgrid 解决方案:

import cv2
from matplotlib.path import Path
from PIL import Image
import numpy as np
sar_ver = np.asarray([(1518, 2024), (2018, 2024), (1518, 2524), (1518, 2024)])

imag = cv2.imread('test_image.jpg')
img = cv2.cvtColor(imag, cv2.COLOR_BGR2GRAY)
h, w = img.shape

path = Path(sar_ver)
xmin, ymin, xmax, ymax = np.asarray(path.get_extents(), dtype=int).ravel()

# create a mesh grid of the shape of the final mask
x, y = np.mgrid[:w, :h]
# mesh grid to points
points = np.vstack((x.ravel(), y.ravel())).T

# mask for the point included in the path
mask = path.contains_points(points)
mask = mask.reshape(x.shape).T

im = np.array(img)
bb = np.multiply(im, mask)[ymin:ymax+1, xmin:xmax+1]
# saving image or we can do  plt.show
int_ans = bb.astype(np.uint8)
fin = Image.fromarray(int_ans)
fin.save('crop_test.png')