Python 图像类型、形状和分割通道

Question

我正在使用这个 tutorial for instance segmentation in PyTorch. The test data the tutorial uses includes images and accompanying image masks from a dataset available here. I have an example of one of the image masks from that data set here（这个问题的示例数据）。该掩码在数据集中默认情况下如下所示：

教程uses this code:

mask.putpalette([
    0, 0, 0, # black background
    255, 0, 0, # index 1 is red
    255, 255, 0, # index 2 is yellow
    255, 153, 0, # index 3 is orange
])

作为遮罩的解释步骤，使其看起来像这样：

但该代码在分割过程本身中并不是必需的。它只是用来显示面具包含的内容。

我正在尝试使用我自己的图像数据。我为我制作的 G.I.M.P. This is one of the masks 中的图像创建了蒙版。默认情况下看起来像这样。

当我尝试运行教程代码时，我遇到了掩码问题。此代码块创建一个 class，用于创建 PyTorch 数据集。

import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image


class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)

        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

dataset = PennFudanDataset('PennFudanPed/')
dataset[0]

最后一行returns:

(<PIL.Image.Image image mode=RGB size=559x536 at 0x7FCB4267C390>,
 {'area': tensor([35358., 36225.]), 'boxes': tensor([[159., 181., 301., 430.],
          [419., 170., 534., 485.]]), 'image_id': tensor([0]), 'iscrowd': tensor([0, 0]), 'labels': tensor([1, 1]), 'masks': tensor([[[0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           ...,
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0]],
  
          [[0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           ...,
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0]]], dtype=torch.uint8)})

当我运行这段代码和我的数据时，

...
dataset = four_chs('drive/MyDrive/chambers/')
dataset[0]

我收到这个错误：

/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:38: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-17-12074ae9ab35> in <module>()
      1 len(dataset)
----> 2 dataset[0]

<ipython-input-1-99ab92a46ebe> in __getitem__(self, idx)
     42         boxes = []
     43         for i in range(num_objs):
---> 44             pos = np.where(masks[i])
     45             xmin = np.min(pos[1])
     46             xmax = np.max(pos[1])

TypeError: 'bool' object is not subscriptable

我不确定到底发生了什么，但我的掩码与测试数据中的掩码有所不同。它们都是 PNG 文件类型，但我的似乎有红色、蓝色、绿色通道与另一个通道或其他通道分开，但我不知道它是什么，根据 [=79= 中对象的形状].这来自我制作的面具之一：

mask2 = np.array(mask1)
mask2.shape
(5312, 2988, 4)

对于其中一个测试数据掩码：

mask2 = np.array(mask)
mask2.shape
(536, 559)

好像只有一个频道？因此，由于它们的形状不同，我想这就是为什么我从中得到错误的原因（这是我之前粘贴的代码的摘录）

...
        mask_path = os.path.join(self.root, "masks", self.masks[idx])
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)

        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
for i in range(num_objs):
             pos = np.where(masks[i])
...

如何让我的蒙版形状与测试数据中的蒙版形状匹配，以便我可以使用其余的分割代码创建我的 PyTorch 兼容数据集，该数据集将与分割算法一起使用？我不是想得到相同的高度和宽度，而是改变channels/layers的数量，但我不认为我希望它是灰度的。

在 HansHirse 的评论后编辑：

我返回 G.I.M.P 并使用“图像模式”菜单将图像更改为灰度。我用那个设置导出。我尝试运行那个文件的代码，但没有用。

我还找到了一种转换 R.B.G 的方法。导入时使用 Image.open().convert("L") 对图像进行灰度化。这也不行。

在这两种情况下，问题都与我认为分开的颜色斑点混合在一起有关。例如，我使用 HansHirse 的建议用 1、2、3 和 4 的灰色“颜色”填充感兴趣的区域，而背景保持为 0。导入创建的文件后，这些文件的值为 3， 5,8 和 10。虽然其中一个形状的值可能大部分为 3，但在其他形状中有具有该值的游离像素，因此没有一个值完全包含在一个形状中。在这种情况下，代码绘制包围所有 4 个形状的边界框，而不是围绕一个形状。

我知道使用色调、饱和度、值 (H.S.V.) 颜色 space 并尝试转换为该颜色 space。这仍然不能解决我的问题。

我正在尝试弄清楚如何使用

np.where( mask[<buffered shape1 xmin>,<buffered shape1 xmax>, <buffered shape1 ymin>, <buffered shape1 ymax>,0] == <majority color value for shape>)

要对蒙版进行四分之一排序，根据该四分之一形状的主要颜色值进行过滤，并获取该四分之一形状的实际 x 和 y 值。有了这些值，我想我可以使用实际值中的最小值和最大值来创建我的边界框。

Answer 1

以下是如何为分割任务或类似任务创建表示类的灰度图像的示例。

在一些黑色背景上，绘制一些填充值在1, ..., #classes范围内的形状。出于可视化目的，此蒙版被绘制为感知为常规灰度图像并缩放到所述值范围 - 以强调蒙版通常看起来全黑，但其中有实际内容。这个蒙版保存为无损PNG图片，然后用Pillow打开，然后converted to mode P. Last step is to set up a proper palette for the desired number of colors, and apply that palette using Image.putpalette.

import cv2
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# Generate mask:  0 - Background  |  1 - Class 1  |  2 - Class 2, and so on.
mask = np.zeros((300, 300), np.uint8)
cv2.rectangle(mask, (30, 40), (75, 60), 1, cv2.FILLED)
cv2.circle(mask, (230, 50), 85, 2, cv2.FILLED)
cv2.ellipse(mask, (230, 230), (60, 40), 0, 0, 360, 3, cv2.FILLED)
cv2.line(mask, (20, 240), (80, 260), 4, 5)

# Save mask as lossless PNG image
cv2.imwrite('mask.png', mask)

# Visualization
plt.figure(1, figsize=(18, 6))
plt.subplot(1, 3, 1), plt.imshow(mask, vmin=0, vmax=255, cmap='gray')
plt.colorbar(), plt.title('Mask when shown as regular image')
plt.subplot(1, 3, 2), plt.imshow(mask, cmap='gray')
plt.colorbar(), plt.title('Mask when shown scaled to values 0 - 4')

# Open mask with Pillow, and convert to mode 'P'
mask = Image.open('mask.png').convert('P')

# Set up and apply palette data
mask.putpalette([  0,   0,   0,         # Background - Black
                 255,   0,   0,         # Class 1 - Red
                   0, 255,   0,         # Class 2 - Green
                   0,   0, 255,         # Class 3 - Blue
                 255, 255,   0])        # Class 4 - Yellow

# More visualization
plt.subplot(1, 3, 3), plt.imshow(mask)
plt.title('Mask when shown as indexed image')
plt.tight_layout(), plt.show()

当然，生成实际蒙版的第一步可以在 GIMP 中完成。请务必使用黑色背景，并填写 1, ..., #classes 范围内的值。如果因为这些颜色几乎都是黑色而难以做到这一点，请用一些明亮、可区分的颜色绘制形状，然后用值 1、2 等填充它们。

----------------------------------------
System information
----------------------------------------
Platform:      Windows-10-10.0.19041-SP0
Python:        3.9.1
PyCharm:       2021.1.1
Matplotlib:    3.4.2
NumPy:         1.20.3
OpenCV:        4.5.2
Pillow:        8.2.0
----------------------------------------

Python 图像类型、形状和分割通道

Python image types, shapes, and channels for segmentation

python

image-segmentation

python-imaging-library

pytorch