使用 Mask R-CNN 错误训练自定义 COCO 数据集

Error Training Custom COCO Dataset with Mask R-CNN

我正尝试在 Tensorflow/Keras 上使用 Matterport 的 Mask R-CNN 训练自定义 COCO 格式数据集。我的数据集是具有上述 COCO 格式的 json 文件,“注释”部分中的每个项目如下所示:

有 20 个 classes,带有整个对象的多边形蒙版,然后是对象内各部分的多边形蒙版。图像的形状为 256x448x3。

准备数据集的代码如下:

class CocoLikeDataset(utils.Dataset):
    """ Generates a COCO-like dataset, i.e. an image dataset annotated in the style of the COCO dataset.
        See http://cocodataset.org/#home for more information.
    """
    def load_data(self, annotation_json, images_dir):
        """ Load the coco-like dataset from json
        Args:
            annotation_json: The path to the coco annotations json file
            images_dir: The directory holding the images referred to by the json file
        """
        # Load json from file
        json_file = open(annotation_json)
        coco_json = json.load(json_file)
        json_file.close()
        
        # Add the class names using the base method from utils.Dataset
        source_name = "coco_like"
        for category in coco_json['categories']:
            class_id = category['id']+1
            class_name = category['name']
            if class_id < 1:
                print('Error: Class id for "{}" cannot be less than one. (0 is reserved for the background)'.format(class_name))
                return
            
            self.add_class(source_name, class_id, class_name)
        
        # Get all annotations
        annotations = {}
        for annotation in coco_json['annotations']:
            image_id = annotation['image_id']
            if image_id not in annotations:
                annotations[image_id] = []
            annotations[image_id].append(annotation)
        
        # Get all images and add them to the dataset
        seen_images = {}
        for image in coco_json['images']:
            image_id = image['id']
            if image_id in seen_images:
                print("Warning: Skipping duplicate image id: {}".format(image))
            else:
                seen_images[image_id] = image
                try:
                    image_file_name = image['file_name']
                    image_width = image['width']
                    image_height = image['height']
                except KeyError as key:
                    print("Warning: Skipping image (id: {}) with missing key: {}".format(image_id, key))
                
                image_path = os.path.abspath(os.path.join(images_dir, image_file_name))
                image_annotations = annotations[image_id]
                
                # Add the image using the base method from utils.Dataset
                self.add_image(
                    source=source_name,
                    image_id=image_id,
                    path=image_path,
                    width=image_width,
                    height=image_height,
                    annotations=image_annotations
                )

          
    def load_mask(self, image_id):
        """ Load instance masks for the given image.
        MaskRCNN expects masks in the form of a bitmap [height, width, instances].
        Args:
            image_id: The id of the image to load masks for
        Returns:
            masks: A bool array of shape [height, width, instance count] with
                one mask per instance.
            class_ids: a 1D array of class IDs of the instance masks.
        """
        image_info = self.image_info[image_id]
        annotations = image_info['annotations']
        instance_masks = []
        class_ids = []
        
        for annotation in annotations:
            class_id = annotation['category_id']
            mask = Image.new('1', (image_info['width'], image_info['height']))
            mask_draw = ImageDraw.ImageDraw(mask, '1')
            for segmentation in annotation['segmentation']:
                mask_draw.polygon(segmentation, fill=1)
                bool_array = np.array(mask) > 0
                instance_masks.append(bool_array)
                class_ids.append(class_id)

        mask = np.dstack(instance_masks)
        class_ids = np.array(class_ids, dtype=np.int32)
        
        return mask, class_ids

这似乎至少在以可呈现的格式加载图像和蒙版方面效果很好,因为测试它以可视化图像、蒙版和某些图像的 class id 会产生良好的结果,其中我可以看到每个图像、二进制掩码和每个掩码的 class 个 ID。

但是,当我实际尝试在创建的训练数据集上训练模型时,出现以下错误:

model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-19-7928c4edfc77> in <module>()
      1 # Create model in training mode
      2 model = modellib.MaskRCNN(mode="training", config=config,
----> 3                           model_dir=MODEL_DIR)

3 frames
/content/Mask_RCNN/mrcnn/model.py in __init__(self, mode, config, model_dir)
   1835         self.model_dir = model_dir
   1836         self.set_log_dir()
-> 1837         self.keras_model = self.build(mode=mode, config=config)
   1838 
   1839     def build(self, mode, config):

/content/Mask_RCNN/mrcnn/model.py in build(self, mode, config)
   1927         # Anchors
   1928         if mode == "training":
-> 1929             anchors = self.get_anchors(config.IMAGE_SHAPE)
   1930             # Duplicate across the batch dimension because Keras requires it
   1931             # TODO: can this be optimized to avoid duplicating the anchors?

/content/Mask_RCNN/mrcnn/model.py in get_anchors(self, image_shape)
   2609                 backbone_shapes,
   2610                 self.config.BACKBONE_STRIDES,
-> 2611                 self.config.RPN_ANCHOR_STRIDE)
   2612             # Keep a copy of the latest anchors in pixel coordinates because
   2613             # it's used in inspect_model notebooks.

/content/Mask_RCNN/mrcnn/utils.py in generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride)
    635     anchors = []
    636     for i in range(len(scales)):
--> 637         anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
    638                                         feature_strides[i], anchor_stride))
    639     return np.concatenate(anchors, axis=0)

IndexError: index 5 is out of bounds for axis 0 with size 5

我不知道这个错误实际上意味着什么,也不知道可能的解决方案是什么。我感觉这可能与 CocoLikeDataset class 格式化和处理数据的方式有关,但我不确定。

对于确定问题并解决问题的任何帮助,我们将不胜感激!

谢谢!

将配置文件中的 RPN_ANCHOR_SCALE 变量设置为:

RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)