使用 Mask R-CNN 错误训练自定义 COCO 数据集
Error Training Custom COCO Dataset with Mask R-CNN
我正尝试在 Tensorflow/Keras 上使用 Matterport 的 Mask R-CNN 训练自定义 COCO 格式数据集。我的数据集是具有上述 COCO 格式的 json 文件,“注释”部分中的每个项目如下所示:
有 20 个 classes,带有整个对象的多边形蒙版,然后是对象内各部分的多边形蒙版。图像的形状为 256x448x3。
准备数据集的代码如下:
class CocoLikeDataset(utils.Dataset):
""" Generates a COCO-like dataset, i.e. an image dataset annotated in the style of the COCO dataset.
See http://cocodataset.org/#home for more information.
"""
def load_data(self, annotation_json, images_dir):
""" Load the coco-like dataset from json
Args:
annotation_json: The path to the coco annotations json file
images_dir: The directory holding the images referred to by the json file
"""
# Load json from file
json_file = open(annotation_json)
coco_json = json.load(json_file)
json_file.close()
# Add the class names using the base method from utils.Dataset
source_name = "coco_like"
for category in coco_json['categories']:
class_id = category['id']+1
class_name = category['name']
if class_id < 1:
print('Error: Class id for "{}" cannot be less than one. (0 is reserved for the background)'.format(class_name))
return
self.add_class(source_name, class_id, class_name)
# Get all annotations
annotations = {}
for annotation in coco_json['annotations']:
image_id = annotation['image_id']
if image_id not in annotations:
annotations[image_id] = []
annotations[image_id].append(annotation)
# Get all images and add them to the dataset
seen_images = {}
for image in coco_json['images']:
image_id = image['id']
if image_id in seen_images:
print("Warning: Skipping duplicate image id: {}".format(image))
else:
seen_images[image_id] = image
try:
image_file_name = image['file_name']
image_width = image['width']
image_height = image['height']
except KeyError as key:
print("Warning: Skipping image (id: {}) with missing key: {}".format(image_id, key))
image_path = os.path.abspath(os.path.join(images_dir, image_file_name))
image_annotations = annotations[image_id]
# Add the image using the base method from utils.Dataset
self.add_image(
source=source_name,
image_id=image_id,
path=image_path,
width=image_width,
height=image_height,
annotations=image_annotations
)
def load_mask(self, image_id):
""" Load instance masks for the given image.
MaskRCNN expects masks in the form of a bitmap [height, width, instances].
Args:
image_id: The id of the image to load masks for
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
image_info = self.image_info[image_id]
annotations = image_info['annotations']
instance_masks = []
class_ids = []
for annotation in annotations:
class_id = annotation['category_id']
mask = Image.new('1', (image_info['width'], image_info['height']))
mask_draw = ImageDraw.ImageDraw(mask, '1')
for segmentation in annotation['segmentation']:
mask_draw.polygon(segmentation, fill=1)
bool_array = np.array(mask) > 0
instance_masks.append(bool_array)
class_ids.append(class_id)
mask = np.dstack(instance_masks)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
这似乎至少在以可呈现的格式加载图像和蒙版方面效果很好,因为测试它以可视化图像、蒙版和某些图像的 class id 会产生良好的结果,其中我可以看到每个图像、二进制掩码和每个掩码的 class 个 ID。
但是,当我实际尝试在创建的训练数据集上训练模型时,出现以下错误:
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-19-7928c4edfc77> in <module>()
1 # Create model in training mode
2 model = modellib.MaskRCNN(mode="training", config=config,
----> 3 model_dir=MODEL_DIR)
3 frames
/content/Mask_RCNN/mrcnn/model.py in __init__(self, mode, config, model_dir)
1835 self.model_dir = model_dir
1836 self.set_log_dir()
-> 1837 self.keras_model = self.build(mode=mode, config=config)
1838
1839 def build(self, mode, config):
/content/Mask_RCNN/mrcnn/model.py in build(self, mode, config)
1927 # Anchors
1928 if mode == "training":
-> 1929 anchors = self.get_anchors(config.IMAGE_SHAPE)
1930 # Duplicate across the batch dimension because Keras requires it
1931 # TODO: can this be optimized to avoid duplicating the anchors?
/content/Mask_RCNN/mrcnn/model.py in get_anchors(self, image_shape)
2609 backbone_shapes,
2610 self.config.BACKBONE_STRIDES,
-> 2611 self.config.RPN_ANCHOR_STRIDE)
2612 # Keep a copy of the latest anchors in pixel coordinates because
2613 # it's used in inspect_model notebooks.
/content/Mask_RCNN/mrcnn/utils.py in generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride)
635 anchors = []
636 for i in range(len(scales)):
--> 637 anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
638 feature_strides[i], anchor_stride))
639 return np.concatenate(anchors, axis=0)
IndexError: index 5 is out of bounds for axis 0 with size 5
我不知道这个错误实际上意味着什么,也不知道可能的解决方案是什么。我感觉这可能与 CocoLikeDataset class 格式化和处理数据的方式有关,但我不确定。
对于确定问题并解决问题的任何帮助,我们将不胜感激!
谢谢!
将配置文件中的 RPN_ANCHOR_SCALE 变量设置为:
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
我正尝试在 Tensorflow/Keras 上使用 Matterport 的 Mask R-CNN 训练自定义 COCO 格式数据集。我的数据集是具有上述 COCO 格式的 json 文件,“注释”部分中的每个项目如下所示:
有 20 个 classes,带有整个对象的多边形蒙版,然后是对象内各部分的多边形蒙版。图像的形状为 256x448x3。
准备数据集的代码如下:
class CocoLikeDataset(utils.Dataset):
""" Generates a COCO-like dataset, i.e. an image dataset annotated in the style of the COCO dataset.
See http://cocodataset.org/#home for more information.
"""
def load_data(self, annotation_json, images_dir):
""" Load the coco-like dataset from json
Args:
annotation_json: The path to the coco annotations json file
images_dir: The directory holding the images referred to by the json file
"""
# Load json from file
json_file = open(annotation_json)
coco_json = json.load(json_file)
json_file.close()
# Add the class names using the base method from utils.Dataset
source_name = "coco_like"
for category in coco_json['categories']:
class_id = category['id']+1
class_name = category['name']
if class_id < 1:
print('Error: Class id for "{}" cannot be less than one. (0 is reserved for the background)'.format(class_name))
return
self.add_class(source_name, class_id, class_name)
# Get all annotations
annotations = {}
for annotation in coco_json['annotations']:
image_id = annotation['image_id']
if image_id not in annotations:
annotations[image_id] = []
annotations[image_id].append(annotation)
# Get all images and add them to the dataset
seen_images = {}
for image in coco_json['images']:
image_id = image['id']
if image_id in seen_images:
print("Warning: Skipping duplicate image id: {}".format(image))
else:
seen_images[image_id] = image
try:
image_file_name = image['file_name']
image_width = image['width']
image_height = image['height']
except KeyError as key:
print("Warning: Skipping image (id: {}) with missing key: {}".format(image_id, key))
image_path = os.path.abspath(os.path.join(images_dir, image_file_name))
image_annotations = annotations[image_id]
# Add the image using the base method from utils.Dataset
self.add_image(
source=source_name,
image_id=image_id,
path=image_path,
width=image_width,
height=image_height,
annotations=image_annotations
)
def load_mask(self, image_id):
""" Load instance masks for the given image.
MaskRCNN expects masks in the form of a bitmap [height, width, instances].
Args:
image_id: The id of the image to load masks for
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
image_info = self.image_info[image_id]
annotations = image_info['annotations']
instance_masks = []
class_ids = []
for annotation in annotations:
class_id = annotation['category_id']
mask = Image.new('1', (image_info['width'], image_info['height']))
mask_draw = ImageDraw.ImageDraw(mask, '1')
for segmentation in annotation['segmentation']:
mask_draw.polygon(segmentation, fill=1)
bool_array = np.array(mask) > 0
instance_masks.append(bool_array)
class_ids.append(class_id)
mask = np.dstack(instance_masks)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
这似乎至少在以可呈现的格式加载图像和蒙版方面效果很好,因为测试它以可视化图像、蒙版和某些图像的 class id 会产生良好的结果,其中我可以看到每个图像、二进制掩码和每个掩码的 class 个 ID。
但是,当我实际尝试在创建的训练数据集上训练模型时,出现以下错误:
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-19-7928c4edfc77> in <module>()
1 # Create model in training mode
2 model = modellib.MaskRCNN(mode="training", config=config,
----> 3 model_dir=MODEL_DIR)
3 frames
/content/Mask_RCNN/mrcnn/model.py in __init__(self, mode, config, model_dir)
1835 self.model_dir = model_dir
1836 self.set_log_dir()
-> 1837 self.keras_model = self.build(mode=mode, config=config)
1838
1839 def build(self, mode, config):
/content/Mask_RCNN/mrcnn/model.py in build(self, mode, config)
1927 # Anchors
1928 if mode == "training":
-> 1929 anchors = self.get_anchors(config.IMAGE_SHAPE)
1930 # Duplicate across the batch dimension because Keras requires it
1931 # TODO: can this be optimized to avoid duplicating the anchors?
/content/Mask_RCNN/mrcnn/model.py in get_anchors(self, image_shape)
2609 backbone_shapes,
2610 self.config.BACKBONE_STRIDES,
-> 2611 self.config.RPN_ANCHOR_STRIDE)
2612 # Keep a copy of the latest anchors in pixel coordinates because
2613 # it's used in inspect_model notebooks.
/content/Mask_RCNN/mrcnn/utils.py in generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride)
635 anchors = []
636 for i in range(len(scales)):
--> 637 anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
638 feature_strides[i], anchor_stride))
639 return np.concatenate(anchors, axis=0)
IndexError: index 5 is out of bounds for axis 0 with size 5
我不知道这个错误实际上意味着什么,也不知道可能的解决方案是什么。我感觉这可能与 CocoLikeDataset class 格式化和处理数据的方式有关,但我不确定。
对于确定问题并解决问题的任何帮助,我们将不胜感激!
谢谢!
将配置文件中的 RPN_ANCHOR_SCALE 变量设置为:
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)