为什么我会得到多个边界框?
why am I getting multiple bounding boxes?
我正在尝试将图像对象检测器转变为视频对象检测器。
但是,我得到了多个边界框,但我不知道为什么。
视频的第一帧似乎具有正确数量的边界框,即 1。但是当它循环时,函数 draw_boxes
输出具有多个或重叠边界框的图像。
如果您能提供帮助,我将不胜感激。谢谢
这里是一些框架的例子:
这是代码:
for i in tqdm(range(nb_frames)):
_, frame = video_reader.read()
cv2.imwrite("framey.jpg", frame)
filename = "framey.jpg"
image, image_w, image_h = load_image_pixels(filename, (input_w, input_h))
yhat = model.predict(image)
for i in range(len(yhat)):
# decode the output of the network
boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)
# correct the sizes of the bounding boxes for the shape of the image
correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)
# suppress non-maximal boxes
do_nms(boxes, 0.5)
# get the details of the detected objects
v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
# draw what we found
imagex = draw_boxes(filename, v_boxes, v_labels, v_scores)
video_writer.write(imagex)
video_reader.release()
video_writer.release()
这里是生成上图的函数:
def draw_boxes(filename, v_boxes, v_labels, v_scores):
# load the image
data = pyplot.imread(filename)
# plot the image
pyplot.imshow(data)
# get the context for drawing boxes
ax = pyplot.gca()
# plot each box
for i in range(len(v_boxes)):
box = v_boxes[i]
# get coordinates
y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
# calculate width and height of the box
width, height = x2 - x1, y2 - y1
# create the shape
rect = Rectangle((x1, y1), width, height, fill=False, color='white')
# draw the box
ax.add_patch(rect)
# draw text and score in top left corner
label = "%s (%.3f)" % (v_labels[i], v_scores[i])
pyplot.text(x1, y1, label, color='white')
# show the plot
pyplot.savefig('detected.jpg')
filename = "detected.jpg"
image = load_img(filename)
image_array = img_to_array(image)
image_array = (image_array*255).astype(np.uint8)
return image_array
因此,错误出在 'draw_boxes' 函数中。
我更改了 'draw_boxes',它成功了。
def draw_bounding_boxes(image, v_boxes, v_labels, v_scores):
for i in range(len(v_boxes)):
box = v_boxes[i]
y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
width, height = x2 - x1, y2 - y1
label = "%s (%.3f)" % (v_labels[i], v_scores[i])
region = np.array([[x1 - 3, y1],
[x1-3, y1 - height-26],
[x1+width+13, y1-height-26],
[x1+width+13, y1]], dtype='int32')
cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 5)
cv2.fillPoly(image,[region], (255, 0, 0))
cv2.putText(image,
label,
(x1+13, y1-13),
cv2.FONT_HERSHEY_SIMPLEX,
1e-3 * image.shape[0],
(0,0,0),
2)
return image
我正在尝试将图像对象检测器转变为视频对象检测器。
但是,我得到了多个边界框,但我不知道为什么。
视频的第一帧似乎具有正确数量的边界框,即 1。但是当它循环时,函数 draw_boxes
输出具有多个或重叠边界框的图像。
如果您能提供帮助,我将不胜感激。谢谢
这里是一些框架的例子:
这是代码:
for i in tqdm(range(nb_frames)):
_, frame = video_reader.read()
cv2.imwrite("framey.jpg", frame)
filename = "framey.jpg"
image, image_w, image_h = load_image_pixels(filename, (input_w, input_h))
yhat = model.predict(image)
for i in range(len(yhat)):
# decode the output of the network
boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)
# correct the sizes of the bounding boxes for the shape of the image
correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)
# suppress non-maximal boxes
do_nms(boxes, 0.5)
# get the details of the detected objects
v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
# draw what we found
imagex = draw_boxes(filename, v_boxes, v_labels, v_scores)
video_writer.write(imagex)
video_reader.release()
video_writer.release()
这里是生成上图的函数:
def draw_boxes(filename, v_boxes, v_labels, v_scores):
# load the image
data = pyplot.imread(filename)
# plot the image
pyplot.imshow(data)
# get the context for drawing boxes
ax = pyplot.gca()
# plot each box
for i in range(len(v_boxes)):
box = v_boxes[i]
# get coordinates
y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
# calculate width and height of the box
width, height = x2 - x1, y2 - y1
# create the shape
rect = Rectangle((x1, y1), width, height, fill=False, color='white')
# draw the box
ax.add_patch(rect)
# draw text and score in top left corner
label = "%s (%.3f)" % (v_labels[i], v_scores[i])
pyplot.text(x1, y1, label, color='white')
# show the plot
pyplot.savefig('detected.jpg')
filename = "detected.jpg"
image = load_img(filename)
image_array = img_to_array(image)
image_array = (image_array*255).astype(np.uint8)
return image_array
因此,错误出在 'draw_boxes' 函数中。
我更改了 'draw_boxes',它成功了。
def draw_bounding_boxes(image, v_boxes, v_labels, v_scores):
for i in range(len(v_boxes)):
box = v_boxes[i]
y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
width, height = x2 - x1, y2 - y1
label = "%s (%.3f)" % (v_labels[i], v_scores[i])
region = np.array([[x1 - 3, y1],
[x1-3, y1 - height-26],
[x1+width+13, y1-height-26],
[x1+width+13, y1]], dtype='int32')
cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 5)
cv2.fillPoly(image,[region], (255, 0, 0))
cv2.putText(image,
label,
(x1+13, y1-13),
cv2.FONT_HERSHEY_SIMPLEX,
1e-3 * image.shape[0],
(0,0,0),
2)
return image