有没有办法将我的 YOLO v3 模型连接到我的网络摄像头?
Is there a way to connect my YOLO v3 model to my webcam?
我正在使用 TensorFlow 在 yolo_v3 上关注此 GitHub repo。代码 运行 在单个图像(我从我的计算机加载的图像)上顺利运行,但我正在尝试将模型连接到我的网络摄像头。
我试图研究 OpenCV tutorial on capturing a video from camera,但我无法弄清楚如何将它与启动 TensorFlow 会话的语句结合起来 运行 我的模型:
batch_size = len(img_names)
batch = load_images(img_names, model_size=_MODEL_SIZE)
class_names = load_class_names('files/coco.names')
n_classes = len(class_names)
max_output_size = 10
iou_threshold = 0.5
confidence_threshold = 0.5
tf.reset_default_graph()
model = Yolo_v3(n_classes=n_classes, model_size=_MODEL_SIZE,
max_output_size=max_output_size,
iou_threshold=iou_threshold,
confidence_threshold=confidence_threshold)
inputs = tf.placeholder(tf.float32, [batch_size, 416, 416, 3]) #I think
that the batch size will be 1 since we're dealing with 1 frame at a time
detections = model(inputs, training=False)
model_vars = tf.global_variables(scope='yolo_v3_model')
assign_ops = load_weights(model_vars, 'files/yolov3.weights')
with tf.Session() as sess:
sess.run(assign_ops)
detection_result = sess.run(detections, feed_dict={inputs: batch})
draw_boxes(img_names, detection_result, class_names, _MODEL_SIZE)
编辑:
我试过 运行:
def generator():
cap = cv2.VideoCapture(0)
cap.set(3,416)
cap.set(4,416)
time.sleep(10)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
# Display the resulting frame
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
yield frame
with tf.Session() as sess:
sess.run(assign_ops)
for frame in generator():
detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
draw_boxes(img_names, detection_result, class_names, _MODEL_SIZE)
但是我收到一条错误消息,指出图像未正确加载(空帧),或者:
ValueError: Cannot feed value of shape (1, 240, 320, 3) for Tensor 'Placeholder:0', which has shape '(1, 416, 416, 3)'
编辑 2
我觉得它几乎可以工作了。我运行
def generator():
cap = cv2.VideoCapture(0)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
frame = cv2.resize(frame, (416, 416))
# Display the resulting frame
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
yield frame
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
with tf.Session() as sess:
sess.run(assign_ops)
for frame in generator():
detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)
但是我收到这个错误:
AttributeError: 'numpy.ndarray' object has no attribute 'read'
我尝试 运行 没有最后一部分的代码:
draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)
我的相机确实打开了,尽管没有任何物体检测(YOLO 模型)
顺便说一句,这是draw_boxes
函数:
def draw_boxes(img_names, boxes_dicts, class_names, model_size):
"""Draws detected boxes.
Args:
img_names: A list of input images names.
boxes_dict: A class-to-boxes dictionary.
class_names: A class names list.
model_size: The input size of the model.
Returns:
None.
"""
for num, img_name, boxes_dict in zip(range(len(img_names)), img_names,
boxes_dicts):
img = Image.open(img_name)
draw = ImageDraw.Draw(img)
font = ImageFont.truetype(font='files/futur.ttf',
size=(img.size[0] + img.size[1]) // 100)
resize_factor = \
(img.size[0] / model_size[0], img.size[1] / model_size[1])
for cls in range(len(class_names)):
boxes = boxes_dict[cls]
if np.size(boxes) != 0:
color = np.random.permutation([np.random.randint(256), 255, 0])
for box in boxes:
xy, confidence = box[:4], box[4] #xy – Top left corner of the text.
xy = [xy[i] * resize_factor[i % 2] for i in range(4)]
x0, y0 = xy[0], xy[1]
thickness = (img.size[0] + img.size[1]) // 200
for t in np.linspace(0, 1, thickness):
xy[0], xy[1] = xy[0] + t, xy[1] + t
xy[2], xy[3] = xy[2] - t, xy[3] - t
draw.rectangle(xy, outline=tuple(color))
if class_names[cls] =='car':
text = '{} {:.1f}% about {:.1f} cm away'.format(class_names[cls], #text – Text to be drawn.
confidence * 100,
Distance_To_Obect(4.3,121,780,xy[3]-xy[1],3.5).distance())
elif class_names[cls] =='person':
width, height = img.size
print(width, height)
text = '{} {:.1f}% about {:.1f} cm away'.format(class_names[cls], #text – Text to be drawn.
confidence * 100,
Distance_To_Obect(4.3,170,height,xy[3]-xy[1],3.5).distance())
else:
text = '{} {:.1f}%'.format(class_names[cls], #text – Text to be drawn.
confidence * 100)
text_size = draw.textsize(text, font=font)
print ('[x0, y0, x1, y1]', xy[0], xy[1] ,xy[2], xy[3])
draw.rectangle(
[x0, y0 - text_size[1], x0 + text_size[0], y0],
fill=tuple(color)) #fill – Color to use for the text
draw.text((x0, y0 - text_size[1]), text, fill='black',
font=font)
display(img)
我尝试更换
img = Image.open(img_name)
和
Image.fromarray(img_name)
但是我得到了一个错误(在 运行再次打开文件之后):
TypeError: function takes exactly 1 argument (3 given)
此外,我还有 运行
print (detection_result)
它确实包含点数
编辑 3
我尝试将 draw_boxes
方法更改为 this link
中的方法
但是我得到了这个错误:
OSError Traceback (most recent call last)
<ipython-input-5-fa46870a1059> in <module>
105 detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
106 print(detection_result)
--> 107 draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)
<ipython-input-5-fa46870a1059> in draw_boxes(image, boxes, box_classes, class_names, scores)
36 font = ImageFont.truetype(
37 font='font/FiraMono-Medium.otf',
---> 38 size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
39 thickness = (image.size[0] + image.size[1]) // 300
40
~\AppData\Local\Programs\Python\Python36\lib\site-packages\PIL\ImageFont.py in truetype(font, size, index, encoding, layout_engine)
278
279 try:
--> 280 return FreeTypeFont(font, size, index, encoding, layout_engine)
281 except IOError:
282 ttf_filename = os.path.basename(font)
~\AppData\Local\Programs\Python\Python36\lib\site-packages\PIL\ImageFont.py in __init__(self, font, size, index, encoding, layout_engine)
143 if isPath(font):
144 self.font = core.getfont(font, size, index, encoding,
--> 145 layout_engine=layout_engine)
146 else:
147 self.font_bytes = font.read()
OSError: cannot open resource
编辑 4
顺便说一句,这是 result_box
:
[{0: array([[131.96371 , 131.70601 , 341.41946 , 358.6781 ,
0.68467134]], dtype=float32), 1: array([], shape=(0, 5), dtype=float32), 2: array([], shape=(0, 5), dtype=float32), 3: array([], shape=(0, 5), dtype=float32), 4: array([], shape=(0, 5), dtype=float32), 5: array([], shape=(0, 5), dtype=float32), 6: array([], shape=(0, 5), dtype=float32), 7: array([], shape=(0, 5), dtype=float32), 8: array([], shape=(0, 5), dtype=float32), 9: array([], shape=(0, 5), dtype=float32), 10: array([], shape=(0, 5), dtype=float32), 11: array([], shape=(0, 5), dtype=float32), 12: array([], shape=(0, 5), dtype=float32), 13: array([], shape=(0, 5), dtype=float32), 14: array([], shape=(0, 5), dtype=float32), 15: array([], shape=(0, 5), dtype=float32), 16: array([], shape=(0, 5), dtype=float32), 17: array([], shape=(0, 5), dtype=float32), 18: array([], shape=(0, 5), dtype=float32), 19: array([], shape=(0, 5), dtype=float32), 20: array([], shape=(0, 5), dtype=float32), 21: array([], shape=(0, 5), dtype=float32), 22: array([], shape=(0, 5), dtype=float32), 23: array([], shape=(0, 5), dtype=float32), 24: array([], shape=(0, 5), dtype=float32), 25: array([], shape=(0, 5), dtype=float32), 26: array([], shape=(0, 5), dtype=float32), 27: array([], shape=(0, 5), dtype=float32), 28: array([], shape=(0, 5), dtype=float32), 29: array([], shape=(0, 5), dtype=float32), 30: array([], shape=(0, 5), dtype=float32), 31: array([], shape=(0, 5), dtype=float32), 32: array([], shape=(0, 5), dtype=float32), 33: array([], shape=(0, 5), dtype=float32), 34: array([], shape=(0, 5), dtype=float32), 35: array([], shape=(0, 5), dtype=float32), 36: array([], shape=(0, 5), dtype=float32), 37: array([], shape=(0, 5), dtype=float32), 38: array([], shape=(0, 5), dtype=float32), 39: array([], shape=(0, 5), dtype=float32), 40: array([], shape=(0, 5), dtype=float32), 41: array([], shape=(0, 5), dtype=float32), 42: array([], shape=(0, 5), dtype=float32), 43: array([], shape=(0, 5), dtype=float32), 44: array([], shape=(0, 5), dtype=float32), 45: array([], shape=(0, 5), dtype=float32), 46: array([], shape=(0, 5), dtype=float32), 47: array([], shape=(0, 5), dtype=float32), 48: array([], shape=(0, 5), dtype=float32), 49: array([], shape=(0, 5), dtype=float32), 50: array([], shape=(0, 5), dtype=float32), 51: array([], shape=(0, 5), dtype=float32), 52: array([], shape=(0, 5), dtype=float32), 53: array([], shape=(0, 5), dtype=float32), 54: array([], shape=(0, 5), dtype=float32), 55: array([], shape=(0, 5), dtype=float32), 56: array([], shape=(0, 5), dtype=float32), 57: array([], shape=(0, 5), dtype=float32), 58: array([], shape=(0, 5), dtype=float32), 59: array([], shape=(0, 5), dtype=float32), 60: array([], shape=(0, 5), dtype=float32), 61: array([], shape=(0, 5), dtype=float32), 62: array([], shape=(0, 5), dtype=float32), 63: array([], shape=(0, 5), dtype=float32), 64: array([], shape=(0, 5), dtype=float32), 65: array([], shape=(0, 5), dtype=float32), 66: array([], shape=(0, 5), dtype=float32), 67: array([], shape=(0, 5), dtype=float32), 68: array([], shape=(0, 5), dtype=float32), 69: array([], shape=(0, 5), dtype=float32), 70: array([], shape=(0, 5), dtype=float32), 71: array([], shape=(0, 5), dtype=float32), 72: array([], shape=(0, 5), dtype=float32), 73: array([], shape=(0, 5), dtype=float32), 74: array([], shape=(0, 5), dtype=float32), 75: array([], shape=(0, 5), dtype=float32), 76: array([], shape=(0, 5), dtype=float32), 77: array([], shape=(0, 5), dtype=float32), 78: array([], shape=(0, 5), dtype=float32), 79: array([], shape=(0, 5), dtype=float32)}]
在 link 中,您提供的框架是您需要提供给 yolo 的图像。您可以将 link 中的整个 while true 放入会话中,然后按 运行:
逐帧处理
detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
或者写一个提供框架的生成器,更干净一些。
def generator()
cap = cv2.VideoCapture(0)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
frame = cv2.resize(frame, (416, 416))
yield frame
然后你可以做:
with tf.Session() as sess:
sess.run(assign_ops)
for frame in generator():
detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
expand dims 是创建一批 1,所以例如:将 399x399x3 变成 1x399x399x3。
大概就是这样,希望对你有所帮助
编辑
import numpy as np
import cv2
def convert_bbox_to_absolute(bbox_list, w_img, h_img):
"""
method to convert the relative points (< 1) to absolute points (< 416)
:param bbox_list: (list) list of boundary boxes (x, y, w, h, probability)
:param w_img: (int) width of the image
:param h_img: (int) height of the image
:return: bbox_list: (list) list of boundary boxes but this time scaled up
"""
x, y, w, h, c = zip(*bbox_list)
y = list(map(lambda y: int(y * w_img), y))
w = list(map(lambda w: int(w * h_img), w))
x = list(map(lambda x: int(x * h_img), x))
h = list(map(lambda h: int(h * w_img), h))
bbox_list = zip(x, y, w, h, c)
return list(bbox_list)
def draw_boxes(image, bbox_list):
"""
Will draw the rectangles on th image and place the probability on them
:param image: (np.ndarray) a RGB-color image
:param bbox_list: (list) list of boundary boxes (x, y, w, h, probability)
"""
assert type(image) == np.ndarray
assert type(bbox_list) == list
assert len(image.shape) == 3
assert len(bbox_list[0]) == 5
# get the width and height of the image
w_img, h_img, _ = image.shape
# convert the box coordinates to absolute values
bbox_list = convert_bbox_to_absolute(bbox_list, w_img, h_img)
for bbox in bbox_list:
x, y, w, h, c = bbox
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 3)
cv2.putText(image, str(c), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.imshow("", image)
cv2.waitKey(0)
# creating a black test RGB-image
test_image = np.zeros((512, 416, 3))
# faking some boundary boxes, x, y, w, h, confidence
b_boxes = [[0.5, 0.1, 0.5, 0.9, 0.8], [0.4, 0.1, 0.1, 0.1, 0.4]]
draw_boxes(test_image, b_boxes)
这是一个非常简单的示例,说明如何绘制边界框,我假设 yolo 提供的边界框已归一化,因为 this link。如果您知道我如何确定哪个标签属于哪个框,那么我也会将其添加到代码中。
编辑 2:
import numpy as np
import cv2
from random import choices
from string import ascii_lowercase
def draw_boxes(image, bbox_list, label_list):
"""
Will draw the rectangles on th image and place the probability on them
:param image: (np.ndarray) a RGB-color image
:param bbox_list: (list[dict()]) list dict of boundary boxes label:(x, y, w, h, probability)
:param label_list: (list) a list of label names matching the order of the dict keys
"""
bbox_dict = bbox_list[0]
assert type(image) == np.ndarray
assert type(bbox_dict) == dict
assert type(label_list) == list
assert len(image.shape) == 3
assert len(bbox_dict.keys()) == 80
assert len(label_list) == 80
for label, bbox in bbox_dict.items():
if bbox.size == 0:
continue
x1, y1, x2, y2, c = np.squeeze(bbox)
x1, y1, x2, y2 = list(map(lambda val: int(val), [x1, y1, x2, y2]))
label = label_list[label] + ": {}".format(c)
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 3)
cv2.putText(image, label, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.imshow("", image)
cv2.waitKey(0)
# creating a black test RGB-image
test_image = np.zeros((512, 416, 3))
# faking some boundary boxes, x, y, w, h, confidence and auto generating empty arrays with keys, to match your data
b_boxes = [{
0: np.array([[131.96371, 131.70601, 341.41946, 358.6781, 0.68467134]]),
1: np.array([])
}]
for i in range(2, 80):
b_boxes[0][i] = np.array([])
# get 80 random 10 letter strings to mock labels
labels = ["".join(choices(ascii_lowercase, k=10)) for _ in range(80)]
draw_boxes(test_image, b_boxes, labels)
根据您的数据,我做了一些修改,希望对您有所帮助
我正在使用 TensorFlow 在 yolo_v3 上关注此 GitHub repo。代码 运行 在单个图像(我从我的计算机加载的图像)上顺利运行,但我正在尝试将模型连接到我的网络摄像头。
我试图研究 OpenCV tutorial on capturing a video from camera,但我无法弄清楚如何将它与启动 TensorFlow 会话的语句结合起来 运行 我的模型:
batch_size = len(img_names)
batch = load_images(img_names, model_size=_MODEL_SIZE)
class_names = load_class_names('files/coco.names')
n_classes = len(class_names)
max_output_size = 10
iou_threshold = 0.5
confidence_threshold = 0.5
tf.reset_default_graph()
model = Yolo_v3(n_classes=n_classes, model_size=_MODEL_SIZE,
max_output_size=max_output_size,
iou_threshold=iou_threshold,
confidence_threshold=confidence_threshold)
inputs = tf.placeholder(tf.float32, [batch_size, 416, 416, 3]) #I think
that the batch size will be 1 since we're dealing with 1 frame at a time
detections = model(inputs, training=False)
model_vars = tf.global_variables(scope='yolo_v3_model')
assign_ops = load_weights(model_vars, 'files/yolov3.weights')
with tf.Session() as sess:
sess.run(assign_ops)
detection_result = sess.run(detections, feed_dict={inputs: batch})
draw_boxes(img_names, detection_result, class_names, _MODEL_SIZE)
编辑:
我试过 运行:
def generator():
cap = cv2.VideoCapture(0)
cap.set(3,416)
cap.set(4,416)
time.sleep(10)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
# Display the resulting frame
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
yield frame
with tf.Session() as sess:
sess.run(assign_ops)
for frame in generator():
detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
draw_boxes(img_names, detection_result, class_names, _MODEL_SIZE)
但是我收到一条错误消息,指出图像未正确加载(空帧),或者:
ValueError: Cannot feed value of shape (1, 240, 320, 3) for Tensor 'Placeholder:0', which has shape '(1, 416, 416, 3)'
编辑 2
我觉得它几乎可以工作了。我运行
def generator():
cap = cv2.VideoCapture(0)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
frame = cv2.resize(frame, (416, 416))
# Display the resulting frame
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
yield frame
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
with tf.Session() as sess:
sess.run(assign_ops)
for frame in generator():
detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)
但是我收到这个错误:
AttributeError: 'numpy.ndarray' object has no attribute 'read'
我尝试 运行 没有最后一部分的代码:
draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)
我的相机确实打开了,尽管没有任何物体检测(YOLO 模型)
顺便说一句,这是draw_boxes
函数:
def draw_boxes(img_names, boxes_dicts, class_names, model_size):
"""Draws detected boxes.
Args:
img_names: A list of input images names.
boxes_dict: A class-to-boxes dictionary.
class_names: A class names list.
model_size: The input size of the model.
Returns:
None.
"""
for num, img_name, boxes_dict in zip(range(len(img_names)), img_names,
boxes_dicts):
img = Image.open(img_name)
draw = ImageDraw.Draw(img)
font = ImageFont.truetype(font='files/futur.ttf',
size=(img.size[0] + img.size[1]) // 100)
resize_factor = \
(img.size[0] / model_size[0], img.size[1] / model_size[1])
for cls in range(len(class_names)):
boxes = boxes_dict[cls]
if np.size(boxes) != 0:
color = np.random.permutation([np.random.randint(256), 255, 0])
for box in boxes:
xy, confidence = box[:4], box[4] #xy – Top left corner of the text.
xy = [xy[i] * resize_factor[i % 2] for i in range(4)]
x0, y0 = xy[0], xy[1]
thickness = (img.size[0] + img.size[1]) // 200
for t in np.linspace(0, 1, thickness):
xy[0], xy[1] = xy[0] + t, xy[1] + t
xy[2], xy[3] = xy[2] - t, xy[3] - t
draw.rectangle(xy, outline=tuple(color))
if class_names[cls] =='car':
text = '{} {:.1f}% about {:.1f} cm away'.format(class_names[cls], #text – Text to be drawn.
confidence * 100,
Distance_To_Obect(4.3,121,780,xy[3]-xy[1],3.5).distance())
elif class_names[cls] =='person':
width, height = img.size
print(width, height)
text = '{} {:.1f}% about {:.1f} cm away'.format(class_names[cls], #text – Text to be drawn.
confidence * 100,
Distance_To_Obect(4.3,170,height,xy[3]-xy[1],3.5).distance())
else:
text = '{} {:.1f}%'.format(class_names[cls], #text – Text to be drawn.
confidence * 100)
text_size = draw.textsize(text, font=font)
print ('[x0, y0, x1, y1]', xy[0], xy[1] ,xy[2], xy[3])
draw.rectangle(
[x0, y0 - text_size[1], x0 + text_size[0], y0],
fill=tuple(color)) #fill – Color to use for the text
draw.text((x0, y0 - text_size[1]), text, fill='black',
font=font)
display(img)
我尝试更换
img = Image.open(img_name)
和
Image.fromarray(img_name)
但是我得到了一个错误(在 运行再次打开文件之后):
TypeError: function takes exactly 1 argument (3 given)
此外,我还有 运行
print (detection_result)
它确实包含点数
编辑 3
我尝试将 draw_boxes
方法更改为 this link
中的方法
但是我得到了这个错误:
OSError Traceback (most recent call last)
<ipython-input-5-fa46870a1059> in <module>
105 detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
106 print(detection_result)
--> 107 draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)
<ipython-input-5-fa46870a1059> in draw_boxes(image, boxes, box_classes, class_names, scores)
36 font = ImageFont.truetype(
37 font='font/FiraMono-Medium.otf',
---> 38 size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
39 thickness = (image.size[0] + image.size[1]) // 300
40
~\AppData\Local\Programs\Python\Python36\lib\site-packages\PIL\ImageFont.py in truetype(font, size, index, encoding, layout_engine)
278
279 try:
--> 280 return FreeTypeFont(font, size, index, encoding, layout_engine)
281 except IOError:
282 ttf_filename = os.path.basename(font)
~\AppData\Local\Programs\Python\Python36\lib\site-packages\PIL\ImageFont.py in __init__(self, font, size, index, encoding, layout_engine)
143 if isPath(font):
144 self.font = core.getfont(font, size, index, encoding,
--> 145 layout_engine=layout_engine)
146 else:
147 self.font_bytes = font.read()
OSError: cannot open resource
编辑 4
顺便说一句,这是 result_box
:
[{0: array([[131.96371 , 131.70601 , 341.41946 , 358.6781 ,
0.68467134]], dtype=float32), 1: array([], shape=(0, 5), dtype=float32), 2: array([], shape=(0, 5), dtype=float32), 3: array([], shape=(0, 5), dtype=float32), 4: array([], shape=(0, 5), dtype=float32), 5: array([], shape=(0, 5), dtype=float32), 6: array([], shape=(0, 5), dtype=float32), 7: array([], shape=(0, 5), dtype=float32), 8: array([], shape=(0, 5), dtype=float32), 9: array([], shape=(0, 5), dtype=float32), 10: array([], shape=(0, 5), dtype=float32), 11: array([], shape=(0, 5), dtype=float32), 12: array([], shape=(0, 5), dtype=float32), 13: array([], shape=(0, 5), dtype=float32), 14: array([], shape=(0, 5), dtype=float32), 15: array([], shape=(0, 5), dtype=float32), 16: array([], shape=(0, 5), dtype=float32), 17: array([], shape=(0, 5), dtype=float32), 18: array([], shape=(0, 5), dtype=float32), 19: array([], shape=(0, 5), dtype=float32), 20: array([], shape=(0, 5), dtype=float32), 21: array([], shape=(0, 5), dtype=float32), 22: array([], shape=(0, 5), dtype=float32), 23: array([], shape=(0, 5), dtype=float32), 24: array([], shape=(0, 5), dtype=float32), 25: array([], shape=(0, 5), dtype=float32), 26: array([], shape=(0, 5), dtype=float32), 27: array([], shape=(0, 5), dtype=float32), 28: array([], shape=(0, 5), dtype=float32), 29: array([], shape=(0, 5), dtype=float32), 30: array([], shape=(0, 5), dtype=float32), 31: array([], shape=(0, 5), dtype=float32), 32: array([], shape=(0, 5), dtype=float32), 33: array([], shape=(0, 5), dtype=float32), 34: array([], shape=(0, 5), dtype=float32), 35: array([], shape=(0, 5), dtype=float32), 36: array([], shape=(0, 5), dtype=float32), 37: array([], shape=(0, 5), dtype=float32), 38: array([], shape=(0, 5), dtype=float32), 39: array([], shape=(0, 5), dtype=float32), 40: array([], shape=(0, 5), dtype=float32), 41: array([], shape=(0, 5), dtype=float32), 42: array([], shape=(0, 5), dtype=float32), 43: array([], shape=(0, 5), dtype=float32), 44: array([], shape=(0, 5), dtype=float32), 45: array([], shape=(0, 5), dtype=float32), 46: array([], shape=(0, 5), dtype=float32), 47: array([], shape=(0, 5), dtype=float32), 48: array([], shape=(0, 5), dtype=float32), 49: array([], shape=(0, 5), dtype=float32), 50: array([], shape=(0, 5), dtype=float32), 51: array([], shape=(0, 5), dtype=float32), 52: array([], shape=(0, 5), dtype=float32), 53: array([], shape=(0, 5), dtype=float32), 54: array([], shape=(0, 5), dtype=float32), 55: array([], shape=(0, 5), dtype=float32), 56: array([], shape=(0, 5), dtype=float32), 57: array([], shape=(0, 5), dtype=float32), 58: array([], shape=(0, 5), dtype=float32), 59: array([], shape=(0, 5), dtype=float32), 60: array([], shape=(0, 5), dtype=float32), 61: array([], shape=(0, 5), dtype=float32), 62: array([], shape=(0, 5), dtype=float32), 63: array([], shape=(0, 5), dtype=float32), 64: array([], shape=(0, 5), dtype=float32), 65: array([], shape=(0, 5), dtype=float32), 66: array([], shape=(0, 5), dtype=float32), 67: array([], shape=(0, 5), dtype=float32), 68: array([], shape=(0, 5), dtype=float32), 69: array([], shape=(0, 5), dtype=float32), 70: array([], shape=(0, 5), dtype=float32), 71: array([], shape=(0, 5), dtype=float32), 72: array([], shape=(0, 5), dtype=float32), 73: array([], shape=(0, 5), dtype=float32), 74: array([], shape=(0, 5), dtype=float32), 75: array([], shape=(0, 5), dtype=float32), 76: array([], shape=(0, 5), dtype=float32), 77: array([], shape=(0, 5), dtype=float32), 78: array([], shape=(0, 5), dtype=float32), 79: array([], shape=(0, 5), dtype=float32)}]
在 link 中,您提供的框架是您需要提供给 yolo 的图像。您可以将 link 中的整个 while true 放入会话中,然后按 运行:
逐帧处理detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
或者写一个提供框架的生成器,更干净一些。
def generator()
cap = cv2.VideoCapture(0)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
frame = cv2.resize(frame, (416, 416))
yield frame
然后你可以做:
with tf.Session() as sess:
sess.run(assign_ops)
for frame in generator():
detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
expand dims 是创建一批 1,所以例如:将 399x399x3 变成 1x399x399x3。
大概就是这样,希望对你有所帮助
编辑
import numpy as np
import cv2
def convert_bbox_to_absolute(bbox_list, w_img, h_img):
"""
method to convert the relative points (< 1) to absolute points (< 416)
:param bbox_list: (list) list of boundary boxes (x, y, w, h, probability)
:param w_img: (int) width of the image
:param h_img: (int) height of the image
:return: bbox_list: (list) list of boundary boxes but this time scaled up
"""
x, y, w, h, c = zip(*bbox_list)
y = list(map(lambda y: int(y * w_img), y))
w = list(map(lambda w: int(w * h_img), w))
x = list(map(lambda x: int(x * h_img), x))
h = list(map(lambda h: int(h * w_img), h))
bbox_list = zip(x, y, w, h, c)
return list(bbox_list)
def draw_boxes(image, bbox_list):
"""
Will draw the rectangles on th image and place the probability on them
:param image: (np.ndarray) a RGB-color image
:param bbox_list: (list) list of boundary boxes (x, y, w, h, probability)
"""
assert type(image) == np.ndarray
assert type(bbox_list) == list
assert len(image.shape) == 3
assert len(bbox_list[0]) == 5
# get the width and height of the image
w_img, h_img, _ = image.shape
# convert the box coordinates to absolute values
bbox_list = convert_bbox_to_absolute(bbox_list, w_img, h_img)
for bbox in bbox_list:
x, y, w, h, c = bbox
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 3)
cv2.putText(image, str(c), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.imshow("", image)
cv2.waitKey(0)
# creating a black test RGB-image
test_image = np.zeros((512, 416, 3))
# faking some boundary boxes, x, y, w, h, confidence
b_boxes = [[0.5, 0.1, 0.5, 0.9, 0.8], [0.4, 0.1, 0.1, 0.1, 0.4]]
draw_boxes(test_image, b_boxes)
这是一个非常简单的示例,说明如何绘制边界框,我假设 yolo 提供的边界框已归一化,因为 this link。如果您知道我如何确定哪个标签属于哪个框,那么我也会将其添加到代码中。
编辑 2:
import numpy as np
import cv2
from random import choices
from string import ascii_lowercase
def draw_boxes(image, bbox_list, label_list):
"""
Will draw the rectangles on th image and place the probability on them
:param image: (np.ndarray) a RGB-color image
:param bbox_list: (list[dict()]) list dict of boundary boxes label:(x, y, w, h, probability)
:param label_list: (list) a list of label names matching the order of the dict keys
"""
bbox_dict = bbox_list[0]
assert type(image) == np.ndarray
assert type(bbox_dict) == dict
assert type(label_list) == list
assert len(image.shape) == 3
assert len(bbox_dict.keys()) == 80
assert len(label_list) == 80
for label, bbox in bbox_dict.items():
if bbox.size == 0:
continue
x1, y1, x2, y2, c = np.squeeze(bbox)
x1, y1, x2, y2 = list(map(lambda val: int(val), [x1, y1, x2, y2]))
label = label_list[label] + ": {}".format(c)
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 3)
cv2.putText(image, label, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.imshow("", image)
cv2.waitKey(0)
# creating a black test RGB-image
test_image = np.zeros((512, 416, 3))
# faking some boundary boxes, x, y, w, h, confidence and auto generating empty arrays with keys, to match your data
b_boxes = [{
0: np.array([[131.96371, 131.70601, 341.41946, 358.6781, 0.68467134]]),
1: np.array([])
}]
for i in range(2, 80):
b_boxes[0][i] = np.array([])
# get 80 random 10 letter strings to mock labels
labels = ["".join(choices(ascii_lowercase, k=10)) for _ in range(80)]
draw_boxes(test_image, b_boxes, labels)
根据您的数据,我做了一些修改,希望对您有所帮助