这个网络摄像头面部检测有什么问题?
What's wrong with this webcam face detection?
Dlib 有一个非常方便、快速和高效的对象检测例程,我想制作一个类似于示例 here.
的很酷的面部跟踪示例
得到广泛支持的 OpenCV 具有速度相当快的 VideoCapture 模块(与调用唤醒网络摄像头和获取图片的程序需要 1 秒或更多时间相比,抓拍时间为 1/5 秒)。我将其添加到 Dlib 中的人脸检测器 Python 示例中。
如果您直接显示和处理 OpenCV VideoCapture 输出,它看起来很奇怪,因为显然 OpenCV 存储 BGR 而不是 RGB 顺序。调整后,它起作用了,但是很慢:
from __future__ import division
import sys
import dlib
from skimage import io
detector = dlib.get_frontal_face_detector()
win = dlib.image_window()
if len( sys.argv[1:] ) == 0:
from cv2 import VideoCapture
from time import time
cam = VideoCapture(0) #set the port of the camera as before
while True:
start = time()
retval, image = cam.read() #return a True bolean and and the image if all go right
for row in image:
for px in row:
#rgb expected... but the array is bgr?
r = px[2]
px[2] = px[0]
px[0] = r
#import matplotlib.pyplot as plt
#plt.imshow(image)
#plt.show()
print( "readimage: " + str( time() - start ) )
start = time()
dets = detector(image, 1)
print "your faces: %f" % len(dets)
for i, d in enumerate( dets ):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
i, d.left(), d.top(), d.right(), d.bottom()))
print("from left: {}".format( ( (d.left() + d.right()) / 2 ) / len(image[0]) ))
print("from top: {}".format( ( (d.top() + d.bottom()) / 2 ) /len(image)) )
print( "process: " + str( time() - start ) )
start = time()
win.clear_overlay()
win.set_image(image)
win.add_overlay(dets)
print( "show: " + str( time() - start ) )
#dlib.hit_enter_to_continue()
for f in sys.argv[1:]:
print("Processing file: {}".format(f))
img = io.imread(f)
# The 1 in the second argument indicates that we should upsample the image
# 1 time. This will make everything bigger and allow us to detect more
# faces.
dets = detector(img, 1)
print("Number of faces detected: {}".format(len(dets)))
for i, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
i, d.left(), d.top(), d.right(), d.bottom()))
win.clear_overlay()
win.set_image(img)
win.add_overlay(dets)
dlib.hit_enter_to_continue()
# Finally, if you really want to you can ask the detector to tell you the score
# for each detection. The score is bigger for more confident detections.
# Also, the idx tells you which of the face sub-detectors matched. This can be
# used to broadly identify faces in different orientations.
if (len(sys.argv[1:]) > 0):
img = io.imread(sys.argv[1])
dets, scores, idx = detector.run(img, 1)
for i, d in enumerate(dets):
print("Detection {}, score: {}, face_type:{}".format(
d, scores[i], idx[i]))
从这个程序的计时输出来看,似乎处理和抓取图片都需要五分之一秒,所以你会认为它应该每秒显示一到两次更新 - 但是,如果你提高大约 5 秒后,您的手就会显示在网络摄像头视图中!
是否有某种内部缓存阻止它获取最新的网络摄像头图像?我可以调整或多线程网络摄像头输入过程来修复延迟吗?这是在具有 16gb RAM 的 Intel i5 上。
更新
根据此处,它建议读取抓取视频逐帧。这可以解释它会抓取下一帧和下一帧,直到它最终赶上它正在处理时抓取的所有帧。我想知道是否有一个选项可以设置帧速率或将其设置为丢帧,然后只需单击网络摄像头中的面部图片 now 即可阅读?
http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_gui/py_video_display/py_video_display.html#capture-video-from-camera
如果你想显示在 OpenCV 中读取的帧,你可以借助 cv2.imshow()
函数来完成,而不需要改变颜色顺序。另一方面,如果你仍然想在matplotlib中显示图片,那么你不能避免使用这样的方法:
b,g,r = cv2.split(img)
img = cv2.merge((b,g,r))
这是我目前唯一能帮到你的事=)
我尝试了多线程,它同样慢,然后我只在线程中使用 .read()
进行多线程,没有处理,没有线程锁定,而且它工作得非常快 - 可能是 1 秒左右延迟,不是 3 或 5。参见 http://www.pyimagesearch.com/2015/12/21/increasing-webcam-fps-with-python-and-opencv/
from __future__ import division
import sys
from time import time, sleep
import threading
import dlib
from skimage import io
detector = dlib.get_frontal_face_detector()
win = dlib.image_window()
class webCamGrabber( threading.Thread ):
def __init__( self ):
threading.Thread.__init__( self )
#Lock for when you can read/write self.image:
#self.imageLock = threading.Lock()
self.image = False
from cv2 import VideoCapture, cv
from time import time
self.cam = VideoCapture(0) #set the port of the camera as before
#self.cam.set(cv.CV_CAP_PROP_FPS, 1)
def run( self ):
while True:
start = time()
#self.imageLock.acquire()
retval, self.image = self.cam.read() #return a True bolean and and the image if all go right
print( type( self.image) )
#import matplotlib.pyplot as plt
#plt.imshow(image)
#plt.show()
#print( "readimage: " + str( time() - start ) )
#sleep(0.1)
if len( sys.argv[1:] ) == 0:
#Start webcam reader thread:
camThread = webCamGrabber()
camThread.start()
#Setup window for results
detector = dlib.get_frontal_face_detector()
win = dlib.image_window()
while True:
#camThread.imageLock.acquire()
if camThread.image is not False:
print( "enter")
start = time()
myimage = camThread.image
for row in myimage:
for px in row:
#rgb expected... but the array is bgr?
r = px[2]
px[2] = px[0]
px[0] = r
dets = detector( myimage, 0)
#camThread.imageLock.release()
print "your faces: %f" % len(dets)
for i, d in enumerate( dets ):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
i, d.left(), d.top(), d.right(), d.bottom()))
print("from left: {}".format( ( (d.left() + d.right()) / 2 ) / len(camThread.image[0]) ))
print("from top: {}".format( ( (d.top() + d.bottom()) / 2 ) /len(camThread.image)) )
print( "process: " + str( time() - start ) )
start = time()
win.clear_overlay()
win.set_image(myimage)
win.add_overlay(dets)
print( "show: " + str( time() - start ) )
#dlib.hit_enter_to_continue()
for f in sys.argv[1:]:
print("Processing file: {}".format(f))
img = io.imread(f)
# The 1 in the second argument indicates that we should upsample the image
# 1 time. This will make everything bigger and allow us to detect more
# faces.
dets = detector(img, 1)
print("Number of faces detected: {}".format(len(dets)))
for i, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
i, d.left(), d.top(), d.right(), d.bottom()))
win.clear_overlay()
win.set_image(img)
win.add_overlay(dets)
dlib.hit_enter_to_continue()
# Finally, if you really want to you can ask the detector to tell you the score
# for each detection. The score is bigger for more confident detections.
# Also, the idx tells you which of the face sub-detectors matched. This can be
# used to broadly identify faces in different orientations.
if (len(sys.argv[1:]) > 0):
img = io.imread(sys.argv[1])
dets, scores, idx = detector.run(img, 1)
for i, d in enumerate(dets):
print("Detection {}, score: {}, face_type:{}".format(
d, scores[i], idx[i]))
我感受到你的痛苦。事实上,我最近使用了那个网络摄像头脚本(多次迭代;大量编辑)。我觉得我让它工作得很好,我想。为了让您看到我做了什么,我创建了一个包含详细信息的 GitHub Gist(代码;HTML 自述文件;示例输出):
https://gist.github.com/victoriastuart/8092a3dd7e97ab57ede7614251bf5cbd
可能问题是设置了阈值。
如上所述here
dots = detector(frame, 1)
应改为
dots = detector(frame)
避免门槛。
这对我有用,但同时存在帧处理速度过快的问题。
Dlib 有一个非常方便、快速和高效的对象检测例程,我想制作一个类似于示例 here.
的很酷的面部跟踪示例得到广泛支持的 OpenCV 具有速度相当快的 VideoCapture 模块(与调用唤醒网络摄像头和获取图片的程序需要 1 秒或更多时间相比,抓拍时间为 1/5 秒)。我将其添加到 Dlib 中的人脸检测器 Python 示例中。
如果您直接显示和处理 OpenCV VideoCapture 输出,它看起来很奇怪,因为显然 OpenCV 存储 BGR 而不是 RGB 顺序。调整后,它起作用了,但是很慢:
from __future__ import division
import sys
import dlib
from skimage import io
detector = dlib.get_frontal_face_detector()
win = dlib.image_window()
if len( sys.argv[1:] ) == 0:
from cv2 import VideoCapture
from time import time
cam = VideoCapture(0) #set the port of the camera as before
while True:
start = time()
retval, image = cam.read() #return a True bolean and and the image if all go right
for row in image:
for px in row:
#rgb expected... but the array is bgr?
r = px[2]
px[2] = px[0]
px[0] = r
#import matplotlib.pyplot as plt
#plt.imshow(image)
#plt.show()
print( "readimage: " + str( time() - start ) )
start = time()
dets = detector(image, 1)
print "your faces: %f" % len(dets)
for i, d in enumerate( dets ):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
i, d.left(), d.top(), d.right(), d.bottom()))
print("from left: {}".format( ( (d.left() + d.right()) / 2 ) / len(image[0]) ))
print("from top: {}".format( ( (d.top() + d.bottom()) / 2 ) /len(image)) )
print( "process: " + str( time() - start ) )
start = time()
win.clear_overlay()
win.set_image(image)
win.add_overlay(dets)
print( "show: " + str( time() - start ) )
#dlib.hit_enter_to_continue()
for f in sys.argv[1:]:
print("Processing file: {}".format(f))
img = io.imread(f)
# The 1 in the second argument indicates that we should upsample the image
# 1 time. This will make everything bigger and allow us to detect more
# faces.
dets = detector(img, 1)
print("Number of faces detected: {}".format(len(dets)))
for i, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
i, d.left(), d.top(), d.right(), d.bottom()))
win.clear_overlay()
win.set_image(img)
win.add_overlay(dets)
dlib.hit_enter_to_continue()
# Finally, if you really want to you can ask the detector to tell you the score
# for each detection. The score is bigger for more confident detections.
# Also, the idx tells you which of the face sub-detectors matched. This can be
# used to broadly identify faces in different orientations.
if (len(sys.argv[1:]) > 0):
img = io.imread(sys.argv[1])
dets, scores, idx = detector.run(img, 1)
for i, d in enumerate(dets):
print("Detection {}, score: {}, face_type:{}".format(
d, scores[i], idx[i]))
从这个程序的计时输出来看,似乎处理和抓取图片都需要五分之一秒,所以你会认为它应该每秒显示一到两次更新 - 但是,如果你提高大约 5 秒后,您的手就会显示在网络摄像头视图中!
是否有某种内部缓存阻止它获取最新的网络摄像头图像?我可以调整或多线程网络摄像头输入过程来修复延迟吗?这是在具有 16gb RAM 的 Intel i5 上。
更新
根据此处,它建议读取抓取视频逐帧。这可以解释它会抓取下一帧和下一帧,直到它最终赶上它正在处理时抓取的所有帧。我想知道是否有一个选项可以设置帧速率或将其设置为丢帧,然后只需单击网络摄像头中的面部图片 now 即可阅读? http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_gui/py_video_display/py_video_display.html#capture-video-from-camera
如果你想显示在 OpenCV 中读取的帧,你可以借助 cv2.imshow()
函数来完成,而不需要改变颜色顺序。另一方面,如果你仍然想在matplotlib中显示图片,那么你不能避免使用这样的方法:
b,g,r = cv2.split(img)
img = cv2.merge((b,g,r))
这是我目前唯一能帮到你的事=)
我尝试了多线程,它同样慢,然后我只在线程中使用 .read()
进行多线程,没有处理,没有线程锁定,而且它工作得非常快 - 可能是 1 秒左右延迟,不是 3 或 5。参见 http://www.pyimagesearch.com/2015/12/21/increasing-webcam-fps-with-python-and-opencv/
from __future__ import division
import sys
from time import time, sleep
import threading
import dlib
from skimage import io
detector = dlib.get_frontal_face_detector()
win = dlib.image_window()
class webCamGrabber( threading.Thread ):
def __init__( self ):
threading.Thread.__init__( self )
#Lock for when you can read/write self.image:
#self.imageLock = threading.Lock()
self.image = False
from cv2 import VideoCapture, cv
from time import time
self.cam = VideoCapture(0) #set the port of the camera as before
#self.cam.set(cv.CV_CAP_PROP_FPS, 1)
def run( self ):
while True:
start = time()
#self.imageLock.acquire()
retval, self.image = self.cam.read() #return a True bolean and and the image if all go right
print( type( self.image) )
#import matplotlib.pyplot as plt
#plt.imshow(image)
#plt.show()
#print( "readimage: " + str( time() - start ) )
#sleep(0.1)
if len( sys.argv[1:] ) == 0:
#Start webcam reader thread:
camThread = webCamGrabber()
camThread.start()
#Setup window for results
detector = dlib.get_frontal_face_detector()
win = dlib.image_window()
while True:
#camThread.imageLock.acquire()
if camThread.image is not False:
print( "enter")
start = time()
myimage = camThread.image
for row in myimage:
for px in row:
#rgb expected... but the array is bgr?
r = px[2]
px[2] = px[0]
px[0] = r
dets = detector( myimage, 0)
#camThread.imageLock.release()
print "your faces: %f" % len(dets)
for i, d in enumerate( dets ):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
i, d.left(), d.top(), d.right(), d.bottom()))
print("from left: {}".format( ( (d.left() + d.right()) / 2 ) / len(camThread.image[0]) ))
print("from top: {}".format( ( (d.top() + d.bottom()) / 2 ) /len(camThread.image)) )
print( "process: " + str( time() - start ) )
start = time()
win.clear_overlay()
win.set_image(myimage)
win.add_overlay(dets)
print( "show: " + str( time() - start ) )
#dlib.hit_enter_to_continue()
for f in sys.argv[1:]:
print("Processing file: {}".format(f))
img = io.imread(f)
# The 1 in the second argument indicates that we should upsample the image
# 1 time. This will make everything bigger and allow us to detect more
# faces.
dets = detector(img, 1)
print("Number of faces detected: {}".format(len(dets)))
for i, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
i, d.left(), d.top(), d.right(), d.bottom()))
win.clear_overlay()
win.set_image(img)
win.add_overlay(dets)
dlib.hit_enter_to_continue()
# Finally, if you really want to you can ask the detector to tell you the score
# for each detection. The score is bigger for more confident detections.
# Also, the idx tells you which of the face sub-detectors matched. This can be
# used to broadly identify faces in different orientations.
if (len(sys.argv[1:]) > 0):
img = io.imread(sys.argv[1])
dets, scores, idx = detector.run(img, 1)
for i, d in enumerate(dets):
print("Detection {}, score: {}, face_type:{}".format(
d, scores[i], idx[i]))
我感受到你的痛苦。事实上,我最近使用了那个网络摄像头脚本(多次迭代;大量编辑)。我觉得我让它工作得很好,我想。为了让您看到我做了什么,我创建了一个包含详细信息的 GitHub Gist(代码;HTML 自述文件;示例输出):
https://gist.github.com/victoriastuart/8092a3dd7e97ab57ede7614251bf5cbd
可能问题是设置了阈值。 如上所述here
dots = detector(frame, 1)
应改为
dots = detector(frame)
避免门槛。 这对我有用,但同时存在帧处理速度过快的问题。