如何在 python 中加速咖啡馆分类器
How to speed up caffe classifer in python
我正在使用 python 来使用 caffe 分类器。我从我的相机获取图像并从训练集中预测图像。它运行良好,但问题是速度非常慢。我认为只有 4 frames/second。你能给我一些建议来改进我的代码的计算时间吗?
问题可以解释如下。我必须通过以下代码
重新加载大小约为 80MB 的网络模型 age_net.caffemodel
age_net_pretrained='./age_net.caffemodel'
age_net_model_file='./deploy_age.prototxt'
age_net = caffe.Classifier(age_net_model_file, age_net_pretrained,
mean=mean,
channel_swap=(2,1,0),
raw_scale=255,
image_dims=(256, 256))
并且对于每个输入图像 (caffe_input
),我调用预测函数
prediction = age_net.predict([caffe_input])
我认为由于网络规模很大。然后预测函数需要很长时间来预测图像。我觉得慢时光是从中而来的
这是我的完整参考代码。它被我改变了。
from conv_net import *
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import os
caffe_root = './caffe'
import sys
sys.path.insert(0, caffe_root + 'python')
import caffe
DATA_PATH = './face/'
cnn_params = './params/gender_5x5_5_5x5_10.param'
face_params = './params/haarcascade_frontalface_alt.xml'
def format_frame(frame):
img = frame.astype(np.float32)/255.
img = img[...,::-1]
return img
if __name__ == '__main__':
files = glob.glob(os.path.join(DATA_PATH, '*.*'))
# This is the configuration of the full convolutional part of the CNN
# `d` is a list of dicts, where each dict represents a convolution-maxpooling
# layer.
# Eg c1 - first layer, convolution window size
# p1 - first layer pooling window size
# f_in1 - first layer no. of input feature arrays
# f_out1 - first layer no. of output feature arrays
d = [{'c1':(5,5),
'p1':(2,2),
'f_in1':1, 'f_out1':5},
{'c2':(5,5),
'p2':(2,2),
'f_in2':5, 'f_out2':10}]
# This is the configuration of the mlp part of the CNN
# first tuple has the fan_in and fan_out of the input layer
# of the mlp and so on.
nnet = [(800,256),(256,2)]
c = ConvNet(d,nnet, (45,45))
c.load_params(cnn_params)
face_cascade = cv2.CascadeClassifier(face_params)
cap = cv2.VideoCapture(0)
cv2.namedWindow("Image", cv2.WINDOW_NORMAL)
plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
mean_filename='./mean.binaryproto'
proto_data = open(mean_filename, "rb").read()
a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data)
mean = caffe.io.blobproto_to_array(a)[0]
age_net_pretrained='./age_net.caffemodel'
age_net_model_file='./deploy_age.prototxt'
age_net = caffe.Classifier(age_net_model_file, age_net_pretrained,
mean=mean,
channel_swap=(2,1,0),
raw_scale=255,
image_dims=(256, 256))
age_list=['(0, 2)','(4, 6)','(8, 12)','(15, 20)','(25, 32)','(38, 43)','(48, 53)','(60, 100)']
while(True):
val, image = cap.read()
if image is None:
break
image = cv2.resize(image, (320,240))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5, minSize=(30,30))
for f in faces:
x,y,w,h = f
cv2.rectangle(image, (x,y), (x+w,y+h), (0,255,255))
face_image_rgb = image[y:y+h, x:x+w]
caffe_input = cv2.resize(face_image_rgb, (256, 256)).astype(np.float32)
prediction = age_net.predict([caffe_input])
print 'predicted age:', age_list[prediction[0].argmax()]
cv2.imshow('Image', image)
ch = 0xFF & cv2.waitKey(1)
if ch == 27:
break
#break
尝试用 oversmaple=False
调用 age_net.predict([caffe_input])
:
prediction = age_net.predict([caffe_input], oversample=False)
predict
的默认行为是创建 10 个略有不同的输入图像裁剪并将它们提供给网络进行分类,通过禁用此选项,您应该获得 x10 的加速。
你也可以试试channel pruning your network. It's an algorithm that effectively prune channels in each layer, which could speed up network 2-5x. The github address is : https://github.com/yihui-he/channel-pruning
我正在使用 python 来使用 caffe 分类器。我从我的相机获取图像并从训练集中预测图像。它运行良好,但问题是速度非常慢。我认为只有 4 frames/second。你能给我一些建议来改进我的代码的计算时间吗? 问题可以解释如下。我必须通过以下代码
重新加载大小约为 80MB 的网络模型age_net.caffemodel
age_net_pretrained='./age_net.caffemodel'
age_net_model_file='./deploy_age.prototxt'
age_net = caffe.Classifier(age_net_model_file, age_net_pretrained,
mean=mean,
channel_swap=(2,1,0),
raw_scale=255,
image_dims=(256, 256))
并且对于每个输入图像 (caffe_input
),我调用预测函数
prediction = age_net.predict([caffe_input])
我认为由于网络规模很大。然后预测函数需要很长时间来预测图像。我觉得慢时光是从中而来的
这是我的完整参考代码。它被我改变了。
from conv_net import *
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import os
caffe_root = './caffe'
import sys
sys.path.insert(0, caffe_root + 'python')
import caffe
DATA_PATH = './face/'
cnn_params = './params/gender_5x5_5_5x5_10.param'
face_params = './params/haarcascade_frontalface_alt.xml'
def format_frame(frame):
img = frame.astype(np.float32)/255.
img = img[...,::-1]
return img
if __name__ == '__main__':
files = glob.glob(os.path.join(DATA_PATH, '*.*'))
# This is the configuration of the full convolutional part of the CNN
# `d` is a list of dicts, where each dict represents a convolution-maxpooling
# layer.
# Eg c1 - first layer, convolution window size
# p1 - first layer pooling window size
# f_in1 - first layer no. of input feature arrays
# f_out1 - first layer no. of output feature arrays
d = [{'c1':(5,5),
'p1':(2,2),
'f_in1':1, 'f_out1':5},
{'c2':(5,5),
'p2':(2,2),
'f_in2':5, 'f_out2':10}]
# This is the configuration of the mlp part of the CNN
# first tuple has the fan_in and fan_out of the input layer
# of the mlp and so on.
nnet = [(800,256),(256,2)]
c = ConvNet(d,nnet, (45,45))
c.load_params(cnn_params)
face_cascade = cv2.CascadeClassifier(face_params)
cap = cv2.VideoCapture(0)
cv2.namedWindow("Image", cv2.WINDOW_NORMAL)
plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
mean_filename='./mean.binaryproto'
proto_data = open(mean_filename, "rb").read()
a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data)
mean = caffe.io.blobproto_to_array(a)[0]
age_net_pretrained='./age_net.caffemodel'
age_net_model_file='./deploy_age.prototxt'
age_net = caffe.Classifier(age_net_model_file, age_net_pretrained,
mean=mean,
channel_swap=(2,1,0),
raw_scale=255,
image_dims=(256, 256))
age_list=['(0, 2)','(4, 6)','(8, 12)','(15, 20)','(25, 32)','(38, 43)','(48, 53)','(60, 100)']
while(True):
val, image = cap.read()
if image is None:
break
image = cv2.resize(image, (320,240))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5, minSize=(30,30))
for f in faces:
x,y,w,h = f
cv2.rectangle(image, (x,y), (x+w,y+h), (0,255,255))
face_image_rgb = image[y:y+h, x:x+w]
caffe_input = cv2.resize(face_image_rgb, (256, 256)).astype(np.float32)
prediction = age_net.predict([caffe_input])
print 'predicted age:', age_list[prediction[0].argmax()]
cv2.imshow('Image', image)
ch = 0xFF & cv2.waitKey(1)
if ch == 27:
break
#break
尝试用 oversmaple=False
调用 age_net.predict([caffe_input])
:
prediction = age_net.predict([caffe_input], oversample=False)
predict
的默认行为是创建 10 个略有不同的输入图像裁剪并将它们提供给网络进行分类,通过禁用此选项,您应该获得 x10 的加速。
你也可以试试channel pruning your network. It's an algorithm that effectively prune channels in each layer, which could speed up network 2-5x. The github address is : https://github.com/yihui-he/channel-pruning