Caffe: predict multiple labels at once (in Python)
在caffe中,我希望能够一次预测多个标签,比如键盘方向键:可以同时按下两个键。我正在尝试使用卷积神经网络在 TM Nation Forever
游戏中驾驶虚拟 F1 汽车,我计划很快收集和塑造训练数据,我想知道我做的是否正确。
我认为这个 post 会给出一个很好的例子来说明如何在 Python 中进行这种 class 化,但我还没有找到任何令人满意的例子来说明如何做到这一点。
comp_kwargs = {'compression': 'gzip', 'compression_opts': 1}
with h5py.File(train_filename, 'w') as f:
f.create_dataset('data_img', data=X, **comp_kwargs)
f.create_dataset('data_speed', data=S.astype(np.float_), **comp_kwargs)
f.create_dataset('label_forward', data=f.astype(np.int_), **comp_kwargs)
f.create_dataset('label_backward', data=b.astype(np.int_), **comp_kwargs)
f.create_dataset('label_left', data=l.astype(np.int_), **comp_kwargs)
f.create_dataset('label_right', data=r.astype(np.int_), **comp_kwargs)
with open(train_filename_list_txt, 'w') as f:
f.write(train_filename + '\n')
关于 HDF5 数据形状的信息
-> number N x channel K x height H x width W
-> number N x 1 float number (from 0.0 to 1.0)
注意:我使用 numpy 的 "int_" 来获取标签 class 以 class 化。
-> number N x 1 integer number (0 or 1)
-> number N x 1 integer number (0 or 1)
-> number N x 1 integer number (0 or 1)
-> number N x 1 integer number (0 or 1)
import numpy as np
import caffe
from caffe import layers as L
from caffe import params as P
def cnn(hdf5, batch_size):
n = caffe.NetSpec()
n.data_img, n.data_speed, n.label_forward, n.label_backward, n.label_left, label_right = (
L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=6)
n.conv1 = L.Convolution(, kernel_size=7, num_output=32, weight_filler=dict(type='xavier'))
n.pool1 = L.Pooling(n.conv1, kernel_size=3, stride=2, pool=P.Pooling.MAX)
n.drop1 = L.Dropout(n.pool1, in_place=True)
n.relu1 = L.ReLU(n.drop1, in_place=True)
n.conv2 = L.Convolution(n.relu1, kernel_size=5, num_output=42, weight_filler=dict(type='xavier'))
n.pool2 = L.Pooling(n.conv2, kernel_size=3, stride=2, pool=P.Pooling.MAX)
n.drop2 = L.Dropout(n.pool2, in_place=True)
n.relu2 = L.ReLU(n.drop2, in_place=True)
n.conv3 = L.Convolution(n.relu2, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
n.pool3 = L.Pooling(n.conv3, kernel_size=3, stride=2, pool=P.Pooling.MAX)
n.drop3 = L.Dropout(n.pool3, in_place=True)
n.relu3 = L.ReLU(n.drop3, in_place=True)
n.conv4 = L.Convolution(n.relu3, kernel_size=3, num_output=64, weight_filler=dict(type='xavier'))
n.pool4 = L.Pooling(n.conv4, kernel_size=3, stride=2, pool=P.Pooling.AVE)
# Data of shape `batch_size*64*3*3` out of this layer (if dropout ignored),
# for a total of `batch_size*576` neurons.
# Would you recommend to downsize this `3*3` feature map to `2*2`
# or even `1*1` and to remove dropout at this level?
n.drop4 = L.Dropout(n.pool4, in_place=True)
n.relu4 = L.ReLU(n.drop4, in_place=True)
n.join_speed = L.Concat(n.relu4, n.data_speed, in_place=True)
# Note that I might be wrong on how the parameters are passed to the concat layer
n.ip1 = L.InnerProduct(n.join_speed, num_output=512, weight_filler=dict(type='xavier'))
n.sig1 = L.Sigmoid(n.ip1, in_place=True)
n.ip_f = L.InnerProduct(n.sig1, num_output=2, weight_filler=dict(type='xavier'))
n.accuracy_f = L.Accuracy(n.ip_f, n.label_forward)
n.loss_f = L.SoftmaxWithLoss(n.ip_f, n.label_forward)
n.ip_b = L.InnerProduct(n.sig1, num_output=2, weight_filler=dict(type='xavier'))
n.accuracy_b = L.Accuracy(n.ip_b, n.label_backward)
n.loss_b = L.SoftmaxWithLoss(n.ip_b, n.label_backward)
n.ip_l = L.InnerProduct(n.sig1, num_output=2, weight_filler=dict(type='xavier'))
n.accuracy_l = L.Accuracy(n.ip_l, n.label_left)
n.loss_l = L.SoftmaxWithLoss(n.ip_l, n.label_left)
n.ip_r = L.InnerProduct(n.sig1, num_output=2, weight_filler=dict(type='xavier'))
n.accuracy_r = L.Accuracy(n.ip_r, n.label_right)
n.loss_r = L.SoftmaxWithLoss(n.ip_r, n.label_right)
return n.to_proto()
with open('cnn_train.prototxt', 'w') as f:
cnn(train_filename_list_txt, 100)
此外,我想一次只按下左箭头键或右箭头键之一。考虑到我将使用一些 SoftmaxWithLossLayer
,而不是之后以编程方式进行,像这样融合 label_right 和 label_left 是个好主意吗
-> number N x 1 integer number (0 for left or 1 for right)
我使用 cifar-100 数据集对此进行了测试,其中既有粗标签也有细标签,效果很好。
在caffe中,我希望能够一次预测多个标签,比如键盘方向键:可以同时按下两个键。我正在尝试使用卷积神经网络在 TM Nation Forever
游戏中驾驶虚拟 F1 汽车,我计划很快收集和塑造训练数据,我想知道我做的是否正确。
我认为这个 post 会给出一个很好的例子来说明如何在 Python 中进行这种 class 化,但我还没有找到任何令人满意的例子来说明如何做到这一点。
comp_kwargs = {'compression': 'gzip', 'compression_opts': 1}
with h5py.File(train_filename, 'w') as f:
f.create_dataset('data_img', data=X, **comp_kwargs)
f.create_dataset('data_speed', data=S.astype(np.float_), **comp_kwargs)
f.create_dataset('label_forward', data=f.astype(np.int_), **comp_kwargs)
f.create_dataset('label_backward', data=b.astype(np.int_), **comp_kwargs)
f.create_dataset('label_left', data=l.astype(np.int_), **comp_kwargs)
f.create_dataset('label_right', data=r.astype(np.int_), **comp_kwargs)
with open(train_filename_list_txt, 'w') as f:
f.write(train_filename + '\n')
关于 HDF5 数据形状的信息
-> number N x channel K x height H x width W
-> number N x 1 float number (from 0.0 to 1.0)
注意:我使用 numpy 的 "int_" 来获取标签 class 以 class 化。
-> number N x 1 integer number (0 or 1)
-> number N x 1 integer number (0 or 1)
-> number N x 1 integer number (0 or 1)
-> number N x 1 integer number (0 or 1)
import numpy as np
import caffe
from caffe import layers as L
from caffe import params as P
def cnn(hdf5, batch_size):
n = caffe.NetSpec()
n.data_img, n.data_speed, n.label_forward, n.label_backward, n.label_left, label_right = (
L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=6)
n.conv1 = L.Convolution(, kernel_size=7, num_output=32, weight_filler=dict(type='xavier'))
n.pool1 = L.Pooling(n.conv1, kernel_size=3, stride=2, pool=P.Pooling.MAX)
n.drop1 = L.Dropout(n.pool1, in_place=True)
n.relu1 = L.ReLU(n.drop1, in_place=True)
n.conv2 = L.Convolution(n.relu1, kernel_size=5, num_output=42, weight_filler=dict(type='xavier'))
n.pool2 = L.Pooling(n.conv2, kernel_size=3, stride=2, pool=P.Pooling.MAX)
n.drop2 = L.Dropout(n.pool2, in_place=True)
n.relu2 = L.ReLU(n.drop2, in_place=True)
n.conv3 = L.Convolution(n.relu2, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
n.pool3 = L.Pooling(n.conv3, kernel_size=3, stride=2, pool=P.Pooling.MAX)
n.drop3 = L.Dropout(n.pool3, in_place=True)
n.relu3 = L.ReLU(n.drop3, in_place=True)
n.conv4 = L.Convolution(n.relu3, kernel_size=3, num_output=64, weight_filler=dict(type='xavier'))
n.pool4 = L.Pooling(n.conv4, kernel_size=3, stride=2, pool=P.Pooling.AVE)
# Data of shape `batch_size*64*3*3` out of this layer (if dropout ignored),
# for a total of `batch_size*576` neurons.
# Would you recommend to downsize this `3*3` feature map to `2*2`
# or even `1*1` and to remove dropout at this level?
n.drop4 = L.Dropout(n.pool4, in_place=True)
n.relu4 = L.ReLU(n.drop4, in_place=True)
n.join_speed = L.Concat(n.relu4, n.data_speed, in_place=True)
# Note that I might be wrong on how the parameters are passed to the concat layer
n.ip1 = L.InnerProduct(n.join_speed, num_output=512, weight_filler=dict(type='xavier'))
n.sig1 = L.Sigmoid(n.ip1, in_place=True)
n.ip_f = L.InnerProduct(n.sig1, num_output=2, weight_filler=dict(type='xavier'))
n.accuracy_f = L.Accuracy(n.ip_f, n.label_forward)
n.loss_f = L.SoftmaxWithLoss(n.ip_f, n.label_forward)
n.ip_b = L.InnerProduct(n.sig1, num_output=2, weight_filler=dict(type='xavier'))
n.accuracy_b = L.Accuracy(n.ip_b, n.label_backward)
n.loss_b = L.SoftmaxWithLoss(n.ip_b, n.label_backward)
n.ip_l = L.InnerProduct(n.sig1, num_output=2, weight_filler=dict(type='xavier'))
n.accuracy_l = L.Accuracy(n.ip_l, n.label_left)
n.loss_l = L.SoftmaxWithLoss(n.ip_l, n.label_left)
n.ip_r = L.InnerProduct(n.sig1, num_output=2, weight_filler=dict(type='xavier'))
n.accuracy_r = L.Accuracy(n.ip_r, n.label_right)
n.loss_r = L.SoftmaxWithLoss(n.ip_r, n.label_right)
return n.to_proto()
with open('cnn_train.prototxt', 'w') as f:
cnn(train_filename_list_txt, 100)
此外,我想一次只按下左箭头键或右箭头键之一。考虑到我将使用一些 SoftmaxWithLossLayer
-> number N x 1 integer number (0 for left or 1 for right)
最后,我所做的是正确的任务,除了连接层可能无法工作,因为连接层的形状不同。 我使用 cifar-100 数据集对此进行了测试,其中既有粗标签也有细标签,效果很好。