使用比例参数训练 Caffe CNN
Train a Caffe CNN with Scale Parameter
我改编了 bvlc_reference_caffenet 的 train_val.prototxt 以在 Caffe 中实现 VGG-16 克隆,并能够使用 GTX 1050 和 batch_size: 6
和 [= 对其进行训练13=]。但是,我想将输入数据从 [0;255] 缩放到 [0;1],因为此 CNN 的目标平台精度有限。为了缩放数据,我引入了 scale: 0.00390625
参数(这取自在目标平台上运行良好的 Caffe LeNet 示例)。但是使用 scale
参数,精度不会增加(> 40000 次迭代)并且损失在训练期间也不会改变。
如何使用 scale
参数训练这个 CNN?
train_val.prototxt
name: "ES VGG"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
mirror: true
crop_size: 224
mean_file: "/local/datasets/imagenet/ilsvrc12/imagenet_mean.binaryproto"
}
data_param {
source: "/local/datasets/imagenet/ilsvrc12_train_lmdb"
batch_size: 6
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
mirror: false
crop_size: 224
mean_file: "/local/datasets/imagenet/ilsvrc12/imagenet_mean.binaryproto"
}
data_param {
source: "/local/datasets/imagenet/ilsvrc12_val_lmdb"
batch_size: 6
backend: LMDB
}
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
inner_product_param {
num_output: 4096
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
inner_product_param {
num_output: 4096
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
inner_product_param {
num_output: 1000
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8"
bottom: "label"
top: "loss"
}
layer {
name: "accuracytop1"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracytop1"
accuracy_param {
top_k: 1
}
include {
phase: TEST
}
}
layer {
name: "accuracytop5"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracytop5"
accuracy_param {
top_k: 5
}
include {
phase: TEST
}
}
solver.prototxt
net: "models/es_vgg/train_val.prototxt"
test_iter: 1000
test_interval: 1000
base_lr: 0.0648
lr_policy: "step"
gamma: 0.1
stepsize: 100000
display: 20
max_iter: 18900000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/es_vgg/es_vgg_train"
solver_mode: GPU
如果将输入除以 255
,则需要将第一个卷积层的权重 "conv1_1"
乘以 255
以补偿此变化。
查看 net surgery 了解如何做到这一点。
例如(在python):
import caffe
net = caffe.Net('models/es_vgg/train_val.prototxt', caffe.TEST) # no .caffemodel weights supplied - weights are randomly init
# scale kernels of first conv layer by 255
net.params['conv1_1'][0].data[...] = 255. * net.params['conv1_1'][0].data
# save the scaled weights
net.save('models/es_vgg/init_scaled.caffemodel')
现在您需要开始训练'models/es_vgg/init_scaled.caffemodel'
。
我改编了 bvlc_reference_caffenet 的 train_val.prototxt 以在 Caffe 中实现 VGG-16 克隆,并能够使用 GTX 1050 和 batch_size: 6
和 [= 对其进行训练13=]。但是,我想将输入数据从 [0;255] 缩放到 [0;1],因为此 CNN 的目标平台精度有限。为了缩放数据,我引入了 scale: 0.00390625
参数(这取自在目标平台上运行良好的 Caffe LeNet 示例)。但是使用 scale
参数,精度不会增加(> 40000 次迭代)并且损失在训练期间也不会改变。
如何使用 scale
参数训练这个 CNN?
train_val.prototxt
name: "ES VGG"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
mirror: true
crop_size: 224
mean_file: "/local/datasets/imagenet/ilsvrc12/imagenet_mean.binaryproto"
}
data_param {
source: "/local/datasets/imagenet/ilsvrc12_train_lmdb"
batch_size: 6
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
mirror: false
crop_size: 224
mean_file: "/local/datasets/imagenet/ilsvrc12/imagenet_mean.binaryproto"
}
data_param {
source: "/local/datasets/imagenet/ilsvrc12_val_lmdb"
batch_size: 6
backend: LMDB
}
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
inner_product_param {
num_output: 4096
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
inner_product_param {
num_output: 4096
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
inner_product_param {
num_output: 1000
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8"
bottom: "label"
top: "loss"
}
layer {
name: "accuracytop1"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracytop1"
accuracy_param {
top_k: 1
}
include {
phase: TEST
}
}
layer {
name: "accuracytop5"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracytop5"
accuracy_param {
top_k: 5
}
include {
phase: TEST
}
}
solver.prototxt
net: "models/es_vgg/train_val.prototxt"
test_iter: 1000
test_interval: 1000
base_lr: 0.0648
lr_policy: "step"
gamma: 0.1
stepsize: 100000
display: 20
max_iter: 18900000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/es_vgg/es_vgg_train"
solver_mode: GPU
如果将输入除以 255
,则需要将第一个卷积层的权重 "conv1_1"
乘以 255
以补偿此变化。
查看 net surgery 了解如何做到这一点。
例如(在python):
import caffe
net = caffe.Net('models/es_vgg/train_val.prototxt', caffe.TEST) # no .caffemodel weights supplied - weights are randomly init
# scale kernels of first conv layer by 255
net.params['conv1_1'][0].data[...] = 255. * net.params['conv1_1'][0].data
# save the scaled weights
net.save('models/es_vgg/init_scaled.caffemodel')
现在您需要开始训练'models/es_vgg/init_scaled.caffemodel'
。