我无法让 Caffe 工作
I can't get Caffe working
经过一番挣扎,我决定尝试一个最简单的任务,训练一个网络来分类数字是否为非负数。而我失败了...
我用下面的代码生成了数据。而且我不确定这是否正确。我从文件中读回了数据,但它看起来是正确的...
#pragma comment(lib, "hdf5")
#pragma comment(lib, "hdf5_cpp")
#include <cstdint>
#include <array>
#include <random>
#include <vector>
using namespace std;
#include <H5Cpp.h>
using namespace H5;
mt19937 rng;
float randf(float i_min, float i_max)
{
return rng() * ((i_max - i_min) / 0x100000000) + i_min;
}
#define NAME "pos_neg"
#define TRAIN_SET_SIZE 0x100000
#define TEST_SET_SIZE 0x10000
void make(const string &i_cat, uint32_t i_count)
{
H5File file(NAME "." + i_cat + ".h5", H5F_ACC_TRUNC);
hsize_t dataDim[2] = { i_count, 1 };
hsize_t labelDim = i_count;
FloatType dataType(PredType::NATIVE_FLOAT);
DataSpace dataSpace(2, dataDim);
DataSet dataSet = file.createDataSet("data", dataType, dataSpace);
IntType labelType(PredType::NATIVE_INT);
DataSpace labelSpace(1, &labelDim);
DataSet labelSet = file.createDataSet("label", labelType, labelSpace);
vector<float> data(i_count);
vector<int> labels(i_count);
for (uint32_t i = 0; i < i_count / 2; ++i)
{
labels[i * 2] = 0;
data[i * 2] = randf(0.f, 1.f);
labels[i * 2 + 1] = 1;
data[i * 2 + 1] = randf(-1.f, 0.f);
}
dataSet.write(&data[0], PredType::NATIVE_FLOAT);
labelSet.write(&labels[0], PredType::NATIVE_INT);
}
int main()
{
make("train", TRAIN_SET_SIZE);
make("test", TEST_SET_SIZE);
}
网络看起来像这样
name: "PosNegNet"
layer {
name: "data"
type: "HDF5Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
hdf5_data_param {
source: "pos_neg_train.txt"
batch_size: 64
}
}
layer {
name: "data"
type: "HDF5Data"
top: "data"
top: "label"
include {
phase: TEST
}
hdf5_data_param {
source: "pos_neg_test.txt"
batch_size: 65536
}
}
layer {
name: "fc1"
type: "InnerProduct"
bottom: "data"
top: "fc1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc1"
bottom: "label"
top: "loss"
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc1"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
还有我试过的一组参数
net: "pos_neg.prototxt"
test_iter: 1
test_interval: 500
base_lr: 0.001
momentum: 0.9
momentum2: 0.999
lr_policy: "fixed"
display: 100
max_iter: 10000
snapshot: 5000
snapshot_prefix: "pos_neg"
type: "Adam"
solver_mode: GPU
我 运行 caffe.exe Windows。而且我总是得到损失 = 0,准确度 = 0.5。
我知道我一定做错了什么,但我不知道从哪里看,好吧,除了挖掘源代码...
而且我发现caffe 相当慢。对于 float[64] 数据,在 1080Ti 上每批次有 1024 个项目,我每秒只能进行大约 16 次迭代。是正常还是我又做错了?
在你的 "fc1"
中设置 num_output: 2
:当使用 "SoftmaxWithLoss"
and/or "Accuracy"
层时,caffe 期望你的预测是一个 向量 的 class 概率。在你的例子中,你有两个 classes,因此这个向量的长度应该是 2(而不是目前的 1)。
或者,您可以保留 num_output: 1
并将损失切换到 "SigmoidCrossEntropyLoss"
层。但是,您将无法再使用 "Accuracy"
图层...
经过一番挣扎,我决定尝试一个最简单的任务,训练一个网络来分类数字是否为非负数。而我失败了...
我用下面的代码生成了数据。而且我不确定这是否正确。我从文件中读回了数据,但它看起来是正确的...
#pragma comment(lib, "hdf5")
#pragma comment(lib, "hdf5_cpp")
#include <cstdint>
#include <array>
#include <random>
#include <vector>
using namespace std;
#include <H5Cpp.h>
using namespace H5;
mt19937 rng;
float randf(float i_min, float i_max)
{
return rng() * ((i_max - i_min) / 0x100000000) + i_min;
}
#define NAME "pos_neg"
#define TRAIN_SET_SIZE 0x100000
#define TEST_SET_SIZE 0x10000
void make(const string &i_cat, uint32_t i_count)
{
H5File file(NAME "." + i_cat + ".h5", H5F_ACC_TRUNC);
hsize_t dataDim[2] = { i_count, 1 };
hsize_t labelDim = i_count;
FloatType dataType(PredType::NATIVE_FLOAT);
DataSpace dataSpace(2, dataDim);
DataSet dataSet = file.createDataSet("data", dataType, dataSpace);
IntType labelType(PredType::NATIVE_INT);
DataSpace labelSpace(1, &labelDim);
DataSet labelSet = file.createDataSet("label", labelType, labelSpace);
vector<float> data(i_count);
vector<int> labels(i_count);
for (uint32_t i = 0; i < i_count / 2; ++i)
{
labels[i * 2] = 0;
data[i * 2] = randf(0.f, 1.f);
labels[i * 2 + 1] = 1;
data[i * 2 + 1] = randf(-1.f, 0.f);
}
dataSet.write(&data[0], PredType::NATIVE_FLOAT);
labelSet.write(&labels[0], PredType::NATIVE_INT);
}
int main()
{
make("train", TRAIN_SET_SIZE);
make("test", TEST_SET_SIZE);
}
网络看起来像这样
name: "PosNegNet"
layer {
name: "data"
type: "HDF5Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
hdf5_data_param {
source: "pos_neg_train.txt"
batch_size: 64
}
}
layer {
name: "data"
type: "HDF5Data"
top: "data"
top: "label"
include {
phase: TEST
}
hdf5_data_param {
source: "pos_neg_test.txt"
batch_size: 65536
}
}
layer {
name: "fc1"
type: "InnerProduct"
bottom: "data"
top: "fc1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc1"
bottom: "label"
top: "loss"
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc1"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
还有我试过的一组参数
net: "pos_neg.prototxt"
test_iter: 1
test_interval: 500
base_lr: 0.001
momentum: 0.9
momentum2: 0.999
lr_policy: "fixed"
display: 100
max_iter: 10000
snapshot: 5000
snapshot_prefix: "pos_neg"
type: "Adam"
solver_mode: GPU
我 运行 caffe.exe Windows。而且我总是得到损失 = 0,准确度 = 0.5。
我知道我一定做错了什么,但我不知道从哪里看,好吧,除了挖掘源代码...
而且我发现caffe 相当慢。对于 float[64] 数据,在 1080Ti 上每批次有 1024 个项目,我每秒只能进行大约 16 次迭代。是正常还是我又做错了?
在你的 "fc1"
中设置 num_output: 2
:当使用 "SoftmaxWithLoss"
and/or "Accuracy"
层时,caffe 期望你的预测是一个 向量 的 class 概率。在你的例子中,你有两个 classes,因此这个向量的长度应该是 2(而不是目前的 1)。
或者,您可以保留 num_output: 1
并将损失切换到 "SigmoidCrossEntropyLoss"
层。但是,您将无法再使用 "Accuracy"
图层...