打乱我的训练数据会导致模型性能不佳吗?
Can shuffling my training data lead to bad model performance?
我正在用 C++ 编写一个神经网络,以使用具有 5 个隐藏神经元的单个隐藏层来逼近 xSin(x) 函数。隐藏神经元使用 tanh 激活,输出层使用 Linear 激活。我使用了 30 个训练样例 10,000 个 epochs。
在我打乱我的数据之前,这是我得到的:
(红色:预测数据,绿色:实际数据),MSE 也接近 0
但是,当我打乱训练示例的 indices 并验证我的打乱确实打乱时,我得到了糟糕的结果:
,
错误 vs Epoch 为:
可能会出什么问题?洗牌可以对此负责吗?
这里是简单的代码供参考
//Shuffle Function
void shuffle(int *array, size_t n)
{
if (n > 1) //If no. of training examples > 1
{
size_t i;
for (i = 0; i < n - 1; i++)
{
size_t j = i + rand() / (RAND_MAX / (n - i) + 1);
int t = array[j];
array[j] = array[i];
array[i] = t;
}
}
}
int main(int argc, const char * argv[])
{
//Some other actions
///FOR INDEX SHUFFLING
int trainingSetOrder[numTrainingSets];
for(int j=0; j<numTrainingSets; ++j)
trainingSetOrder[j] = j;
///TRAINING
//std::cout<<"start train\n";
vector<double> performance, epo; ///STORE MSE, EPOCH
for (int n=0; n < epoch; n++)
{
shuffle(trainingSetOrder,numTrainingSets);
for (int i=0; i<numTrainingSets; i++)
{
int x = trainingSetOrder[i];
//cout<<" "<<"("<<training_inputs[x][0]<<","<<training_outputs[x][0] <<")";
/// Forward pass
for (int j=0; j<numHiddenNodes; j++)
{
double activation=hiddenLayerBias[j];
//std::cout<<"Training Set :"<<x<<"\n";
for (int k=0; k<numInputs; k++) {
activation+=training_inputs[x][k]*hiddenWeights[k][j];
}
hiddenLayer[j] = tanh(activation);
}
for (int j=0; j<numOutputs; j++) {
double activation=outputLayerBias[j];
for (int k=0; k<numHiddenNodes; k++)
{
activation+=hiddenLayer[k]*outputWeights[k][j];
}
outputLayer[j] = lin(activation);
}
/// Backprop
/// For V
double deltaOutput[numOutputs];
for (int j=0; j<numOutputs; j++) {
double errorOutput = (training_outputs[i][j]-outputLayer[j]);
deltaOutput[j] = errorOutput*dlin(outputLayer[j]);
}
/// For W
//Some Code
///Updation
/// For V and b
///Some Code
/// For W and c
for (int j=0; j<numHiddenNodes; j++) {
//c
hiddenLayerBias[j] += deltaHidden[j]*lr;
//W
for(int k=0; k<numInputs; k++) {
hiddenWeights[k][j]+=training_inputs[i][k]*deltaHidden[j]*lr;
}
}
}
}
return 0;
}
您的模型似乎没有随机初始化,因为 init_weight
函数
double init_weight() { return (2*rand()/RAND_MAX -1); }
几乎总是returns -1 因为它做整数除法。这样的初始化可能会使模型很难或无法训练。
修复:
double init_weight() { return 2. * rand() / RAND_MAX - 1; }
上面的 2.
具有 double
类型,这会触发将二元运算符中涉及的其他整数项提升为 double
。
Xavier initialization是一个很好的加速训练的方法
我在训练部分发现了 2 个错误(愚蠢!):
1)
/// Backprop
/// For V
double deltaOutput[numOutputs];
for (int j=0; j<numOutputs; j++) {
double errorOutput = (training_outputs[i][j]-outputLayer[j]);
deltaOutput[j] = errorOutput*dlin(outputLayer[j]);
}
应该是
/// Backprop
/// For V
double deltaOutput[numOutputs];
for (int j=0; j<numOutputs; j++) {
double errorOutput = (training_outputs[x][j]-outputLayer[j]);
deltaOutput[j] = errorOutput*dlin(outputLayer[j]);
}
2)
/// For W and c
for (int j=0; j<numHiddenNodes; j++) {
//c
hiddenLayerBias[j] += deltaHidden[j]*lr;
//W
for(int k=0; k<numInputs; k++) {
hiddenWeights[k][j]+=training_inputs[i][k]*deltaHidden[j]*lr;
}
}
应该是
/// For W and c
for (int j=0; j<numHiddenNodes; j++) {
//c
hiddenLayerBias[j] += deltaHidden[j]*lr;
//W
for(int k=0; k<numInputs; k++) {
hiddenWeights[k][j]+=training_inputs[x][k]*deltaHidden[j]*lr;
}
}
Post 我明白了
和
我正在用 C++ 编写一个神经网络,以使用具有 5 个隐藏神经元的单个隐藏层来逼近 xSin(x) 函数。隐藏神经元使用 tanh 激活,输出层使用 Linear 激活。我使用了 30 个训练样例 10,000 个 epochs。
在我打乱我的数据之前,这是我得到的:
但是,当我打乱训练示例的 indices 并验证我的打乱确实打乱时,我得到了糟糕的结果:
错误 vs Epoch 为:
可能会出什么问题?洗牌可以对此负责吗?
这里是简单的代码供参考
//Shuffle Function
void shuffle(int *array, size_t n)
{
if (n > 1) //If no. of training examples > 1
{
size_t i;
for (i = 0; i < n - 1; i++)
{
size_t j = i + rand() / (RAND_MAX / (n - i) + 1);
int t = array[j];
array[j] = array[i];
array[i] = t;
}
}
}
int main(int argc, const char * argv[])
{
//Some other actions
///FOR INDEX SHUFFLING
int trainingSetOrder[numTrainingSets];
for(int j=0; j<numTrainingSets; ++j)
trainingSetOrder[j] = j;
///TRAINING
//std::cout<<"start train\n";
vector<double> performance, epo; ///STORE MSE, EPOCH
for (int n=0; n < epoch; n++)
{
shuffle(trainingSetOrder,numTrainingSets);
for (int i=0; i<numTrainingSets; i++)
{
int x = trainingSetOrder[i];
//cout<<" "<<"("<<training_inputs[x][0]<<","<<training_outputs[x][0] <<")";
/// Forward pass
for (int j=0; j<numHiddenNodes; j++)
{
double activation=hiddenLayerBias[j];
//std::cout<<"Training Set :"<<x<<"\n";
for (int k=0; k<numInputs; k++) {
activation+=training_inputs[x][k]*hiddenWeights[k][j];
}
hiddenLayer[j] = tanh(activation);
}
for (int j=0; j<numOutputs; j++) {
double activation=outputLayerBias[j];
for (int k=0; k<numHiddenNodes; k++)
{
activation+=hiddenLayer[k]*outputWeights[k][j];
}
outputLayer[j] = lin(activation);
}
/// Backprop
/// For V
double deltaOutput[numOutputs];
for (int j=0; j<numOutputs; j++) {
double errorOutput = (training_outputs[i][j]-outputLayer[j]);
deltaOutput[j] = errorOutput*dlin(outputLayer[j]);
}
/// For W
//Some Code
///Updation
/// For V and b
///Some Code
/// For W and c
for (int j=0; j<numHiddenNodes; j++) {
//c
hiddenLayerBias[j] += deltaHidden[j]*lr;
//W
for(int k=0; k<numInputs; k++) {
hiddenWeights[k][j]+=training_inputs[i][k]*deltaHidden[j]*lr;
}
}
}
}
return 0;
}
您的模型似乎没有随机初始化,因为 init_weight
函数
double init_weight() { return (2*rand()/RAND_MAX -1); }
几乎总是returns -1 因为它做整数除法。这样的初始化可能会使模型很难或无法训练。
修复:
double init_weight() { return 2. * rand() / RAND_MAX - 1; }
上面的 2.
具有 double
类型,这会触发将二元运算符中涉及的其他整数项提升为 double
。
Xavier initialization是一个很好的加速训练的方法
我在训练部分发现了 2 个错误(愚蠢!):
1)
/// Backprop
/// For V
double deltaOutput[numOutputs];
for (int j=0; j<numOutputs; j++) {
double errorOutput = (training_outputs[i][j]-outputLayer[j]);
deltaOutput[j] = errorOutput*dlin(outputLayer[j]);
}
应该是
/// Backprop
/// For V
double deltaOutput[numOutputs];
for (int j=0; j<numOutputs; j++) {
double errorOutput = (training_outputs[x][j]-outputLayer[j]);
deltaOutput[j] = errorOutput*dlin(outputLayer[j]);
}
2)
/// For W and c
for (int j=0; j<numHiddenNodes; j++) {
//c
hiddenLayerBias[j] += deltaHidden[j]*lr;
//W
for(int k=0; k<numInputs; k++) {
hiddenWeights[k][j]+=training_inputs[i][k]*deltaHidden[j]*lr;
}
}
应该是
/// For W and c
for (int j=0; j<numHiddenNodes; j++) {
//c
hiddenLayerBias[j] += deltaHidden[j]*lr;
//W
for(int k=0; k<numInputs; k++) {
hiddenWeights[k][j]+=training_inputs[x][k]*deltaHidden[j]*lr;
}
}
Post 我明白了