神经网络不良收敛
Neural Network bad convergeance
过去两周我读了很多关于 NN 的文章,我想我几乎在网上看到了所有 "XOR" 方法教程。但是,我无法让工作成为我自己的。我从一个简单的 "OR" 神经元方法开始。取得好成绩。我认为我的问题在于反向传播实现。我做了一个对象方法,所以这里是主线。
三个 类 :
神经元
public class Neuron {
/*
* Attributes
*/
double[] inputs;
double[] weights;
double output;
double error;
double delta;
double deltaWeight;
/*
* Constructors
*/
public Neuron(int nInputs)
{
inputs = new double[nInputs + 1];
inputs[inputs.length - 1] = 1; // bias
weights = new double[nInputs + 1];
}
/*
* Methods
*/
/**
* Reset all weights of the neuron to random values between -1 and 1
*/
public void reset()
{
Random random = new Random();
for (int i = 0; i < weights.length; i++)
weights[i] = (random.nextDouble() * ((0.5d - (-0.5d))) + (-0.5d));
}
/**
* Compute output for given inputs
* @param inputs
*/
public void computeOutput(double inputs[])
{
setInputs(inputs);
output = Sigmoid.activation(getDotProduct());
}
/**
* Compute error for given ideal
* @param ideal
*/
public void computeError(double ideal)
{
error = ideal - output;
delta = error;
}
/**
* Compute error for hidden neurons
*/
public void computeError(FeedForwardLayer previousLayer, int position)
{
double sum = 0;
for (int i = 0; i < previousLayer.neurons.length; i++)
sum += (previousLayer.neurons[i].delta * previousLayer.neurons[i].weights[position]);
delta = Sigmoid.derivative(getDotProduct()) * sum;
error = delta;
}
/**
* Adjust every weight of the neuron
*/
public void adjustWeights(double lambda, double momentum)
{
for (int i = 0; i < weights.length; i++)
{
double lastDeltaWeight = deltaWeight;
deltaWeight = lambda * (delta * inputs[i]) + momentum * lastDeltaWeight;
weights[i] += deltaWeight;
}
}
@Override
public String toString()
{
String str = "";
for (int i = 0; i < weights.length; i++)
str = str.concat(String.format("IN|W --> %.6f | %.6f \n", (float) inputs[i], (float) weights[i]));
str = str.concat("Output = " + output + "\n");
str = str.concat("Error = " + error + "\n");
return str;
}
/*
* Getters & Setters
*/
/**
* @return weights * inputs + bias
*/
public double getDotProduct()
{
double sum = 0;
for (int i = 0; i < inputs.length; i++)
sum += (weights[i] * inputs[i]);
return sum;
}
/**
* Set inputs (keep bias input)
* @param inputs
*/
public void setInputs(double[] inputs)
{
for (int i = 0; i < inputs.length; i++)
this.inputs[i] = inputs[i];
}
/**
* Set every weight to a single value
* @param weight
*/
public void setWeights(double weight)
{
for (int i = 0; i < weights.length; i++)
this.weights[i] = weight;
}
}
FeedForwardLayer(包含神经元)
public class FeedForwardLayer {
/*
* Attributes
*/
Neuron[] neurons;
LayerTypes type;
/*
* Constructors
*/
/**
* First layer constructor
* @param nNeurons
*/
public FeedForwardLayer(int nInputs, int nNeurons, LayerTypes type)
{
neurons = new Neuron[nNeurons];
for (int i = 0; i < neurons.length; i++)
neurons[i] = new Neuron(nInputs);
this.type = type;
}
/*
* Methods
*/
/**
* Reset all weights of the layer's neurons to random values between -1 and 1
*/
public void reset()
{
for (Neuron neuron : neurons)
neuron.reset();
}
/**
* Compute output, if layer isn't input one, you can pass null into parameter
* @param inputs
*/
public void computeOutputs(double[] inputs)
{
for (int i = 0; i < neurons.length; i++)
neurons[i].computeOutput(inputs);
}
/**
* Compute error, if layer is output one
* @param ideals
*/
public void computeErrors(double[] ideals)
{
for (int i = 0; i < neurons.length; i++)
neurons[i].computeError(ideals[i]);
}
/**
* Compute error, if layer isn't output one
* @param layer n+1
*/
public void computeErrors(FeedForwardLayer next)
{
for (int i = 0; i < neurons.length; i++)
neurons[i].computeError(next, i);
}
/**
* Adjust weights for every neurons
*/
public void adjustWeights(double lambda, double momentum)
{
for (Neuron neuron : neurons)
neuron.adjustWeights(lambda, momentum);
}
@Override
public String toString()
{
String str = "";
for (int i = 0; i < neurons.length; i++)
str = str.concat("Neuron " + i + "\n" + neurons[i]);
return str;
}
/*
* Getters - Setters
*/
/**
* @return true if layer is input, false otherwise
*/
public boolean isInput()
{
if (type == LayerTypes.INPUT)
return true;
return false;
}
/**
* @return true if layer is input, false otherwise
*/
public boolean isOutput()
{
if (type == LayerTypes.OUTPUT)
return true;
return false;
}
/**
* @return an array of layer's outputs
*/
public double[] getOutputs()
{
double[] outputs = new double[neurons.length];
for (int i = 0; i < neurons.length; i++)
outputs[i] = neurons[i].output;
return outputs;
}
/**
* @return array of layer's errors
*/
public double[] getErrors()
{
double[] errors = new double[neurons.length];
for (int i = 0; i < neurons.length; i++)
errors[i] = neurons[i].error;
return errors;
}
/**
* Set all the weights of the layer to given weight
* @param weight
*/
public void setWeights(double weight)
{
for (int i = 0; i < neurons.length; i++)
neurons[i].setWeights(weight);
}
}
FeedForwardNetwork(包含 FeedForwardLayers)
public class FeedForwardNetwork {
static final double lambda = 0.1;
static final double momentum = 0;
/*
* Attributes
*/
private ArrayList<FeedForwardLayer> layers;
/*
* Constructors
*/
public FeedForwardNetwork()
{
layers = new ArrayList<FeedForwardLayer>();
}
/*
* Methods
*/
/**
* Init all the weights to random values
*/
public void reset()
{
for (int i = 0; i < layers.size(); i++)
layers.get(i).reset();;
}
/**
* Compute output for all the neurons of all the layers for given inputs
* @param inputs
*/
public void feedForward(double[] inputs)
{
//System.err.println("FeedForwardNetwork.feedForward(" + inputs[0] + ", " + inputs[1] +")");
for (int i = 0; i < layers.size(); i++)
{
//System.err.println("\n*** COMPUTING OUTPUT FOR LAYER " + i + "***\n");
if (layers.get(i).isInput())
layers.get(i).computeOutputs(inputs);
else
layers.get(i).computeOutputs(layers.get(i - 1).getOutputs());
}
}
/**
* Compute errors for all the neurons of all the layers starting by output layer
* @param ideals
*/
public void feedBackward(double[] ideals)
{
//System.err.println("FeedForwardNetwork.feedBackward(" + ideals[0] + ")");
// For each layers starting by output one
for (int i = layers.size() - 1; i > 0; i--)
{
//System.err.println("*** COMPUTING ERROR FOR LAYER " + i + "***");
if (layers.get(i).isOutput())
layers.get(i).computeErrors(ideals);
else
layers.get(i).computeErrors(layers.get(i + 1));
}
}
/**
* Adjust weights of every layer
*/
public void adjustWeights()
{
for (FeedForwardLayer feedForwardLayer : layers)
feedForwardLayer.adjustWeights(lambda, momentum);
}
/**
* Train the nn with given inputs and outputs
* @param inputs
* @param outputs
*/
public void train(double[] inputs, double... outputs)
{
feedForward(inputs);
feedBackward(outputs);
adjustWeights();
}
/**
* Add a layer to the network
* @param layer
*/
public void addLayer(FeedForwardLayer layer)
{
layers.add(layer);
}
@Override
public String toString()
{
String str = "";
for (int i = 0; i < layers.size(); i++)
str = str.concat("Layer " + LayerTypes.values()[i] + "\n" + layers.get(i));
str = str.concat("\n");
str = str.concat("OUTPUT = " + getOutputs()[0] + "\n");
str = str.concat("ERROR = " + getError(false) + "\n");
return str;
}
/*
* Getters & Setters
*/
public FeedForwardLayer getInputLayer()
{
return layers.get(0);
}
public FeedForwardLayer getOutputLayer()
{
return layers.get(layers.size() - 1);
}
public FeedForwardLayer getLayer(int index)
{
return layers.get(index);
}
public double getError(boolean abs)
{
if (abs)
return Math.abs(getOutputLayer().neurons[0].error);
return getOutputLayer().neurons[0].error;
}
public double[] getOutputs()
{
return getOutputLayer().getOutputs();
}
}
所以我通过给它 xor 的纪元来训练网络 table
异或table
X | Y | S
0 0 0
0 1 1
0 1 1
0 0 0
网络将在数千个 epoch 后输出大约 0.5...
有趣的是,如果我用 AND table、OR table 或 NAND table 替换训练集,nn 将输出 1 在训练集的 S 列..(它将输出 0.25 AND 和 NAND table 和 0.75 OR table)
我只是想知道我的实现是否足够好以使其工作,ty!
所以,经过一些研究,我意识到我的实现很好,只是我不明白输入层是如何工作的。就是这样,输入层的工作方式类似于 In = Out
过去两周我读了很多关于 NN 的文章,我想我几乎在网上看到了所有 "XOR" 方法教程。但是,我无法让工作成为我自己的。我从一个简单的 "OR" 神经元方法开始。取得好成绩。我认为我的问题在于反向传播实现。我做了一个对象方法,所以这里是主线。
三个 类 :
神经元
public class Neuron {
/*
* Attributes
*/
double[] inputs;
double[] weights;
double output;
double error;
double delta;
double deltaWeight;
/*
* Constructors
*/
public Neuron(int nInputs)
{
inputs = new double[nInputs + 1];
inputs[inputs.length - 1] = 1; // bias
weights = new double[nInputs + 1];
}
/*
* Methods
*/
/**
* Reset all weights of the neuron to random values between -1 and 1
*/
public void reset()
{
Random random = new Random();
for (int i = 0; i < weights.length; i++)
weights[i] = (random.nextDouble() * ((0.5d - (-0.5d))) + (-0.5d));
}
/**
* Compute output for given inputs
* @param inputs
*/
public void computeOutput(double inputs[])
{
setInputs(inputs);
output = Sigmoid.activation(getDotProduct());
}
/**
* Compute error for given ideal
* @param ideal
*/
public void computeError(double ideal)
{
error = ideal - output;
delta = error;
}
/**
* Compute error for hidden neurons
*/
public void computeError(FeedForwardLayer previousLayer, int position)
{
double sum = 0;
for (int i = 0; i < previousLayer.neurons.length; i++)
sum += (previousLayer.neurons[i].delta * previousLayer.neurons[i].weights[position]);
delta = Sigmoid.derivative(getDotProduct()) * sum;
error = delta;
}
/**
* Adjust every weight of the neuron
*/
public void adjustWeights(double lambda, double momentum)
{
for (int i = 0; i < weights.length; i++)
{
double lastDeltaWeight = deltaWeight;
deltaWeight = lambda * (delta * inputs[i]) + momentum * lastDeltaWeight;
weights[i] += deltaWeight;
}
}
@Override
public String toString()
{
String str = "";
for (int i = 0; i < weights.length; i++)
str = str.concat(String.format("IN|W --> %.6f | %.6f \n", (float) inputs[i], (float) weights[i]));
str = str.concat("Output = " + output + "\n");
str = str.concat("Error = " + error + "\n");
return str;
}
/*
* Getters & Setters
*/
/**
* @return weights * inputs + bias
*/
public double getDotProduct()
{
double sum = 0;
for (int i = 0; i < inputs.length; i++)
sum += (weights[i] * inputs[i]);
return sum;
}
/**
* Set inputs (keep bias input)
* @param inputs
*/
public void setInputs(double[] inputs)
{
for (int i = 0; i < inputs.length; i++)
this.inputs[i] = inputs[i];
}
/**
* Set every weight to a single value
* @param weight
*/
public void setWeights(double weight)
{
for (int i = 0; i < weights.length; i++)
this.weights[i] = weight;
}
}
FeedForwardLayer(包含神经元)
public class FeedForwardLayer {
/*
* Attributes
*/
Neuron[] neurons;
LayerTypes type;
/*
* Constructors
*/
/**
* First layer constructor
* @param nNeurons
*/
public FeedForwardLayer(int nInputs, int nNeurons, LayerTypes type)
{
neurons = new Neuron[nNeurons];
for (int i = 0; i < neurons.length; i++)
neurons[i] = new Neuron(nInputs);
this.type = type;
}
/*
* Methods
*/
/**
* Reset all weights of the layer's neurons to random values between -1 and 1
*/
public void reset()
{
for (Neuron neuron : neurons)
neuron.reset();
}
/**
* Compute output, if layer isn't input one, you can pass null into parameter
* @param inputs
*/
public void computeOutputs(double[] inputs)
{
for (int i = 0; i < neurons.length; i++)
neurons[i].computeOutput(inputs);
}
/**
* Compute error, if layer is output one
* @param ideals
*/
public void computeErrors(double[] ideals)
{
for (int i = 0; i < neurons.length; i++)
neurons[i].computeError(ideals[i]);
}
/**
* Compute error, if layer isn't output one
* @param layer n+1
*/
public void computeErrors(FeedForwardLayer next)
{
for (int i = 0; i < neurons.length; i++)
neurons[i].computeError(next, i);
}
/**
* Adjust weights for every neurons
*/
public void adjustWeights(double lambda, double momentum)
{
for (Neuron neuron : neurons)
neuron.adjustWeights(lambda, momentum);
}
@Override
public String toString()
{
String str = "";
for (int i = 0; i < neurons.length; i++)
str = str.concat("Neuron " + i + "\n" + neurons[i]);
return str;
}
/*
* Getters - Setters
*/
/**
* @return true if layer is input, false otherwise
*/
public boolean isInput()
{
if (type == LayerTypes.INPUT)
return true;
return false;
}
/**
* @return true if layer is input, false otherwise
*/
public boolean isOutput()
{
if (type == LayerTypes.OUTPUT)
return true;
return false;
}
/**
* @return an array of layer's outputs
*/
public double[] getOutputs()
{
double[] outputs = new double[neurons.length];
for (int i = 0; i < neurons.length; i++)
outputs[i] = neurons[i].output;
return outputs;
}
/**
* @return array of layer's errors
*/
public double[] getErrors()
{
double[] errors = new double[neurons.length];
for (int i = 0; i < neurons.length; i++)
errors[i] = neurons[i].error;
return errors;
}
/**
* Set all the weights of the layer to given weight
* @param weight
*/
public void setWeights(double weight)
{
for (int i = 0; i < neurons.length; i++)
neurons[i].setWeights(weight);
}
}
FeedForwardNetwork(包含 FeedForwardLayers)
public class FeedForwardNetwork {
static final double lambda = 0.1;
static final double momentum = 0;
/*
* Attributes
*/
private ArrayList<FeedForwardLayer> layers;
/*
* Constructors
*/
public FeedForwardNetwork()
{
layers = new ArrayList<FeedForwardLayer>();
}
/*
* Methods
*/
/**
* Init all the weights to random values
*/
public void reset()
{
for (int i = 0; i < layers.size(); i++)
layers.get(i).reset();;
}
/**
* Compute output for all the neurons of all the layers for given inputs
* @param inputs
*/
public void feedForward(double[] inputs)
{
//System.err.println("FeedForwardNetwork.feedForward(" + inputs[0] + ", " + inputs[1] +")");
for (int i = 0; i < layers.size(); i++)
{
//System.err.println("\n*** COMPUTING OUTPUT FOR LAYER " + i + "***\n");
if (layers.get(i).isInput())
layers.get(i).computeOutputs(inputs);
else
layers.get(i).computeOutputs(layers.get(i - 1).getOutputs());
}
}
/**
* Compute errors for all the neurons of all the layers starting by output layer
* @param ideals
*/
public void feedBackward(double[] ideals)
{
//System.err.println("FeedForwardNetwork.feedBackward(" + ideals[0] + ")");
// For each layers starting by output one
for (int i = layers.size() - 1; i > 0; i--)
{
//System.err.println("*** COMPUTING ERROR FOR LAYER " + i + "***");
if (layers.get(i).isOutput())
layers.get(i).computeErrors(ideals);
else
layers.get(i).computeErrors(layers.get(i + 1));
}
}
/**
* Adjust weights of every layer
*/
public void adjustWeights()
{
for (FeedForwardLayer feedForwardLayer : layers)
feedForwardLayer.adjustWeights(lambda, momentum);
}
/**
* Train the nn with given inputs and outputs
* @param inputs
* @param outputs
*/
public void train(double[] inputs, double... outputs)
{
feedForward(inputs);
feedBackward(outputs);
adjustWeights();
}
/**
* Add a layer to the network
* @param layer
*/
public void addLayer(FeedForwardLayer layer)
{
layers.add(layer);
}
@Override
public String toString()
{
String str = "";
for (int i = 0; i < layers.size(); i++)
str = str.concat("Layer " + LayerTypes.values()[i] + "\n" + layers.get(i));
str = str.concat("\n");
str = str.concat("OUTPUT = " + getOutputs()[0] + "\n");
str = str.concat("ERROR = " + getError(false) + "\n");
return str;
}
/*
* Getters & Setters
*/
public FeedForwardLayer getInputLayer()
{
return layers.get(0);
}
public FeedForwardLayer getOutputLayer()
{
return layers.get(layers.size() - 1);
}
public FeedForwardLayer getLayer(int index)
{
return layers.get(index);
}
public double getError(boolean abs)
{
if (abs)
return Math.abs(getOutputLayer().neurons[0].error);
return getOutputLayer().neurons[0].error;
}
public double[] getOutputs()
{
return getOutputLayer().getOutputs();
}
}
所以我通过给它 xor 的纪元来训练网络 table 异或table
X | Y | S
0 0 0
0 1 1
0 1 1
0 0 0
网络将在数千个 epoch 后输出大约 0.5... 有趣的是,如果我用 AND table、OR table 或 NAND table 替换训练集,nn 将输出 1 在训练集的 S 列..(它将输出 0.25 AND 和 NAND table 和 0.75 OR table)
我只是想知道我的实现是否足够好以使其工作,ty!
所以,经过一些研究,我意识到我的实现很好,只是我不明白输入层是如何工作的。就是这样,输入层的工作方式类似于 In = Out