有人可以检查我的异或神经网络代码有什么问题吗
Can someone check what is wrong with my xor neural network code
我一直在尝试创建 XOR 神经网络,但所有输入的输出总是会收敛到某个值(例如 1、0 或 0.5)。这是我最近的尝试:
import java.io.*;
import java.util.*;
public class Main {
public static void main(String[] args) {
double[][] trainingInputs = {
{1, 1},
{1, 0},
{0, 1},
{1, 1}
};
double[] targetOutputs = {0, 1, 1, 0};
NeuralNetwork network = new NeuralNetwork();
System.out.println("Training");
for(int i=0; i<40; i++) {
network.train(trainingInputs, targetOutputs);
}
for(double[] inputs : trainingInputs) {
double output = network.feedForward(inputs);
System.out.println(inputs[0] + " - " + inputs[1] + " : " + output);
}
}
}
class Neuron {
private ArrayList<Synapse> inputs; // List di sinapsi collegate al neurone
private double output; // output del neurone
private double derivative; // derivata dell'output
private double weightedSum; // somma ponderata del peso delle sinapsi e degli output collegati
private double error; // errore
public Neuron() {
inputs = new ArrayList<Synapse>();
error = 0;
}
// Aggiunge una sinpapsi
public void addInput(Synapse input) {
inputs.add(input);
}
public List<Synapse> getInputs() {
return this.inputs;
}
public double[] getWeights() {
double[] weights = new double[inputs.size()];
int i = 0;
for(Synapse synapse : inputs) {
weights[i] = synapse.getWeight();
i++;
}
return weights;
}
private void calculateWeightedSum() {
weightedSum = 0;
for(Synapse synapse : inputs) {
weightedSum += synapse.getWeight() * synapse.getSourceNeuron().getOutput();
}
}
public void activate() {
calculateWeightedSum();
output = sigmoid(weightedSum);
derivative = sigmoidDerivative(output);
}
public double getOutput() {
return this.output;
}
public void setOutput(double output) {
this.output = output;
}
public double getDerivative() {
return this.derivative;
}
public double getError() {
return error;
}
public void setError(double error) {
this.error = error;
}
public double sigmoid(double weightedSum) {
return 1 / (1 + Math.exp(-weightedSum));
}
public double sigmoidDerivative(double output) {
return output / (1 - output);
}
}
class Synapse implements Serializable {
private Neuron sourceNeuron; // Neurone da cui origina la sinapsi
private double weight; // Peso della sinapsi
public Synapse(Neuron sourceNeuron) {
this.sourceNeuron = sourceNeuron;
this.weight = Math.random() - 0.5;
}
public Neuron getSourceNeuron() {
return sourceNeuron;
}
public double getWeight() {
return weight;
}
public void adjustWeight(double deltaWeight) {
this.weight += deltaWeight;
}
}
class NeuralNetwork implements Serializable {
Neuron[] input;
Neuron[] hidden;
Neuron output;
double learningRate = 0.1;
public NeuralNetwork() {
input = new Neuron[2];
hidden = new Neuron[2];
output = new Neuron();
for(int i=0; i<2; i++) {
input[i] = new Neuron();
}
for(int i=0; i<2; i++) {
hidden[i] = new Neuron();
}
for(int i=0; i<2; i++) {
Synapse s = new Synapse(hidden[i]);
output.addInput(s);
}
for(int i=0; i<2; i++) {
for(int j=0; j<2; j++) {
Synapse s = new Synapse(input[j]);
hidden[i].addInput(s);
}
}
}
public void setInput(double[] inputVal) {
for(int i=0; i<2; i++) {
input[i].setOutput(inputVal[i]);
}
}
public double feedForward(double[] inputVal) {
setInput(inputVal);
for(int i=0; i<2; i++) {
hidden[i].activate();
}
output.activate();
return output.getOutput();
}
public void train(double[][] trainingInputs, double[] targetOutputs) {
for(int i=0; i<4; i++) {
double[] inputs = trainingInputs[i];
double target = targetOutputs[i];
double currentOutput = feedForward(inputs);
double delta = 0;
double neuronError = 0;
for(int j=0; j<2; j++) {
Synapse s = output.getInputs().get(j);
neuronError = output.getDerivative() * (target - currentOutput);
delta = learningRate * s.getSourceNeuron().getOutput() * neuronError;
output.setError(neuronError);
s.adjustWeight(delta);
}
for(int j=0; j<2; j++) {
for(int k=0; k<2; k++) {
Synapse s = hidden[j].getInputs().get(k);
Synapse s1 = output.getInputs().get(j);
delta = learningRate * s.getSourceNeuron().getOutput() * hidden[j].getDerivative() * s1.getWeight() * output.getError();
s.adjustWeight(delta);
}
}
}
}
}
我从 github 的其他人的实现中找到了反向传播算法并尝试使用它,但我得到的输出约为 0.50 或只是 NaN。如果我使用了错误的算法,如果我以错误的方式或其他方式实现它,我就不知道了。
我使用的算法是这样的:
首先我找到神经元本身的错误:
如果是输出神经元那么neuronError =(输出神经元的导数)*(预期输出-实际输出)
如果它是隐藏神经元,则 neuronError =(隐藏神经元的导数)*(输出神经元的神经元误差)*(从隐藏神经元到输出神经元的突触权重)
然后deltaWeight = learningRate * (neuronError of the neuron the synapse starts from) * (output of the neuron the synapse starts)
最后我把deltaWeight加到之前的权重上。
抱歉,文字太长了,如果你不会通读代码,你能至少告诉我我的算法是否正确吗?谢谢
你的sigmoid导数是错误的,应该是这样的:
public double sigmoidDerivative(double output) {
return output * (1 - output);
}
}
正如我在评论中所说,您的火车输入中有两次 {1, 1},
所以用 {0, 0} 换一个。
最后,将迭代次数从 40 增加到 100,000。
我一直在尝试创建 XOR 神经网络,但所有输入的输出总是会收敛到某个值(例如 1、0 或 0.5)。这是我最近的尝试:
import java.io.*;
import java.util.*;
public class Main {
public static void main(String[] args) {
double[][] trainingInputs = {
{1, 1},
{1, 0},
{0, 1},
{1, 1}
};
double[] targetOutputs = {0, 1, 1, 0};
NeuralNetwork network = new NeuralNetwork();
System.out.println("Training");
for(int i=0; i<40; i++) {
network.train(trainingInputs, targetOutputs);
}
for(double[] inputs : trainingInputs) {
double output = network.feedForward(inputs);
System.out.println(inputs[0] + " - " + inputs[1] + " : " + output);
}
}
}
class Neuron {
private ArrayList<Synapse> inputs; // List di sinapsi collegate al neurone
private double output; // output del neurone
private double derivative; // derivata dell'output
private double weightedSum; // somma ponderata del peso delle sinapsi e degli output collegati
private double error; // errore
public Neuron() {
inputs = new ArrayList<Synapse>();
error = 0;
}
// Aggiunge una sinpapsi
public void addInput(Synapse input) {
inputs.add(input);
}
public List<Synapse> getInputs() {
return this.inputs;
}
public double[] getWeights() {
double[] weights = new double[inputs.size()];
int i = 0;
for(Synapse synapse : inputs) {
weights[i] = synapse.getWeight();
i++;
}
return weights;
}
private void calculateWeightedSum() {
weightedSum = 0;
for(Synapse synapse : inputs) {
weightedSum += synapse.getWeight() * synapse.getSourceNeuron().getOutput();
}
}
public void activate() {
calculateWeightedSum();
output = sigmoid(weightedSum);
derivative = sigmoidDerivative(output);
}
public double getOutput() {
return this.output;
}
public void setOutput(double output) {
this.output = output;
}
public double getDerivative() {
return this.derivative;
}
public double getError() {
return error;
}
public void setError(double error) {
this.error = error;
}
public double sigmoid(double weightedSum) {
return 1 / (1 + Math.exp(-weightedSum));
}
public double sigmoidDerivative(double output) {
return output / (1 - output);
}
}
class Synapse implements Serializable {
private Neuron sourceNeuron; // Neurone da cui origina la sinapsi
private double weight; // Peso della sinapsi
public Synapse(Neuron sourceNeuron) {
this.sourceNeuron = sourceNeuron;
this.weight = Math.random() - 0.5;
}
public Neuron getSourceNeuron() {
return sourceNeuron;
}
public double getWeight() {
return weight;
}
public void adjustWeight(double deltaWeight) {
this.weight += deltaWeight;
}
}
class NeuralNetwork implements Serializable {
Neuron[] input;
Neuron[] hidden;
Neuron output;
double learningRate = 0.1;
public NeuralNetwork() {
input = new Neuron[2];
hidden = new Neuron[2];
output = new Neuron();
for(int i=0; i<2; i++) {
input[i] = new Neuron();
}
for(int i=0; i<2; i++) {
hidden[i] = new Neuron();
}
for(int i=0; i<2; i++) {
Synapse s = new Synapse(hidden[i]);
output.addInput(s);
}
for(int i=0; i<2; i++) {
for(int j=0; j<2; j++) {
Synapse s = new Synapse(input[j]);
hidden[i].addInput(s);
}
}
}
public void setInput(double[] inputVal) {
for(int i=0; i<2; i++) {
input[i].setOutput(inputVal[i]);
}
}
public double feedForward(double[] inputVal) {
setInput(inputVal);
for(int i=0; i<2; i++) {
hidden[i].activate();
}
output.activate();
return output.getOutput();
}
public void train(double[][] trainingInputs, double[] targetOutputs) {
for(int i=0; i<4; i++) {
double[] inputs = trainingInputs[i];
double target = targetOutputs[i];
double currentOutput = feedForward(inputs);
double delta = 0;
double neuronError = 0;
for(int j=0; j<2; j++) {
Synapse s = output.getInputs().get(j);
neuronError = output.getDerivative() * (target - currentOutput);
delta = learningRate * s.getSourceNeuron().getOutput() * neuronError;
output.setError(neuronError);
s.adjustWeight(delta);
}
for(int j=0; j<2; j++) {
for(int k=0; k<2; k++) {
Synapse s = hidden[j].getInputs().get(k);
Synapse s1 = output.getInputs().get(j);
delta = learningRate * s.getSourceNeuron().getOutput() * hidden[j].getDerivative() * s1.getWeight() * output.getError();
s.adjustWeight(delta);
}
}
}
}
}
我从 github 的其他人的实现中找到了反向传播算法并尝试使用它,但我得到的输出约为 0.50 或只是 NaN。如果我使用了错误的算法,如果我以错误的方式或其他方式实现它,我就不知道了。
我使用的算法是这样的: 首先我找到神经元本身的错误:
如果是输出神经元那么neuronError =(输出神经元的导数)*(预期输出-实际输出)
如果它是隐藏神经元,则 neuronError =(隐藏神经元的导数)*(输出神经元的神经元误差)*(从隐藏神经元到输出神经元的突触权重)
然后deltaWeight = learningRate * (neuronError of the neuron the synapse starts from) * (output of the neuron the synapse starts)
最后我把deltaWeight加到之前的权重上。
抱歉,文字太长了,如果你不会通读代码,你能至少告诉我我的算法是否正确吗?谢谢
你的sigmoid导数是错误的,应该是这样的:
public double sigmoidDerivative(double output) {
return output * (1 - output);
}
}
正如我在评论中所说,您的火车输入中有两次 {1, 1}, 所以用 {0, 0} 换一个。
最后,将迭代次数从 40 增加到 100,000。