为什么我的神经网络有时会出现 NaN?
why am I getting NaN in my neural network sometimes?
我最近用youtube上的一系列视频写了一个神经网络,频道是coding train。它是用 js 编写的,我在 java 中编写了我的。它有时工作正常,但有时我得到 NaN 作为输出,我不知道为什么?
有人可以帮忙吗?有一个矩阵 class 用于一些矩阵数学和神经网络 class 它自己有一个测试问题。如果 0 大于 1,则第一个输出为 1,否则第二个输出为 1。
编辑:
我找到了问题所在,但我仍然无法弄清楚为什么会发生?!
这发生在我在 Matrix class 中的静态点积方法中。有时一个或两个矩阵数据都是 NaN!
编辑2:
我检查过,输入在构造函数中有效,但在 feedForward 方法中它们有时是 NaN!难道是因为我用的是 10 年前的笔记本电脑?!因为代码好像没有问题
已解决:我找到问题了!在前馈中,我没有为输出矩阵映射 sigmoid -_-
public class NeuralNetwork {
//private int inputNodes, hiddenNodes, outputNodes;
private Matrix weightsIH, weightsHO, biasH, biasO;
private double learningRate = 0.1;
public NeuralNetwork(int inputNodes, int hiddenNodes, int outputNodes) {
//this.inputNodes = inputNodes;
//this.hiddenNodes = hiddenNodes;
//this.outputNodes = outputNodes;
weightsIH = new Matrix(hiddenNodes, inputNodes);
weightsHO = new Matrix(outputNodes, hiddenNodes);
weightsIH.randomize();
weightsHO.randomize();
biasH = new Matrix(hiddenNodes, 1);
biasO = new Matrix(outputNodes, 1);
biasH.randomize();
biasO.randomize();
}
public void setLearningRate(double learningRate) {
this.learningRate = learningRate;
}
public double sigmoid(double x) {
return 1 / (1 + Math.exp(-x));
}
public double dsigmoid(double y) {
return y * (1 - y);
}
public double[] feedForward(double[] inputArray) throws Exception {
Matrix inputs = Matrix.fromArray(inputArray);
Matrix hidden = Matrix.dot(weightsIH, inputs);
hidden.add(biasH);
hidden.map(f -> sigmoid(f));
Matrix output = Matrix.dot(weightsHO, hidden);
output.add(biasO);
return output.toArray();
}
public void train(double[] inputArray, double[] targetsArray) throws Exception {
Matrix targets = Matrix.fromArray(targetsArray);
// feed forward algorithm //
Matrix inputs = Matrix.fromArray(inputArray);
Matrix hidden = Matrix.dot(weightsIH, inputs);
hidden.add(biasH);
hidden.map(f -> sigmoid(f));
Matrix outputs = Matrix.dot(weightsHO, hidden);
outputs.add(biasO);
// feed forward algorithm //
// Calculate outputs ERRORS
Matrix outputErrors = Matrix.subtract(targets, outputs);
// Calculate outputs Gradients
Matrix outputsGradients = Matrix.map(outputs, f -> dsigmoid(f));
outputsGradients.multiply(outputErrors);
outputsGradients.multiply(learningRate);
// Calculate outputs Deltas
Matrix hidden_t = Matrix.transpose(hidden);
Matrix weightsHO_deltas = Matrix.dot(outputsGradients, hidden_t);
// adjust outputs weights
weightsHO.add(weightsHO_deltas);
// adjust outputs bias
biasO.add(outputsGradients);
// Calculate hidden layer ERRORS
Matrix weightsHO_t = Matrix.transpose(weightsHO);
Matrix hiddenErrors = Matrix.dot(weightsHO_t, outputErrors);
// Calculate hidden Gradients
Matrix hiddenGradients = Matrix.map(hidden, f -> dsigmoid(f));
hiddenGradients.multiply(hiddenErrors);
hiddenGradients.multiply(learningRate);
// Calculate hidden Deltas
Matrix inputs_t = Matrix.transpose(inputs);
Matrix weightsIH_deltas = Matrix.dot(hiddenGradients, inputs_t);
// adjust hidden weights
weightsIH.add(weightsIH_deltas);
// adjust hidden bias
biasH.add(hiddenGradients);
}
public static void print(double[] data) {
for (double d : data) {
System.out.print(d + " ");
}
System.out.println();
}
public static void main(String[] args) {
NeuralNetwork nn = new NeuralNetwork(3, 4, 2);
double[][] trainingInputs = {{0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 1}, {1, 0, 0}, {1, 0, 1}, {1, 1, 0}, {1, 1, 1}};
double[][] targets = {{1, 0}, {1, 0}, {1, 0}, {0, 1}, {1, 0}, {0, 1}, {0, 1}, {1, 0}};
for (int i = 0; i < 10000; i++) {
for (int j = 0; j < trainingInputs.length; j++) {
try {
nn.train(trainingInputs[j], targets[j]);
} catch (Exception e) {
e.printStackTrace();
}
}
}
double[] output;
try {
output = nn.feedForward(new double[]{0, 0, 0});
print(output);
output = nn.feedForward(new double[]{0, 0, 1});
print(output);
output = nn.feedForward(new double[]{0, 1, 0});
print(output);
output = nn.feedForward(new double[]{0, 1, 1});
print(output);
output = nn.feedForward(new double[]{1, 0, 0});
print(output);
output = nn.feedForward(new double[]{1, 0, 1});
print(output);
output = nn.feedForward(new double[]{1, 1, 0});
print(output);
output = nn.feedForward(new double[]{1, 1, 1});
print(output);
} catch (Exception e) {
e.printStackTrace();
}
} }
public class Matrix {
public double[][] data;
public Matrix(int row, int col) {
data = new double[row][col];
}
public Matrix(double[][] data) {
this.data = data;
}
public void randomize() {
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] = new Random().nextDouble() * 2 - 1;
}
}
}
public Matrix transpose() {
Matrix result = new Matrix(data[0].length, data.length);
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
result.data[j][i] = data[i][j];
}
}
return result;
}
public static Matrix transpose(Matrix m) {
Matrix result = new Matrix(m.data[0].length, m.data.length);
for (int i = 0; i < m.data.length; i++) {
for (int j = 0; j < m.data[0].length; j++) {
result.data[j][i] = m.data[i][j];
}
}
return result;
}
public void add(double n) {
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] += n;
}
}
}
public void subtract(double n) {
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] -= n;
}
}
}
public void add(Matrix m) throws Exception {
if (!(data.length == m.data.length && data[0].length == m.data[0].length))
throw new Exception("columns and rows don't match!");
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] += m.data[i][j];
}
}
}
public void subtract(Matrix m) throws Exception {
if (!(data.length == m.data.length && data[0].length == m.data[0].length))
throw new Exception("columns and rows don't match!");
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] -= m.data[i][j];
}
}
}
public static Matrix add(Matrix m1, Matrix m2) throws Exception {
if (!(m1.data.length == m2.data.length && m1.data[0].length == m2.data[0].length))
throw new Exception("columns and rows don't match!");
Matrix result = new Matrix(m1.data.length, m1.data[0].length);
for (int i = 0; i < result.data.length; i++) {
for (int j = 0; j < result.data[0].length; j++) {
result.data[i][j] = m1.data[i][j] + m2.data[i][j];
}
}
return result;
}
public static Matrix subtract(Matrix m1, Matrix m2) throws Exception {
if (!(m1.data.length == m2.data.length && m1.data[0].length == m2.data[0].length))
throw new Exception("columns and rows don't match!");
Matrix result = new Matrix(m1.data.length, m1.data[0].length);
for (int i = 0; i < result.data.length; i++) {
for (int j = 0; j < result.data[0].length; j++) {
result.data[i][j] = m1.data[i][j] - m2.data[i][j];
}
}
return result;
}
public void multiply(double n) {
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] *= n;
}
}
}
public void multiply(Matrix m) throws Exception {
if (!(data.length == m.data.length && data[0].length == m.data[0].length))
throw new Exception("columns and rows don't match!");
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] *= m.data[i][j];
}
}
}
public static Matrix multiply(Matrix m1, Matrix m2) throws Exception {
if (!(m1.data.length == m2.data.length && m1.data[0].length == m2.data[0].length))
throw new Exception("columns and rows don't match!");
Matrix result = new Matrix(m1.data.length, m1.data[0].length);
for (int i = 0; i < m1.data.length; i++) {
for (int j = 0; j < m1.data[0].length; j++) {
result.data[i][j] = m1.data[i][j] * m2.data[i][j];
}
}
return result;
}
public Matrix dot(Matrix m) throws Exception {
if (data[0].length != m.data.length)
throw new Exception("columns and rows don't match!");
Matrix result = new Matrix(data.length, m.data[0].length);
for (int i = 0; i < result.data.length; i++) {
for (int j = 0; j < result.data[0].length; j++) {
double sum = 0;
for (int k = 0; k < data[0].length; k++) {
sum += data[i][k] * m.data[k][j];
}
result.data[i][j] = sum;
}
}
return result;
}
public static Matrix dot(Matrix m1, Matrix m2) throws Exception {
if (m1.data[0].length != m2.data.length)
throw new Exception("columns and rows don't match!");
Matrix result = new Matrix(m1.data.length, m2.data[0].length);
for (int i = 0; i < result.data.length; i++) {
for (int j = 0; j < result.data[0].length; j++) {
double sum = 0;
for (int k = 0; k < m1.data[0].length; k++) {
sum += m1.data[i][k] * m2.data[k][j];
}
result.data[i][j] = sum;
}
}
return result;
}
public static interface Func {
public double method(double d);
}
public void map(Func f) {
for (int i = 0 ; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] = f.method(data[i][j]);
}
}
}
public static Matrix map(Matrix m, Func f) {
Matrix result = new Matrix(m.data.length, m.data[0].length);
for (int i = 0 ; i < m.data.length; i++) {
for (int j = 0; j < m.data[0].length; j++) {
result.data[i][j] = f.method(m.data[i][j]);
}
}
return result;
}
public static Matrix fromArray(double[] arr) {
Matrix res = new Matrix(arr.length, 1);
for (int i = 0; i < arr.length; i++) {
res.data[i][0] = arr[i];
}
return res;
}
public double[] toArray() {
double[] res = new double[data.length];
for (int i = 0; i < data.length; i++) {
res[i] = data[i][0];
}
return res;
}
public void print() {
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
System.out.print(data[i][j] + " ");
}
System.out.println();
}
}}
你有几个调试选项,它们甚至可以一起使用。
添加调试输出
为您的所有计算添加调试输出,以便您可以查看到底是什么导致了意外值。例如,您有...
public double sigmoid(double x) {
return 1 / (1 + Math.exp(-x));
}
但是你可以通过制作它来看到它在做什么...
public double sigmoid(double x) {
double sigmoid = 1 / (1 + Math.exp(-x));
System.out.println("1 / (1 + Math.exp(" + (-x) + ")) = " + sigmoid);
return sigmoid;
}
在执行计算的任何地方执行此操作可能会导致意外值。
我建议你像这样输出一些调试信息,然后在输出内容中搜索 NaN。如果您可以将输出输出到一个文件中,然后在文字处理器中打开该文件进行文本搜索,这将更加容易 - 如果您在命令行上 运行ning,您可以 java MyApp > myapp_log.txt
然后在文本编辑器中打开 myapp_log.txt
进行文本搜索。
或者为了使输出更易于处理,您可以让调试逻辑仅在发现 NaN 时输出,例如...
public double sigmoid(double x) {
double sigmoid = 1 / (1 + Math.exp(-x));
if(sigmoid == Double.NaN)
System.out.println("1 / (1 + Math.exp(" + (-x) + ")) = " + sigmoid);
return sigmoid;
}
请记住为您计算的所有内容执行此操作,包括您的 dsigmoid
、您的 add
等,无论您在哪里进行任何类型的计算。如果你到处都放足够多的它,那么你就会发现问题并看到像“1 / (1 + Math.exp(NaN)) = NaN”这样的行输出。
使用调试器
调试器可以做很多事情。您可以 运行 您的程序,但一次一行地逐步执行它,并在它发生时检查每个变量和结果。根据矩阵的大小以及调用这些函数的次数,这可能会花费很多精力。
或者你可以在一个变量上设置一个 "watch" 让程序在某个值等于 NaN 时停止,然后检查程序在那一刻的状态 - 我不确定是否Java 的任何调试器都具有此功能,因为我只在 C 或汇编中进行过此类调试,因此您必须弄清楚您是否有权访问此类调试器。
我最近用youtube上的一系列视频写了一个神经网络,频道是coding train。它是用 js 编写的,我在 java 中编写了我的。它有时工作正常,但有时我得到 NaN 作为输出,我不知道为什么?
有人可以帮忙吗?有一个矩阵 class 用于一些矩阵数学和神经网络 class 它自己有一个测试问题。如果 0 大于 1,则第一个输出为 1,否则第二个输出为 1。
编辑: 我找到了问题所在,但我仍然无法弄清楚为什么会发生?! 这发生在我在 Matrix class 中的静态点积方法中。有时一个或两个矩阵数据都是 NaN!
编辑2: 我检查过,输入在构造函数中有效,但在 feedForward 方法中它们有时是 NaN!难道是因为我用的是 10 年前的笔记本电脑?!因为代码好像没有问题
已解决:我找到问题了!在前馈中,我没有为输出矩阵映射 sigmoid -_-
public class NeuralNetwork {
//private int inputNodes, hiddenNodes, outputNodes;
private Matrix weightsIH, weightsHO, biasH, biasO;
private double learningRate = 0.1;
public NeuralNetwork(int inputNodes, int hiddenNodes, int outputNodes) {
//this.inputNodes = inputNodes;
//this.hiddenNodes = hiddenNodes;
//this.outputNodes = outputNodes;
weightsIH = new Matrix(hiddenNodes, inputNodes);
weightsHO = new Matrix(outputNodes, hiddenNodes);
weightsIH.randomize();
weightsHO.randomize();
biasH = new Matrix(hiddenNodes, 1);
biasO = new Matrix(outputNodes, 1);
biasH.randomize();
biasO.randomize();
}
public void setLearningRate(double learningRate) {
this.learningRate = learningRate;
}
public double sigmoid(double x) {
return 1 / (1 + Math.exp(-x));
}
public double dsigmoid(double y) {
return y * (1 - y);
}
public double[] feedForward(double[] inputArray) throws Exception {
Matrix inputs = Matrix.fromArray(inputArray);
Matrix hidden = Matrix.dot(weightsIH, inputs);
hidden.add(biasH);
hidden.map(f -> sigmoid(f));
Matrix output = Matrix.dot(weightsHO, hidden);
output.add(biasO);
return output.toArray();
}
public void train(double[] inputArray, double[] targetsArray) throws Exception {
Matrix targets = Matrix.fromArray(targetsArray);
// feed forward algorithm //
Matrix inputs = Matrix.fromArray(inputArray);
Matrix hidden = Matrix.dot(weightsIH, inputs);
hidden.add(biasH);
hidden.map(f -> sigmoid(f));
Matrix outputs = Matrix.dot(weightsHO, hidden);
outputs.add(biasO);
// feed forward algorithm //
// Calculate outputs ERRORS
Matrix outputErrors = Matrix.subtract(targets, outputs);
// Calculate outputs Gradients
Matrix outputsGradients = Matrix.map(outputs, f -> dsigmoid(f));
outputsGradients.multiply(outputErrors);
outputsGradients.multiply(learningRate);
// Calculate outputs Deltas
Matrix hidden_t = Matrix.transpose(hidden);
Matrix weightsHO_deltas = Matrix.dot(outputsGradients, hidden_t);
// adjust outputs weights
weightsHO.add(weightsHO_deltas);
// adjust outputs bias
biasO.add(outputsGradients);
// Calculate hidden layer ERRORS
Matrix weightsHO_t = Matrix.transpose(weightsHO);
Matrix hiddenErrors = Matrix.dot(weightsHO_t, outputErrors);
// Calculate hidden Gradients
Matrix hiddenGradients = Matrix.map(hidden, f -> dsigmoid(f));
hiddenGradients.multiply(hiddenErrors);
hiddenGradients.multiply(learningRate);
// Calculate hidden Deltas
Matrix inputs_t = Matrix.transpose(inputs);
Matrix weightsIH_deltas = Matrix.dot(hiddenGradients, inputs_t);
// adjust hidden weights
weightsIH.add(weightsIH_deltas);
// adjust hidden bias
biasH.add(hiddenGradients);
}
public static void print(double[] data) {
for (double d : data) {
System.out.print(d + " ");
}
System.out.println();
}
public static void main(String[] args) {
NeuralNetwork nn = new NeuralNetwork(3, 4, 2);
double[][] trainingInputs = {{0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 1}, {1, 0, 0}, {1, 0, 1}, {1, 1, 0}, {1, 1, 1}};
double[][] targets = {{1, 0}, {1, 0}, {1, 0}, {0, 1}, {1, 0}, {0, 1}, {0, 1}, {1, 0}};
for (int i = 0; i < 10000; i++) {
for (int j = 0; j < trainingInputs.length; j++) {
try {
nn.train(trainingInputs[j], targets[j]);
} catch (Exception e) {
e.printStackTrace();
}
}
}
double[] output;
try {
output = nn.feedForward(new double[]{0, 0, 0});
print(output);
output = nn.feedForward(new double[]{0, 0, 1});
print(output);
output = nn.feedForward(new double[]{0, 1, 0});
print(output);
output = nn.feedForward(new double[]{0, 1, 1});
print(output);
output = nn.feedForward(new double[]{1, 0, 0});
print(output);
output = nn.feedForward(new double[]{1, 0, 1});
print(output);
output = nn.feedForward(new double[]{1, 1, 0});
print(output);
output = nn.feedForward(new double[]{1, 1, 1});
print(output);
} catch (Exception e) {
e.printStackTrace();
}
} }
public class Matrix {
public double[][] data;
public Matrix(int row, int col) {
data = new double[row][col];
}
public Matrix(double[][] data) {
this.data = data;
}
public void randomize() {
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] = new Random().nextDouble() * 2 - 1;
}
}
}
public Matrix transpose() {
Matrix result = new Matrix(data[0].length, data.length);
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
result.data[j][i] = data[i][j];
}
}
return result;
}
public static Matrix transpose(Matrix m) {
Matrix result = new Matrix(m.data[0].length, m.data.length);
for (int i = 0; i < m.data.length; i++) {
for (int j = 0; j < m.data[0].length; j++) {
result.data[j][i] = m.data[i][j];
}
}
return result;
}
public void add(double n) {
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] += n;
}
}
}
public void subtract(double n) {
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] -= n;
}
}
}
public void add(Matrix m) throws Exception {
if (!(data.length == m.data.length && data[0].length == m.data[0].length))
throw new Exception("columns and rows don't match!");
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] += m.data[i][j];
}
}
}
public void subtract(Matrix m) throws Exception {
if (!(data.length == m.data.length && data[0].length == m.data[0].length))
throw new Exception("columns and rows don't match!");
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] -= m.data[i][j];
}
}
}
public static Matrix add(Matrix m1, Matrix m2) throws Exception {
if (!(m1.data.length == m2.data.length && m1.data[0].length == m2.data[0].length))
throw new Exception("columns and rows don't match!");
Matrix result = new Matrix(m1.data.length, m1.data[0].length);
for (int i = 0; i < result.data.length; i++) {
for (int j = 0; j < result.data[0].length; j++) {
result.data[i][j] = m1.data[i][j] + m2.data[i][j];
}
}
return result;
}
public static Matrix subtract(Matrix m1, Matrix m2) throws Exception {
if (!(m1.data.length == m2.data.length && m1.data[0].length == m2.data[0].length))
throw new Exception("columns and rows don't match!");
Matrix result = new Matrix(m1.data.length, m1.data[0].length);
for (int i = 0; i < result.data.length; i++) {
for (int j = 0; j < result.data[0].length; j++) {
result.data[i][j] = m1.data[i][j] - m2.data[i][j];
}
}
return result;
}
public void multiply(double n) {
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] *= n;
}
}
}
public void multiply(Matrix m) throws Exception {
if (!(data.length == m.data.length && data[0].length == m.data[0].length))
throw new Exception("columns and rows don't match!");
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] *= m.data[i][j];
}
}
}
public static Matrix multiply(Matrix m1, Matrix m2) throws Exception {
if (!(m1.data.length == m2.data.length && m1.data[0].length == m2.data[0].length))
throw new Exception("columns and rows don't match!");
Matrix result = new Matrix(m1.data.length, m1.data[0].length);
for (int i = 0; i < m1.data.length; i++) {
for (int j = 0; j < m1.data[0].length; j++) {
result.data[i][j] = m1.data[i][j] * m2.data[i][j];
}
}
return result;
}
public Matrix dot(Matrix m) throws Exception {
if (data[0].length != m.data.length)
throw new Exception("columns and rows don't match!");
Matrix result = new Matrix(data.length, m.data[0].length);
for (int i = 0; i < result.data.length; i++) {
for (int j = 0; j < result.data[0].length; j++) {
double sum = 0;
for (int k = 0; k < data[0].length; k++) {
sum += data[i][k] * m.data[k][j];
}
result.data[i][j] = sum;
}
}
return result;
}
public static Matrix dot(Matrix m1, Matrix m2) throws Exception {
if (m1.data[0].length != m2.data.length)
throw new Exception("columns and rows don't match!");
Matrix result = new Matrix(m1.data.length, m2.data[0].length);
for (int i = 0; i < result.data.length; i++) {
for (int j = 0; j < result.data[0].length; j++) {
double sum = 0;
for (int k = 0; k < m1.data[0].length; k++) {
sum += m1.data[i][k] * m2.data[k][j];
}
result.data[i][j] = sum;
}
}
return result;
}
public static interface Func {
public double method(double d);
}
public void map(Func f) {
for (int i = 0 ; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
data[i][j] = f.method(data[i][j]);
}
}
}
public static Matrix map(Matrix m, Func f) {
Matrix result = new Matrix(m.data.length, m.data[0].length);
for (int i = 0 ; i < m.data.length; i++) {
for (int j = 0; j < m.data[0].length; j++) {
result.data[i][j] = f.method(m.data[i][j]);
}
}
return result;
}
public static Matrix fromArray(double[] arr) {
Matrix res = new Matrix(arr.length, 1);
for (int i = 0; i < arr.length; i++) {
res.data[i][0] = arr[i];
}
return res;
}
public double[] toArray() {
double[] res = new double[data.length];
for (int i = 0; i < data.length; i++) {
res[i] = data[i][0];
}
return res;
}
public void print() {
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[0].length; j++) {
System.out.print(data[i][j] + " ");
}
System.out.println();
}
}}
你有几个调试选项,它们甚至可以一起使用。
添加调试输出
为您的所有计算添加调试输出,以便您可以查看到底是什么导致了意外值。例如,您有...
public double sigmoid(double x) {
return 1 / (1 + Math.exp(-x));
}
但是你可以通过制作它来看到它在做什么...
public double sigmoid(double x) {
double sigmoid = 1 / (1 + Math.exp(-x));
System.out.println("1 / (1 + Math.exp(" + (-x) + ")) = " + sigmoid);
return sigmoid;
}
在执行计算的任何地方执行此操作可能会导致意外值。
我建议你像这样输出一些调试信息,然后在输出内容中搜索 NaN。如果您可以将输出输出到一个文件中,然后在文字处理器中打开该文件进行文本搜索,这将更加容易 - 如果您在命令行上 运行ning,您可以 java MyApp > myapp_log.txt
然后在文本编辑器中打开 myapp_log.txt
进行文本搜索。
或者为了使输出更易于处理,您可以让调试逻辑仅在发现 NaN 时输出,例如...
public double sigmoid(double x) {
double sigmoid = 1 / (1 + Math.exp(-x));
if(sigmoid == Double.NaN)
System.out.println("1 / (1 + Math.exp(" + (-x) + ")) = " + sigmoid);
return sigmoid;
}
请记住为您计算的所有内容执行此操作,包括您的 dsigmoid
、您的 add
等,无论您在哪里进行任何类型的计算。如果你到处都放足够多的它,那么你就会发现问题并看到像“1 / (1 + Math.exp(NaN)) = NaN”这样的行输出。
使用调试器
调试器可以做很多事情。您可以 运行 您的程序,但一次一行地逐步执行它,并在它发生时检查每个变量和结果。根据矩阵的大小以及调用这些函数的次数,这可能会花费很多精力。
或者你可以在一个变量上设置一个 "watch" 让程序在某个值等于 NaN 时停止,然后检查程序在那一刻的状态 - 我不确定是否Java 的任何调试器都具有此功能,因为我只在 C 或汇编中进行过此类调试,因此您必须弄清楚您是否有权访问此类调试器。