神经网络对具有不同特征的不同实例做出相同的预测
Neural Network makes same predictions for different instances with different features
出于兴趣,我创建(或至少尝试创建)一个四层人工神经网络作为著名鸢尾花数据集的分类器。作为三种不同花朵的标签,目标值从 0 到 2 不等。为了简单起见,我忽略了偏见。
问题是:即使均方误差实际上减少了并且似乎收敛了,网络最终还是平等地对所有实例(训练和测试)进行了分类。每次我 运行 它,它都是 "choosing" 介于 1 和 3 之间的标签,从不低于或高于。所以梯度下降似乎有点工作。
会不会是因为缺少偏差?还是我误解了算法?或者导数不正确?
我在这里学习了反向传播背后的数学理论:https://google-developers.appspot.com/machine-learning/crash-course/backprop-scroll/
neuralnetwork.py
import numpy as np
import math
def sigmoid(x):
return (math.e**x) / (math.e**x + 1)
def sigmoid_deriv(x):
return sigmoid(x) * (1 - sigmoid(x))
def ReLU(x):
return x * (x > 0)
def ReLU_deriv(x):
if x > 0:
return 1
else:
return 0
def mean_square_error(output_vector, correct_vector):
error = 0
for i in range(0, len(output_vector)):
error += (output_vector[i][0] - correct_vector[i][0])**2
return 1/len(output_vector) * error
def div_x_output(y, y_correct, nr_instances):
return 2 / nr_instances * (y - y_correct) * ReLU_deriv(y)
def div_x(y):
return sigmoid_deriv(y)
def partial_deriv_synapses_output(learning_rate, prediction, solution, nr_instances, x, i, hidden_layer_1):
return learning_rate * div_x_output(prediction, solution, nr_instances) * hidden_layer_1[x][i]
def partial_deriv_synapses_1(learning_rate, y, i, j, hidden_layer_0):
return learning_rate * div_x(y) * hidden_layer_0[j][i]
def partial_deriv_synapses_0(learning_rate, y, i, j, input_matrix):
return learning_rate * div_x(y) * input_matrix[j][i]
class NeuralNetwork:
def __init__(self, synapses_0, synapses_1, synapses_2):
self.synapses_0 = synapses_0
self.synapses_1 = synapses_1
self.synapses_2 = synapses_2
self.sigmoid = np.vectorize(sigmoid)
self.ReLU = np.vectorize(ReLU)
def fit(self, input_matrix, solutions, learning_rate, nr_instances):
hidden_layer_0 = self.sigmoid(np.dot(input_matrix, self.synapses_0))
hidden_layer_1 = self.sigmoid(np.dot(hidden_layer_0, self.synapses_1))
output_layer = self.ReLU(np.dot(hidden_layer_1, self.synapses_2))
while mean_square_error(output_layer, solutions) > 0.7:
print(mean_square_error(output_layer, solutions))
x = 0
for prediction in output_layer:
# back propagate synapses 2
for i in range(0, len(self.synapses_2)):
self.synapses_2[i][0] -= partial_deriv_synapses_output(learning_rate, prediction[0], solutions[x][0], nr_instances, x, i, hidden_layer_1)
# back propagate synapses 1
y_deriv_vector_synapses_1 = np.array([1. for i in range(0, len(self.synapses_1[0]))])
for i in range(0, len(self.synapses_1[0])):
y_deriv_vector_synapses_1[i] = div_x_output(prediction[0], solutions[x][0], nr_instances) * self.synapses_2[i][0]
for i in range(0, len(self.synapses_1)):
for j in range(0, len(self.synapses_1[0])):
self.synapses_1[i][j] -= partial_deriv_synapses_1(learning_rate, y_deriv_vector_synapses_1[j], i, j, hidden_layer_0)
# back propagate synapses 0
y_deriv_vector_synapses_0 = np.array([1. for i in range(0, len(self.synapses_0[0]))])
for i in range(0, len(self.synapses_0[0])):
y_deriv_vector_synapses_0[i] = sum([div_x(y_deriv_vector_synapses_1[k]) * self.synapses_1[i][k] for k in range(0, len(self.synapses_1[0]))])
for i in range(0, len(self.synapses_0)):
for j in range(0, len(self.synapses_0[0])):
self.synapses_0[i][j] -= partial_deriv_synapses_0(learning_rate, y_deriv_vector_synapses_0[j], i, j, input_matrix)
hidden_layer_0 = self.sigmoid(np.dot(input_matrix, self.synapses_0))
hidden_layer_1 = self.sigmoid(np.dot(hidden_layer_0, self.synapses_1))
output_layer = self.sigmoid(np.dot(hidden_layer_1, self.synapses_2))
x += 1
def predict(self, input_vector):
hidden_layer_0 = self.sigmoid(np.dot(input_vector, self.synapses_0))
hidden_layer_1 = self.sigmoid(np.dot(hidden_layer_0, self.synapses_1))
output_layer = self.ReLU(np.dot(hidden_layer_1, self.synapses_2))
return output_layer[0]
main.py
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
import random
from neuralnetwork import NeuralNetwork
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2)
network = NeuralNetwork(np.array([[random.random() for i in range(0, 8)] for j in range(0, 4)]),
np.array([[random.random() for i in range(0, 3)] for j in range(0, 8)]),
np.array([[random.random() for i in range(0, 1)] for j in range(0, 3)]))
network.fit(np.array([x for x in X_train]), np.array([[y] for y in y_train]), 0.1, len(X_train))
error_count = 0
counter = 0
for x in X_train:
prediction = round(network.predict(x))
print("prediction: "+ str(prediction) + ", actual: " + str(y_train[counter]))
if prediction != y_train[counter]:
error_count += 1
counter += 1
print("The error count is: " + str(error_count))
感谢所有帮助或提示!
问题出在你的损失函数上;均方误差 (MSE) 对于回归问题很有意义,而在这里你面临 classification 一个 (3-class),因此你的损失函数应该是Cross Entropy(也称为对数损失)。
对于多class class化,sigmoid也是不可取的;因此,在高层次上,这里有一些针对您的问题的其他代码修改建议:
- One-hot encode 你的 3 classes
- 对你的最后一层使用 softmax 激活,它应该有 3 个单元(即与你的 classes 的数量一样多)
出于兴趣,我创建(或至少尝试创建)一个四层人工神经网络作为著名鸢尾花数据集的分类器。作为三种不同花朵的标签,目标值从 0 到 2 不等。为了简单起见,我忽略了偏见。
问题是:即使均方误差实际上减少了并且似乎收敛了,网络最终还是平等地对所有实例(训练和测试)进行了分类。每次我 运行 它,它都是 "choosing" 介于 1 和 3 之间的标签,从不低于或高于。所以梯度下降似乎有点工作。
会不会是因为缺少偏差?还是我误解了算法?或者导数不正确?
我在这里学习了反向传播背后的数学理论:https://google-developers.appspot.com/machine-learning/crash-course/backprop-scroll/
neuralnetwork.py
import numpy as np
import math
def sigmoid(x):
return (math.e**x) / (math.e**x + 1)
def sigmoid_deriv(x):
return sigmoid(x) * (1 - sigmoid(x))
def ReLU(x):
return x * (x > 0)
def ReLU_deriv(x):
if x > 0:
return 1
else:
return 0
def mean_square_error(output_vector, correct_vector):
error = 0
for i in range(0, len(output_vector)):
error += (output_vector[i][0] - correct_vector[i][0])**2
return 1/len(output_vector) * error
def div_x_output(y, y_correct, nr_instances):
return 2 / nr_instances * (y - y_correct) * ReLU_deriv(y)
def div_x(y):
return sigmoid_deriv(y)
def partial_deriv_synapses_output(learning_rate, prediction, solution, nr_instances, x, i, hidden_layer_1):
return learning_rate * div_x_output(prediction, solution, nr_instances) * hidden_layer_1[x][i]
def partial_deriv_synapses_1(learning_rate, y, i, j, hidden_layer_0):
return learning_rate * div_x(y) * hidden_layer_0[j][i]
def partial_deriv_synapses_0(learning_rate, y, i, j, input_matrix):
return learning_rate * div_x(y) * input_matrix[j][i]
class NeuralNetwork:
def __init__(self, synapses_0, synapses_1, synapses_2):
self.synapses_0 = synapses_0
self.synapses_1 = synapses_1
self.synapses_2 = synapses_2
self.sigmoid = np.vectorize(sigmoid)
self.ReLU = np.vectorize(ReLU)
def fit(self, input_matrix, solutions, learning_rate, nr_instances):
hidden_layer_0 = self.sigmoid(np.dot(input_matrix, self.synapses_0))
hidden_layer_1 = self.sigmoid(np.dot(hidden_layer_0, self.synapses_1))
output_layer = self.ReLU(np.dot(hidden_layer_1, self.synapses_2))
while mean_square_error(output_layer, solutions) > 0.7:
print(mean_square_error(output_layer, solutions))
x = 0
for prediction in output_layer:
# back propagate synapses 2
for i in range(0, len(self.synapses_2)):
self.synapses_2[i][0] -= partial_deriv_synapses_output(learning_rate, prediction[0], solutions[x][0], nr_instances, x, i, hidden_layer_1)
# back propagate synapses 1
y_deriv_vector_synapses_1 = np.array([1. for i in range(0, len(self.synapses_1[0]))])
for i in range(0, len(self.synapses_1[0])):
y_deriv_vector_synapses_1[i] = div_x_output(prediction[0], solutions[x][0], nr_instances) * self.synapses_2[i][0]
for i in range(0, len(self.synapses_1)):
for j in range(0, len(self.synapses_1[0])):
self.synapses_1[i][j] -= partial_deriv_synapses_1(learning_rate, y_deriv_vector_synapses_1[j], i, j, hidden_layer_0)
# back propagate synapses 0
y_deriv_vector_synapses_0 = np.array([1. for i in range(0, len(self.synapses_0[0]))])
for i in range(0, len(self.synapses_0[0])):
y_deriv_vector_synapses_0[i] = sum([div_x(y_deriv_vector_synapses_1[k]) * self.synapses_1[i][k] for k in range(0, len(self.synapses_1[0]))])
for i in range(0, len(self.synapses_0)):
for j in range(0, len(self.synapses_0[0])):
self.synapses_0[i][j] -= partial_deriv_synapses_0(learning_rate, y_deriv_vector_synapses_0[j], i, j, input_matrix)
hidden_layer_0 = self.sigmoid(np.dot(input_matrix, self.synapses_0))
hidden_layer_1 = self.sigmoid(np.dot(hidden_layer_0, self.synapses_1))
output_layer = self.sigmoid(np.dot(hidden_layer_1, self.synapses_2))
x += 1
def predict(self, input_vector):
hidden_layer_0 = self.sigmoid(np.dot(input_vector, self.synapses_0))
hidden_layer_1 = self.sigmoid(np.dot(hidden_layer_0, self.synapses_1))
output_layer = self.ReLU(np.dot(hidden_layer_1, self.synapses_2))
return output_layer[0]
main.py
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
import random
from neuralnetwork import NeuralNetwork
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2)
network = NeuralNetwork(np.array([[random.random() for i in range(0, 8)] for j in range(0, 4)]),
np.array([[random.random() for i in range(0, 3)] for j in range(0, 8)]),
np.array([[random.random() for i in range(0, 1)] for j in range(0, 3)]))
network.fit(np.array([x for x in X_train]), np.array([[y] for y in y_train]), 0.1, len(X_train))
error_count = 0
counter = 0
for x in X_train:
prediction = round(network.predict(x))
print("prediction: "+ str(prediction) + ", actual: " + str(y_train[counter]))
if prediction != y_train[counter]:
error_count += 1
counter += 1
print("The error count is: " + str(error_count))
感谢所有帮助或提示!
问题出在你的损失函数上;均方误差 (MSE) 对于回归问题很有意义,而在这里你面临 classification 一个 (3-class),因此你的损失函数应该是Cross Entropy(也称为对数损失)。
对于多class class化,sigmoid也是不可取的;因此,在高层次上,这里有一些针对您的问题的其他代码修改建议:
- One-hot encode 你的 3 classes
- 对你的最后一层使用 softmax 激活,它应该有 3 个单元(即与你的 classes 的数量一样多)