Python-编码神经网络无法正确学习
Python-coded neural network does not learn properly
我的网络没有接受过单独识别输入的训练,它要么输出平均结果,要么变得偏向于一个特定的输出。我做错了什么?
import numpy as np
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0
class NeuralNetwork:
def __init__(self, shape: tuple):
self.layers = len(shape) # The amount layers
self.shape = shape # The amount of neurons per each layer
self.weights = [
np.array([np.random.rand(shape[l - 1]) for _ in range(shape[l])])
for l in range(1, self.layers)
] # A list of matrices of weights connecting neighbouring layers
self.weighted_sums = [np.zeros(l) for l in shape]
self.activations = [np.zeros(l) for l in shape]
def inspect(self):
print("=============NeuralNetwork===============")
print(f"Shape: {self.shape}")
print(f"Weights: {self.weights}")
print(f"Activations: {self.activations}")
def forward_prop(self, X):
self.activations[0] = X
for l in range(1, self.layers):
self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1]
self.activations[l] = sigmoid(self.weighted_sums[l])
def backprop(self, X, Y):
delta = [np.empty(self.shape[l]) for l in range(1, self.layers)] # Here errors get stored
delta[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1]) # The output error
for l in reversed(range(self.layers - 2)): # The errors get backpropagated
delta[l] = self.weights[l + 1].T @ delta[l + 1] * sigmoid_der(self.weighted_sums[l])
for l in range(self.layers - 1): # The weights get updated online
for j in range(self.shape[l + 1]):
self.weights[l][j] -= 0.1 * self.activations[l + 1][j] * delta[l][j]
nn = NeuralNetwork((2, 2, 1))
X = np.array([
[1, 0],
[0, 1],
[1, 1],
[0, 0]
])
Y = np.array([
[1],
[1],
[0],
[0]
])
# I train my network by randomly picking an example from my training sets
for _ in range(1000):
i = np.random.randint(0, 4)
nn.forward_prop(X[i])
nn.backprop(X[i], Y[i])
for x in X:
nn.forward_prop(x)
print(nn.activations[-1])
反向传播的矩阵数学非常难。尤其令人困惑的是,权重矩阵和增量列表的长度(实际上也是偏置数组的列表)应该比网络中的层数少一个,这使得索引变得混乱。显然,问题是由于索引错误造成的。终于成功了!
import numpy as np
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0
class NeuralNetwork:
def __init__(self, shape: tuple):
self.layers = len(shape)
self.shape = shape
self.weights = [
np.array([2 * np.random.random(shape[l - 1]) - 1 for _ in range(shape[l])])
for l in range(1, self.layers)
]
self.biases = [np.zeros(l) for l in shape[1:]]
self.weighted_sums = [None for l in shape]
self.activations = [None for l in shape]
self.deltas = [None for l in shape[1:]]
def inspect(self):
print("=============NeuralNetwork===============")
print(f"Shape: {self.shape}")
print(f"Weights: {self.weights}")
print(f"Activations: {self.activations}")
def forward_prop(self, X):
self.activations[0] = X
for l in range(1, self.layers):
self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1] + self.biases[l - 1]
self.activations[l] = sigmoid(self.weighted_sums[l])
def backprop(self, X, Y, lr):
self.deltas[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1])
for l in range(self.layers - 2, 0, -1):
self.deltas[l - 1] = self.weights[l].T @ self.deltas[l] * sigmoid_der(self.weighted_sums[l])
for l in range(self.layers - 1):
for j in range(self.shape[l + 1]):
self.weights[l][j] += lr * self.activations[l] * self.deltas[l][j]
self.biases[l] += self.deltas[l]
def train(self, X, Y, lr, epochs):
for e in range(epochs):
if not e % 1000: self.test(X)
i = np.random.randint(len(X))
self.forward_prop(X[i])
self.backprop(X[i], Y[i], lr)
def test(self, X):
print()
for x in X:
self.forward_prop(x)
print(x, self.activations[-1])
if __name__ == "__main__":
nn = NeuralNetwork((2, 3, 2, 1))
X = np.array([
[1, 0],
[0, 1],
[1, 1],
[0, 0]
])
Y = np.array([
[1],
[1],
[0],
[0]
])
nn.train(X, Y, 0.4, 20000)
nn.test(X)
我的网络没有接受过单独识别输入的训练,它要么输出平均结果,要么变得偏向于一个特定的输出。我做错了什么?
import numpy as np
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0
class NeuralNetwork:
def __init__(self, shape: tuple):
self.layers = len(shape) # The amount layers
self.shape = shape # The amount of neurons per each layer
self.weights = [
np.array([np.random.rand(shape[l - 1]) for _ in range(shape[l])])
for l in range(1, self.layers)
] # A list of matrices of weights connecting neighbouring layers
self.weighted_sums = [np.zeros(l) for l in shape]
self.activations = [np.zeros(l) for l in shape]
def inspect(self):
print("=============NeuralNetwork===============")
print(f"Shape: {self.shape}")
print(f"Weights: {self.weights}")
print(f"Activations: {self.activations}")
def forward_prop(self, X):
self.activations[0] = X
for l in range(1, self.layers):
self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1]
self.activations[l] = sigmoid(self.weighted_sums[l])
def backprop(self, X, Y):
delta = [np.empty(self.shape[l]) for l in range(1, self.layers)] # Here errors get stored
delta[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1]) # The output error
for l in reversed(range(self.layers - 2)): # The errors get backpropagated
delta[l] = self.weights[l + 1].T @ delta[l + 1] * sigmoid_der(self.weighted_sums[l])
for l in range(self.layers - 1): # The weights get updated online
for j in range(self.shape[l + 1]):
self.weights[l][j] -= 0.1 * self.activations[l + 1][j] * delta[l][j]
nn = NeuralNetwork((2, 2, 1))
X = np.array([
[1, 0],
[0, 1],
[1, 1],
[0, 0]
])
Y = np.array([
[1],
[1],
[0],
[0]
])
# I train my network by randomly picking an example from my training sets
for _ in range(1000):
i = np.random.randint(0, 4)
nn.forward_prop(X[i])
nn.backprop(X[i], Y[i])
for x in X:
nn.forward_prop(x)
print(nn.activations[-1])
反向传播的矩阵数学非常难。尤其令人困惑的是,权重矩阵和增量列表的长度(实际上也是偏置数组的列表)应该比网络中的层数少一个,这使得索引变得混乱。显然,问题是由于索引错误造成的。终于成功了!
import numpy as np
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0
class NeuralNetwork:
def __init__(self, shape: tuple):
self.layers = len(shape)
self.shape = shape
self.weights = [
np.array([2 * np.random.random(shape[l - 1]) - 1 for _ in range(shape[l])])
for l in range(1, self.layers)
]
self.biases = [np.zeros(l) for l in shape[1:]]
self.weighted_sums = [None for l in shape]
self.activations = [None for l in shape]
self.deltas = [None for l in shape[1:]]
def inspect(self):
print("=============NeuralNetwork===============")
print(f"Shape: {self.shape}")
print(f"Weights: {self.weights}")
print(f"Activations: {self.activations}")
def forward_prop(self, X):
self.activations[0] = X
for l in range(1, self.layers):
self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1] + self.biases[l - 1]
self.activations[l] = sigmoid(self.weighted_sums[l])
def backprop(self, X, Y, lr):
self.deltas[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1])
for l in range(self.layers - 2, 0, -1):
self.deltas[l - 1] = self.weights[l].T @ self.deltas[l] * sigmoid_der(self.weighted_sums[l])
for l in range(self.layers - 1):
for j in range(self.shape[l + 1]):
self.weights[l][j] += lr * self.activations[l] * self.deltas[l][j]
self.biases[l] += self.deltas[l]
def train(self, X, Y, lr, epochs):
for e in range(epochs):
if not e % 1000: self.test(X)
i = np.random.randint(len(X))
self.forward_prop(X[i])
self.backprop(X[i], Y[i], lr)
def test(self, X):
print()
for x in X:
self.forward_prop(x)
print(x, self.activations[-1])
if __name__ == "__main__":
nn = NeuralNetwork((2, 3, 2, 1))
X = np.array([
[1, 0],
[0, 1],
[1, 1],
[0, 0]
])
Y = np.array([
[1],
[1],
[0],
[0]
])
nn.train(X, Y, 0.4, 20000)
nn.test(X)