Python-编码神经网络无法正确学习

Python-coded neural network does not learn properly

我的网络没有接受过单独识别输入的训练,它要么输出平均结果,要么变得偏向于一个特定的输出。我做错了什么?

import numpy as np

sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0


class NeuralNetwork:
    def __init__(self, shape: tuple):
        self.layers = len(shape) # The amount layers
        self.shape = shape # The amount of neurons per each layer
        self.weights = [
            np.array([np.random.rand(shape[l - 1]) for _ in range(shape[l])])
            for l in range(1, self.layers)
        ] # A list of matrices of weights connecting neighbouring layers
        self.weighted_sums = [np.zeros(l) for l in shape]
        self.activations = [np.zeros(l) for l in shape]

    def inspect(self):
        print("=============NeuralNetwork===============")
        print(f"Shape: {self.shape}")
        print(f"Weights: {self.weights}")
        print(f"Activations: {self.activations}")

    def forward_prop(self, X):
        self.activations[0] = X
        for l in range(1, self.layers):
            self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1]
            self.activations[l] = sigmoid(self.weighted_sums[l])

    def backprop(self, X, Y):
        delta = [np.empty(self.shape[l]) for l in range(1, self.layers)] # Here errors get stored
        delta[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1]) # The output error
        for l in reversed(range(self.layers - 2)): # The errors get backpropagated
            delta[l] = self.weights[l + 1].T @ delta[l + 1] * sigmoid_der(self.weighted_sums[l])
        for l in range(self.layers - 1): # The weights get updated online
            for j in range(self.shape[l + 1]):
                self.weights[l][j] -= 0.1 * self.activations[l + 1][j] * delta[l][j]


nn = NeuralNetwork((2, 2, 1))

X = np.array([
    [1, 0],
    [0, 1],
    [1, 1],
    [0, 0]
])

Y = np.array([
    [1],
    [1],
    [0],
    [0]
])

# I train my network by randomly picking an example from my training sets
for _ in range(1000):
    i = np.random.randint(0, 4)
    nn.forward_prop(X[i])
    nn.backprop(X[i], Y[i])

for x in X:
    nn.forward_prop(x)
    print(nn.activations[-1])

反向传播的矩阵数学非常难。尤其令人困惑的是,权重矩阵和增量列表的长度(实际上也是偏置数组的列表)应该比网络中的层数少一个,这使得索引变得混乱。显然,问题是由于索引错误造成的。终于成功了!

import numpy as np

sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0


class NeuralNetwork:
    def __init__(self, shape: tuple):
        self.layers = len(shape)
        self.shape = shape
        self.weights = [
            np.array([2 * np.random.random(shape[l - 1]) - 1 for _ in range(shape[l])])
            for l in range(1, self.layers)
        ]
        self.biases = [np.zeros(l) for l in shape[1:]]
        self.weighted_sums = [None for l in shape]
        self.activations = [None for l in shape]
        self.deltas = [None for l in shape[1:]]

    def inspect(self):
        print("=============NeuralNetwork===============")
        print(f"Shape: {self.shape}")
        print(f"Weights: {self.weights}")
        print(f"Activations: {self.activations}")

    def forward_prop(self, X):
        self.activations[0] = X
        for l in range(1, self.layers):
            self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1] + self.biases[l - 1]
            self.activations[l] = sigmoid(self.weighted_sums[l])

    def backprop(self, X, Y, lr):
        self.deltas[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1])
        for l in range(self.layers - 2, 0, -1):
            self.deltas[l - 1] = self.weights[l].T @ self.deltas[l] * sigmoid_der(self.weighted_sums[l])
        for l in range(self.layers - 1):
            for j in range(self.shape[l + 1]):
                self.weights[l][j] += lr * self.activations[l] * self.deltas[l][j]
            self.biases[l] += self.deltas[l]

    def train(self, X, Y, lr, epochs):
        for e in range(epochs):
            if not e % 1000: self.test(X)
            i = np.random.randint(len(X))
            self.forward_prop(X[i])
            self.backprop(X[i], Y[i], lr)

    def test(self, X):
        print()
        for x in X:
            self.forward_prop(x)
            print(x, self.activations[-1])


if __name__ == "__main__":
    nn = NeuralNetwork((2, 3, 2, 1))

    X = np.array([
        [1, 0],
        [0, 1],
        [1, 1],
        [0, 0]
    ])

    Y = np.array([
        [1],
        [1],
        [0],
        [0]
    ])

    nn.train(X, Y, 0.4, 20000)
    nn.test(X)