我的梯度下降算法有什么问题或者它是如何应用的？

Question

几个小时以来，我一直在努力找出自己做错了什么，但就是想不通。我什至查看了其他基本神经网络库以确保我的梯度下降算法是正确的，但它仍然无法正常工作。

我正在尝试教它 XOR 但它输出 -

input (0 0) | 0.011441891321516094
input (1 0) | 0.6558508610135193
input (0 1) | 0.6558003273099053
input (1 1) | 0.6563021185296245

经过 1000 次训练后，显然有问题。

代码是用 lua 编写的，我从原始数据创建了神经网络，因此您可以轻松理解数据的格式。

-训练代码-

math.randomseed(os.time())

local nn =  require("NeuralNetwork")

local network = nn.newFromRawData({
    ["activationFunction"] = "sigmoid",
    ["learningRate"] = 0.3,
    ["net"] = {
        [1] = {
            [1] = {
                ["value"] = 0
            },
            [2] = {
                ["value"] = 0
            }
        },
        [2] = {
            [1] = {
                ["bias"] = 1,
                ["netInput"] = 0,
                ["value"] = 0,
                ["weights"] = {
                    [1] = 1,
                    [2] = 1
                }
            },
            [2] = {
                ["bias"] = 1,
                ["netInput"] = 0,
                ["value"] = 0,
                ["weights"] = {
                    [1] = 1,
                    [2] = 1
                }
            },
            [3] = {
                ["bias"] = 1,
                ["netInput"] = 0,
                ["value"] = 0,
                ["weights"] = {
                    [1] = 1,
                    [2] = 1
                }
            },
            [4] = {
                ["bias"] = 1,
                ["netInput"] = 0,
                ["value"] = 0,
                ["weights"] = {
                    [1] = 1,
                    [2] = 1
                }
            }
        },
        [3] = {
            [1] = {
                ["bias"] = 1,
                ["netInput"] = 0,
                ["value"] = 0,
                ["weights"] = {
                    [1] = 1,
                    [2] = 1,
                    [3] = 1,
                    [4] = 1
                }
            }
        }
    }
})

attempts = 1000
for i = 1,attempts do
    network:backPropagate({0,0},{0}) 
    network:backPropagate({1,0},{1})
    network:backPropagate({0,1},{1})
    network:backPropagate({1,1},{0})
end

print("Results:")
print("input (0 0) | "..network:feedForward({0,0})[1])
print("input (1 0) | "..network:feedForward({1,0})[1])
print("input (0 1) | "..network:feedForward({0,1})[1])
print("input (1 1) | "..network:feedForward({1,1})[1])

-图书馆-

local nn = {}
nn.__index = nn

nn.ActivationFunctions = {
    sigmoid = function(x) return 1/(1+math.exp(-x/1)) end,
    ReLu = function(x) return math.max(0, x) end,
}
nn.Derivatives = {
    sigmoid = function(x) return x * (1 - x) end,
    ReLu = function(x) return x > 0 and 1 or 0 end,
}
nn.CostFunctions = {
    MSE = function(outputs, expected)
        local sum = 0
        for i = 1, #outputs do
            sum += 1/2*(expected[i] - outputs[i])^2
        end
        return sum/#outputs
    end,
}

function nn.new(inputs, outputs, hiddenLayers, neurons, learningRate, activationFunction)
    
    local self = setmetatable({}, nn)
    
    self.learningRate = learningRate or .3
    self.activationFunction = activationFunction or "ReLu"
    self.net = {}
    
    local net = self.net
    local layers = hiddenLayers+2
    
    for i = 1, layers do
        net[i] = {}
    end
    
    for i = 1, inputs do
        net[1][i] = {value = 0}
    end
    for i = 2, layers-1 do
        for x = 1, neurons do
            net[i][x] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
            for z = 1, #net[i-1] do
                net[i][x].weights[z] = math.random()*2-1
            end
        end
    end
    for i = 1, outputs do
        net[layers][i] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
        for z = 1, #net[layers-1] do
            net[layers][i].weights[z] = math.random()*2-1
        end
    end
    
    return self
    
end

function nn.newFromRawData(data)
    
    return setmetatable(data, nn)
    
end

function nn:feedForward(inputs)
    
    local net = self.net
    local activation = self.activationFunction
    local layers = #net
    
    local inputLayer = net[1]
    local outputLayer = net[layers]
    
    
    for i = 1, #inputLayer do
        inputLayer[i].value = inputs[i]
    end
    
    for i = 2, layers do
        local layer = net[i]
        for x = 1, #layer do
            local sum = layer[x].bias
            for z = 1, #net[i-1] do
                sum += net[i-1][z].value * layer[x].weights[z]
            end
            layer[x].netInput = sum
            layer[x].value = nn.ActivationFunctions[activation](sum)
        end 
    end
    
    local outputs = {}
    
    for i = 1, #outputLayer do
        table.insert(outputs, outputLayer[i].value)
    end
    
    return outputs
    
end

function nn:backPropagate(inputs, expected)
    
    local outputs = self:feedForward(inputs)
    
    local net = self.net
    local activation = self.activationFunction
    local layers = #net
    local lr = self.learningRate
    
    local inputLayer = net[1]
    local outputLayer = net[layers]
    
    for i = 1, #outputLayer do
        local delta = -(expected[i] - outputs[i]) * nn.Derivatives[activation](net[layers][i].value)
        outputLayer[i].delta = delta    
    end
    
    for i = layers-1, 2, -1 do
        local layer = net[i]
        local nextLayer = net[i+1]
        for x = 1, #layer do
            local delta = 0
            for z = 1, #nextLayer do
                delta += nextLayer[z].delta * nextLayer[z].weights[x]
            end
            layer[x].delta = delta * nn.Derivatives[activation](layer[x].value)
        end
    end
    
    for i = 2, layers do
        local lastLayer = net[i-1]
        for x = 1, #net[i] do
            net[i][x].bias -= lr * net[i][x].delta
            for z = 1, #lastLayer do
                net[i][x].weights[z] -= lr * net[i][x].delta * lastLayer[z].value       
            end
        end
    end
    
end

return nn

非常感谢任何帮助，谢谢！

Answer 1

所有初始权重必须是不同的数字，否则反向传播将不起作用。例如，您可以将 1 替换为 math.random()
增加 10000

通过这些修改，您的代码可以正常工作：

Results:
input (0 0) | 0.028138230938126
input (1 0) | 0.97809448578087
input (0 1) | 0.97785000216126
input (1 1) | 0.023128477689456

我的梯度下降算法有什么问题或者它是如何应用的？

What is the problem with my Gradient Descent Algorithm or how its applied?

lua

machine-learning

backpropagation

neural-network

deep-learning