我的梯度下降算法有什么问题或者它是如何应用的?
What is the problem with my Gradient Descent Algorithm or how its applied?
几个小时以来,我一直在努力找出自己做错了什么,但就是想不通。我什至查看了其他基本神经网络库以确保我的梯度下降算法是正确的,但它仍然无法正常工作。
我正在尝试教它 XOR 但它输出 -
input (0 0) | 0.011441891321516094
input (1 0) | 0.6558508610135193
input (0 1) | 0.6558003273099053
input (1 1) | 0.6563021185296245
经过 1000 次训练后,显然有问题。
代码是用 lua 编写的,我从原始数据创建了神经网络,因此您可以轻松理解数据的格式。
-训练代码-
math.randomseed(os.time())
local nn = require("NeuralNetwork")
local network = nn.newFromRawData({
["activationFunction"] = "sigmoid",
["learningRate"] = 0.3,
["net"] = {
[1] = {
[1] = {
["value"] = 0
},
[2] = {
["value"] = 0
}
},
[2] = {
[1] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[2] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[3] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[4] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
}
},
[3] = {
[1] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1,
[3] = 1,
[4] = 1
}
}
}
}
})
attempts = 1000
for i = 1,attempts do
network:backPropagate({0,0},{0})
network:backPropagate({1,0},{1})
network:backPropagate({0,1},{1})
network:backPropagate({1,1},{0})
end
print("Results:")
print("input (0 0) | "..network:feedForward({0,0})[1])
print("input (1 0) | "..network:feedForward({1,0})[1])
print("input (0 1) | "..network:feedForward({0,1})[1])
print("input (1 1) | "..network:feedForward({1,1})[1])
-图书馆-
local nn = {}
nn.__index = nn
nn.ActivationFunctions = {
sigmoid = function(x) return 1/(1+math.exp(-x/1)) end,
ReLu = function(x) return math.max(0, x) end,
}
nn.Derivatives = {
sigmoid = function(x) return x * (1 - x) end,
ReLu = function(x) return x > 0 and 1 or 0 end,
}
nn.CostFunctions = {
MSE = function(outputs, expected)
local sum = 0
for i = 1, #outputs do
sum += 1/2*(expected[i] - outputs[i])^2
end
return sum/#outputs
end,
}
function nn.new(inputs, outputs, hiddenLayers, neurons, learningRate, activationFunction)
local self = setmetatable({}, nn)
self.learningRate = learningRate or .3
self.activationFunction = activationFunction or "ReLu"
self.net = {}
local net = self.net
local layers = hiddenLayers+2
for i = 1, layers do
net[i] = {}
end
for i = 1, inputs do
net[1][i] = {value = 0}
end
for i = 2, layers-1 do
for x = 1, neurons do
net[i][x] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[i-1] do
net[i][x].weights[z] = math.random()*2-1
end
end
end
for i = 1, outputs do
net[layers][i] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[layers-1] do
net[layers][i].weights[z] = math.random()*2-1
end
end
return self
end
function nn.newFromRawData(data)
return setmetatable(data, nn)
end
function nn:feedForward(inputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #inputLayer do
inputLayer[i].value = inputs[i]
end
for i = 2, layers do
local layer = net[i]
for x = 1, #layer do
local sum = layer[x].bias
for z = 1, #net[i-1] do
sum += net[i-1][z].value * layer[x].weights[z]
end
layer[x].netInput = sum
layer[x].value = nn.ActivationFunctions[activation](sum)
end
end
local outputs = {}
for i = 1, #outputLayer do
table.insert(outputs, outputLayer[i].value)
end
return outputs
end
function nn:backPropagate(inputs, expected)
local outputs = self:feedForward(inputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local lr = self.learningRate
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #outputLayer do
local delta = -(expected[i] - outputs[i]) * nn.Derivatives[activation](net[layers][i].value)
outputLayer[i].delta = delta
end
for i = layers-1, 2, -1 do
local layer = net[i]
local nextLayer = net[i+1]
for x = 1, #layer do
local delta = 0
for z = 1, #nextLayer do
delta += nextLayer[z].delta * nextLayer[z].weights[x]
end
layer[x].delta = delta * nn.Derivatives[activation](layer[x].value)
end
end
for i = 2, layers do
local lastLayer = net[i-1]
for x = 1, #net[i] do
net[i][x].bias -= lr * net[i][x].delta
for z = 1, #lastLayer do
net[i][x].weights[z] -= lr * net[i][x].delta * lastLayer[z].value
end
end
end
end
return nn
非常感谢任何帮助,谢谢!
- 所有初始权重必须是不同的数字,否则反向传播将不起作用。例如,您可以将
1
替换为 math.random()
- 增加
10000
的尝试次数
通过这些修改,您的代码可以正常工作:
Results:
input (0 0) | 0.028138230938126
input (1 0) | 0.97809448578087
input (0 1) | 0.97785000216126
input (1 1) | 0.023128477689456
几个小时以来,我一直在努力找出自己做错了什么,但就是想不通。我什至查看了其他基本神经网络库以确保我的梯度下降算法是正确的,但它仍然无法正常工作。
我正在尝试教它 XOR 但它输出 -
input (0 0) | 0.011441891321516094
input (1 0) | 0.6558508610135193
input (0 1) | 0.6558003273099053
input (1 1) | 0.6563021185296245
经过 1000 次训练后,显然有问题。
代码是用 lua 编写的,我从原始数据创建了神经网络,因此您可以轻松理解数据的格式。
-训练代码-
math.randomseed(os.time())
local nn = require("NeuralNetwork")
local network = nn.newFromRawData({
["activationFunction"] = "sigmoid",
["learningRate"] = 0.3,
["net"] = {
[1] = {
[1] = {
["value"] = 0
},
[2] = {
["value"] = 0
}
},
[2] = {
[1] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[2] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[3] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[4] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
}
},
[3] = {
[1] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1,
[3] = 1,
[4] = 1
}
}
}
}
})
attempts = 1000
for i = 1,attempts do
network:backPropagate({0,0},{0})
network:backPropagate({1,0},{1})
network:backPropagate({0,1},{1})
network:backPropagate({1,1},{0})
end
print("Results:")
print("input (0 0) | "..network:feedForward({0,0})[1])
print("input (1 0) | "..network:feedForward({1,0})[1])
print("input (0 1) | "..network:feedForward({0,1})[1])
print("input (1 1) | "..network:feedForward({1,1})[1])
-图书馆-
local nn = {}
nn.__index = nn
nn.ActivationFunctions = {
sigmoid = function(x) return 1/(1+math.exp(-x/1)) end,
ReLu = function(x) return math.max(0, x) end,
}
nn.Derivatives = {
sigmoid = function(x) return x * (1 - x) end,
ReLu = function(x) return x > 0 and 1 or 0 end,
}
nn.CostFunctions = {
MSE = function(outputs, expected)
local sum = 0
for i = 1, #outputs do
sum += 1/2*(expected[i] - outputs[i])^2
end
return sum/#outputs
end,
}
function nn.new(inputs, outputs, hiddenLayers, neurons, learningRate, activationFunction)
local self = setmetatable({}, nn)
self.learningRate = learningRate or .3
self.activationFunction = activationFunction or "ReLu"
self.net = {}
local net = self.net
local layers = hiddenLayers+2
for i = 1, layers do
net[i] = {}
end
for i = 1, inputs do
net[1][i] = {value = 0}
end
for i = 2, layers-1 do
for x = 1, neurons do
net[i][x] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[i-1] do
net[i][x].weights[z] = math.random()*2-1
end
end
end
for i = 1, outputs do
net[layers][i] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[layers-1] do
net[layers][i].weights[z] = math.random()*2-1
end
end
return self
end
function nn.newFromRawData(data)
return setmetatable(data, nn)
end
function nn:feedForward(inputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #inputLayer do
inputLayer[i].value = inputs[i]
end
for i = 2, layers do
local layer = net[i]
for x = 1, #layer do
local sum = layer[x].bias
for z = 1, #net[i-1] do
sum += net[i-1][z].value * layer[x].weights[z]
end
layer[x].netInput = sum
layer[x].value = nn.ActivationFunctions[activation](sum)
end
end
local outputs = {}
for i = 1, #outputLayer do
table.insert(outputs, outputLayer[i].value)
end
return outputs
end
function nn:backPropagate(inputs, expected)
local outputs = self:feedForward(inputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local lr = self.learningRate
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #outputLayer do
local delta = -(expected[i] - outputs[i]) * nn.Derivatives[activation](net[layers][i].value)
outputLayer[i].delta = delta
end
for i = layers-1, 2, -1 do
local layer = net[i]
local nextLayer = net[i+1]
for x = 1, #layer do
local delta = 0
for z = 1, #nextLayer do
delta += nextLayer[z].delta * nextLayer[z].weights[x]
end
layer[x].delta = delta * nn.Derivatives[activation](layer[x].value)
end
end
for i = 2, layers do
local lastLayer = net[i-1]
for x = 1, #net[i] do
net[i][x].bias -= lr * net[i][x].delta
for z = 1, #lastLayer do
net[i][x].weights[z] -= lr * net[i][x].delta * lastLayer[z].value
end
end
end
end
return nn
非常感谢任何帮助,谢谢!
- 所有初始权重必须是不同的数字,否则反向传播将不起作用。例如,您可以将
1
替换为math.random()
- 增加
10000
的尝试次数
通过这些修改,您的代码可以正常工作:
Results:
input (0 0) | 0.028138230938126
input (1 0) | 0.97809448578087
input (0 1) | 0.97785000216126
input (1 1) | 0.023128477689456