神经网络训练平台中的梯度下降
gradient descent in neural network training plateauing
我一直在尝试在 python 中实现一个基本的反向传播神经网络,并且已经完成了初始化和训练权重集的编程。然而,在我训练的所有集合中,误差(均方)总是收敛到一个奇怪的数字——误差总是在进一步的迭代中减少,但从未真正接近于零。
任何帮助将不胜感激。
import csv
import numpy as np
class NeuralNetwork:
layers = 0
shape = None
weights = []
layerIn = []
layerOut = []
def __init__(self, shape):
self.shape = shape
self.layers = len(shape) - 1
for i in range(0,self.layers):
n = shape[i]
m = shape[i+1]
self.weights.append(np.random.normal(scale=0.2, size = (m,n+1)))
def sgm(self, x):
return 1/(1+np.exp(-x))
def dersgm(self, x):
y = self.sgm(x)
return y*(y-1)
def run(self, input):
self.layerIn = []
self.layerOut = []
for i in range(self.layers):
if i == 0:
layer = self.weights[0].dot(np.vstack((input.transpose(), np.ones([1,input.shape[0]]))))
else:
layer = self.weights[i].dot(np.vstack((self.layerOut[-1], np.ones([1,input.shape[0]]))))
self.layerIn.append(layer)
self.layerOut.append(self.sgm(layer))
return self.layerOut[-1].T
def backpropogate(self, input, y, learning_rate):
deltas = []
y_hat = self.run(input)
#Calculate deltas
for i in reversed(range(self.layers)):
#for last layer
if i == self.layers-1:
error = y_hat - y
msq_error = sum(.5 * ((error) ** 2))
#returns delta, k rows for k inputs, m columns for m nodes
deltas.append(error * self.dersgm(y_hat))
else:
error = deltas[-1].dot(self.weights[i+1][:,:-1])
deltas.append(self.dersgm(self.layerOut[i]).T * error)
#Calculate weight-deltas
wdelta = []
ordered_deltas = list(reversed(deltas)) #reverse order because created backwards
#returns weight deltas, k rows for k nodes, m columns for m next layer nodes
for i in range(self.layers):
if i == 0:
#add bias
input_with_bias = np.vstack((input.T, np.ones(input.shape[0])))
#some over n rows of deltas for n training examples to get one delta for all examples
#for all nodes
wdelta.append(ordered_deltas[i].T.dot(input_with_bias.T))
else:
with_bias = np.vstack((self.layerOut[i-1], np.ones(input.shape[0])))
wdelta.append(ordered_deltas[i].T.dot(with_bias.T))
#update_weights
def update_weights(self, weight_deltas, learning_rate):
for i in range(self.layers):
self.weights[i] = self.weights[i] +\
(learning_rate * weight_deltas[i])
update_weights(self, wdelta, learning_rate)
return msq_error
#end backpropogate
def train(self, input, target, lr, run_iter):
for i in range(run_iter):
if i % 100000 == 0:
print self.backpropogate(input, target, lr)
以下场景中的误差函数不能为0,因为误差函数为0需要点与曲线完美匹配。
拥有更多神经元肯定会减少误差,因为函数可以具有更复杂和精确的形状。但是当你太适合你的数据时,就会出现一个称为过度拟合的问题,如下图所示。从左到右,一条曲线要么欠拟合数据集,要么几乎正确拟合,然后在右侧过度拟合。
右边的情况会导致错误为 0,但这是不希望的,您希望避免这种情况。怎么样?
确定网络中神经元数量是否理想(具有良好拟合)的最简单方法是反复试验。将您的数据拆分为训练数据(80% - 用于训练网络)和测试数据(20% - 仅保留用于训练后测试网络)。虽然只对训练数据进行训练,但可以绘制测试数据集上的性能。
您还可以使用第三个数据集进行验证,请参阅:
whats is the difference between train, validation and test set, in neural networks?
我一直在尝试在 python 中实现一个基本的反向传播神经网络,并且已经完成了初始化和训练权重集的编程。然而,在我训练的所有集合中,误差(均方)总是收敛到一个奇怪的数字——误差总是在进一步的迭代中减少,但从未真正接近于零。
任何帮助将不胜感激。
import csv
import numpy as np
class NeuralNetwork:
layers = 0
shape = None
weights = []
layerIn = []
layerOut = []
def __init__(self, shape):
self.shape = shape
self.layers = len(shape) - 1
for i in range(0,self.layers):
n = shape[i]
m = shape[i+1]
self.weights.append(np.random.normal(scale=0.2, size = (m,n+1)))
def sgm(self, x):
return 1/(1+np.exp(-x))
def dersgm(self, x):
y = self.sgm(x)
return y*(y-1)
def run(self, input):
self.layerIn = []
self.layerOut = []
for i in range(self.layers):
if i == 0:
layer = self.weights[0].dot(np.vstack((input.transpose(), np.ones([1,input.shape[0]]))))
else:
layer = self.weights[i].dot(np.vstack((self.layerOut[-1], np.ones([1,input.shape[0]]))))
self.layerIn.append(layer)
self.layerOut.append(self.sgm(layer))
return self.layerOut[-1].T
def backpropogate(self, input, y, learning_rate):
deltas = []
y_hat = self.run(input)
#Calculate deltas
for i in reversed(range(self.layers)):
#for last layer
if i == self.layers-1:
error = y_hat - y
msq_error = sum(.5 * ((error) ** 2))
#returns delta, k rows for k inputs, m columns for m nodes
deltas.append(error * self.dersgm(y_hat))
else:
error = deltas[-1].dot(self.weights[i+1][:,:-1])
deltas.append(self.dersgm(self.layerOut[i]).T * error)
#Calculate weight-deltas
wdelta = []
ordered_deltas = list(reversed(deltas)) #reverse order because created backwards
#returns weight deltas, k rows for k nodes, m columns for m next layer nodes
for i in range(self.layers):
if i == 0:
#add bias
input_with_bias = np.vstack((input.T, np.ones(input.shape[0])))
#some over n rows of deltas for n training examples to get one delta for all examples
#for all nodes
wdelta.append(ordered_deltas[i].T.dot(input_with_bias.T))
else:
with_bias = np.vstack((self.layerOut[i-1], np.ones(input.shape[0])))
wdelta.append(ordered_deltas[i].T.dot(with_bias.T))
#update_weights
def update_weights(self, weight_deltas, learning_rate):
for i in range(self.layers):
self.weights[i] = self.weights[i] +\
(learning_rate * weight_deltas[i])
update_weights(self, wdelta, learning_rate)
return msq_error
#end backpropogate
def train(self, input, target, lr, run_iter):
for i in range(run_iter):
if i % 100000 == 0:
print self.backpropogate(input, target, lr)
以下场景中的误差函数不能为0,因为误差函数为0需要点与曲线完美匹配。
拥有更多神经元肯定会减少误差,因为函数可以具有更复杂和精确的形状。但是当你太适合你的数据时,就会出现一个称为过度拟合的问题,如下图所示。从左到右,一条曲线要么欠拟合数据集,要么几乎正确拟合,然后在右侧过度拟合。
右边的情况会导致错误为 0,但这是不希望的,您希望避免这种情况。怎么样?
确定网络中神经元数量是否理想(具有良好拟合)的最简单方法是反复试验。将您的数据拆分为训练数据(80% - 用于训练网络)和测试数据(20% - 仅保留用于训练后测试网络)。虽然只对训练数据进行训练,但可以绘制测试数据集上的性能。
您还可以使用第三个数据集进行验证,请参阅: whats is the difference between train, validation and test set, in neural networks?