为什么我的损失函数只在中间震荡
Why my loss function oscilates only in the middle
我正在制作一个卷积神经网络,并且总是得到一个在中间振荡更多的损失函数,我该如何解决它?
Loss error
我的数据只有100张图片,我想过度拟合以确保一切顺利但是当损失总是在中心部分时会有更大的振荡,我试图降低学习率但它总是同样,曲线的一半总是有更大的振荡,将batch size中的所有100张图像都取下来但振荡没有减少,这是我的code.Why这会发生吗?我该如何解决?
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
train_path='/Users/David/Deskt...'
batch_size_train=100
num_epochs=1
tf.logging.set_verbosity(tf.logging.INFO)
sess=tf.Session()
#Convolutional Model
def cnn_model(features,labels,mode):
#Capa de ingreso
input_layer=tf.reshape(features["x"],[-1,224,224,3])
#Capa convolucional 1........
conv1=tf.layers.conv2d(
inputs=input_layer,
filters=64,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_1")
#Pooling 1.........
pool1=tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2,name="Pool_1")
conv2=tf.layers.conv2d(
inputs=pool1,
filters=128,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_2")
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2,name="Pool_2")
conv3=tf.layers.conv2d(
inputs=pool2,
filters=192,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_3")
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2,name="Pool_3")
conv4=tf.layers.conv2d(
inputs=pool3,
filters=256,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_4")
pool4 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2,name="Pool_4")
conv5=tf.layers.conv2d(
inputs=pool4,
filters=320,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_5")
pool5 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[2, 2], strides=2,name="Pool_5")
pool5_flat=tf.reshape(pool5,[-1,7*7*320],name="Flat_Pool")
#Deep neural network..............
dense=tf.layers.dense(inputs=pool5_flat,units=10000,activation=tf.nn.relu,name="Capa_1")
dense1=tf.layers.dense(inputs=dense,units=7000,activation=tf.nn.relu,name="Capa_2")
dense2=tf.layers.dense(inputs=dense1,units=4000,activation=tf.nn.relu,name="Capa_3")
dense3=tf.layers.dense(inputs=dense2,units=1000,activation=tf.nn.relu,name="Capa_4")
dense4=tf.layers.dense(inputs=dense3,units=500,activation=tf.nn.relu,name="Capa_5")
logits=tf.layers.dense(inputs=dense4,units=2,name="Capa_final")
onehot_labels = tf.one_hot(indices=labels, depth=2)
t=tf.nn.softmax(logits, name="softmax_tensor")
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)
tf.summary.scalar('loss',loss)
ds=tf.train.SummarySaverHook(save_steps=1,output_dir="/Users/David/Desktop/David/Tesis/Practica/Programas/CNN/Model_Chekpoint",summary_op=tf.summary.merge_all())
loss_hook = tf.train.LoggingTensorHook(tensors={"loss":loss}, every_n_iter=1)
if mode==tf.estimator.ModeKeys.TRAIN:
optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op=optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op,training_hooks=[ds,loss_hook])
def read_file(filename_queue):
#Funcion para leer el archivo tf.record, y retornamos el next recrod
reader=tf.TFRecordReader()
_,serialized_example=reader.read(filename_queue)
#Se decodifica el tf.record retornando un diccionario
feature={'train/image':tf.FixedLenFeature([],tf.string),
'train/label':tf.FixedLenFeature([],tf.int64)}
features=tf.parse_single_example(serialized_example,features=feature,name="Decodificacion_Parse")
#Convertimos el string a numeros de los decodificados features
image=tf.decode_raw(features['train/image'],tf.float32,name="imagenes_decod")* (1 / 255.0)
#Convertimos a datos
label=tf.cast(features['train/label'],dtype=tf.int32,name="label_decod")
#Reshape data
image=tf.reshape(image,[224,224,3])
return image,label
def input_pipeline(filenames,batch_size):
#Creacion de una lista de los archivos
filename_queue=tf.train.string_input_producer([filenames],num_epochs=1,shuffle=True,name="Creacion_lista_archiv")
images,labels=read_file(filename_queue)
#Mezclar (shuffle) los datos de entrada
min_after_dequeue=100
capacity=min_after_dequeue+3*batch_size
images,labels=tf.train.shuffle_batch([images,labels],batch_size=batch_size,capacity=capacity,num_threads=2,min_after_dequeue=min_after_dequeue,name="Shuffle_data_in")
return images,labels
def main(unused_argv):
#Lectura y Decodificacion de datos
img_train,lbl_train=input_pipeline(train_path,batch_size_train)
#Estimator - Modelo
gun_detector=tf.estimator.Estimator(model_fn=cnn_model,model_dir="/Users/David/Desktop/David/Tesis/Practica/Programas/CNN/Model_Chekpoint")
#Inicializacion de variables y run de la session
init_op=tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())
sess.run(init_op)
#Corremos las filas(queue) que se crearon en el grafico computacional
coord = tf.train.Coordinator()
threads=tf.train.start_queue_runners(sess=sess,coord=coord)
try:
while not coord.should_stop():
img,lbl=sess.run([img_train,lbl_train])
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": img},
y=lbl,
batch_size=70,
num_epochs=None,
shuffle=True)
gun_detector.train(
input_fn=train_input_fn,
steps=5000)
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()
if __name__ == '__main__':
tf.app.run()
由于学习率而发生振荡。如果你学会禁食,你就会跳过局部最小值,你的损失函数就会发散。如果您将学习率设置得很小,您将永远不会收敛或收敛得非常慢。您可以尝试 fiddle 改变您的学习率以消除这些振荡,但随后您 运行 有过度训练模型的风险。只要你在合理的时间内收敛,你的图表对我来说看起来很好,你不应该关心中间发生的事情。
我正在制作一个卷积神经网络,并且总是得到一个在中间振荡更多的损失函数,我该如何解决它? Loss error
我的数据只有100张图片,我想过度拟合以确保一切顺利但是当损失总是在中心部分时会有更大的振荡,我试图降低学习率但它总是同样,曲线的一半总是有更大的振荡,将batch size中的所有100张图像都取下来但振荡没有减少,这是我的code.Why这会发生吗?我该如何解决?
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
train_path='/Users/David/Deskt...'
batch_size_train=100
num_epochs=1
tf.logging.set_verbosity(tf.logging.INFO)
sess=tf.Session()
#Convolutional Model
def cnn_model(features,labels,mode):
#Capa de ingreso
input_layer=tf.reshape(features["x"],[-1,224,224,3])
#Capa convolucional 1........
conv1=tf.layers.conv2d(
inputs=input_layer,
filters=64,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_1")
#Pooling 1.........
pool1=tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2,name="Pool_1")
conv2=tf.layers.conv2d(
inputs=pool1,
filters=128,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_2")
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2,name="Pool_2")
conv3=tf.layers.conv2d(
inputs=pool2,
filters=192,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_3")
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2,name="Pool_3")
conv4=tf.layers.conv2d(
inputs=pool3,
filters=256,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_4")
pool4 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2,name="Pool_4")
conv5=tf.layers.conv2d(
inputs=pool4,
filters=320,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_5")
pool5 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[2, 2], strides=2,name="Pool_5")
pool5_flat=tf.reshape(pool5,[-1,7*7*320],name="Flat_Pool")
#Deep neural network..............
dense=tf.layers.dense(inputs=pool5_flat,units=10000,activation=tf.nn.relu,name="Capa_1")
dense1=tf.layers.dense(inputs=dense,units=7000,activation=tf.nn.relu,name="Capa_2")
dense2=tf.layers.dense(inputs=dense1,units=4000,activation=tf.nn.relu,name="Capa_3")
dense3=tf.layers.dense(inputs=dense2,units=1000,activation=tf.nn.relu,name="Capa_4")
dense4=tf.layers.dense(inputs=dense3,units=500,activation=tf.nn.relu,name="Capa_5")
logits=tf.layers.dense(inputs=dense4,units=2,name="Capa_final")
onehot_labels = tf.one_hot(indices=labels, depth=2)
t=tf.nn.softmax(logits, name="softmax_tensor")
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)
tf.summary.scalar('loss',loss)
ds=tf.train.SummarySaverHook(save_steps=1,output_dir="/Users/David/Desktop/David/Tesis/Practica/Programas/CNN/Model_Chekpoint",summary_op=tf.summary.merge_all())
loss_hook = tf.train.LoggingTensorHook(tensors={"loss":loss}, every_n_iter=1)
if mode==tf.estimator.ModeKeys.TRAIN:
optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op=optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op,training_hooks=[ds,loss_hook])
def read_file(filename_queue):
#Funcion para leer el archivo tf.record, y retornamos el next recrod
reader=tf.TFRecordReader()
_,serialized_example=reader.read(filename_queue)
#Se decodifica el tf.record retornando un diccionario
feature={'train/image':tf.FixedLenFeature([],tf.string),
'train/label':tf.FixedLenFeature([],tf.int64)}
features=tf.parse_single_example(serialized_example,features=feature,name="Decodificacion_Parse")
#Convertimos el string a numeros de los decodificados features
image=tf.decode_raw(features['train/image'],tf.float32,name="imagenes_decod")* (1 / 255.0)
#Convertimos a datos
label=tf.cast(features['train/label'],dtype=tf.int32,name="label_decod")
#Reshape data
image=tf.reshape(image,[224,224,3])
return image,label
def input_pipeline(filenames,batch_size):
#Creacion de una lista de los archivos
filename_queue=tf.train.string_input_producer([filenames],num_epochs=1,shuffle=True,name="Creacion_lista_archiv")
images,labels=read_file(filename_queue)
#Mezclar (shuffle) los datos de entrada
min_after_dequeue=100
capacity=min_after_dequeue+3*batch_size
images,labels=tf.train.shuffle_batch([images,labels],batch_size=batch_size,capacity=capacity,num_threads=2,min_after_dequeue=min_after_dequeue,name="Shuffle_data_in")
return images,labels
def main(unused_argv):
#Lectura y Decodificacion de datos
img_train,lbl_train=input_pipeline(train_path,batch_size_train)
#Estimator - Modelo
gun_detector=tf.estimator.Estimator(model_fn=cnn_model,model_dir="/Users/David/Desktop/David/Tesis/Practica/Programas/CNN/Model_Chekpoint")
#Inicializacion de variables y run de la session
init_op=tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())
sess.run(init_op)
#Corremos las filas(queue) que se crearon en el grafico computacional
coord = tf.train.Coordinator()
threads=tf.train.start_queue_runners(sess=sess,coord=coord)
try:
while not coord.should_stop():
img,lbl=sess.run([img_train,lbl_train])
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": img},
y=lbl,
batch_size=70,
num_epochs=None,
shuffle=True)
gun_detector.train(
input_fn=train_input_fn,
steps=5000)
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()
if __name__ == '__main__':
tf.app.run()
由于学习率而发生振荡。如果你学会禁食,你就会跳过局部最小值,你的损失函数就会发散。如果您将学习率设置得很小,您将永远不会收敛或收敛得非常慢。您可以尝试 fiddle 改变您的学习率以消除这些振荡,但随后您 运行 有过度训练模型的风险。只要你在合理的时间内收敛,你的图表对我来说看起来很好,你不应该关心中间发生的事情。