Keras NN 损失没有减少

Keras NN loss not decreasing

我在 Windows 10

上使用 Keras 2.07,Python 3.5,Tensorflow 1.3.0

我正在测试论文 Long-term Temporal Convolutions for Action Recognition 中使用的架构。我希望将它用于我自己的数据。我使用了我认为会很快收敛的测试数据。学习率下降到 1.e-6 但损失从未离开 4.5549672 的高值...

有人可以查看或试用此代码吗?我是假设错误、编码错误还是不耐烦?

谢谢

## Attempt to implement based on ...
#Long-term Temporal Convolutionsfor Action Recognition
#
#Gul Varol, Ivan Laptev, and Cordelia Schmid, ¨ Fellow, IEEE
#
import time
import numpy as np,cv2
import sys
import os

import keras
import tensorflow as tf

from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten,  Input
from keras.layers import Conv3D, MaxPooling3D
from keras.layers import ZeroPadding3D, AveragePooling3D, MaxPooling3D,LeakyReLU
from keras.optimizers import SGD,rmsprop, adam, Adagrad, Nadam, Adadelta

from keras import regularizers


from keras import backend as K


K.clear_session()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=.99, allocator_type = 'BFC') 
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True, gpu_options=gpu_options))
config = tf.ConfigProto()


#path information
sample_root_path = "C:\"

cur=os.chdir(sample_root_path )




batch_size = 15
num_classes = 1
epochs = 5000
my_lambda = 0.

nchan = 3

m_loss = 'binary_crossentropy'

m_opt = Nadam(lr=0.02, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004)
#
m_met = ['binary_accuracy'] 

#test data
nvideos = 300
nframes = 30
nrows = 60
ncols = 80
nchan = 3

x_flow = np.ones((nvideos, nframes,nrows,ncols,nchan),np.float32)
x_flow[150:] =-1.0
y_flow = np.ones((nvideos),np.float32)
y_flow[150:]= 0




t_inp =Input(shape=(nframes,nrows,ncols,nchan),name='t_inp')

t =  Conv3D(64, (3, 3,3), activation='relu',padding="same", name="conv1", strides=(1, 1, 1),
                 kernel_initializer = 'glorot_normal',
                 bias_initializer = 'glorot_normal', 
                 bias_regularizer = regularizers.l1(my_lambda), 
                 trainable=True) (t_inp)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
t = MaxPooling3D(pool_size=(2,2,1), strides=(2, 2, 2),name='pool1') (t)


t =  Conv3D(128, (3, 3,3), activation='relu',padding="same", name="conv2", strides=(1, 1, 1), 
                  bias_regularizer = regularizers.l1(my_lambda),trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)

t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool2') (t)




t =  Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv3", strides=(1, 1, 1), 
                 bias_regularizer = regularizers.l1(my_lambda),trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)

t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool3') (t)



t =  Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv4", strides=(1, 1, 1), 
                 bias_regularizer = regularizers.l1(my_lambda), trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)

t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool4') (t)



t =  Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv5", strides=(1, 1, 1), 
                 bias_regularizer = regularizers.l1(my_lambda), trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
#
t = MaxPooling3D(pool_size=(1,1,1), strides=(1, 1, 1),name='pool5') (t)
##*****************************

t = Flatten () (t)
t = Dense((2048),name='s_den00') (t)
t = Dropout(.5) (t)
t = Dense((2048),name='s_den0') (t)
t = Dropout(.5) (t)


t = Dense((num_classes),activation='softmax',name='s_den1') (t)


model = Model(inputs=t_inp,outputs=t)
print (model.summary())

model.compile(loss=m_loss, optimizer=m_opt, metrics=m_met)

print ('compiled model')
tb = keras.callbacks.TensorBoard(log_dir=sample_root_path+'logs', histogram_freq=0,
                          write_graph=True, write_grads=True, write_images=True)

reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5,verbose=1, patience=3, min_lr=0.000001)

with tf.device('/gpu:0'):
  history = model.fit(x_flow, y_flow,
                      batch_size=batch_size,
                      callbacks=[reduce_lr,
                                 tb],
                      verbose=1,
                      validation_split=.3,
                      shuffle = True,
                      epochs=epochs)

print ('done')

问题是输出层激活函数。如果你的输出数组看起来像这样

,你只会使用 softmax 作为两个 class 结构
[[1,0],
 [1,0],
 [0,1]]

softmax 假设输出总和为 1,因此如果您预测输入值,您会注意到它们都是 1,因此损失永远不会减少。您可以像上面那样设置输出,也可以保持不变,像这样的单个二进制列

[[1],
 [1],
 [0]]

并使用 sigmoid 激活函数,它在 0-1 的范围内优化输出。所以你的最后一层将是

t = Dense((num_classes),activation='sigmoid',name='s_den1') (t)