非常基本的 Keras CNN,其中 2 类 给出了莫名其妙的答案
Very basic Keras CNN with 2 classes giving inexplicable answers
尝试在二元分类问题上使用 Keras/Theano 训练一个非常简单的 CNN。损失函数总是收敛到 8.0151 左右。 Parameter/architecture 修改没有帮助。所以我做了一个很简单的例子:新的输入数组,一个全1,另一个全0。没有骰子,同样的行为。我尝试了全 1 和全 -1,同样的事情。然后,全 0 和随机。相同的。降低维度和深度,移除 dropout,修改参数,相同。帮助!发生了什么事?
import numpy
A = []
B = []
for j in range(100):
npa = numpy.array([[1 for j in range(100)] for i in range(100)])
A.append(npa.reshape(1,npa.shape[0],npa.shape[1]))
for j in range(100):
npa = numpy.array([[0 for j in range(100)] for i in range(100)])
B.append(npa.reshape(1,npa.shape[0],npa.shape[1]))
trainXA = []
trainXB = []
testXA = []
testXB = []
for j in range(len(A)):
if ((j+2) % 7) != 0:
trainXA.append(A[j])
trainXB.append(B[j])
else:
testXA.append(A[j])
testXB.append(B[j])
X_train = numpy.array(trainXA + trainXB)
X_test = numpy.array(testXA + testXB)
Y_train = numpy.array([[1,0] for i in range(len(X_train)/2)] + [[0,1] for i in range(len(X_train)/2)])
import random
def jumblelists(C,D):
outC = []
outD = []
for j in range(len(C)):
newpos = int(random.random()*(len(outC)+1))
outC = outC[:newpos]+[C[j]]+outC[newpos:]
outD = outD[:newpos]+[D[j]]+outD[newpos:]
return numpy.array(outC),numpy.array(outD)
X_train,Y_train = jumblelists(X_train,Y_train)
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(1,100,100)))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(2))
model.add(Activation('softmax'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd)
model.fit(X_train, Y_train, batch_size=32, nb_epoch=10)
您的学习率设置得太高,可能导致权重和梯度激增。只需更改
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
至
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
您可能还想尝试不同的优化器。默认设置的 Adam 通常是一个不错的选择。
尝试在二元分类问题上使用 Keras/Theano 训练一个非常简单的 CNN。损失函数总是收敛到 8.0151 左右。 Parameter/architecture 修改没有帮助。所以我做了一个很简单的例子:新的输入数组,一个全1,另一个全0。没有骰子,同样的行为。我尝试了全 1 和全 -1,同样的事情。然后,全 0 和随机。相同的。降低维度和深度,移除 dropout,修改参数,相同。帮助!发生了什么事?
import numpy
A = []
B = []
for j in range(100):
npa = numpy.array([[1 for j in range(100)] for i in range(100)])
A.append(npa.reshape(1,npa.shape[0],npa.shape[1]))
for j in range(100):
npa = numpy.array([[0 for j in range(100)] for i in range(100)])
B.append(npa.reshape(1,npa.shape[0],npa.shape[1]))
trainXA = []
trainXB = []
testXA = []
testXB = []
for j in range(len(A)):
if ((j+2) % 7) != 0:
trainXA.append(A[j])
trainXB.append(B[j])
else:
testXA.append(A[j])
testXB.append(B[j])
X_train = numpy.array(trainXA + trainXB)
X_test = numpy.array(testXA + testXB)
Y_train = numpy.array([[1,0] for i in range(len(X_train)/2)] + [[0,1] for i in range(len(X_train)/2)])
import random
def jumblelists(C,D):
outC = []
outD = []
for j in range(len(C)):
newpos = int(random.random()*(len(outC)+1))
outC = outC[:newpos]+[C[j]]+outC[newpos:]
outD = outD[:newpos]+[D[j]]+outD[newpos:]
return numpy.array(outC),numpy.array(outD)
X_train,Y_train = jumblelists(X_train,Y_train)
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(1,100,100)))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(2))
model.add(Activation('softmax'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd)
model.fit(X_train, Y_train, batch_size=32, nb_epoch=10)
您的学习率设置得太高,可能导致权重和梯度激增。只需更改
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
至
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
您可能还想尝试不同的优化器。默认设置的 Adam 通常是一个不错的选择。