使用张量流后端在 keras 中测试多个 类 的加权分类交叉熵
Testing weighted categorical cross entropy for multiple classes in keras with tensorflow backend
我有一个问题在 Keras 中似乎没有直接的解决方案。我的服务器运行在 ubuntu 14.04,带有后端 tensorflow 的 Keras 上。它有 4 个 Nvidia Geforce gtx1080 GPU。
我正在尝试测试加权分类交叉熵的最佳可用实现(https://github.com/keras-team/keras/issues/2115)(curiale 于 2017 年 1 月 20 日发表评论)。下面粘贴的代码对于下面显示的错误是可重现的。
输入数组Xtrain的形状为(800,40),其中800表示样本数,40表示输入特征维度。同样,Xtest 的形状为 (400,40)。问题是 类 的数量为三的多类场景。以下代码用于实现,但出现错误,指示 GPU 和批量大小不匹配,这很难解决,请提供一些解决此问题的指针。
import keras
from keras.models import Sequential, Model, load_model
from keras.layers.embeddings import Embedding
from keras.layers.core import Activation, Dense, Dropout, Reshape
from keras.optimizers import SGD, Adam, RMSprop
#from keras.layers import TimeDistributed,Merge, Conv1D, Conv2D, Flatten, MaxPooling2D, Conv2DTranspose, UpSampling2D, RepeatVector
#from
#keras.layers.recurrent import GRU, LSTM
#from keras.datasets.data_utils import get_file
#import tarfile
from functools import partial, update_wrapper
from keras.callbacks import TensorBoard
from time import time
from sklearn.model_selection import KFold
import numpy as np
from keras.callbacks import EarlyStopping
import tensorflow as tf
import scipy.io
from keras import backend as K
from keras.layers import Input, Lambda
import os
from keras import optimizers
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
#os.export CUDA_VISIBLE_DEVICES="0,1"
import keras, sys
from matplotlib import pyplot
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
#from keras.utils import np_utils
from itertools import product
from keras.layers import Input
def w_categorical_crossentropy(y_true, y_pred, weights):
nb_cl = weights.shape[1]#len(weights[0,:])
print weights.shape
print nb_cl
print y_pred
print y_true
final_mask = K.zeros_like(y_pred[:, 0])
y_pred_max = K.max(y_pred, axis=1)#returns maximum value along an axis in a tensor
y_pred_max = K.reshape(y_pred_max, (K.shape(y_pred)[0], 1))
y_pred_max_mat = K.cast(K.equal(y_pred, y_pred_max), K.floatx())
for c_p, c_t in product(range(nb_cl), range(nb_cl)):
final_mask += (weights[c_t, c_p] *y_pred_max_mat[:, c_p]*y_true[:, c_t])
#ypred_tensor=K.constant(y_pred,dtype=K.set_floatx('float32'))
#ytrue_tensor=K.constant(y_true,dtype=K.set_floatx('float32'))
return K.categorical_crossentropy(y_true,y_pred) * final_mask
#def get_mat_data(add,in1,in2):
# Assuming sample_matlab_file.mat has 2 matrices A and B
#matData = scipy.io.loadmat(add)
#matrixA = matData[in1]
#matrixA1 = matData[in2]
#matrixB = matData['Ytrain']
#matrixB1 = matData['Ytest']
#weights = matData['w']
#matrixC = matData['Ytrainclassify']
#matrixC1 = matData['Ytestclassify']
#nfold = matData['nfold']
#return matrixA, matrixA1, matrixB, matrixB1, weights, matrixC, matrixC1, nfold
def wrapped_partial(func, *args, **kwargs):
partial_func = partial(func, *args, **kwargs)
update_wrapper(partial_func, func)
return partial_func
def gen_model():
input = Input(shape=(40,))
#m1=Sequential()
# m1.add(conv_model)
# #m1.add(Conv2D(15, (5,5), strides=(1, 1),activation='relu', input_shape=(1,30,125), kernel_initializer='glorot_uniform'))#temporal filters theano
# m1.add(Dropout(0.2))
# #m1.add(Conv2D(15, (5,1), strides=(1, 1),activation='relu',kernel_initializer='glorot_uniform'))#spatial filters
# #m1.add(Dropout(0.2))
# m1.add(Flatten())
# m1.add(Dropout(0.2))
x1 =(Dense(200,activation='relu',name='dense_1'))(input)
x2 =(Dropout(0.2))(x1)
x3 =(Dense(100,activation='relu',name='dense_2'))(x2)
x4 =(Dropout(0.2))(x3)
x5 =(Dense(3,activation='softmax',name='softmax_layer'))(x4)
model = Model(input=input, output=[x5])
return model
#in1 = 'Xtrain'
#in2 = 'Xtest'
#add = '/home/tharun/all_mat_files/test_keras.mat'
#Xtrain, Xtest, Ytrain, Ytest, weights, Ytrainclassify, Ytestclassify, nfold = get_mat_data(add,in1,in2)
Ytrain = np.random.randint(3, size=(800, 1))
Ytest = np.random.randint(3, size=(400, 1))
Ytrainclassify = Ytrain
Ytestclassify = Ytrain
Xtrain=np.random.rand(800,40)
Xtest=np.random.rand(400,40)
nb_classes = 3
print Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape, Ytrainclassify.shape, Ytestclassify.shape
wts = np.ones((3,3))
print 'wts:'
print wts.shape
# convert class vectors to binary class matrices
Y_train = keras.utils.to_categorical(Ytrainclassify[:,None], nb_classes)
Y_test = keras.utils.to_categorical(Ytestclassify[:,None], nb_classes)
Xtrain=Xtrain.astype('float32')
Xtest=Xtest.astype('float32')
print Xtrain.shape
print Y_train.shape
print Xtest.shape
print Y_test.shape
ncce = wrapped_partial(w_categorical_crossentropy, weights=wts)
batch_size = 10
nb_classes = 3
nb_epoch = 1
model=gen_model()
#model.compile(loss=ncce, optimizer="adam")
model.summary()
rms = SGD()
model.compile(loss=ncce, optimizer=rms)
model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
model.evaluate(Xtest, Y_test)
#print('Test score:', score[0])
#print('Test accuracy:', score[1])
#saving weights
model.save('model_classify_weights.h5')
错误:
python /home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py
/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
Using TensorFlow backend.
(800, 40) (400, 40) (800, 1) (400, 1) (1, 3) (800, 1) (400, 1)
wts:
(3, 3)
(800, 40)
(800, 3)
(400, 40)
(400, 3)
/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py:129: UserWarning: Update your `Model` call to the Keras 2 API: `Model(outputs=[<tf.Tenso..., inputs=Tensor("in...)`
model = Model(input=input, output=[x5])
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 40) 0
_________________________________________________________________
dense_1 (Dense) (None, 200) 8200
_________________________________________________________________
dropout_1 (Dropout) (None, 200) 0
_________________________________________________________________
dense_2 (Dense) (None, 100) 20100
_________________________________________________________________
dropout_2 (Dropout) (None, 100) 0
_________________________________________________________________
softmax_layer (Dense) (None, 3) 303
=================================================================
Total params: 28,603
Trainable params: 28,603
Non-trainable params: 0
_________________________________________________________________
(?, 3)
3
Tensor("softmax_layer_target:0", shape=(?, ?), dtype=float32)
[[array([1.41292294]) 1 1]
[1 array([7.328564]) 1]
[1 1 array([2.38611435])]]
/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py:176: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.
model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
Epoch 1/1
2018-02-13 15:41:44.382214: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2018-02-13 15:41:44.758387: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 0 with properties:
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:05:00.0
totalMemory: 7.92GiB freeMemory: 7.42GiB
2018-02-13 15:41:44.992640: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 1 with properties:
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:06:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.225696: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 2 with properties:
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:09:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.458070: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 3 with properties:
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:0a:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.461078: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Device peer to peer matrix
2018-02-13 15:41:45.461151: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1051] DMA: 0 1 2 3
2018-02-13 15:41:45.461160: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 0: Y Y Y Y
2018-02-13 15:41:45.461165: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 1: Y Y Y Y
2018-02-13 15:41:45.461170: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 2: Y Y Y Y
2018-02-13 15:41:45.461175: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 3: Y Y Y Y
2018-02-13 15:41:45.461191: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1080, pci bus id: 0000:05:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461198: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:1) -> (device: 1, name: GeForce GTX 1080, pci bus id: 0000:06:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461204: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:2) -> (device: 2, name: GeForce GTX 1080, pci bus id: 0000:09:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461209: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:3) -> (device: 3, name: GeForce GTX 1080, pci bus id: 0000:0a:00.0, compute capability: 6.1)
Traceback (most recent call last):
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 239, in <module>
main()
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 176, in main
model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 1598, in fit
validation_steps=validation_steps)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 1183, in _fit_loop
outs = f(ins_batch)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2273, in __call__
**self.session_kwargs)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [3] vs. [10]
[[Node: training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@loss/softmax_layer_loss/mul_20"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape, training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape_1)]]
[[Node: loss/mul/_19 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_806_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op u'training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs', defined at:
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 239, in <module>
main()
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 176, in main
model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 1575, in fit
self._make_train_function()
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 960, in _make_train_function
loss=self.total_loss)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
return func(*args, **kwargs)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/optimizers.py", line 156, in get_updates
grads = self.get_gradients(loss, params)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/optimizers.py", line 73, in get_gradients
grads = K.gradients(loss, params)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2310, in gradients
return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 581, in gradients
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 353, in _MaybeCompile
return grad_fn() # Exit early
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 581, in <lambda>
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/math_grad.py", line 742, in _MulGrad
rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 532, in _broadcast_gradient_args
"BroadcastGradientArgs", s0=s0, s1=s1, name=name)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
...which was originally created as op u'loss/softmax_layer_loss/mul_20', defined at:
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 239, in <module>
main()
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 174, in main
model.compile(loss=ncce, optimizer=rms)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 850, in compile
sample_weight, mask)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 466, in weighted
score_array *= weights
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 894, in binary_op_wrapper
return func(x, y, name=name)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 1117, in _mul_dispatch
return gen_math_ops._mul(x, y, name=name)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2726, in _mul
"Mul", x=x, y=y, name=name)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Incompatible shapes: [3] vs. [10]
[[Node: training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@loss/softmax_layer_loss/mul_20"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape, training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape_1)]]
[[Node: loss/mul/_19 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_806_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
您是否检查了输入数据中的标签,看看是否只有 3 个 类 而不是 10 个?事实上,第二个嫌疑人是你的 w_categorical_crossentropy 函数......你是否在虚拟数据上对其进行了基准测试?
问题出在输入标签数组 (Ytrainclassify) 上。它必须重塑为 (Ytrainclassify.shape[0],)。工作代码在下面共享,没有错误。
import keras
from keras.models import Sequential, Model, load_model
from keras.layers.embeddings import Embedding
from keras.layers.core import Activation, Dense, Dropout, Reshape
from keras.optimizers import SGD, Adam, RMSprop
#from keras.layers import TimeDistributed,Merge, Conv1D, Conv2D, Flatten, MaxPooling2D, Conv2DTranspose, UpSampling2D, RepeatVector
#from keras.layers.recurrent import GRU, LSTM
#from keras.datasets.data_utils import get_file
#import tarfile
from ipdb import set_trace as bp
from functools import partial, update_wrapper
from keras.callbacks import TensorBoard
from time import time
from sklearn.model_selection import KFold
import numpy as np
from keras.callbacks import EarlyStopping
import tensorflow as tf
import scipy.io
from keras import backend as K
from keras.layers import Input, Lambda
import os
from keras import optimizers
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
#os.export CUDA_VISIBLE_DEVICES="0,1"
import keras, sys
from matplotlib import pyplot
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
#from keras.utils import np_utils
from itertools import product
from keras.layers import Input
# Custom loss function with costs
def w_categorical_crossentropy(y_true, y_pred, weights):
nb_cl = weights.shape[1]#len(weights[0,:])
print "dbg \n\n\n\n\n\n\n\n\n\n"
print weights.shape
print nb_cl
print y_pred
print y_true
final_mask = K.zeros_like(y_pred[:, 0])
print final_mask
y_pred_max = K.max(y_pred, axis=1)#returns maximum value along an axis in a tensor
print y_pred_max
y_pred_max = K.reshape(y_pred_max, (K.shape(y_pred)[0], 1))
print y_pred_max
y_pred_max_mat = K.cast(K.equal(y_pred, y_pred_max), K.floatx())
print y_pred_max_mat
for c_p, c_t in product(range(nb_cl), range(nb_cl)):
final_mask += (weights[c_t, c_p] *y_pred_max_mat[:, c_p]*y_true[:, c_t])
#ypred_tensor=K.constant(y_pred,dtype=K.set_floatx('float32'))
#ytrue_tensor=K.constant(y_true,dtype=K.set_floatx('float32'))
return K.categorical_crossentropy(y_true,y_pred) * final_mask
# def joint_classificatn_regressn_loss(x1,ytrn,x2,ytst,w):
# return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
#def get_mat_data(add,in1,in2):
# Assuming sample_matlab_file.mat has 2 matrices A and B
#matData = scipy.io.loadmat(add)
#matrixA = matData[in1]
#matrixA1 = matData[in2]
#matrixB = matData['Ytrain']
#matrixB1 = matData['Ytest']
#weights = matData['w']
#matrixC = matData['Ytrainclassify']
#matrixC1 = matData['Ytestclassify']
#nfold = matData['nfold']
#return matrixA, matrixA1, matrixB, matrixB1, weights, matrixC, matrixC1, nfold
#load riemannian features from matlab
#phase I
#train and test three DNN models
# def cutomized_loss(args):
# #A is from the training data
# #S is the internal state
# A, A, S, S = args
# #customize your own loss components
# loss1 = K.mean(K.square(A-A),axis=-1)
# loss1 = K.mean(K.square(A-A),axis=-1)
def wrapped_partial(func, *args, **kwargs):
partial_func = partial(func, *args, **kwargs)
update_wrapper(partial_func, func)
return partial_func
def gen_model():
input = Input(shape=(40,))
#m1=Sequential()
# m1.add(conv_model)
# #m1.add(Conv2D(15, (5,5), strides=(1, 1),activation='relu', input_shape=(1,30,125), kernel_initializer='glorot_uniform'))#temporal filters theano
# m1.add(Dropout(0.2))
# #m1.add(Conv2D(15, (5,1), strides=(1, 1),activation='relu',kernel_initializer='glorot_uniform'))#spatial filters
# #m1.add(Dropout(0.2))
# m1.add(Flatten())
# m1.add(Dropout(0.2))
x1 =(Dense(200,activation='relu',name='dense_1'))(input)
x2 =(Dropout(0.2))(x1)
x3 =(Dense(100,activation='relu',name='dense_2'))(x2)
x4 =(Dropout(0.2))(x3)
x5 =(Dense(3,activation='softmax',name='softmax_layer'))(x4)
model = Model(input=input, output=[x5])
return model
#x6 =(Dropout(0.2))(x5)
def main():
# print command line arguments
# for arg in sys.argv[1:]:
# print arg
batch_size = 10
nb_classes = 3
nb_epoch = 1
Ytrain = np.random.randint(3, size=(800, 1))
Ytest = np.random.randint(3, size=(400, 1))
Ytrainclassify = Ytrain
Ytestclassify = Ytrain
Xtrain=np.random.rand(800,40)
Xtest=np.random.rand(400,40)
#add = '/home/tharun/all_mat_files/'#+sys.argv[1]
#in1 = 'Xfff'
#in2 = 'Xtestf'
#in1 = 'Xtrain'
#in2 = 'Xtest'
#add = '/home/tharun/all_mat_files/test_keras.mat'
#Xtrain, Xtest, Ytrain, Ytest, weights, Ytrainclassify, Ytestclassify, nfold = get_mat_data(add,in1,in2)
wts = np.ones((3,3))
#np.array([[1/weights[:,0], 1, 1],[1, 1/weights[:,1], 1],[1, 1, 1/weights[:,2]]])
#y = np.bincount(Ytrain)
#ii = np.nonzero(y)[0]
#weight_indx = y[ii]
# wts[0,0]=1/weights[0,0]
# wts[1,1]=1/weights[0,1]
# wts[2,2]=1/weights[0,2]
print 'wts.shape:'
print wts.shape
print wts
ncce = wrapped_partial(w_categorical_crossentropy, weights=wts)
Xtrain = Xtrain.astype('float32')
Xtest = Xtest.astype('float32')
nb_classes = 3
print Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape, wts.shape,Ytrainclassify.shape, Ytestclassify.shape
Ytestclassify = Ytestclassify.reshape(Ytestclassify.shape[0],)
Ytrainclassify = Ytrainclassify.reshape(Ytrainclassify.shape[0],)
# convert class vectors to binary class matrices
Y_train = keras.utils.to_categorical(Ytrainclassify, nb_classes)
Y_test = keras.utils.to_categorical(Ytestclassify, nb_classes)
Xtrain=Xtrain.astype('float32')
Xtest=Xtest.astype('float32')
Y_train=Y_train.astype('float32')
Y_test=Y_test.astype('float32')
print Ytrainclassify.shape
print Ytestclassify.shape
print Xtrain.shape
print Y_train.shape
print Xtest.shape
print Y_test.shape
#weights = np.array(sys.argv[2:], dtype=np.float64)
# m1.add(Dense(400,activation='relu'))
# m1.add(Dropout(0.2))
# m1.add(Dense(100,activation='relu'))
# m1.add(Dropout(0.2))
# m1.add(Dense(3, activation='softmax'))
#parent model
model=gen_model()
#model.compile(loss=ncce, optimizer="adam")
model.summary()
rms = SGD()
model.compile(loss=ncce, optimizer=rms)
print Xtrain.shape
print Y_train.shape
print Xtest.shape
print Y_test.shape
model.fit(Xtrain, Y_train, batch_size=batch_size, nb_epoch=nb_epoch)
model.evaluate(Xtest, Y_test)
#print('Test score:', score[0])
#print('Test accuracy:', score[1])
#saving weights
model.save('model_classify_weights.h5')
if __name__ == "__main__":
main()
我有一个问题在 Keras 中似乎没有直接的解决方案。我的服务器运行在 ubuntu 14.04,带有后端 tensorflow 的 Keras 上。它有 4 个 Nvidia Geforce gtx1080 GPU。
我正在尝试测试加权分类交叉熵的最佳可用实现(https://github.com/keras-team/keras/issues/2115)(curiale 于 2017 年 1 月 20 日发表评论)。下面粘贴的代码对于下面显示的错误是可重现的。
输入数组Xtrain的形状为(800,40),其中800表示样本数,40表示输入特征维度。同样,Xtest 的形状为 (400,40)。问题是 类 的数量为三的多类场景。以下代码用于实现,但出现错误,指示 GPU 和批量大小不匹配,这很难解决,请提供一些解决此问题的指针。
import keras
from keras.models import Sequential, Model, load_model
from keras.layers.embeddings import Embedding
from keras.layers.core import Activation, Dense, Dropout, Reshape
from keras.optimizers import SGD, Adam, RMSprop
#from keras.layers import TimeDistributed,Merge, Conv1D, Conv2D, Flatten, MaxPooling2D, Conv2DTranspose, UpSampling2D, RepeatVector
#from
#keras.layers.recurrent import GRU, LSTM
#from keras.datasets.data_utils import get_file
#import tarfile
from functools import partial, update_wrapper
from keras.callbacks import TensorBoard
from time import time
from sklearn.model_selection import KFold
import numpy as np
from keras.callbacks import EarlyStopping
import tensorflow as tf
import scipy.io
from keras import backend as K
from keras.layers import Input, Lambda
import os
from keras import optimizers
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
#os.export CUDA_VISIBLE_DEVICES="0,1"
import keras, sys
from matplotlib import pyplot
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
#from keras.utils import np_utils
from itertools import product
from keras.layers import Input
def w_categorical_crossentropy(y_true, y_pred, weights):
nb_cl = weights.shape[1]#len(weights[0,:])
print weights.shape
print nb_cl
print y_pred
print y_true
final_mask = K.zeros_like(y_pred[:, 0])
y_pred_max = K.max(y_pred, axis=1)#returns maximum value along an axis in a tensor
y_pred_max = K.reshape(y_pred_max, (K.shape(y_pred)[0], 1))
y_pred_max_mat = K.cast(K.equal(y_pred, y_pred_max), K.floatx())
for c_p, c_t in product(range(nb_cl), range(nb_cl)):
final_mask += (weights[c_t, c_p] *y_pred_max_mat[:, c_p]*y_true[:, c_t])
#ypred_tensor=K.constant(y_pred,dtype=K.set_floatx('float32'))
#ytrue_tensor=K.constant(y_true,dtype=K.set_floatx('float32'))
return K.categorical_crossentropy(y_true,y_pred) * final_mask
#def get_mat_data(add,in1,in2):
# Assuming sample_matlab_file.mat has 2 matrices A and B
#matData = scipy.io.loadmat(add)
#matrixA = matData[in1]
#matrixA1 = matData[in2]
#matrixB = matData['Ytrain']
#matrixB1 = matData['Ytest']
#weights = matData['w']
#matrixC = matData['Ytrainclassify']
#matrixC1 = matData['Ytestclassify']
#nfold = matData['nfold']
#return matrixA, matrixA1, matrixB, matrixB1, weights, matrixC, matrixC1, nfold
def wrapped_partial(func, *args, **kwargs):
partial_func = partial(func, *args, **kwargs)
update_wrapper(partial_func, func)
return partial_func
def gen_model():
input = Input(shape=(40,))
#m1=Sequential()
# m1.add(conv_model)
# #m1.add(Conv2D(15, (5,5), strides=(1, 1),activation='relu', input_shape=(1,30,125), kernel_initializer='glorot_uniform'))#temporal filters theano
# m1.add(Dropout(0.2))
# #m1.add(Conv2D(15, (5,1), strides=(1, 1),activation='relu',kernel_initializer='glorot_uniform'))#spatial filters
# #m1.add(Dropout(0.2))
# m1.add(Flatten())
# m1.add(Dropout(0.2))
x1 =(Dense(200,activation='relu',name='dense_1'))(input)
x2 =(Dropout(0.2))(x1)
x3 =(Dense(100,activation='relu',name='dense_2'))(x2)
x4 =(Dropout(0.2))(x3)
x5 =(Dense(3,activation='softmax',name='softmax_layer'))(x4)
model = Model(input=input, output=[x5])
return model
#in1 = 'Xtrain'
#in2 = 'Xtest'
#add = '/home/tharun/all_mat_files/test_keras.mat'
#Xtrain, Xtest, Ytrain, Ytest, weights, Ytrainclassify, Ytestclassify, nfold = get_mat_data(add,in1,in2)
Ytrain = np.random.randint(3, size=(800, 1))
Ytest = np.random.randint(3, size=(400, 1))
Ytrainclassify = Ytrain
Ytestclassify = Ytrain
Xtrain=np.random.rand(800,40)
Xtest=np.random.rand(400,40)
nb_classes = 3
print Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape, Ytrainclassify.shape, Ytestclassify.shape
wts = np.ones((3,3))
print 'wts:'
print wts.shape
# convert class vectors to binary class matrices
Y_train = keras.utils.to_categorical(Ytrainclassify[:,None], nb_classes)
Y_test = keras.utils.to_categorical(Ytestclassify[:,None], nb_classes)
Xtrain=Xtrain.astype('float32')
Xtest=Xtest.astype('float32')
print Xtrain.shape
print Y_train.shape
print Xtest.shape
print Y_test.shape
ncce = wrapped_partial(w_categorical_crossentropy, weights=wts)
batch_size = 10
nb_classes = 3
nb_epoch = 1
model=gen_model()
#model.compile(loss=ncce, optimizer="adam")
model.summary()
rms = SGD()
model.compile(loss=ncce, optimizer=rms)
model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
model.evaluate(Xtest, Y_test)
#print('Test score:', score[0])
#print('Test accuracy:', score[1])
#saving weights
model.save('model_classify_weights.h5')
错误:
python /home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py
/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
Using TensorFlow backend.
(800, 40) (400, 40) (800, 1) (400, 1) (1, 3) (800, 1) (400, 1)
wts:
(3, 3)
(800, 40)
(800, 3)
(400, 40)
(400, 3)
/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py:129: UserWarning: Update your `Model` call to the Keras 2 API: `Model(outputs=[<tf.Tenso..., inputs=Tensor("in...)`
model = Model(input=input, output=[x5])
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 40) 0
_________________________________________________________________
dense_1 (Dense) (None, 200) 8200
_________________________________________________________________
dropout_1 (Dropout) (None, 200) 0
_________________________________________________________________
dense_2 (Dense) (None, 100) 20100
_________________________________________________________________
dropout_2 (Dropout) (None, 100) 0
_________________________________________________________________
softmax_layer (Dense) (None, 3) 303
=================================================================
Total params: 28,603
Trainable params: 28,603
Non-trainable params: 0
_________________________________________________________________
(?, 3)
3
Tensor("softmax_layer_target:0", shape=(?, ?), dtype=float32)
[[array([1.41292294]) 1 1]
[1 array([7.328564]) 1]
[1 1 array([2.38611435])]]
/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py:176: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.
model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
Epoch 1/1
2018-02-13 15:41:44.382214: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2018-02-13 15:41:44.758387: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 0 with properties:
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:05:00.0
totalMemory: 7.92GiB freeMemory: 7.42GiB
2018-02-13 15:41:44.992640: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 1 with properties:
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:06:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.225696: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 2 with properties:
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:09:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.458070: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 3 with properties:
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:0a:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.461078: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Device peer to peer matrix
2018-02-13 15:41:45.461151: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1051] DMA: 0 1 2 3
2018-02-13 15:41:45.461160: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 0: Y Y Y Y
2018-02-13 15:41:45.461165: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 1: Y Y Y Y
2018-02-13 15:41:45.461170: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 2: Y Y Y Y
2018-02-13 15:41:45.461175: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 3: Y Y Y Y
2018-02-13 15:41:45.461191: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1080, pci bus id: 0000:05:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461198: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:1) -> (device: 1, name: GeForce GTX 1080, pci bus id: 0000:06:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461204: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:2) -> (device: 2, name: GeForce GTX 1080, pci bus id: 0000:09:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461209: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:3) -> (device: 3, name: GeForce GTX 1080, pci bus id: 0000:0a:00.0, compute capability: 6.1)
Traceback (most recent call last):
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 239, in <module>
main()
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 176, in main
model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 1598, in fit
validation_steps=validation_steps)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 1183, in _fit_loop
outs = f(ins_batch)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2273, in __call__
**self.session_kwargs)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [3] vs. [10]
[[Node: training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@loss/softmax_layer_loss/mul_20"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape, training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape_1)]]
[[Node: loss/mul/_19 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_806_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op u'training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs', defined at:
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 239, in <module>
main()
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 176, in main
model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 1575, in fit
self._make_train_function()
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 960, in _make_train_function
loss=self.total_loss)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
return func(*args, **kwargs)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/optimizers.py", line 156, in get_updates
grads = self.get_gradients(loss, params)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/optimizers.py", line 73, in get_gradients
grads = K.gradients(loss, params)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2310, in gradients
return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 581, in gradients
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 353, in _MaybeCompile
return grad_fn() # Exit early
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 581, in <lambda>
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/math_grad.py", line 742, in _MulGrad
rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 532, in _broadcast_gradient_args
"BroadcastGradientArgs", s0=s0, s1=s1, name=name)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
...which was originally created as op u'loss/softmax_layer_loss/mul_20', defined at:
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 239, in <module>
main()
File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 174, in main
model.compile(loss=ncce, optimizer=rms)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 850, in compile
sample_weight, mask)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 466, in weighted
score_array *= weights
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 894, in binary_op_wrapper
return func(x, y, name=name)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 1117, in _mul_dispatch
return gen_math_ops._mul(x, y, name=name)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2726, in _mul
"Mul", x=x, y=y, name=name)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Incompatible shapes: [3] vs. [10]
[[Node: training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@loss/softmax_layer_loss/mul_20"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape, training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape_1)]]
[[Node: loss/mul/_19 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_806_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
您是否检查了输入数据中的标签,看看是否只有 3 个 类 而不是 10 个?事实上,第二个嫌疑人是你的 w_categorical_crossentropy 函数......你是否在虚拟数据上对其进行了基准测试?
问题出在输入标签数组 (Ytrainclassify) 上。它必须重塑为 (Ytrainclassify.shape[0],)。工作代码在下面共享,没有错误。
import keras
from keras.models import Sequential, Model, load_model
from keras.layers.embeddings import Embedding
from keras.layers.core import Activation, Dense, Dropout, Reshape
from keras.optimizers import SGD, Adam, RMSprop
#from keras.layers import TimeDistributed,Merge, Conv1D, Conv2D, Flatten, MaxPooling2D, Conv2DTranspose, UpSampling2D, RepeatVector
#from keras.layers.recurrent import GRU, LSTM
#from keras.datasets.data_utils import get_file
#import tarfile
from ipdb import set_trace as bp
from functools import partial, update_wrapper
from keras.callbacks import TensorBoard
from time import time
from sklearn.model_selection import KFold
import numpy as np
from keras.callbacks import EarlyStopping
import tensorflow as tf
import scipy.io
from keras import backend as K
from keras.layers import Input, Lambda
import os
from keras import optimizers
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
#os.export CUDA_VISIBLE_DEVICES="0,1"
import keras, sys
from matplotlib import pyplot
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
#from keras.utils import np_utils
from itertools import product
from keras.layers import Input
# Custom loss function with costs
def w_categorical_crossentropy(y_true, y_pred, weights):
nb_cl = weights.shape[1]#len(weights[0,:])
print "dbg \n\n\n\n\n\n\n\n\n\n"
print weights.shape
print nb_cl
print y_pred
print y_true
final_mask = K.zeros_like(y_pred[:, 0])
print final_mask
y_pred_max = K.max(y_pred, axis=1)#returns maximum value along an axis in a tensor
print y_pred_max
y_pred_max = K.reshape(y_pred_max, (K.shape(y_pred)[0], 1))
print y_pred_max
y_pred_max_mat = K.cast(K.equal(y_pred, y_pred_max), K.floatx())
print y_pred_max_mat
for c_p, c_t in product(range(nb_cl), range(nb_cl)):
final_mask += (weights[c_t, c_p] *y_pred_max_mat[:, c_p]*y_true[:, c_t])
#ypred_tensor=K.constant(y_pred,dtype=K.set_floatx('float32'))
#ytrue_tensor=K.constant(y_true,dtype=K.set_floatx('float32'))
return K.categorical_crossentropy(y_true,y_pred) * final_mask
# def joint_classificatn_regressn_loss(x1,ytrn,x2,ytst,w):
# return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
#def get_mat_data(add,in1,in2):
# Assuming sample_matlab_file.mat has 2 matrices A and B
#matData = scipy.io.loadmat(add)
#matrixA = matData[in1]
#matrixA1 = matData[in2]
#matrixB = matData['Ytrain']
#matrixB1 = matData['Ytest']
#weights = matData['w']
#matrixC = matData['Ytrainclassify']
#matrixC1 = matData['Ytestclassify']
#nfold = matData['nfold']
#return matrixA, matrixA1, matrixB, matrixB1, weights, matrixC, matrixC1, nfold
#load riemannian features from matlab
#phase I
#train and test three DNN models
# def cutomized_loss(args):
# #A is from the training data
# #S is the internal state
# A, A, S, S = args
# #customize your own loss components
# loss1 = K.mean(K.square(A-A),axis=-1)
# loss1 = K.mean(K.square(A-A),axis=-1)
def wrapped_partial(func, *args, **kwargs):
partial_func = partial(func, *args, **kwargs)
update_wrapper(partial_func, func)
return partial_func
def gen_model():
input = Input(shape=(40,))
#m1=Sequential()
# m1.add(conv_model)
# #m1.add(Conv2D(15, (5,5), strides=(1, 1),activation='relu', input_shape=(1,30,125), kernel_initializer='glorot_uniform'))#temporal filters theano
# m1.add(Dropout(0.2))
# #m1.add(Conv2D(15, (5,1), strides=(1, 1),activation='relu',kernel_initializer='glorot_uniform'))#spatial filters
# #m1.add(Dropout(0.2))
# m1.add(Flatten())
# m1.add(Dropout(0.2))
x1 =(Dense(200,activation='relu',name='dense_1'))(input)
x2 =(Dropout(0.2))(x1)
x3 =(Dense(100,activation='relu',name='dense_2'))(x2)
x4 =(Dropout(0.2))(x3)
x5 =(Dense(3,activation='softmax',name='softmax_layer'))(x4)
model = Model(input=input, output=[x5])
return model
#x6 =(Dropout(0.2))(x5)
def main():
# print command line arguments
# for arg in sys.argv[1:]:
# print arg
batch_size = 10
nb_classes = 3
nb_epoch = 1
Ytrain = np.random.randint(3, size=(800, 1))
Ytest = np.random.randint(3, size=(400, 1))
Ytrainclassify = Ytrain
Ytestclassify = Ytrain
Xtrain=np.random.rand(800,40)
Xtest=np.random.rand(400,40)
#add = '/home/tharun/all_mat_files/'#+sys.argv[1]
#in1 = 'Xfff'
#in2 = 'Xtestf'
#in1 = 'Xtrain'
#in2 = 'Xtest'
#add = '/home/tharun/all_mat_files/test_keras.mat'
#Xtrain, Xtest, Ytrain, Ytest, weights, Ytrainclassify, Ytestclassify, nfold = get_mat_data(add,in1,in2)
wts = np.ones((3,3))
#np.array([[1/weights[:,0], 1, 1],[1, 1/weights[:,1], 1],[1, 1, 1/weights[:,2]]])
#y = np.bincount(Ytrain)
#ii = np.nonzero(y)[0]
#weight_indx = y[ii]
# wts[0,0]=1/weights[0,0]
# wts[1,1]=1/weights[0,1]
# wts[2,2]=1/weights[0,2]
print 'wts.shape:'
print wts.shape
print wts
ncce = wrapped_partial(w_categorical_crossentropy, weights=wts)
Xtrain = Xtrain.astype('float32')
Xtest = Xtest.astype('float32')
nb_classes = 3
print Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape, wts.shape,Ytrainclassify.shape, Ytestclassify.shape
Ytestclassify = Ytestclassify.reshape(Ytestclassify.shape[0],)
Ytrainclassify = Ytrainclassify.reshape(Ytrainclassify.shape[0],)
# convert class vectors to binary class matrices
Y_train = keras.utils.to_categorical(Ytrainclassify, nb_classes)
Y_test = keras.utils.to_categorical(Ytestclassify, nb_classes)
Xtrain=Xtrain.astype('float32')
Xtest=Xtest.astype('float32')
Y_train=Y_train.astype('float32')
Y_test=Y_test.astype('float32')
print Ytrainclassify.shape
print Ytestclassify.shape
print Xtrain.shape
print Y_train.shape
print Xtest.shape
print Y_test.shape
#weights = np.array(sys.argv[2:], dtype=np.float64)
# m1.add(Dense(400,activation='relu'))
# m1.add(Dropout(0.2))
# m1.add(Dense(100,activation='relu'))
# m1.add(Dropout(0.2))
# m1.add(Dense(3, activation='softmax'))
#parent model
model=gen_model()
#model.compile(loss=ncce, optimizer="adam")
model.summary()
rms = SGD()
model.compile(loss=ncce, optimizer=rms)
print Xtrain.shape
print Y_train.shape
print Xtest.shape
print Y_test.shape
model.fit(Xtrain, Y_train, batch_size=batch_size, nb_epoch=nb_epoch)
model.evaluate(Xtest, Y_test)
#print('Test score:', score[0])
#print('Test accuracy:', score[1])
#saving weights
model.save('model_classify_weights.h5')
if __name__ == "__main__":
main()