Return DNN 训练结束时的逆 Hessian 矩阵和输入的偏导数
Return Inverse Hessian Matrix at the end of DNN Training and Partial Derivatives wrt the Inputs
使用 Keras 和 Tensorflow 作为后端,我构建了一个 DNN,它将恒星光谱作为输入(7213 个数据点)并输出三个恒星参数(温度、重力和金属丰度)。网络在我的测试集上训练得很好并且预测得很好,但为了使结果具有科学意义,我需要能够估计我的错误。这样做的第一步是获得逆 Hessian 矩阵,这似乎仅使用 Keras 是不可能的。因此,我尝试使用 scipy 创建一个解决方法,使用 scipy.optimize.minimize 和 BFGS、L-BFGS-B 或 Netwon-CG 作为方法。这些中的任何一个都将 return 逆 Hessian 矩阵。
想法是使用 Adam 优化器训练模型 100 个时期(或直到模型收敛),然后 运行 BFGS(或其他之一)的一次迭代或函数 return 我模型的 Hessian 矩阵。
这是我的代码:
from scipy.optimize import minimize
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam
# Define vars
activation = 'relu'
init = 'he_normal'
beta_1 = 0.9
beta_2 = 0.999
epsilon = 1e-08
input_shape = (None,n)
n_hidden = [2048,1024,512,256,128,32]
output_dim = 3
epochs = 100
lr = 0.0008
batch_size = 64
decay = 0.00
# Design DNN Layers
model = Sequential([
Dense(n_hidden[0], batch_input_shape=input_shape, init=init, activation=activation),
Dense(n_hidden[1], init=init, activation=activation),
Dense(n_hidden[2], init=init, activation=activation),
Dense(n_hidden[3], init=init, activation=activation),
Dense(n_hidden[4], init=init, activation=activation),
Dense(n_hidden[5], init=init, activation=activation),
Dense(output_dim, init=init, activation='linear'),
])
# Optimization function
optimizer = Adam(lr=lr, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, decay=decay)
# Compile and train network
model.compile(optimizer=optimizer, loss='mean_squared_error')
#train_X.shape = (50000,7213)
#train_Y.shape = (50000,3)
#cv_X.shape = (10000,7213)
#cv_Y.shape = (10000,3)
history = model.fit(train_X, train_Y, validation_data=(cv_X, cv_Y),
nb_epoch=epochs, batch_size=batch_size, verbose=2)
weights = []
for layer in model.layers:
weights.append(layer.get_weights())
def loss(W):
weightsList = W
weightsList = np.array(W)
new_weights = []
for i, layer in enumerate((weightsList)):
new_weights.append(np.array(weightsList[i]))
model.set_weights(np.array(new_weights))
preds = model.predict(train_X)
mse = np.sum(np.square(np.subtract(preds,train_Y)))/len(train_X[:,0])
print(mse)
return mse
x0=weights
res = minimize(loss, x0, args=(), method = 'BFGS', options={'maxiter':1,'eps':1e-6,'disp':True})
#res = minimize(loss, x0, method='L-BFGS-B', options={'disp': True, 'maxls': 1, 'gtol': 1e-05, 'eps': 1e-08, 'maxiter': 1, 'ftol': 0.5, 'maxcor': 1, 'maxfun': 1})
#res = minimize(loss, x0, args=(), method='Newton-CG', jac=None, hess=None, hessp=None, tol=None, callback=None, options={'disp': False, 'xtol': 1e-05, 'eps': 1.4901161193847656e-08, 'return_all': False, 'maxiter': 1})
inv_hess = res['hess_inv']
1) 该模型训练得非常好,但是当尝试 运行 使用先前训练的权重进行单次迭代的 scipy 最小化时,我 运行 遇到了问题。
尝试 method=BFGS 时的输出:
0.458706819754
0.457811632697
0.458706716791
...
0.350124572422
0.350186770445
0.350125320636
ValueErrorTraceback (most recent call last)
---> 19 res = minimize(loss, x0, args=(), method = 'BFGS', tol=1, options={'maxiter':1,'eps':1e-6,'disp':True})#,'gtol':0.1}, tol=5)
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
442 return _minimize_cg(fun, x0, args, jac, callback, **options)
443 elif meth == 'bfgs':
--> 444 return _minimize_bfgs(fun, x0, args, jac, callback, **options)
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
963 try: # this was handled in numeric, let it remaines for more safety
--> 964 rhok = 1.0 / (numpy.dot(yk, sk))
965 except ZeroDivisionError:
966 rhok = 1000.0
ValueError: operands could not be broadcast together with shapes (7213,2048) (2048,1024)
尝试方法时的输出=L-BFGS-B:
ValueErrorTraceback (most recent call last)
---> 20 res = minimize(loss, x0, method='L-BFGS-B', options={'disp': True, 'maxls': 1, 'gtol': 1e-05, 'eps': 1e-08, 'maxiter': 1, 'ftol': 0.5, 'maxcor': 1, 'maxfun': 1})
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
448 elif meth == 'l-bfgs-b':
449 return _minimize_lbfgsb(fun, x0, args, jac, bounds,
--> 450 callback=callback, **options)
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/lbfgsb.pyc in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, **unknown_options)
300 raise ValueError('maxls must be positive.')
301
--> 302 x = array(x0, float64)
303 f = array(0.0, float64)
304 g = zeros((n,), float64)
ValueError: setting an array element with a sequence.
尝试 method=Newton-CG 时的输出
ValueErrorTraceback (most recent call last)
---> 21 res = minimize(loss, x0, args=(), method='Newton-CG', jac=None, hess=None, hessp=None, tol=None, callback=None, options={'disp': False, 'xtol': 1e-05, 'eps': 1.4901161193847656e-08, 'return_all': False, 'maxiter': 1})
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
445 elif meth == 'newton-cg':
446 return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,
--> 447 **options)
448 elif meth == 'l-bfgs-b':
449 return _minimize_lbfgsb(fun, x0, args, jac, bounds,
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback, xtol, eps, maxiter, disp, return_all, **unknown_options)
1438 _check_unknown_options(unknown_options)
1439 if jac is None:
-> 1440 raise ValueError('Jacobian is required for Newton-CG method')
ValueError: Jacobian is required for Newton-CG method
2) 下一个任务是获得模型输出相对于模型输入的导数。例如,对于一个恒星参数(输出之一),比如温度,我需要找到关于 7213 个输入中每个输入的偏导数。然后对 3 个输出中的每一个执行相同的操作。
所以基本上,我的第一个任务 (1) 是找到一种方法 return 我的模型的逆 Hessian 矩阵,接下来 (2) 我需要找到一种方法 return我的输出关于我的输入的一阶偏导数。
有人对这两项任务有一些了解吗?
谢谢。
编辑
我正在尝试使用 theano.gradient.jacobian() 来 return 我的输出 w.r.t 的雅可比矩阵。我的输入。我已将我的模型转换为模型权重的函数,并将该函数用作 theano.gradient.jacobian() 中的第一个参数。当我尝试 运行 多维数组的梯度时,我的模型权重和输入数据的形式为
,我的问题就出现了。
import theano.tensor as T
weights_in_model = T.dvector('model_weights')
x = T.dvector('x')
def pred(x,weights_in_model):
weights = T.stack((weights_in_model[0],weights_in_model[1]), axis=0)
x = T.shape_padright(x, n_ones=1)
prediction=T.dot(x, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[2],weights_in_model[3]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[4],weights_in_model[5]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[6],weights_in_model[7]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[8],weights_in_model[9]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[10],weights_in_model[11]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[12],weights_in_model[13]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
T.flatten(prediction)
return prediction
f=theano.gradient.jacobian(pred(x,weights_in_model),wrt=x)
h=theano.function([x,weights_in_model],f,allow_input_downcast=True)
x = train_X
weights_in_model = model.get_weights()
h(x,weights_in_model)
最后一行给出了错误:
TypeError: ('Bad input argument to theano function with name "<ipython-input-365-a1ab256aa220>:1" at index 0(0-based)', 'Wrong number of dimensions: expected 1, got 2 with shape (2000, 7213).')
但是当我将输入更改为:
weights_in_model = T.matrix('model_weights')
x = T.matrix('x')
我从以下行收到错误:
f=theano.gradient.jacobian(pred(x,weights_in_model),wrt=x)
阅读:
AssertionError: tensor.jacobian expects a 1 dimensional variable as `expression`. If not use flatten to make it a vector
关于如何解决这个问题的任何想法?
已找到答案!:
此代码用于预测模型的一个输出值。目前我正在修改它以计算 3 个雅可比矩阵;每个输出一个。
import theano
import theano.tensor as T
import theano.typed_list
theano.config.optimizer='fast_compile'
theano.config.exception_verbosity='high'
# Declare function input placeholders
weights_in_model = theano.typed_list.TypedListType(theano.tensor.dmatrix)()
x = T.matrix('x')
# Define model function
def pred(x,weights_in_model):
weights = T.concatenate((weights_in_model[0],weights_in_model[1]), axis=0)
x = T.concatenate((x, T.ones((T.shape(x)[0], 1))), axis=1)
prediction = T.dot(x, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[2],weights_in_model[3]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[4],weights_in_model[5]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[6],weights_in_model[7]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[8],weights_in_model[9]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[10],weights_in_model[11]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[12],weights_in_model[13]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.flatten(prediction)
return prediction
# Create gradient function
f=theano.gradient.jacobian(pred(x,weights_in_model),wrt=x)
# Compile function
h=theano.function([x,weights_in_model],f,allow_input_downcast=True)
# Get function inputs
weights_in_model_ = model.get_weights()
x_=train_data
# Reshape bias layers
weights_in_model_[1] = np.reshape(weights_in_model_[1], (1, 2048))
weights_in_model_[3] = np.reshape(weights_in_model_[3], (1, 1024))
weights_in_model_[5] = np.reshape(weights_in_model_[5], (1, 512))
weights_in_model_[7] = np.reshape(weights_in_model_[7], (1, 256))
weights_in_model_[9] = np.reshape(weights_in_model_[9], (1, 128))
weights_in_model_[11] = np.reshape(weights_in_model_[11], (1, 32))
weights_in_model_[13] = np.reshape(weights_in_model_[13], (1, 1))
# Compute Jacobian (returns format with a bunch of zero rows)
jacs = h(x_, weights_in_model_)
# Put Jacobian matrix in proper format (ie. shape = (number_of_input_examples, number_of_input_features)
jacobian_matrix = np.zeros((jacs.shape[0],jacs.shape[2]))
for i, jac in enumerate(jacs):
jacobian_matrix[i] = jac[i]
下一个任务是找到输出的 Hessian 矩阵 w.r.t。模型权重!
使用 Keras 和 Tensorflow 作为后端,我构建了一个 DNN,它将恒星光谱作为输入(7213 个数据点)并输出三个恒星参数(温度、重力和金属丰度)。网络在我的测试集上训练得很好并且预测得很好,但为了使结果具有科学意义,我需要能够估计我的错误。这样做的第一步是获得逆 Hessian 矩阵,这似乎仅使用 Keras 是不可能的。因此,我尝试使用 scipy 创建一个解决方法,使用 scipy.optimize.minimize 和 BFGS、L-BFGS-B 或 Netwon-CG 作为方法。这些中的任何一个都将 return 逆 Hessian 矩阵。
想法是使用 Adam 优化器训练模型 100 个时期(或直到模型收敛),然后 运行 BFGS(或其他之一)的一次迭代或函数 return 我模型的 Hessian 矩阵。
这是我的代码:
from scipy.optimize import minimize
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam
# Define vars
activation = 'relu'
init = 'he_normal'
beta_1 = 0.9
beta_2 = 0.999
epsilon = 1e-08
input_shape = (None,n)
n_hidden = [2048,1024,512,256,128,32]
output_dim = 3
epochs = 100
lr = 0.0008
batch_size = 64
decay = 0.00
# Design DNN Layers
model = Sequential([
Dense(n_hidden[0], batch_input_shape=input_shape, init=init, activation=activation),
Dense(n_hidden[1], init=init, activation=activation),
Dense(n_hidden[2], init=init, activation=activation),
Dense(n_hidden[3], init=init, activation=activation),
Dense(n_hidden[4], init=init, activation=activation),
Dense(n_hidden[5], init=init, activation=activation),
Dense(output_dim, init=init, activation='linear'),
])
# Optimization function
optimizer = Adam(lr=lr, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, decay=decay)
# Compile and train network
model.compile(optimizer=optimizer, loss='mean_squared_error')
#train_X.shape = (50000,7213)
#train_Y.shape = (50000,3)
#cv_X.shape = (10000,7213)
#cv_Y.shape = (10000,3)
history = model.fit(train_X, train_Y, validation_data=(cv_X, cv_Y),
nb_epoch=epochs, batch_size=batch_size, verbose=2)
weights = []
for layer in model.layers:
weights.append(layer.get_weights())
def loss(W):
weightsList = W
weightsList = np.array(W)
new_weights = []
for i, layer in enumerate((weightsList)):
new_weights.append(np.array(weightsList[i]))
model.set_weights(np.array(new_weights))
preds = model.predict(train_X)
mse = np.sum(np.square(np.subtract(preds,train_Y)))/len(train_X[:,0])
print(mse)
return mse
x0=weights
res = minimize(loss, x0, args=(), method = 'BFGS', options={'maxiter':1,'eps':1e-6,'disp':True})
#res = minimize(loss, x0, method='L-BFGS-B', options={'disp': True, 'maxls': 1, 'gtol': 1e-05, 'eps': 1e-08, 'maxiter': 1, 'ftol': 0.5, 'maxcor': 1, 'maxfun': 1})
#res = minimize(loss, x0, args=(), method='Newton-CG', jac=None, hess=None, hessp=None, tol=None, callback=None, options={'disp': False, 'xtol': 1e-05, 'eps': 1.4901161193847656e-08, 'return_all': False, 'maxiter': 1})
inv_hess = res['hess_inv']
1) 该模型训练得非常好,但是当尝试 运行 使用先前训练的权重进行单次迭代的 scipy 最小化时,我 运行 遇到了问题。
尝试 method=BFGS 时的输出:
0.458706819754
0.457811632697
0.458706716791
...
0.350124572422
0.350186770445
0.350125320636
ValueErrorTraceback (most recent call last)
---> 19 res = minimize(loss, x0, args=(), method = 'BFGS', tol=1, options={'maxiter':1,'eps':1e-6,'disp':True})#,'gtol':0.1}, tol=5)
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
442 return _minimize_cg(fun, x0, args, jac, callback, **options)
443 elif meth == 'bfgs':
--> 444 return _minimize_bfgs(fun, x0, args, jac, callback, **options)
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
963 try: # this was handled in numeric, let it remaines for more safety
--> 964 rhok = 1.0 / (numpy.dot(yk, sk))
965 except ZeroDivisionError:
966 rhok = 1000.0
ValueError: operands could not be broadcast together with shapes (7213,2048) (2048,1024)
尝试方法时的输出=L-BFGS-B:
ValueErrorTraceback (most recent call last)
---> 20 res = minimize(loss, x0, method='L-BFGS-B', options={'disp': True, 'maxls': 1, 'gtol': 1e-05, 'eps': 1e-08, 'maxiter': 1, 'ftol': 0.5, 'maxcor': 1, 'maxfun': 1})
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
448 elif meth == 'l-bfgs-b':
449 return _minimize_lbfgsb(fun, x0, args, jac, bounds,
--> 450 callback=callback, **options)
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/lbfgsb.pyc in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, **unknown_options)
300 raise ValueError('maxls must be positive.')
301
--> 302 x = array(x0, float64)
303 f = array(0.0, float64)
304 g = zeros((n,), float64)
ValueError: setting an array element with a sequence.
尝试 method=Newton-CG 时的输出
ValueErrorTraceback (most recent call last)
---> 21 res = minimize(loss, x0, args=(), method='Newton-CG', jac=None, hess=None, hessp=None, tol=None, callback=None, options={'disp': False, 'xtol': 1e-05, 'eps': 1.4901161193847656e-08, 'return_all': False, 'maxiter': 1})
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
445 elif meth == 'newton-cg':
446 return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,
--> 447 **options)
448 elif meth == 'l-bfgs-b':
449 return _minimize_lbfgsb(fun, x0, args, jac, bounds,
/opt/anaconda3/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback, xtol, eps, maxiter, disp, return_all, **unknown_options)
1438 _check_unknown_options(unknown_options)
1439 if jac is None:
-> 1440 raise ValueError('Jacobian is required for Newton-CG method')
ValueError: Jacobian is required for Newton-CG method
2) 下一个任务是获得模型输出相对于模型输入的导数。例如,对于一个恒星参数(输出之一),比如温度,我需要找到关于 7213 个输入中每个输入的偏导数。然后对 3 个输出中的每一个执行相同的操作。
所以基本上,我的第一个任务 (1) 是找到一种方法 return 我的模型的逆 Hessian 矩阵,接下来 (2) 我需要找到一种方法 return我的输出关于我的输入的一阶偏导数。
有人对这两项任务有一些了解吗? 谢谢。
编辑
我正在尝试使用 theano.gradient.jacobian() 来 return 我的输出 w.r.t 的雅可比矩阵。我的输入。我已将我的模型转换为模型权重的函数,并将该函数用作 theano.gradient.jacobian() 中的第一个参数。当我尝试 运行 多维数组的梯度时,我的模型权重和输入数据的形式为
,我的问题就出现了。import theano.tensor as T
weights_in_model = T.dvector('model_weights')
x = T.dvector('x')
def pred(x,weights_in_model):
weights = T.stack((weights_in_model[0],weights_in_model[1]), axis=0)
x = T.shape_padright(x, n_ones=1)
prediction=T.dot(x, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[2],weights_in_model[3]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[4],weights_in_model[5]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[6],weights_in_model[7]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[8],weights_in_model[9]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[10],weights_in_model[11]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.stack((weights_in_model[12],weights_in_model[13]), axis=0)
prediction = T.shape_padright(prediction, n_ones=1)
prediction = T.dot(prediction, weights)
T.flatten(prediction)
return prediction
f=theano.gradient.jacobian(pred(x,weights_in_model),wrt=x)
h=theano.function([x,weights_in_model],f,allow_input_downcast=True)
x = train_X
weights_in_model = model.get_weights()
h(x,weights_in_model)
最后一行给出了错误:
TypeError: ('Bad input argument to theano function with name "<ipython-input-365-a1ab256aa220>:1" at index 0(0-based)', 'Wrong number of dimensions: expected 1, got 2 with shape (2000, 7213).')
但是当我将输入更改为:
weights_in_model = T.matrix('model_weights')
x = T.matrix('x')
我从以下行收到错误:
f=theano.gradient.jacobian(pred(x,weights_in_model),wrt=x)
阅读:
AssertionError: tensor.jacobian expects a 1 dimensional variable as `expression`. If not use flatten to make it a vector
关于如何解决这个问题的任何想法?
已找到答案!: 此代码用于预测模型的一个输出值。目前我正在修改它以计算 3 个雅可比矩阵;每个输出一个。
import theano
import theano.tensor as T
import theano.typed_list
theano.config.optimizer='fast_compile'
theano.config.exception_verbosity='high'
# Declare function input placeholders
weights_in_model = theano.typed_list.TypedListType(theano.tensor.dmatrix)()
x = T.matrix('x')
# Define model function
def pred(x,weights_in_model):
weights = T.concatenate((weights_in_model[0],weights_in_model[1]), axis=0)
x = T.concatenate((x, T.ones((T.shape(x)[0], 1))), axis=1)
prediction = T.dot(x, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[2],weights_in_model[3]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[4],weights_in_model[5]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[6],weights_in_model[7]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[8],weights_in_model[9]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[10],weights_in_model[11]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.clip(prediction, 0, 9999.)
weights = T.concatenate((weights_in_model[12],weights_in_model[13]), axis=0)
prediction = T.concatenate((prediction, T.ones((T.shape(prediction)[0], 1))), axis=1)
prediction = T.dot(prediction, weights)
prediction = T.flatten(prediction)
return prediction
# Create gradient function
f=theano.gradient.jacobian(pred(x,weights_in_model),wrt=x)
# Compile function
h=theano.function([x,weights_in_model],f,allow_input_downcast=True)
# Get function inputs
weights_in_model_ = model.get_weights()
x_=train_data
# Reshape bias layers
weights_in_model_[1] = np.reshape(weights_in_model_[1], (1, 2048))
weights_in_model_[3] = np.reshape(weights_in_model_[3], (1, 1024))
weights_in_model_[5] = np.reshape(weights_in_model_[5], (1, 512))
weights_in_model_[7] = np.reshape(weights_in_model_[7], (1, 256))
weights_in_model_[9] = np.reshape(weights_in_model_[9], (1, 128))
weights_in_model_[11] = np.reshape(weights_in_model_[11], (1, 32))
weights_in_model_[13] = np.reshape(weights_in_model_[13], (1, 1))
# Compute Jacobian (returns format with a bunch of zero rows)
jacs = h(x_, weights_in_model_)
# Put Jacobian matrix in proper format (ie. shape = (number_of_input_examples, number_of_input_features)
jacobian_matrix = np.zeros((jacs.shape[0],jacs.shape[2]))
for i, jac in enumerate(jacs):
jacobian_matrix[i] = jac[i]
下一个任务是找到输出的 Hessian 矩阵 w.r.t。模型权重!