为什么此方法会在参数数量方面引发错误?
Why does this method throw an error in terms of number of arguments?
我有以下代码,当我 运行 theano_build() 方法时,它抛出错误说
File "rnn_theano.py", line 28, in __init__
self.__theano_build__()
File "rnn_theano.py", line 45, in __theano_build__
non_sequences=[U, V, W1, W12, W2],
File "/usr/local/lib/python2.7/dist-packages/theano/scan_module/scan.py", line 745, in scan
condition, outputs, updates = scan_utils.get_updates_and_outputs(fn(*args))
TypeError: forward_prop_step() takes exactly 8 arguments (7 given)
以下是Theano中的代码。它基本上是一个两个隐藏层的递归神经网络
import numpy as np
import theano as theano
import theano.tensor as T
from utils import *
import operator
class RNNTheano:
def __init__(self, word_dim, hidden_dim=100, bptt_truncate=4):
# Assign instance variables
self.word_dim = word_dim
self.hidden_dim = hidden_dim
self.bptt_truncate = bptt_truncate
# Randomly initialize the network parameters
U = np.random.uniform(-np.sqrt(1./word_dim), np.sqrt(1./word_dim), (hidden_dim, word_dim))
V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (word_dim, hidden_dim))
W1 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim))
W12 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim))
W2 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim))
# Theano: Created shared variables
self.U = theano.shared(name='U', value=U.astype(theano.config.floatX))
self.V = theano.shared(name='V', value=V.astype(theano.config.floatX))
self.W1 = theano.shared(name='W1', value=W1.astype(theano.config.floatX))
self.W12 = theano.shared(name='W12', value=W12.astype(theano.config.floatX))
self.W2 = theano.shared(name='W2', value=W2.astype(theano.config.floatX))
# We store the Theano graph here
self.theano = {}
self.__theano_build__()
def forward_prop_step(self, x_t, s_t1_prev, s_t2_prev, U, V, W1, W12, W2):
s_t1 = T.tanh(U[:,x_t] + W1.dot(s_t1_prev))
s_t2 = T.tanh(W12.dot(s_t1) + W2.dot(s_t2_prev))
o_t = T.nnet.softmax(V.dot(s_t2))
return [o_t[0], s_t1, s_t2]
def __theano_build__(self):
U, V, W1, W12, W2 = self.U, self.V, self.W1, self.W12, self.W2
x = T.ivector('x')
y = T.ivector('y')
[o,s1,s2], updates = theano.scan(
self.forward_prop_step,
sequences=x,
outputs_info=[None, dict(initial=T.zeros(self.hidden_dim)), dict(initial=T.zeros(self.hidden_dim))],
non_sequences=[U, V, W1, W12, W2],
truncate_gradient=self.bptt_truncate,
strict=False)
prediction = T.argmax(o, axis=1)
o_error = T.sum(T.nnet.categorical_crossentropy(o, y))
# Gradients
dU = T.grad(o_error, U)
dV = T.grad(o_error, V)
dW1 = T.grad(o_error, W1)
dW12 = T.grad(o_error, W12)
dW2 = T.grad(o_error, W2)
# Assign functions
self.forward_propagation = theano.function([x], o)
self.predict = theano.function([x], prediction)
self.ce_error = theano.function([x, y], o_error)
self.bptt = theano.function([x, y], [dU, dV, dW1, dW12, dW2])
# SGD
learning_rate = T.scalar('learning_rate')
self.sgd_step = theano.function([x,y,learning_rate], [],
updates=[(self.U, self.U - learning_rate * dU),
(self.V, self.V - learning_rate * dV),
(self.W1, self.W1 - learning_rate * dW1)
(self.W12, self.W12 - learning_rate * dW12),
(self.W2, self.W2 - learning_rate * dW2)])
def calculate_total_loss(self, X, Y):
return np.sum([self.ce_error(x,y) for x,y in zip(X,Y)])
def calculate_loss(self, X, Y):
# Divide calculate_loss by the number of words
num_words = np.sum([len(y) for y in Y])
return self.calculate_total_loss(X,Y)/float(num_words)
尝试改变
return [o_t[0], s_t1, s_t2]
至
return o_t[0], s_t1, s_t2
我认为前者导致该方法 return 被 Theano 强制转换为单个张量的东西,而后者明确 returningthree 对象,如 outputs_info
中所示。
我有以下代码,当我 运行 theano_build() 方法时,它抛出错误说
File "rnn_theano.py", line 28, in __init__
self.__theano_build__()
File "rnn_theano.py", line 45, in __theano_build__
non_sequences=[U, V, W1, W12, W2],
File "/usr/local/lib/python2.7/dist-packages/theano/scan_module/scan.py", line 745, in scan
condition, outputs, updates = scan_utils.get_updates_and_outputs(fn(*args))
TypeError: forward_prop_step() takes exactly 8 arguments (7 given)
以下是Theano中的代码。它基本上是一个两个隐藏层的递归神经网络
import numpy as np
import theano as theano
import theano.tensor as T
from utils import *
import operator
class RNNTheano:
def __init__(self, word_dim, hidden_dim=100, bptt_truncate=4):
# Assign instance variables
self.word_dim = word_dim
self.hidden_dim = hidden_dim
self.bptt_truncate = bptt_truncate
# Randomly initialize the network parameters
U = np.random.uniform(-np.sqrt(1./word_dim), np.sqrt(1./word_dim), (hidden_dim, word_dim))
V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (word_dim, hidden_dim))
W1 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim))
W12 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim))
W2 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim))
# Theano: Created shared variables
self.U = theano.shared(name='U', value=U.astype(theano.config.floatX))
self.V = theano.shared(name='V', value=V.astype(theano.config.floatX))
self.W1 = theano.shared(name='W1', value=W1.astype(theano.config.floatX))
self.W12 = theano.shared(name='W12', value=W12.astype(theano.config.floatX))
self.W2 = theano.shared(name='W2', value=W2.astype(theano.config.floatX))
# We store the Theano graph here
self.theano = {}
self.__theano_build__()
def forward_prop_step(self, x_t, s_t1_prev, s_t2_prev, U, V, W1, W12, W2):
s_t1 = T.tanh(U[:,x_t] + W1.dot(s_t1_prev))
s_t2 = T.tanh(W12.dot(s_t1) + W2.dot(s_t2_prev))
o_t = T.nnet.softmax(V.dot(s_t2))
return [o_t[0], s_t1, s_t2]
def __theano_build__(self):
U, V, W1, W12, W2 = self.U, self.V, self.W1, self.W12, self.W2
x = T.ivector('x')
y = T.ivector('y')
[o,s1,s2], updates = theano.scan(
self.forward_prop_step,
sequences=x,
outputs_info=[None, dict(initial=T.zeros(self.hidden_dim)), dict(initial=T.zeros(self.hidden_dim))],
non_sequences=[U, V, W1, W12, W2],
truncate_gradient=self.bptt_truncate,
strict=False)
prediction = T.argmax(o, axis=1)
o_error = T.sum(T.nnet.categorical_crossentropy(o, y))
# Gradients
dU = T.grad(o_error, U)
dV = T.grad(o_error, V)
dW1 = T.grad(o_error, W1)
dW12 = T.grad(o_error, W12)
dW2 = T.grad(o_error, W2)
# Assign functions
self.forward_propagation = theano.function([x], o)
self.predict = theano.function([x], prediction)
self.ce_error = theano.function([x, y], o_error)
self.bptt = theano.function([x, y], [dU, dV, dW1, dW12, dW2])
# SGD
learning_rate = T.scalar('learning_rate')
self.sgd_step = theano.function([x,y,learning_rate], [],
updates=[(self.U, self.U - learning_rate * dU),
(self.V, self.V - learning_rate * dV),
(self.W1, self.W1 - learning_rate * dW1)
(self.W12, self.W12 - learning_rate * dW12),
(self.W2, self.W2 - learning_rate * dW2)])
def calculate_total_loss(self, X, Y):
return np.sum([self.ce_error(x,y) for x,y in zip(X,Y)])
def calculate_loss(self, X, Y):
# Divide calculate_loss by the number of words
num_words = np.sum([len(y) for y in Y])
return self.calculate_total_loss(X,Y)/float(num_words)
尝试改变
return [o_t[0], s_t1, s_t2]
至
return o_t[0], s_t1, s_t2
我认为前者导致该方法 return 被 Theano 强制转换为单个张量的东西,而后者明确 returningthree 对象,如 outputs_info
中所示。