'Wrong number of dimensions' Theano 中的错误 - LSTM
'Wrong number of dimensions' error in Theano - LSTM
我正在尝试为我自己的数据重新创建 this LSTM 示例。
Traceback (most recent call last):
File "lstm.py", line 124, in <module>
train_rnn(train_data)
File "lstm.py", line 120, in train_rnn
train_cost = learn_rnn_fn(i, o)
File "/usr/local/lib/python3.5/site-packages/theano/compile/function_module.py", line 788, in __call__
allow_downcast=s.allow_downcast)
File "/usr/local/lib/python3.5/site-packages/theano/tensor/type.py", line 178, in filter
data.shape))
TypeError: ('Bad input argument to theano function with name "lstm.py:108" at index 0 (0-based)', 'Wrong number of dimensions: expected 2, got 0 with shape ().')
我提供的代码如下:
import numpy as np
import theano
import theano.tensor as T
import pandas
dtype=theano.config.floatX
def create_dataset(dataset, look_back=1):
data = []
for dx in range(len(dataset) - look_back - 1):
data.append([dataset[dx], dataset[dx + 1]])
return np.array(data, dtype=dtype)
raw_data = pandas.read_csv('international-airline-passengers.csv', usecols=[1])
train_data = create_dataset(raw_data.as_matrix()[:,0])
train_data
在此转换后变为二维 numpy 矩阵。
sigma = lambda x: 1 / (1 + T.exp(-x))
act = T.tanh
def one_lstm_step(x_t, h_tm1, c_tm1, W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xy, W_ho, W_cy, b_o, W_hy, b_y):
i_t = sigma(theano.dot(x_t, W_xi) + theano.dot(h_tm1, W_hi) + theano.dot(c_tm1, W_ci) + b_i)
f_t = sigma(theano.dot(x_t, W_xf) + theano.dot(h_tm1, W_hf) + theano.dot(c_tm1, W_cf) + b_f)
c_t = f_t * c_tm1 + i_t * act(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) + b_c)
o_t = sigma(theano.dot(x_t, W_xo)+ theano.dot(h_tm1, W_ho) + theano.dot(c_t, W_co) + b_o)
h_t = o_t * act(c_t)
y_t = sigma(theano.dot(h_t, W_hy) + b_y)
return [h_t, c_t, y_t]
def sample_weights(sizeX, sizeY):
values = np.ndarray([sizeX, sizeY], dtype=dtype)
for dx in range(sizeX):
vals = np.random.uniform(low=-1., high=1., size=(sizeY,))
values[dx,:] = vals
_,svs,_ = np.linalg.svd(values)
values = values / svs[0]
return values
n_in = 1
n_hidden = n_i = n_c = n_o = n_f = 10
n_y = 1
W_xi = theano.shared(sample_weights(n_in, n_i))
W_hi = theano.shared(sample_weights(n_hidden, n_i))
W_ci = theano.shared(sample_weights(n_c, n_i))
b_i = theano.shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_i)))
W_xf = theano.shared(sample_weights(n_in, n_f))
W_hf = theano.shared(sample_weights(n_hidden, n_f))
W_cf = theano.shared(sample_weights(n_c, n_f))
b_f = theano.shared(np.cast[dtype](np.random.uniform(0, 1.,size = n_f)))
W_xc = theano.shared(sample_weights(n_in, n_c))
W_hc = theano.shared(sample_weights(n_hidden, n_c))
b_c = theano.shared(np.zeros(n_c, dtype=dtype))
W_xo = theano.shared(sample_weights(n_in, n_o))
W_ho = theano.shared(sample_weights(n_hidden, n_o))
W_co = theano.shared(sample_weights(n_c, n_o))
b_o = theano.shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_o)))
W_hy = theano.shared(sample_weights(n_hidden, n_y))
b_y = theano.shared(np.zeros(n_y, dtype=dtype))
c0 = theano.shared(np.zeros(n_hidden, dtype=dtype))
h0 = T.tanh(c0)
params = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y, c0]
v = T.matrix(dtype=dtype)
target = T.matrix(dtype=dtype)
[h_vals, _, y_vals], _ = theano.scan(fn=one_lstm_step,
sequences = dict(input=v, taps=[0]),
outputs_info = [h0, c0, None ], # corresponds to return type of fn
non_sequences = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y] )
cost = -T.mean(target * T.log(y_vals)+ (1.- target) * T.log(1. - y_vals))
updates=[]
learn_rnn_fn = theano.function(inputs = [v, target],
outputs = cost,
updates = updates)
nb_epochs=1
train_errors = np.ndarray(nb_epochs)
def train_rnn(train_data):
for x in range(nb_epochs):
error = 0.
print(train_data)
for j in range(len(train_data)):
index = np.random.randint(0, len(train_data))
i, o = train_data[index]
train_cost = learn_rnn_fn(i, o)
error += train_cost
train_errors[x] = error
train_rnn(train_data)
调试显示变量i
和o
的形状不合适。我尝试重塑数据,但它会导致其他数据类型问题。
函数 create_dataset 返回一个 numpy 数组。但是,当您调用 i, o = train_data[index] 时,您试图获得两个值。例如,您可以将值分配给时间变量,然后根据需要拆分它。
编辑
变量 i
和 o
与函数 learn_rnn_fn
所期望的类型不同。它期待 numpy 矩阵。
我正在尝试为我自己的数据重新创建 this LSTM 示例。
Traceback (most recent call last):
File "lstm.py", line 124, in <module>
train_rnn(train_data)
File "lstm.py", line 120, in train_rnn
train_cost = learn_rnn_fn(i, o)
File "/usr/local/lib/python3.5/site-packages/theano/compile/function_module.py", line 788, in __call__
allow_downcast=s.allow_downcast)
File "/usr/local/lib/python3.5/site-packages/theano/tensor/type.py", line 178, in filter
data.shape))
TypeError: ('Bad input argument to theano function with name "lstm.py:108" at index 0 (0-based)', 'Wrong number of dimensions: expected 2, got 0 with shape ().')
我提供的代码如下:
import numpy as np
import theano
import theano.tensor as T
import pandas
dtype=theano.config.floatX
def create_dataset(dataset, look_back=1):
data = []
for dx in range(len(dataset) - look_back - 1):
data.append([dataset[dx], dataset[dx + 1]])
return np.array(data, dtype=dtype)
raw_data = pandas.read_csv('international-airline-passengers.csv', usecols=[1])
train_data = create_dataset(raw_data.as_matrix()[:,0])
train_data
在此转换后变为二维 numpy 矩阵。
sigma = lambda x: 1 / (1 + T.exp(-x))
act = T.tanh
def one_lstm_step(x_t, h_tm1, c_tm1, W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xy, W_ho, W_cy, b_o, W_hy, b_y):
i_t = sigma(theano.dot(x_t, W_xi) + theano.dot(h_tm1, W_hi) + theano.dot(c_tm1, W_ci) + b_i)
f_t = sigma(theano.dot(x_t, W_xf) + theano.dot(h_tm1, W_hf) + theano.dot(c_tm1, W_cf) + b_f)
c_t = f_t * c_tm1 + i_t * act(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) + b_c)
o_t = sigma(theano.dot(x_t, W_xo)+ theano.dot(h_tm1, W_ho) + theano.dot(c_t, W_co) + b_o)
h_t = o_t * act(c_t)
y_t = sigma(theano.dot(h_t, W_hy) + b_y)
return [h_t, c_t, y_t]
def sample_weights(sizeX, sizeY):
values = np.ndarray([sizeX, sizeY], dtype=dtype)
for dx in range(sizeX):
vals = np.random.uniform(low=-1., high=1., size=(sizeY,))
values[dx,:] = vals
_,svs,_ = np.linalg.svd(values)
values = values / svs[0]
return values
n_in = 1
n_hidden = n_i = n_c = n_o = n_f = 10
n_y = 1
W_xi = theano.shared(sample_weights(n_in, n_i))
W_hi = theano.shared(sample_weights(n_hidden, n_i))
W_ci = theano.shared(sample_weights(n_c, n_i))
b_i = theano.shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_i)))
W_xf = theano.shared(sample_weights(n_in, n_f))
W_hf = theano.shared(sample_weights(n_hidden, n_f))
W_cf = theano.shared(sample_weights(n_c, n_f))
b_f = theano.shared(np.cast[dtype](np.random.uniform(0, 1.,size = n_f)))
W_xc = theano.shared(sample_weights(n_in, n_c))
W_hc = theano.shared(sample_weights(n_hidden, n_c))
b_c = theano.shared(np.zeros(n_c, dtype=dtype))
W_xo = theano.shared(sample_weights(n_in, n_o))
W_ho = theano.shared(sample_weights(n_hidden, n_o))
W_co = theano.shared(sample_weights(n_c, n_o))
b_o = theano.shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_o)))
W_hy = theano.shared(sample_weights(n_hidden, n_y))
b_y = theano.shared(np.zeros(n_y, dtype=dtype))
c0 = theano.shared(np.zeros(n_hidden, dtype=dtype))
h0 = T.tanh(c0)
params = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y, c0]
v = T.matrix(dtype=dtype)
target = T.matrix(dtype=dtype)
[h_vals, _, y_vals], _ = theano.scan(fn=one_lstm_step,
sequences = dict(input=v, taps=[0]),
outputs_info = [h0, c0, None ], # corresponds to return type of fn
non_sequences = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y] )
cost = -T.mean(target * T.log(y_vals)+ (1.- target) * T.log(1. - y_vals))
updates=[]
learn_rnn_fn = theano.function(inputs = [v, target],
outputs = cost,
updates = updates)
nb_epochs=1
train_errors = np.ndarray(nb_epochs)
def train_rnn(train_data):
for x in range(nb_epochs):
error = 0.
print(train_data)
for j in range(len(train_data)):
index = np.random.randint(0, len(train_data))
i, o = train_data[index]
train_cost = learn_rnn_fn(i, o)
error += train_cost
train_errors[x] = error
train_rnn(train_data)
调试显示变量i
和o
的形状不合适。我尝试重塑数据,但它会导致其他数据类型问题。
函数 create_dataset 返回一个 numpy 数组。但是,当您调用 i, o = train_data[index] 时,您试图获得两个值。例如,您可以将值分配给时间变量,然后根据需要拆分它。
编辑
变量 i
和 o
与函数 learn_rnn_fn
所期望的类型不同。它期待 numpy 矩阵。