'Wrong number of dimensions' Theano 中的错误 - LSTM

'Wrong number of dimensions' error in Theano - LSTM

我正在尝试为我自己的数据重新创建 this LSTM 示例。

Traceback (most recent call last):
  File "lstm.py", line 124, in <module>
    train_rnn(train_data)
  File "lstm.py", line 120, in train_rnn
    train_cost = learn_rnn_fn(i, o)
  File "/usr/local/lib/python3.5/site-packages/theano/compile/function_module.py", line 788, in __call__
    allow_downcast=s.allow_downcast)
  File "/usr/local/lib/python3.5/site-packages/theano/tensor/type.py", line 178, in filter
    data.shape))
TypeError: ('Bad input argument to theano function with name "lstm.py:108" at index 0 (0-based)', 'Wrong number of dimensions: expected 2, got 0 with shape ().')

我提供的代码如下:

import numpy as np
import theano
import theano.tensor as T
import pandas

dtype=theano.config.floatX

def create_dataset(dataset, look_back=1):
    data = []
    for dx in range(len(dataset) - look_back - 1):
        data.append([dataset[dx], dataset[dx + 1]])
    return np.array(data, dtype=dtype)

raw_data = pandas.read_csv('international-airline-passengers.csv', usecols=[1])

train_data = create_dataset(raw_data.as_matrix()[:,0])

train_data 在此转换后变为二维 numpy 矩阵。

sigma = lambda x: 1 / (1 + T.exp(-x))
act = T.tanh

def one_lstm_step(x_t, h_tm1, c_tm1, W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xy, W_ho, W_cy, b_o, W_hy, b_y):
    i_t = sigma(theano.dot(x_t, W_xi) + theano.dot(h_tm1, W_hi) + theano.dot(c_tm1, W_ci) + b_i)
    f_t = sigma(theano.dot(x_t, W_xf) + theano.dot(h_tm1, W_hf) + theano.dot(c_tm1, W_cf) + b_f)
    c_t = f_t * c_tm1 + i_t * act(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) + b_c) 
    o_t = sigma(theano.dot(x_t, W_xo)+ theano.dot(h_tm1, W_ho) + theano.dot(c_t, W_co)  + b_o)
    h_t = o_t * act(c_t)
    y_t = sigma(theano.dot(h_t, W_hy) + b_y) 
    return [h_t, c_t, y_t]

def sample_weights(sizeX, sizeY):
    values = np.ndarray([sizeX, sizeY], dtype=dtype)
    for dx in range(sizeX):
        vals = np.random.uniform(low=-1., high=1.,  size=(sizeY,))
        values[dx,:] = vals
    _,svs,_ = np.linalg.svd(values)                      
    values = values / svs[0]
    return values

n_in = 1
n_hidden = n_i = n_c = n_o = n_f = 10
n_y = 1

W_xi = theano.shared(sample_weights(n_in, n_i))
W_hi = theano.shared(sample_weights(n_hidden, n_i))  
W_ci = theano.shared(sample_weights(n_c, n_i))  
b_i = theano.shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_i)))
W_xf = theano.shared(sample_weights(n_in, n_f)) 
W_hf = theano.shared(sample_weights(n_hidden, n_f))
W_cf = theano.shared(sample_weights(n_c, n_f))
b_f = theano.shared(np.cast[dtype](np.random.uniform(0, 1.,size = n_f)))
W_xc = theano.shared(sample_weights(n_in, n_c))  
W_hc = theano.shared(sample_weights(n_hidden, n_c))
b_c = theano.shared(np.zeros(n_c, dtype=dtype))
W_xo = theano.shared(sample_weights(n_in, n_o))
W_ho = theano.shared(sample_weights(n_hidden, n_o))
W_co = theano.shared(sample_weights(n_c, n_o))
b_o = theano.shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_o)))
W_hy = theano.shared(sample_weights(n_hidden, n_y))
b_y = theano.shared(np.zeros(n_y, dtype=dtype))
c0 = theano.shared(np.zeros(n_hidden, dtype=dtype))
h0 = T.tanh(c0)

params = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y, c0]
v = T.matrix(dtype=dtype)
target = T.matrix(dtype=dtype)

[h_vals, _, y_vals], _ = theano.scan(fn=one_lstm_step, 
                                    sequences = dict(input=v, taps=[0]), 
                                    outputs_info = [h0, c0, None ], # corresponds to return type of fn
                                    non_sequences = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y] )


cost = -T.mean(target * T.log(y_vals)+ (1.- target) * T.log(1. - y_vals))

updates=[]

learn_rnn_fn = theano.function(inputs = [v, target],
                                outputs = cost,
                                updates = updates)

nb_epochs=1
train_errors = np.ndarray(nb_epochs)

def train_rnn(train_data):
    for x in range(nb_epochs):
        error = 0.
        print(train_data)
        for j in range(len(train_data)):
            index = np.random.randint(0, len(train_data))
            i, o = train_data[index]
            train_cost = learn_rnn_fn(i, o)
            error += train_cost
        train_errors[x] = error

train_rnn(train_data)

调试显示变量io的形状不合适。我尝试重塑数据,但它会导致其他数据类型问题。

函数 create_dataset 返回一个 numpy 数组。但是,当您调用 i, o = train_data[index] 时,您试图获得两个值。例如,您可以将值分配给时间变量,然后根据需要拆分它。

编辑 变量 io 与函数 learn_rnn_fn 所期望的类型不同。它期待 numpy 矩阵。