多对多序列预测可变长度 input/output inkeras

many to many sequence prediction variable length input/output inkeras

我正在尝试使用 Keras 预测可变长度 input/output 多对多序列,下面的数据框是数据的表示。 5 列和 1 个目标列。

    df3={'email': [[0,0,0,1],[0,1,2],[0,3,1,5],[0,0,0,1],[0,1,2],[0,3,1,5]],
         'fax':[[0,1,0,1],[3,2],[0,2,1,5,4,6],[0,1,0,1],[3,2],[0,2,1,5,4,6]],
         'physical_mail':[[0,0,0,2],[0,2],[0,9,1,3,4,0],[0,0,3,0],[1,2],[0,2,0,2,4,6]],
         'cold_call':[[0,0,0,0,0,0],[0,2,0,0],[0,1,1,3,2,0,2,2,],[0,0,3,0,0,0,0],[1,2,5,0,0,1,2],[0,2,0,2,4,3,9,0,6]],
         'in_person':[[0,0,0,0,0,0],[0,0,0,0],[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,1],[1,0,0,0,0,0,0],[0,2,0,2,0,0,9,0,0,0,0,1]],
          'tar':[[0,1],[0,0,0,0],[0,0,0,0,1],[0,1],[0,0,0,0],[0,0,0,0,1]]
         }
    df4=pd.Dataframe(df3)

要重塑数据,有 6 个样本,5 列,一次输入一列 y 是 6 个样本,一次 1 列

    x_train=df4[['email','fax','physical_mail','cold_call','in_person']].values.reshape(6,5,1)
    y_train=df4.tar.values.reshape(6,1,1)


 
 model = Sequential()  
 ## 5 columns which are passed one at a time so the input shape (5,1)
 model.add(LSTM(64 , input_shape=(5,1))) 
 # kinda not sure about the RepeatVector argument 
 model.add(RepeatVector(10))
 model.add(LSTM(64,return_sequences=True))
 model.add(TimeDistributed(Dense(1)))
 model.add(Activation('linear'))   
 model.compile(loss='mean_squared_error', optimizer='rmsprop')

我看到一个错误“设置一个带有序列的数组元素。是因为输入是列表的混合吗?如果是,如何展平它?

试试这个 -

np.array([np.concatenate(pad_sequences(list(v), maxlen=12)) for k,v in df4[['email','fax','physical_mail','cold_call','in_person']].items()])
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 3, 1, 5],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        3, 2, 0, 0, 0, 0, 0, 0, 0, 2, 1, 5, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 0,
        0, 2, 1, 5, 4, 6],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 2, 0, 0, 0, 0, 0, 0, 0, 9, 1, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0,
        0, 2, 0, 2, 4, 6],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
        0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 3,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 5, 0, 0, 1, 2, 0, 0, 0, 0, 2, 0,
        2, 4, 3, 9, 0, 6],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,
        9, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 1]]

这应该为每一行提供一个一维数组,其中每一列都被填充到 12 长度并连接起来。假设这就是您所需要的。如果每一行都需要二维数组,请忽略连接部分。

np.array([pad_sequences(list(v), maxlen=12) for k,v in df4.items()])
array([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 5],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 5]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2],
        [0, 0, 0, 0, 0, 0, 0, 2, 1, 5, 4, 6],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2],
        [0, 0, 0, 0, 0, 0, 0, 2, 1, 5, 4, 6]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
        [0, 0, 0, 0, 0, 0, 0, 9, 1, 3, 4, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
        [0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 4, 6]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0],
        [0, 0, 0, 0, 0, 1, 1, 3, 2, 0, 2, 2],
        [0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 2, 5, 0, 0, 1, 2],
        [0, 0, 0, 0, 2, 0, 2, 4, 3, 9, 0, 6]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
        [0, 2, 0, 2, 0, 0, 9, 0, 0, 0, 0, 1]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]], dtype=int32)