多对多序列预测可变长度 input/output inkeras
many to many sequence prediction variable length input/output inkeras
我正在尝试使用 Keras 预测可变长度 input/output 多对多序列,下面的数据框是数据的表示。 5 列和 1 个目标列。
df3={'email': [[0,0,0,1],[0,1,2],[0,3,1,5],[0,0,0,1],[0,1,2],[0,3,1,5]],
'fax':[[0,1,0,1],[3,2],[0,2,1,5,4,6],[0,1,0,1],[3,2],[0,2,1,5,4,6]],
'physical_mail':[[0,0,0,2],[0,2],[0,9,1,3,4,0],[0,0,3,0],[1,2],[0,2,0,2,4,6]],
'cold_call':[[0,0,0,0,0,0],[0,2,0,0],[0,1,1,3,2,0,2,2,],[0,0,3,0,0,0,0],[1,2,5,0,0,1,2],[0,2,0,2,4,3,9,0,6]],
'in_person':[[0,0,0,0,0,0],[0,0,0,0],[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,1],[1,0,0,0,0,0,0],[0,2,0,2,0,0,9,0,0,0,0,1]],
'tar':[[0,1],[0,0,0,0],[0,0,0,0,1],[0,1],[0,0,0,0],[0,0,0,0,1]]
}
df4=pd.Dataframe(df3)
要重塑数据,有 6 个样本,5 列,一次输入一列
y 是 6 个样本,一次 1 列
x_train=df4[['email','fax','physical_mail','cold_call','in_person']].values.reshape(6,5,1)
y_train=df4.tar.values.reshape(6,1,1)
model = Sequential()
## 5 columns which are passed one at a time so the input shape (5,1)
model.add(LSTM(64 , input_shape=(5,1)))
# kinda not sure about the RepeatVector argument
model.add(RepeatVector(10))
model.add(LSTM(64,return_sequences=True))
model.add(TimeDistributed(Dense(1)))
model.add(Activation('linear'))
model.compile(loss='mean_squared_error', optimizer='rmsprop')
我看到一个错误“设置一个带有序列的数组元素。是因为输入是列表的混合吗?如果是,如何展平它?
试试这个 -
np.array([np.concatenate(pad_sequences(list(v), maxlen=12)) for k,v in df4[['email','fax','physical_mail','cold_call','in_person']].items()])
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0,
0, 0, 0, 3, 1, 5],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3, 2, 0, 0, 0, 0, 0, 0, 0, 2, 1, 5, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 0,
0, 2, 1, 5, 4, 6],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 2, 0, 0, 0, 0, 0, 0, 0, 9, 1, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0,
0, 2, 0, 2, 4, 6],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 5, 0, 0, 1, 2, 0, 0, 0, 0, 2, 0,
2, 4, 3, 9, 0, 6],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,
9, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1]]
这应该为每一行提供一个一维数组,其中每一列都被填充到 12 长度并连接起来。假设这就是您所需要的。如果每一行都需要二维数组,请忽略连接部分。
np.array([pad_sequences(list(v), maxlen=12) for k,v in df4.items()])
array([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 5],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 5]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2],
[0, 0, 0, 0, 0, 0, 0, 2, 1, 5, 4, 6],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2],
[0, 0, 0, 0, 0, 0, 0, 2, 1, 5, 4, 6]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
[0, 0, 0, 0, 0, 0, 0, 9, 1, 3, 4, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
[0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 4, 6]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 3, 2, 0, 2, 2],
[0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 2, 5, 0, 0, 1, 2],
[0, 0, 0, 0, 2, 0, 2, 4, 3, 9, 0, 6]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 2, 0, 2, 0, 0, 9, 0, 0, 0, 0, 1]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]], dtype=int32)
我正在尝试使用 Keras 预测可变长度 input/output 多对多序列,下面的数据框是数据的表示。 5 列和 1 个目标列。
df3={'email': [[0,0,0,1],[0,1,2],[0,3,1,5],[0,0,0,1],[0,1,2],[0,3,1,5]],
'fax':[[0,1,0,1],[3,2],[0,2,1,5,4,6],[0,1,0,1],[3,2],[0,2,1,5,4,6]],
'physical_mail':[[0,0,0,2],[0,2],[0,9,1,3,4,0],[0,0,3,0],[1,2],[0,2,0,2,4,6]],
'cold_call':[[0,0,0,0,0,0],[0,2,0,0],[0,1,1,3,2,0,2,2,],[0,0,3,0,0,0,0],[1,2,5,0,0,1,2],[0,2,0,2,4,3,9,0,6]],
'in_person':[[0,0,0,0,0,0],[0,0,0,0],[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,1],[1,0,0,0,0,0,0],[0,2,0,2,0,0,9,0,0,0,0,1]],
'tar':[[0,1],[0,0,0,0],[0,0,0,0,1],[0,1],[0,0,0,0],[0,0,0,0,1]]
}
df4=pd.Dataframe(df3)
要重塑数据,有 6 个样本,5 列,一次输入一列 y 是 6 个样本,一次 1 列
x_train=df4[['email','fax','physical_mail','cold_call','in_person']].values.reshape(6,5,1)
y_train=df4.tar.values.reshape(6,1,1)
model = Sequential()
## 5 columns which are passed one at a time so the input shape (5,1)
model.add(LSTM(64 , input_shape=(5,1)))
# kinda not sure about the RepeatVector argument
model.add(RepeatVector(10))
model.add(LSTM(64,return_sequences=True))
model.add(TimeDistributed(Dense(1)))
model.add(Activation('linear'))
model.compile(loss='mean_squared_error', optimizer='rmsprop')
我看到一个错误“设置一个带有序列的数组元素。是因为输入是列表的混合吗?如果是,如何展平它?
试试这个 -
np.array([np.concatenate(pad_sequences(list(v), maxlen=12)) for k,v in df4[['email','fax','physical_mail','cold_call','in_person']].items()])
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0,
0, 0, 0, 3, 1, 5],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3, 2, 0, 0, 0, 0, 0, 0, 0, 2, 1, 5, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 0,
0, 2, 1, 5, 4, 6],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 2, 0, 0, 0, 0, 0, 0, 0, 9, 1, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0,
0, 2, 0, 2, 4, 6],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 5, 0, 0, 1, 2, 0, 0, 0, 0, 2, 0,
2, 4, 3, 9, 0, 6],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,
9, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1]]
这应该为每一行提供一个一维数组,其中每一列都被填充到 12 长度并连接起来。假设这就是您所需要的。如果每一行都需要二维数组,请忽略连接部分。
np.array([pad_sequences(list(v), maxlen=12) for k,v in df4.items()])
array([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 5],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 5]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2],
[0, 0, 0, 0, 0, 0, 0, 2, 1, 5, 4, 6],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2],
[0, 0, 0, 0, 0, 0, 0, 2, 1, 5, 4, 6]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
[0, 0, 0, 0, 0, 0, 0, 9, 1, 3, 4, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
[0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 4, 6]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 3, 2, 0, 2, 2],
[0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 2, 5, 0, 0, 1, 2],
[0, 0, 0, 0, 2, 0, 2, 4, 3, 9, 0, 6]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 2, 0, 2, 0, 0, 9, 0, 0, 0, 0, 1]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]], dtype=int32)