当我使用 keras 库训练 RNN 时,如何纠正不断出现的维度错误?
How can I correct the dimension error I keep getting when I train RNN using keras library?
我想构建 40-class LSTM classifier 来分析时间序列数据。我有一个从 13 个传感器收集的 13 维实时数据。当我 运行 下面的代码时,我不断收到此错误消息。
ValueError: Error when checking model input: the list of Numpy arrays
that you are passing to your model is not the size the model expected.
Expected to see 1 arrays but instead got the following list of 241458
arrays: [array([[ 0.64817517, 0.12892013, 0.01879949, 0.00946322,
0.00458952,
0.01668651, 0.04776124, 0.03301365, 0.0360659 , 0.15013408,
0.10112171, 0.05494366, 0.02620634],
RNN代码
from __future__ import print_function
import keras
from keras import metrics
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Activation
from keras.utils import np_utils
from keras.layers.normalization import BatchNormalization
from sklearn.cross_validation import train_test_split
import pandas as pd
from keras.callbacks import CSVLogger
from keras.models import load_model
from keras.layers import LSTM
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import keras
def top_k_acc(y_true, y_pred):
return metrics.top_k_categorical_accuracy(y_true, y_pred, k=5)
# train Parameters
sequence_length = 60
data_dim = 13
num_classes = 40
batch_size = 15000
epochs = 10
# tf.set_random_seed(777) # reproducibility
def MinMaxScaler(data):
''' Min Max Normalization
Parameters
----------
data : numpy.ndarray
input data to be normalized
shape: [Batch size, dimension]
Returns
----------
data : numpy.ndarry
normalized data
shape: [Batch size, dimension]
References
----------
.. [1] http://sebastianraschka.com/Articles/2014_about_feature_scaling.html
'''
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
# Load data
xy = np.loadtxt('sc_total_for 60s v4.0 test.csv', delimiter=',', skiprows=1)
x = xy[:, 1:14]
x = MinMaxScaler(x)
y = xy[:,0]
# Build a dataset
x_data = []
y_data = []
for i in range(0, len(y) - sequence_length):
_x = x[i:i + sequence_length]
_y = y[i + sequence_length]
# print(_x, "->", _y)
x_data.append(_x)
y_data.append(_y)
# One-hot encoding
encoder = LabelEncoder()
encoder.fit(y_data)
encoded_Y = encoder.transform(y_data)
dummy_y = np_utils.to_categorical(encoded_Y)
#train/test split
x_train,x_test,y_train,y_test=train_test_split(x_data,dummy_y,random_state=4,test_size=0.3);
# print(x_train[0],"->",y_train[0])
# Network
model = Sequential()
model.add(LSTM(40, batch_input_shape=(batch_size, sequence_length, data_dim),return_sequences=True))
model.add(LSTM(40, return_sequences=False))
model.add(Dense(40))
model.add(Activation("linear"))
# model.add(Dense(40))
# model.add(Dense(25, init='uniform', activation='relu'))
# model.add(BatchNormalization())
# model.add(Dense(30, init='uniform', activation='relu'))
# model.add(BatchNormalization())
# model.add(Dense(40, init='uniform', activation='softmax'))
model.summary()
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['accuracy'])
csv_logger = CSVLogger('LSTM 1111.log')
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test),
callbacks=[csv_logger])
score = model.evaluate(x_test, y_test, verbose=0)
predictions=model.predict(x_test)
# model.save('New Model6 save.h5')
#plot_model(model, to_file='model1.png')
# print('Test loss:', score[0])
# print('Test accuracy:', score[1])
问题是:
# Build a dataset
x_data = []
y_data = []
for i in range(0, len(y) - sequence_length):
_x = x[i:i + sequence_length]
_y = y[i + sequence_length]
# print(_x, "->", _y)
x_data.append(_x)
y_data.append(_y)
当 Keras 需要 LSTM 的单个三维数组时,您正在为 x_data
构建二维 numpy 数组列表。改为这样做:
num_samples = len(y) - sequence_length
x_data = np.zeros((num_samples, sequence_length, data_dim))
y_data = np.zeros((num_samples))
for i in range(num_samples):
x_data[i] = x[i:i + sequence_length]
y_data[i] = y[i + sequence_length]
我想构建 40-class LSTM classifier 来分析时间序列数据。我有一个从 13 个传感器收集的 13 维实时数据。当我 运行 下面的代码时,我不断收到此错误消息。
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 arrays but instead got the following list of 241458 arrays: [array([[ 0.64817517, 0.12892013, 0.01879949, 0.00946322, 0.00458952, 0.01668651, 0.04776124, 0.03301365, 0.0360659 , 0.15013408, 0.10112171, 0.05494366, 0.02620634],
RNN代码
from __future__ import print_function
import keras
from keras import metrics
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Activation
from keras.utils import np_utils
from keras.layers.normalization import BatchNormalization
from sklearn.cross_validation import train_test_split
import pandas as pd
from keras.callbacks import CSVLogger
from keras.models import load_model
from keras.layers import LSTM
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import keras
def top_k_acc(y_true, y_pred):
return metrics.top_k_categorical_accuracy(y_true, y_pred, k=5)
# train Parameters
sequence_length = 60
data_dim = 13
num_classes = 40
batch_size = 15000
epochs = 10
# tf.set_random_seed(777) # reproducibility
def MinMaxScaler(data):
''' Min Max Normalization
Parameters
----------
data : numpy.ndarray
input data to be normalized
shape: [Batch size, dimension]
Returns
----------
data : numpy.ndarry
normalized data
shape: [Batch size, dimension]
References
----------
.. [1] http://sebastianraschka.com/Articles/2014_about_feature_scaling.html
'''
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
# Load data
xy = np.loadtxt('sc_total_for 60s v4.0 test.csv', delimiter=',', skiprows=1)
x = xy[:, 1:14]
x = MinMaxScaler(x)
y = xy[:,0]
# Build a dataset
x_data = []
y_data = []
for i in range(0, len(y) - sequence_length):
_x = x[i:i + sequence_length]
_y = y[i + sequence_length]
# print(_x, "->", _y)
x_data.append(_x)
y_data.append(_y)
# One-hot encoding
encoder = LabelEncoder()
encoder.fit(y_data)
encoded_Y = encoder.transform(y_data)
dummy_y = np_utils.to_categorical(encoded_Y)
#train/test split
x_train,x_test,y_train,y_test=train_test_split(x_data,dummy_y,random_state=4,test_size=0.3);
# print(x_train[0],"->",y_train[0])
# Network
model = Sequential()
model.add(LSTM(40, batch_input_shape=(batch_size, sequence_length, data_dim),return_sequences=True))
model.add(LSTM(40, return_sequences=False))
model.add(Dense(40))
model.add(Activation("linear"))
# model.add(Dense(40))
# model.add(Dense(25, init='uniform', activation='relu'))
# model.add(BatchNormalization())
# model.add(Dense(30, init='uniform', activation='relu'))
# model.add(BatchNormalization())
# model.add(Dense(40, init='uniform', activation='softmax'))
model.summary()
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['accuracy'])
csv_logger = CSVLogger('LSTM 1111.log')
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test),
callbacks=[csv_logger])
score = model.evaluate(x_test, y_test, verbose=0)
predictions=model.predict(x_test)
# model.save('New Model6 save.h5')
#plot_model(model, to_file='model1.png')
# print('Test loss:', score[0])
# print('Test accuracy:', score[1])
问题是:
# Build a dataset
x_data = []
y_data = []
for i in range(0, len(y) - sequence_length):
_x = x[i:i + sequence_length]
_y = y[i + sequence_length]
# print(_x, "->", _y)
x_data.append(_x)
y_data.append(_y)
当 Keras 需要 LSTM 的单个三维数组时,您正在为 x_data
构建二维 numpy 数组列表。改为这样做:
num_samples = len(y) - sequence_length
x_data = np.zeros((num_samples, sequence_length, data_dim))
y_data = np.zeros((num_samples))
for i in range(num_samples):
x_data[i] = x[i:i + sequence_length]
y_data[i] = y[i + sequence_length]