日期格式的 LSTM 错误
LSTM error with date format
这是我第一次尝试深度学习,这段代码的目的是预测外汇市场走向。
代码如下:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
column_names = ['Date', 'Time', 'Open', 'High', 'Low','Close', 'Volume']
data = pd.read_csv(r"E:\Tutorial\EURUSD60.csv", header=None, names=column_names)
data['DateTime'] = pd.to_datetime(data.Date + ' ' + data.Time)
del data['Date']
del data['Time']
sequence_length = 21
n_features = len(data.columns)
val_ratio = 0.1
n_epochs = 300
batch_size = 512
data = data.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
data_processed.append(data[index: index + sequence_length])
data_processed = np.array(data_processed)
val_split = round((1 - val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split):, :]
print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))
train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape
train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))
preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)
train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))
X_train = train[:, : -1]
y_train = train[:, -1][:, -1]
X_val = val[:, : -1]
y_val = val[:, -1][:, -1]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))
model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units=128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("linear"))
model.compile(loss="mse", optimizer="adam")
history = model.fit(
X_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
verbose=2)
preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
pred = preds_val[i][0]
diff.append(y_val[i] - pred)
real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[104])
print(preprocessor.data_max_[104])
preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min
plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()
这里是错误:
Using TensorFlow backend. 2017-12-03 13:26:44.494199: W
C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but
these are available on your machine and could speed up CPU
computations. 2017-12-03 13:26:44.494660: W
C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but
these are available on your machine and could speed up CPU
computations. Training data: (1824, 21, 6) Validation data: (203, 21,
6) Traceback (most recent call last): File "E:/Tutorial/Deep
Learning.py", line 42, in
preprocessor = MinMaxScaler().fit(train) File "C:\Users\sydgo\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py",
line 308, in fit
return self.partial_fit(X, y) File "C:\Users\sydgo\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py",
line 334, in partial_fit
estimator=self, dtype=FLOAT_DTYPES) File "C:\Users\sydgo\Anaconda3\lib\site-packages\sklearn\utils\validation.py",
line 433, in check_array
array = np.array(array, dtype=dtype, order=order, copy=copy) TypeError: float() argument must be a string or a number, not
'Timestamp'
预期dtype
之间存在冲突
和
实际传递的数据类型:
TypeError: float() argument must be a string or a number, not 'Timestamp'
最有可能修改的是:
中的转换
data['DateTime'] = pd.to_datetime(data.Date + ' ' + data.Time)
你必须回到这个概念,什么(如果有的话)应该作为一组预期的外汇量化建模特征输入到 LSTM 模型中。
这是修复错误后的代码
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
column_names = ['Date', 'Time', 'Open', 'High', 'Low','Close', 'Volume']
df = pd.read_csv(r"E:\Tutorial\EURUSD60.csv", header=None, names=column_names)
df['DateTime'] = pd.to_datetime(df.Date + ' ' + df.Time)
del df['Date']
del df['Time']
df.rename(columns={'DateTime': 'timestamp', 'Open': 'open',
'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'}, inplace=True)
df['timestamp'] = pd.to_datetime(df['timestamp'], infer_datetime_format=True)
df.set_index('timestamp', inplace=True)
df = df.astype(float)
df['hour'] = df.index.hour
df['day'] = df.index.weekday
df['week'] = df.index.week
sequence_length = 21
n_features = len(df.columns)
val_ratio = 0.1
n_epochs = 300
batch_size = 512
data = df.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
data_processed.append(data[index: index + sequence_length])
data_processed = np.array(data_processed)
val_split = round((1 - val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split):, :]
print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))
train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape
train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))
preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)
train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))
X_train = train[:, : -1]
y_train = train[:, -1][:, -1]
X_val = val[:, : -1]
y_val = val[:, -1][:, -1]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))
model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units=128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("linear"))
model.compile(loss="mse", optimizer="adam")
history = model.fit(
X_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
verbose=2)
preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
pred = preds_val[i][0]
diff.append(y_val[i] - pred)
real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[:120])
print(preprocessor.data_max_[:120])
preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min
plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()
这是我第一次尝试深度学习,这段代码的目的是预测外汇市场走向。
代码如下:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
column_names = ['Date', 'Time', 'Open', 'High', 'Low','Close', 'Volume']
data = pd.read_csv(r"E:\Tutorial\EURUSD60.csv", header=None, names=column_names)
data['DateTime'] = pd.to_datetime(data.Date + ' ' + data.Time)
del data['Date']
del data['Time']
sequence_length = 21
n_features = len(data.columns)
val_ratio = 0.1
n_epochs = 300
batch_size = 512
data = data.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
data_processed.append(data[index: index + sequence_length])
data_processed = np.array(data_processed)
val_split = round((1 - val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split):, :]
print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))
train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape
train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))
preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)
train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))
X_train = train[:, : -1]
y_train = train[:, -1][:, -1]
X_val = val[:, : -1]
y_val = val[:, -1][:, -1]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))
model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units=128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("linear"))
model.compile(loss="mse", optimizer="adam")
history = model.fit(
X_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
verbose=2)
preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
pred = preds_val[i][0]
diff.append(y_val[i] - pred)
real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[104])
print(preprocessor.data_max_[104])
preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min
plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()
这里是错误:
Using TensorFlow backend. 2017-12-03 13:26:44.494199: W
C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but
these are available on your machine and could speed up CPU
computations. 2017-12-03 13:26:44.494660: W
C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but
these are available on your machine and could speed up CPU
computations. Training data: (1824, 21, 6) Validation data: (203, 21,
6) Traceback (most recent call last): File "E:/Tutorial/Deep
Learning.py", line 42, in preprocessor = MinMaxScaler().fit(train) File "C:\Users\sydgo\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py",
line 308, in fit
return self.partial_fit(X, y) File "C:\Users\sydgo\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py",
line 334, in partial_fit
estimator=self, dtype=FLOAT_DTYPES) File "C:\Users\sydgo\Anaconda3\lib\site-packages\sklearn\utils\validation.py",
line 433, in check_array array = np.array(array, dtype=dtype, order=order, copy=copy) TypeError: float() argument must be a string or a number, not
'Timestamp'
预期dtype
之间存在冲突
和
实际传递的数据类型:
TypeError: float() argument must be a string or a number, not 'Timestamp'
最有可能修改的是:
中的转换data['DateTime'] = pd.to_datetime(data.Date + ' ' + data.Time)
你必须回到这个概念,什么(如果有的话)应该作为一组预期的外汇量化建模特征输入到 LSTM 模型中。
这是修复错误后的代码
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
column_names = ['Date', 'Time', 'Open', 'High', 'Low','Close', 'Volume']
df = pd.read_csv(r"E:\Tutorial\EURUSD60.csv", header=None, names=column_names)
df['DateTime'] = pd.to_datetime(df.Date + ' ' + df.Time)
del df['Date']
del df['Time']
df.rename(columns={'DateTime': 'timestamp', 'Open': 'open',
'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'}, inplace=True)
df['timestamp'] = pd.to_datetime(df['timestamp'], infer_datetime_format=True)
df.set_index('timestamp', inplace=True)
df = df.astype(float)
df['hour'] = df.index.hour
df['day'] = df.index.weekday
df['week'] = df.index.week
sequence_length = 21
n_features = len(df.columns)
val_ratio = 0.1
n_epochs = 300
batch_size = 512
data = df.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
data_processed.append(data[index: index + sequence_length])
data_processed = np.array(data_processed)
val_split = round((1 - val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split):, :]
print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))
train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape
train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))
preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)
train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))
X_train = train[:, : -1]
y_train = train[:, -1][:, -1]
X_val = val[:, : -1]
y_val = val[:, -1][:, -1]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))
model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units=128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("linear"))
model.compile(loss="mse", optimizer="adam")
history = model.fit(
X_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
verbose=2)
preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
pred = preds_val[i][0]
diff.append(y_val[i] - pred)
real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[:120])
print(preprocessor.data_max_[:120])
preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min
plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()