PyTorch LSTM for Daily Stock Return 预测 - 训练损失始终低于测试损失

Question

我想知道是否有人可以分享一些想法，说明为什么我的训练损失开始于比测试损失更高的水平？

我正在尝试运行每日股票 return 数据的 LSTM 作为唯一输入，并使用前 10 天来预测第二天的价格。 Training/test/validation套不重叠，所以不漏。不使用任何只会影响训练数据的正则化。

目前真的很困惑，因为我似乎找不到错误。

我将包含下面的代码，但它很长

    # Defining the LSTM class 

import torch
import torch.nn as nn
from sklearn import preprocessing 
from sklearn.preprocessing import StandardScaler, MinMaxScaler

class LSTM(nn.Module):
  def __init__(self, n_inputs, n_hidden, num_layers, n_outputs):
    super(LSTM, self).__init__()
    self.D = n_inputs
    self.M = n_hidden
    self.K = n_outputs
    self.L = num_layers

    self.rnn = nn.LSTM(
        input_size=self.D,
        hidden_size=self.M,
        num_layers=self.L,
        batch_first=True)
    self.fc = nn.Linear(self.M, self.K)
  
  def forward(self, X):
    # initial hidden states
    h0 = torch.zeros(self.L, X.size(0), self.M).to(device)
    c0 = torch.zeros(self.L, X.size(0), self.M).to(device)

    # get RNN unit output
    out, _ = self.rnn(X, (h0, c0))

    # we only want h(T) at the final time step
    out = self.fc(out[:, -1, :])
    return out

# Defining a function to train the LSTM

def full_gd(model,
            loss_function,
            optimizer,
            X_train,
            y_train,
            X_test,
            y_test,
            no_epochs):

  # Stuff to store
  train_losses = np.zeros(no_epochs)
  test_losses = np.zeros(no_epochs)

  for it in range(no_epochs):
    # zero the parameter gradients
    optimizer.zero_grad()

    # Forward pass
    outputs = model(X_train)
    loss = loss_function(outputs, y_train)
      
    # Backward and optimize
    loss.backward()
    optimizer.step()

    # Save losses
    train_losses[it] = loss.item()

    # Test loss
    test_outputs = model(X_test)
    test_loss = loss_function(test_outputs, y_test)
    test_losses[it] = test_loss.item()
      
    if (it + 1) % 10 == 0:
      print(f'Epoch {it+1}/{no_epochs}, Train Loss: {loss.item():.4f}, Test Loss: {test_loss.item():.4f}')
  
  return train_losses, test_losses

# Import sklearn's StandardScaler to scale the returns data 
scaler = StandardScaler()
scaler.fit(data[:3*len(data)//5])
historical_returns = scaler.transform(data)

# Creating the dataset to train the LSTM. D is the number of input features. T is the number of data points used in forecasting
 
T = 10
D = 1
X = []
Y = []

for t in range(len(historical_returns) - T):
    x = historical_returns[t:t+T]
    X.append(x)
    y = historical_returns[t+T]
    Y.append(y)

X_historical = np.array(X).reshape(-1, T, 1)
Y_historical = np.array(Y).reshape(-1, 1)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Splitting the data into a 60/20/20 train/validation/test split. No random split is used here as this is a time series dataset 

x_train1 = torch.from_numpy(X_historical[:3*len(historical_returns)//5].astype(np.float32))
y_train1 = torch.from_numpy(Y_historical[:3*len(historical_returns)//5].astype(np.float32))

x_val1 = torch.from_numpy(X_historical[-2*len(historical_returns)//5: -1*len(historical_returns)//5].astype(np.float32))
y_val1 = torch.from_numpy(Y_historical[-2*len(historical_returns)//5: -1*len(historical_returns)//5].astype(np.float32))

x_test1 = torch.from_numpy(X_historical[-1*len(historical_returns)//5:].astype(np.float32))
y_test1 = torch.from_numpy(Y_historical[-1*len(historical_returns)//5:].astype(np.float32))

# move data to GPU
x_train1, y_train1 = x_train1.to(device), y_train1.to(device)
x_val1, y_val1 = x_val1.to(device), y_val1.to(device)
x_test1, y_test1 = x_test1.to(device), y_test1.to(device)

x_train1 = x_train1.reshape(-1, T, 1)
x_test1 = x_test1.reshape(-1, T, 1)
x_val1 = x_val1.reshape(-1, T, 1)


# Define the model parameters
Hidden = 10
model = LSTM(1, Hidden, 1, 1)
model.to(device)
loss_function = nn.MSELoss()

learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model 

no_epochs = 200
train_losses, validation_losses = full_gd(model,
                                    loss_function,
                                    optimizer,
                                    x_train1,
                                    y_train1,
                                    x_val1,
                                    y_val1,
                                    no_epochs)

# Plot training and validation loss 

plt.figure(figsize=(12,8))
plt.plot(train_losses, label='train loss')
plt.plot(validation_losses, label='test loss')
plt.legend()
plt.show()

Answer 1

嗯，可能有几个原因。

你的任务很难，或者你现有的数据很难。
您的验证拆分包含非常简单的任务。

这个问题的另一个自然原因是数据集大小的增加，因为验证拆分相对小于训练拆分。理论上通过随机猜测（这在某种程度上是模型的初始状态），你更有可能在大量猜测中失败。

您的模型似乎无法学习，它在训练数据上表现不佳，这是不希望的。请记住，RNN 很难训练。您可以尝试一些潜在的帮助，例如增加 epoch 大小，使模型更复杂。如果你可以将你的结果与另一项工作进行比较，你应该这样做。这将指导您进行实验的好坏。

PyTorch LSTM for Daily Stock Return 预测 - 训练损失始终低于测试损失

PyTorch LSTM for Daily Stock Return Prediction - Train loss is consistently lower than test loss

python

finance

machine-learning

lstm

pytorch