RuntimeError: Expected hidden[0] size (2, 1, 100), got (1, 1, 100)

RuntimeError: Expected hidden[0] size (2, 1, 100), got (1, 1, 100)

我建立了一个 LSTM 模型并且它有效。但只要我设置 num_layers = 1。 例如,如果我将它设置为 2,它会给我一条很长的错误消息,告诉我:

运行时错误:预期隐藏 [0] 大小 (2, 1, 100),得到 (1, 1, 100)

我在 Python 和深度学习方面还很陌生,所以我可能需要一些建议来解决我的问题。

代码:

import torch
import torch.nn as nn
import numpy as np
import dataset
import time

amount_hidden_layers = 2
amount_neurons_hidden_layers = 100

input_layer_size = 12

output_layer_size = 48
small = True
hours = 20
learning_rate = 1/1000 #0,001
early_stoping = False

train_obs_dataset = dataset.return_observation_dataset(hours=hours, split="train", small=small)
val_obs_dataset = dataset.return_observation_dataset(hours=hours, split="val", small=small)
test_obs_dataset = dataset.return_observation_dataset(hours=hours, split="test", small=small)

class LSTM(nn.Module):
    def __init__(self, input_layer_size, hidden_layer_size, output_layer_size,
                 num_layers):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.input_layer_size = input_layer_size
        self.num_layers = num_layers
        self.output_layer_size = output_layer_size

        # lstm_layers:
        self.lstm = nn.LSTM(input_size=self.input_layer_size, hidden_size=self.hidden_layer_size, num_layers=num_layers)

        self.hidden_cell = (torch.zeros(self.num_layers, 1, self.hidden_layer_size),
                            torch.zeros(self.num_layers, 1, self.hidden_layer_size))
        # output_layer:
        self.linear = nn.Linear(self.hidden_layer_size, self.output_layer_size)

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

model = LSTM(input_layer_size=input_layer_size, hidden_layer_size=amount_neurons_hidden_layers,
             output_layer_size=output_layer_size, num_layers=amount_hidden_layers)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

def training(dataset=train_obs_dataset, epochs=5):
    start_time = time.time()
    val_test = float(100000000)                                                     # for early stoping, saves the last evaluation value
    val_worse_counter = 0                                                           # for early stoping, couts how often tests with validation data get worse
    for i in range(epochs):                                                         # every epoch
        for datapoint in range(dataset[0].__len__()):                               # every datapoint
            optimizer.zero_grad()
            model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                                    torch.zeros(1, 1, model.hidden_layer_size))

            y_pred = model(torch.FloatTensor(dataset[0][datapoint]))                # model needs a tensor of shape [sequence][features]
            # squeeze: does not change data, prevents error,
            single_loss = loss_function(y_pred, torch.FloatTensor(dataset[1][datapoint]).squeeze(-1))
           
            single_loss.backward()
            optimizer.step()
            if datapoint%100 == 1:
                print(f' Time: {time.time() - start_time: 10.1f} sec,   Epoch: {i:4} Datapoint: {datapoint:3} Loss:  {single_loss.item():10.8f} ')
                #print(y_pred[0])
            "early stoping: if tests with the validation data does not get better for 100 steps, the model stops"
            if (early_stoping == True):
                if datapoint%20 == 1:
                    val = test(val_obs_dataset)
                    if (val[0] <= val_test):
                        val_test = val[0]
                        val_worse_counter = 0
                    if(val[0] > val_test):
                        val_worse_counter = val_worse_counter + 1
                    if(val[0] >= 100):
                        break
                    print("Val: ", val[0], val[1][0], "val_test: ", val_test, " val_worse_counter: ", val_worse_counter)

"""
returns a tupel with 1. the average loss and 2. an array with the losses for average losses seperated for all 48 hours
"""
def test(dataset=val_obs_dataset):
    with torch.no_grad():
        losses = []                 #sperated per hour
        loss = 0
        for i in range(48):
            losses.append(0)
        for datapoint in range(dataset.__len__()):
            val_pred = model(torch.FloatTensor(dataset[0][datapoint]))
            "seperates the loss values per hour"
            for i in range(48):
                losses[i] = losses[i] + float(loss_function(val_pred[i], torch.FloatTensor(dataset[1][datapoint]).squeeze(-1)[i]))
            loss = loss + float(loss_function(val_pred, torch.FloatTensor(dataset[1][datapoint]).squeeze(-1)))
        for i in range(48):                                         # calculates the average
            losses[i] = losses[i]/dataset.__len__()
            loss = loss/dataset.__len__()
    return (loss, losses)

print(model)

training(epochs=200)

提前感谢每条有用的评论

那是因为你的训练循环中有这一行:

model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                     torch.zeros(1, 1, model.hidden_layer_size))

即使您在模型中正确定义了 hidden_cell,在这里您将 num_layers 硬编码为 1 并替换了您的模型做对了。

要修复,可以改成这样;

model.hidden_cell = (torch.zeros(model.num_layers, 1, model.hidden_layer_size),
                     torch.zeros(model.num_layers, 1, model.hidden_layer_size))

甚至完全删除它,因为您基本上是在重复已经完成的操作。而且我不认为 hidden_layer_size 会在训练中改变。

也许当你批处理你的数据,然后把它留在这里会更有意义,但很明显你的 batch_size = 1.