RuntimeError: Expected hidden[0] size (2, 1, 100), got (1, 1, 100)
RuntimeError: Expected hidden[0] size (2, 1, 100), got (1, 1, 100)
我建立了一个 LSTM 模型并且它有效。但只要我设置 num_layers = 1。
例如,如果我将它设置为 2,它会给我一条很长的错误消息,告诉我:
运行时错误:预期隐藏 [0] 大小 (2, 1, 100),得到 (1, 1, 100)
我在 Python 和深度学习方面还很陌生,所以我可能需要一些建议来解决我的问题。
代码:
import torch
import torch.nn as nn
import numpy as np
import dataset
import time
amount_hidden_layers = 2
amount_neurons_hidden_layers = 100
input_layer_size = 12
output_layer_size = 48
small = True
hours = 20
learning_rate = 1/1000 #0,001
early_stoping = False
train_obs_dataset = dataset.return_observation_dataset(hours=hours, split="train", small=small)
val_obs_dataset = dataset.return_observation_dataset(hours=hours, split="val", small=small)
test_obs_dataset = dataset.return_observation_dataset(hours=hours, split="test", small=small)
class LSTM(nn.Module):
def __init__(self, input_layer_size, hidden_layer_size, output_layer_size,
num_layers):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.input_layer_size = input_layer_size
self.num_layers = num_layers
self.output_layer_size = output_layer_size
# lstm_layers:
self.lstm = nn.LSTM(input_size=self.input_layer_size, hidden_size=self.hidden_layer_size, num_layers=num_layers)
self.hidden_cell = (torch.zeros(self.num_layers, 1, self.hidden_layer_size),
torch.zeros(self.num_layers, 1, self.hidden_layer_size))
# output_layer:
self.linear = nn.Linear(self.hidden_layer_size, self.output_layer_size)
def forward(self, input_seq):
lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 1, -1), self.hidden_cell)
predictions = self.linear(lstm_out.view(len(input_seq), -1))
return predictions[-1]
model = LSTM(input_layer_size=input_layer_size, hidden_layer_size=amount_neurons_hidden_layers,
output_layer_size=output_layer_size, num_layers=amount_hidden_layers)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def training(dataset=train_obs_dataset, epochs=5):
start_time = time.time()
val_test = float(100000000) # for early stoping, saves the last evaluation value
val_worse_counter = 0 # for early stoping, couts how often tests with validation data get worse
for i in range(epochs): # every epoch
for datapoint in range(dataset[0].__len__()): # every datapoint
optimizer.zero_grad()
model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
torch.zeros(1, 1, model.hidden_layer_size))
y_pred = model(torch.FloatTensor(dataset[0][datapoint])) # model needs a tensor of shape [sequence][features]
# squeeze: does not change data, prevents error,
single_loss = loss_function(y_pred, torch.FloatTensor(dataset[1][datapoint]).squeeze(-1))
single_loss.backward()
optimizer.step()
if datapoint%100 == 1:
print(f' Time: {time.time() - start_time: 10.1f} sec, Epoch: {i:4} Datapoint: {datapoint:3} Loss: {single_loss.item():10.8f} ')
#print(y_pred[0])
"early stoping: if tests with the validation data does not get better for 100 steps, the model stops"
if (early_stoping == True):
if datapoint%20 == 1:
val = test(val_obs_dataset)
if (val[0] <= val_test):
val_test = val[0]
val_worse_counter = 0
if(val[0] > val_test):
val_worse_counter = val_worse_counter + 1
if(val[0] >= 100):
break
print("Val: ", val[0], val[1][0], "val_test: ", val_test, " val_worse_counter: ", val_worse_counter)
"""
returns a tupel with 1. the average loss and 2. an array with the losses for average losses seperated for all 48 hours
"""
def test(dataset=val_obs_dataset):
with torch.no_grad():
losses = [] #sperated per hour
loss = 0
for i in range(48):
losses.append(0)
for datapoint in range(dataset.__len__()):
val_pred = model(torch.FloatTensor(dataset[0][datapoint]))
"seperates the loss values per hour"
for i in range(48):
losses[i] = losses[i] + float(loss_function(val_pred[i], torch.FloatTensor(dataset[1][datapoint]).squeeze(-1)[i]))
loss = loss + float(loss_function(val_pred, torch.FloatTensor(dataset[1][datapoint]).squeeze(-1)))
for i in range(48): # calculates the average
losses[i] = losses[i]/dataset.__len__()
loss = loss/dataset.__len__()
return (loss, losses)
print(model)
training(epochs=200)
提前感谢每条有用的评论
那是因为你的训练循环中有这一行:
model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
torch.zeros(1, 1, model.hidden_layer_size))
即使您在模型中正确定义了 hidden_cell,在这里您将 num_layers
硬编码为 1
并替换了您的模型做对了。
要修复,可以改成这样;
model.hidden_cell = (torch.zeros(model.num_layers, 1, model.hidden_layer_size),
torch.zeros(model.num_layers, 1, model.hidden_layer_size))
甚至完全删除它,因为您基本上是在重复已经完成的操作。而且我不认为 hidden_layer_size
会在训练中改变。
也许当你批处理你的数据,然后把它留在这里会更有意义,但很明显你的 batch_size = 1
.
我建立了一个 LSTM 模型并且它有效。但只要我设置 num_layers = 1。 例如,如果我将它设置为 2,它会给我一条很长的错误消息,告诉我:
运行时错误:预期隐藏 [0] 大小 (2, 1, 100),得到 (1, 1, 100)
我在 Python 和深度学习方面还很陌生,所以我可能需要一些建议来解决我的问题。
代码:
import torch
import torch.nn as nn
import numpy as np
import dataset
import time
amount_hidden_layers = 2
amount_neurons_hidden_layers = 100
input_layer_size = 12
output_layer_size = 48
small = True
hours = 20
learning_rate = 1/1000 #0,001
early_stoping = False
train_obs_dataset = dataset.return_observation_dataset(hours=hours, split="train", small=small)
val_obs_dataset = dataset.return_observation_dataset(hours=hours, split="val", small=small)
test_obs_dataset = dataset.return_observation_dataset(hours=hours, split="test", small=small)
class LSTM(nn.Module):
def __init__(self, input_layer_size, hidden_layer_size, output_layer_size,
num_layers):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.input_layer_size = input_layer_size
self.num_layers = num_layers
self.output_layer_size = output_layer_size
# lstm_layers:
self.lstm = nn.LSTM(input_size=self.input_layer_size, hidden_size=self.hidden_layer_size, num_layers=num_layers)
self.hidden_cell = (torch.zeros(self.num_layers, 1, self.hidden_layer_size),
torch.zeros(self.num_layers, 1, self.hidden_layer_size))
# output_layer:
self.linear = nn.Linear(self.hidden_layer_size, self.output_layer_size)
def forward(self, input_seq):
lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 1, -1), self.hidden_cell)
predictions = self.linear(lstm_out.view(len(input_seq), -1))
return predictions[-1]
model = LSTM(input_layer_size=input_layer_size, hidden_layer_size=amount_neurons_hidden_layers,
output_layer_size=output_layer_size, num_layers=amount_hidden_layers)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def training(dataset=train_obs_dataset, epochs=5):
start_time = time.time()
val_test = float(100000000) # for early stoping, saves the last evaluation value
val_worse_counter = 0 # for early stoping, couts how often tests with validation data get worse
for i in range(epochs): # every epoch
for datapoint in range(dataset[0].__len__()): # every datapoint
optimizer.zero_grad()
model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
torch.zeros(1, 1, model.hidden_layer_size))
y_pred = model(torch.FloatTensor(dataset[0][datapoint])) # model needs a tensor of shape [sequence][features]
# squeeze: does not change data, prevents error,
single_loss = loss_function(y_pred, torch.FloatTensor(dataset[1][datapoint]).squeeze(-1))
single_loss.backward()
optimizer.step()
if datapoint%100 == 1:
print(f' Time: {time.time() - start_time: 10.1f} sec, Epoch: {i:4} Datapoint: {datapoint:3} Loss: {single_loss.item():10.8f} ')
#print(y_pred[0])
"early stoping: if tests with the validation data does not get better for 100 steps, the model stops"
if (early_stoping == True):
if datapoint%20 == 1:
val = test(val_obs_dataset)
if (val[0] <= val_test):
val_test = val[0]
val_worse_counter = 0
if(val[0] > val_test):
val_worse_counter = val_worse_counter + 1
if(val[0] >= 100):
break
print("Val: ", val[0], val[1][0], "val_test: ", val_test, " val_worse_counter: ", val_worse_counter)
"""
returns a tupel with 1. the average loss and 2. an array with the losses for average losses seperated for all 48 hours
"""
def test(dataset=val_obs_dataset):
with torch.no_grad():
losses = [] #sperated per hour
loss = 0
for i in range(48):
losses.append(0)
for datapoint in range(dataset.__len__()):
val_pred = model(torch.FloatTensor(dataset[0][datapoint]))
"seperates the loss values per hour"
for i in range(48):
losses[i] = losses[i] + float(loss_function(val_pred[i], torch.FloatTensor(dataset[1][datapoint]).squeeze(-1)[i]))
loss = loss + float(loss_function(val_pred, torch.FloatTensor(dataset[1][datapoint]).squeeze(-1)))
for i in range(48): # calculates the average
losses[i] = losses[i]/dataset.__len__()
loss = loss/dataset.__len__()
return (loss, losses)
print(model)
training(epochs=200)
提前感谢每条有用的评论
那是因为你的训练循环中有这一行:
model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
torch.zeros(1, 1, model.hidden_layer_size))
即使您在模型中正确定义了 hidden_cell,在这里您将 num_layers
硬编码为 1
并替换了您的模型做对了。
要修复,可以改成这样;
model.hidden_cell = (torch.zeros(model.num_layers, 1, model.hidden_layer_size),
torch.zeros(model.num_layers, 1, model.hidden_layer_size))
甚至完全删除它,因为您基本上是在重复已经完成的操作。而且我不认为 hidden_layer_size
会在训练中改变。
也许当你批处理你的数据,然后把它留在这里会更有意义,但很明显你的 batch_size = 1
.