预期隐藏 [0] 大小 (2, 8, 256),得到 [8, 256]
Expected hidden[0] size (2, 8, 256), got [8, 256]
我有如下正确的打印隐藏层形状。
print(h0.shape)
print(x.shape)
torch.Size([2, 8, 256])
torch.Size([8, 300, 300])
但我仍然有错误 Expected hidden[0] size (2, 8, 256), got [8, 256]
有什么问题吗?
整个代码如下
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
import torchvision.transforms as tt
from torchvision.datasets import ImageFolder
from PIL import Image
import numpy as np
from torch.autograd import Variable
seq_len = input_size
hidden_size = 256 #size of hidden layers
num_classes = 5
num_epochs = 20
batch_size = 8
learning_rate = 0.001
# Fully connected neural network with one hidden layer
num_layers = 2 # 2 RNN layers are stacked
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(LSTM, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)#batch must have first dimension
#our inpyt needs to have shape
#x -> (batch_size, seq, input_size)
self.fc = nn.Linear(hidden_size, num_classes)#this fc is after RNN. So needs the last hidden size of RNN
def forward(self, x):
#according to ducumentation of RNN in pytorch
#rnn needs input, h_0 for inputs at RNN (h_0 is initial hidden state)
#the following one is initial hidden layer
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)#first one is number of layers and second one is batch size
#output has two outputs. The first tensor contains the output features of the hidden last layer for all time steps
#the second one is hidden state f
print(h0.shape)
print(x.shape)
out, _ = self.lstm(x, h0)
print(out.shape)
#output has batch_size, seq_len, hidden size
#we need to decode hidden state only the last time step
#out (N, 30, 128)
#Since we need only the last time step
#Out (N, 128)
out = out[:, -1, :] #-1 for last time step, take all for N and 128
out = self.fc(out)
return out
stacked_lstm_model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()#cross entropy has softmax at output
optimizer = torch.optim.Adam(stacked_lstm_model.parameters(), lr=learning_rate) #optimizer used gradient optimization using Adam
# Train the model
n_total_steps = len(train_dl)
for epoch in range(num_epochs):
t_losses=[]
for i, (images, labels) in enumerate(train_dl):
# origin shape: [8, 1, 300, 300]
# resized: [8, 300, 300]
images = images.reshape(-1, seq_len, input_size).to(device)
labels = labels.to(device)
# Forward pass
outputs = stacked_lstm_model(images)
loss = criterion(outputs, labels)
t_losses.append(loss)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
avgd_trainloss = sum(t_losses)/len(t_losses)
acc=0
v_losses=[]
with torch.no_grad():
n_correct = 0
n_samples = 0
for v_images, v_labels in valid_dl:
v_images = v_images.reshape(-1, seq_len, input_size).to(device)
v_labels = v_labels.to(device)
v_outputs = stacked_lstm_model(v_images)
v_loss = criterion(v_outputs, v_labels)
v_losses.append(v_loss)
# max returns (value ,index)
_, v_predicted = torch.max(v_outputs.data, 1)
n_samples += v_labels.size(0)
n_correct += (v_predicted == v_labels).sum().item()
acc = 100.0 * n_correct / n_samples
avgd_validloss = sum(v_losses)/len(v_losses)
print (f'Epoch [{epoch+1}/{num_epochs}], Train loss: {avgd_trainloss.item():.4f}, Valid loss: {avgd_validloss.item():.4f}, Valid accu: {acc.item():.2f}')
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
n_correct = 0
n_samples = 0
for images, labels in test_dl:
images = images.reshape(-1, seq_len, input_size).to(device)
labels = labels.to(device)
outputs = stacked_lstm_model(images)
# max returns (value ,index)
_, predicted = torch.max(outputs.data, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on test images: {acc} %')
LSTM 需要两个隐藏状态,而不是一个。所以而不是
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
使用
h0 = (torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device), torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device))
所以你需要一个元组中的两个隐藏状态。
我有如下正确的打印隐藏层形状。
print(h0.shape)
print(x.shape)
torch.Size([2, 8, 256])
torch.Size([8, 300, 300])
但我仍然有错误 Expected hidden[0] size (2, 8, 256), got [8, 256]
有什么问题吗?
整个代码如下
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
import torchvision.transforms as tt
from torchvision.datasets import ImageFolder
from PIL import Image
import numpy as np
from torch.autograd import Variable
seq_len = input_size
hidden_size = 256 #size of hidden layers
num_classes = 5
num_epochs = 20
batch_size = 8
learning_rate = 0.001
# Fully connected neural network with one hidden layer
num_layers = 2 # 2 RNN layers are stacked
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(LSTM, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)#batch must have first dimension
#our inpyt needs to have shape
#x -> (batch_size, seq, input_size)
self.fc = nn.Linear(hidden_size, num_classes)#this fc is after RNN. So needs the last hidden size of RNN
def forward(self, x):
#according to ducumentation of RNN in pytorch
#rnn needs input, h_0 for inputs at RNN (h_0 is initial hidden state)
#the following one is initial hidden layer
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)#first one is number of layers and second one is batch size
#output has two outputs. The first tensor contains the output features of the hidden last layer for all time steps
#the second one is hidden state f
print(h0.shape)
print(x.shape)
out, _ = self.lstm(x, h0)
print(out.shape)
#output has batch_size, seq_len, hidden size
#we need to decode hidden state only the last time step
#out (N, 30, 128)
#Since we need only the last time step
#Out (N, 128)
out = out[:, -1, :] #-1 for last time step, take all for N and 128
out = self.fc(out)
return out
stacked_lstm_model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()#cross entropy has softmax at output
optimizer = torch.optim.Adam(stacked_lstm_model.parameters(), lr=learning_rate) #optimizer used gradient optimization using Adam
# Train the model
n_total_steps = len(train_dl)
for epoch in range(num_epochs):
t_losses=[]
for i, (images, labels) in enumerate(train_dl):
# origin shape: [8, 1, 300, 300]
# resized: [8, 300, 300]
images = images.reshape(-1, seq_len, input_size).to(device)
labels = labels.to(device)
# Forward pass
outputs = stacked_lstm_model(images)
loss = criterion(outputs, labels)
t_losses.append(loss)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
avgd_trainloss = sum(t_losses)/len(t_losses)
acc=0
v_losses=[]
with torch.no_grad():
n_correct = 0
n_samples = 0
for v_images, v_labels in valid_dl:
v_images = v_images.reshape(-1, seq_len, input_size).to(device)
v_labels = v_labels.to(device)
v_outputs = stacked_lstm_model(v_images)
v_loss = criterion(v_outputs, v_labels)
v_losses.append(v_loss)
# max returns (value ,index)
_, v_predicted = torch.max(v_outputs.data, 1)
n_samples += v_labels.size(0)
n_correct += (v_predicted == v_labels).sum().item()
acc = 100.0 * n_correct / n_samples
avgd_validloss = sum(v_losses)/len(v_losses)
print (f'Epoch [{epoch+1}/{num_epochs}], Train loss: {avgd_trainloss.item():.4f}, Valid loss: {avgd_validloss.item():.4f}, Valid accu: {acc.item():.2f}')
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
n_correct = 0
n_samples = 0
for images, labels in test_dl:
images = images.reshape(-1, seq_len, input_size).to(device)
labels = labels.to(device)
outputs = stacked_lstm_model(images)
# max returns (value ,index)
_, predicted = torch.max(outputs.data, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on test images: {acc} %')
LSTM 需要两个隐藏状态,而不是一个。所以而不是
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
使用
h0 = (torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device), torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device))
所以你需要一个元组中的两个隐藏状态。