convLSTM 大小不匹配
convLSTM size mismatch
我正在尝试只实现一个卷积 LSTM 单元并在其中传递一个张量 (1,3,128,128)。我收到大小不匹配错误。
class ConvLSTMCell(nn.Module):
def __init__(self, input_size, input_dim, hidden_dim, kernel_size, bias):
"""
Parameters
----------
input_size: (int, int)
Height and width of input tensor as (height, width).
input_dim: int
Number of channels of input tensor.
hidden_dim: int
Number of channels of hidden state.
kernel_size: (int, int)
Size of the convolutional kernel.
bias: bool
Whether or not to add the bias.
"""
super(ConvLSTMCell, self).__init__()
self.height, self.width = input_size
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.kernel_size = kernel_size
# self.padding = kernel_size[0] // 2, kernel_size[1] // 2
self.bias = bias
self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
out_channels=4 * self.hidden_dim,
kernel_size=self.kernel_size,
#padding=self.padding,
bias=self.bias)
def forward(self, input, prev_state):
h_prev, c_prev = prev_state
print('x: {}\nh_prev: {}\nc_prev: {}'.format(x.size(), h_prev.size(), c_prev.size()))
combined = torch.cat((input, h_prev), dim=1) # concatenate along channel axis
print('combined: {}'.format(combined.size()))
combined_conv = self.conv(combined)
print('combined_conv: {}'.format(combined_conv.size()))
cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
print('cc_i: {}\ncc_f: {}\ncc_o: {}\ncc_g: {}'.format(cc_i.size(), cc_f.size(), cc_o.size(), cc_g.size()))
i = torch.sigmoid(cc_i)
f = torch.sigmoid(cc_f)
o = torch.sigmoid(cc_o)
g = torch.tanh(cc_g)
print('i: {}\nf: {}\no: {}\ng: {}'.format(i.size(), f.size(), o.size(), g.size()))
c_cur = f * c_prev + i * g
h_cur = o * F.tanh(c_cur)
print('c_cur: {}\nh_cur: {}'.format(c_cur.size(), h_cur.size()))
return h_cur, c_cur
def init_hidden(self, batch_size):
return (Variable(torch.zeros(batch_size, self.hidden_dim, self.height, self.width)),
Variable(torch.zeros(batch_size, self.hidden_dim, self.height, self.width)))
x = torch.randn(1,3,128,128)
model = ConvLSTMCell(input_size=(128,128), input_dim=3, hidden_dim=3, kernel_size=(5,5),
bias=True)
hc = model.init_hidden(batch_size=1)
if gpu:
x.cuda()
model.cuda()
hc.cuda()
out = model(x, hc)
print(out.size())
我收到以下错误:
x: torch.Size([1, 3, 128, 128])
h_prev: torch.Size([1, 3, 128, 128])
c_prev: torch.Size([1, 3, 128, 128])
combined: torch.Size([1, 6, 128, 128])
combined_conv: torch.Size([1, 12, 124, 124])
cc_i: torch.Size([1, 3, 124, 124])
cc_f: torch.Size([1, 3, 124, 124])
cc_o: torch.Size([1, 3, 124, 124])
cc_g: torch.Size([1, 3, 124, 124])
i: torch.Size([1, 3, 124, 124])
f: torch.Size([1, 3, 124, 124])
o: torch.Size([1, 3, 124, 124])
g: torch.Size([1, 3, 124, 124])
Traceback (most recent call last):
File "trial.py", line 87, in
out = model(x, hc)
File "/Users/abcde/opt/anaconda3/envs/matrix/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in call
result = self.forward(*input, **kwargs)
File "trial.py", line 66, in forward
c_cur = f * c_prev + i * g
RuntimeError: The size of tensor a (124) must match the size of tensor b (128) at non-singleton >dimension 3
我希望使用它构建一个包含 17 个单元格的网络,并且我想使用每个单元格的输出来计算相对于真实情况的损失。地面实况是 18 (3,128,128) 张图像。
如何让我的网络输出相同大小的隐藏状态?
由于边界效应,您的输出较小 - 卷积运算仅计算内核可以完全适合输入形状的坐标处的值。最简单的解决方案是将填充应用于卷积层(您似乎已经尝试过,这有什么问题吗?)。如果您的内核大小为 5,则应填充 2,然后卷积输出的形状将与输入的形状相同。
我正在尝试只实现一个卷积 LSTM 单元并在其中传递一个张量 (1,3,128,128)。我收到大小不匹配错误。
class ConvLSTMCell(nn.Module):
def __init__(self, input_size, input_dim, hidden_dim, kernel_size, bias):
"""
Parameters
----------
input_size: (int, int)
Height and width of input tensor as (height, width).
input_dim: int
Number of channels of input tensor.
hidden_dim: int
Number of channels of hidden state.
kernel_size: (int, int)
Size of the convolutional kernel.
bias: bool
Whether or not to add the bias.
"""
super(ConvLSTMCell, self).__init__()
self.height, self.width = input_size
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.kernel_size = kernel_size
# self.padding = kernel_size[0] // 2, kernel_size[1] // 2
self.bias = bias
self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
out_channels=4 * self.hidden_dim,
kernel_size=self.kernel_size,
#padding=self.padding,
bias=self.bias)
def forward(self, input, prev_state):
h_prev, c_prev = prev_state
print('x: {}\nh_prev: {}\nc_prev: {}'.format(x.size(), h_prev.size(), c_prev.size()))
combined = torch.cat((input, h_prev), dim=1) # concatenate along channel axis
print('combined: {}'.format(combined.size()))
combined_conv = self.conv(combined)
print('combined_conv: {}'.format(combined_conv.size()))
cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
print('cc_i: {}\ncc_f: {}\ncc_o: {}\ncc_g: {}'.format(cc_i.size(), cc_f.size(), cc_o.size(), cc_g.size()))
i = torch.sigmoid(cc_i)
f = torch.sigmoid(cc_f)
o = torch.sigmoid(cc_o)
g = torch.tanh(cc_g)
print('i: {}\nf: {}\no: {}\ng: {}'.format(i.size(), f.size(), o.size(), g.size()))
c_cur = f * c_prev + i * g
h_cur = o * F.tanh(c_cur)
print('c_cur: {}\nh_cur: {}'.format(c_cur.size(), h_cur.size()))
return h_cur, c_cur
def init_hidden(self, batch_size):
return (Variable(torch.zeros(batch_size, self.hidden_dim, self.height, self.width)),
Variable(torch.zeros(batch_size, self.hidden_dim, self.height, self.width)))
x = torch.randn(1,3,128,128)
model = ConvLSTMCell(input_size=(128,128), input_dim=3, hidden_dim=3, kernel_size=(5,5),
bias=True)
hc = model.init_hidden(batch_size=1)
if gpu:
x.cuda()
model.cuda()
hc.cuda()
out = model(x, hc)
print(out.size())
我收到以下错误:
x: torch.Size([1, 3, 128, 128])
h_prev: torch.Size([1, 3, 128, 128])
c_prev: torch.Size([1, 3, 128, 128])
combined: torch.Size([1, 6, 128, 128])
combined_conv: torch.Size([1, 12, 124, 124])
cc_i: torch.Size([1, 3, 124, 124])
cc_f: torch.Size([1, 3, 124, 124])
cc_o: torch.Size([1, 3, 124, 124])
cc_g: torch.Size([1, 3, 124, 124])
i: torch.Size([1, 3, 124, 124])
f: torch.Size([1, 3, 124, 124])
o: torch.Size([1, 3, 124, 124])
g: torch.Size([1, 3, 124, 124])
Traceback (most recent call last):
File "trial.py", line 87, in
out = model(x, hc)
File "/Users/abcde/opt/anaconda3/envs/matrix/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in call
result = self.forward(*input, **kwargs)
File "trial.py", line 66, in forward
c_cur = f * c_prev + i * g
RuntimeError: The size of tensor a (124) must match the size of tensor b (128) at non-singleton >dimension 3
我希望使用它构建一个包含 17 个单元格的网络,并且我想使用每个单元格的输出来计算相对于真实情况的损失。地面实况是 18 (3,128,128) 张图像。
如何让我的网络输出相同大小的隐藏状态?
由于边界效应,您的输出较小 - 卷积运算仅计算内核可以完全适合输入形状的坐标处的值。最简单的解决方案是将填充应用于卷积层(您似乎已经尝试过,这有什么问题吗?)。如果您的内核大小为 5,则应填充 2,然后卷积输出的形状将与输入的形状相同。