PyTorch 在构建模型时附加额外的连接
PyTorch attach extra connection when building model
我在 Pytorch 上有以下 Resnet 原型:
Resnet_Classifier(
(activation): ReLU()
(model): Sequential(
(0): Res_Block(
(mod): Sequential(
(0): Conv1d(1, 200, kernel_size=(5,), stride=(1,), padding=same)
(1): ReLU()
(2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Conv1d(200, 200, kernel_size=(5,), stride=(1,), padding=same)
(4): ReLU()
(5): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): Conv1d(200, 200, kernel_size=(5,), stride=(1,), padding=same)
(7): ReLU()
(8): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(shortcut): Conv1d(1, 200, kernel_size=(1,), stride=(1,), padding=same)
)
(1): ReLU()
(2): Flatten(start_dim=1, end_dim=-1)
(3): Dropout(p=0.1, inplace=False)
(4): Linear(in_features=40000, out_features=2, bias=True)
(5): Softmax(dim=1)
)
)
输入样本形状为 (1, 200)。
这似乎完全没问题,但是,当我尝试在 tensorboard
中获取图形时,我得到以下结构:
不知何故我的残差块与线性连接。这个连接真的符合我的网络结构吗?
模型定义:
class Res_Block(nn.Module):
def __init__(self, in_ch, out_ch, ks, stride, activation):
super(Res_Block, self).__init__()
self.mod = nn.Sequential(
nn.Conv1d(in_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch),
nn.Conv1d(out_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch),
nn.Conv1d(out_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch)
)
self.shortcut = nn.Conv1d(in_ch, out_ch, kernel_size=1, stride=1, padding='same')
def forward(self, X):
return self.mod(X) + self.shortcut(X)
layers = []
layers.append(Res_Block(1, 200, 5, 1, nn.ReLU()))
layers.append(nn.ReLU())
layers.append(nn.Flatten())
layers.append(nn.Dropout(0.2))
layers.append(nn.Linear(200 * 200, 2))
layers.append(nn.Softmax(dim=1))
R = nn.Sequential(*layers)
模型可视化似乎不正确,主要分支和跳过连接封装在您的 Res_Block
定义中,它不应出现在红色 Res_Block[0]
框之外,而应该出现在红色框内。
我通过删除 Res_Block
__init__
中的 nn.Sequential
并添加 self.l1
、self.l2
...
来解决问题。 (我也删除了一些层并添加了 maxpool 但只有在我解决问题之后)
class Res_Block(nn.Module):
def __init__(self, in_shape, out_ch, ks, stride, activation):
super(Res_Block, self).__init__()
self.l1 = nn.Conv1d(in_shape, out_ch, ks, stride, padding='same')
self.l2 = deepcopy(activation)
self.l3 = nn.BatchNorm1d(out_ch)
self.l4 = nn.Conv1d(out_ch, out_ch, ks, stride, padding='same')
self.l5 = nn.BatchNorm1d(out_ch)
self.shortcut = nn.Conv1d(in_shape, out_ch, kernel_size=1, stride=1, padding='same')
def forward(self, X):
return self.l5(self.l4(self.l3(self.l2(self.l1(X))))) + self.shortcut(X)
对应的tensorboard结构为
剩下的唯一问题是为什么它帮我解决了问题。
我在 Pytorch 上有以下 Resnet 原型:
Resnet_Classifier(
(activation): ReLU()
(model): Sequential(
(0): Res_Block(
(mod): Sequential(
(0): Conv1d(1, 200, kernel_size=(5,), stride=(1,), padding=same)
(1): ReLU()
(2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Conv1d(200, 200, kernel_size=(5,), stride=(1,), padding=same)
(4): ReLU()
(5): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): Conv1d(200, 200, kernel_size=(5,), stride=(1,), padding=same)
(7): ReLU()
(8): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(shortcut): Conv1d(1, 200, kernel_size=(1,), stride=(1,), padding=same)
)
(1): ReLU()
(2): Flatten(start_dim=1, end_dim=-1)
(3): Dropout(p=0.1, inplace=False)
(4): Linear(in_features=40000, out_features=2, bias=True)
(5): Softmax(dim=1)
)
)
输入样本形状为 (1, 200)。
这似乎完全没问题,但是,当我尝试在 tensorboard
中获取图形时,我得到以下结构:
不知何故我的残差块与线性连接。这个连接真的符合我的网络结构吗?
模型定义:
class Res_Block(nn.Module):
def __init__(self, in_ch, out_ch, ks, stride, activation):
super(Res_Block, self).__init__()
self.mod = nn.Sequential(
nn.Conv1d(in_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch),
nn.Conv1d(out_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch),
nn.Conv1d(out_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch)
)
self.shortcut = nn.Conv1d(in_ch, out_ch, kernel_size=1, stride=1, padding='same')
def forward(self, X):
return self.mod(X) + self.shortcut(X)
layers = []
layers.append(Res_Block(1, 200, 5, 1, nn.ReLU()))
layers.append(nn.ReLU())
layers.append(nn.Flatten())
layers.append(nn.Dropout(0.2))
layers.append(nn.Linear(200 * 200, 2))
layers.append(nn.Softmax(dim=1))
R = nn.Sequential(*layers)
模型可视化似乎不正确,主要分支和跳过连接封装在您的 Res_Block
定义中,它不应出现在红色 Res_Block[0]
框之外,而应该出现在红色框内。
我通过删除 Res_Block
__init__
中的 nn.Sequential
并添加 self.l1
、self.l2
...
来解决问题。 (我也删除了一些层并添加了 maxpool 但只有在我解决问题之后)
class Res_Block(nn.Module):
def __init__(self, in_shape, out_ch, ks, stride, activation):
super(Res_Block, self).__init__()
self.l1 = nn.Conv1d(in_shape, out_ch, ks, stride, padding='same')
self.l2 = deepcopy(activation)
self.l3 = nn.BatchNorm1d(out_ch)
self.l4 = nn.Conv1d(out_ch, out_ch, ks, stride, padding='same')
self.l5 = nn.BatchNorm1d(out_ch)
self.shortcut = nn.Conv1d(in_shape, out_ch, kernel_size=1, stride=1, padding='same')
def forward(self, X):
return self.l5(self.l4(self.l3(self.l2(self.l1(X))))) + self.shortcut(X)
对应的tensorboard结构为
剩下的唯一问题是为什么它帮我解决了问题。