创建一个模型,其权重是 2 个不同神经网络的权重之和
Creating a model which weights are the sum of weights of 2 different neural networks
我正在做迁移学习的实验。
我训练了 2 个具有完全相同结构的 CNN,一个用于 MNIST,一个用于 SVHN。
我获得了 2 个模型的参数(权重和偏差)。
现在,我想合并(求和或其他操作)这些权重。像这样的事情:
modelMNIST.parameters()
modelSVHN.parameters()
#now the new model
model3 = MyCNN(1)
model3.parameters = modelMNIST.parameters()+modelSVHN.parameters()
如果我这样做,我会得到这个错误:
SyntaxError: can't assign to function call
然后这样:
model3.block_1[0].weight = modelMNIST.block_1[0].weight + modelSVHN.block_1[0].weight
我收到这个错误:
TypeError: cannot assign 'torch.cuda.FloatTensor' as parameter 'weight' (torch.nn.Parameter or None expected)
有什么方法可以合并不同模型的权重吗?
您需要更新参数的 .data
属性。 Parameter
不是 FloatTensor,因此是错误。
由于两个网络相同,您可以使用以下代码更新权重。
for param1, param2 in zip(modelMNIST.parameters(), modelSVHN.parameters()):
param1.data += param2.data
我的解决方案是这样的:
class VGG16SUM(nn.Module):
def __init__(self, model1, model2, num_classes):
super(VGG16SUM, self).__init__()
# calculate same padding:
# (w - k + 2*p)/s + 1 = o
# => p = (s(o-1) - w + k)/2
self.block_1 = nn.Sequential(
nn.Conv2d(in_channels=1,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
# (1(32-1)- 32 + 3)/2 = 1
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(in_channels=64,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_2 = nn.Sequential(
nn.Conv2d(in_channels=64,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(in_channels=128,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_3 = nn.Sequential(
nn.Conv2d(in_channels=128,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_4 = nn.Sequential(
nn.Conv2d(in_channels=256,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.classifier = nn.Sequential(
nn.Linear(2048, 4096),
nn.ReLU(True),
nn.Dropout(p=0.25),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(p=0.25),
nn.Linear(4096, num_classes),
)
for p_out, p_in1, p_in2 in zip(self.parameters(), model1.parameters(), model2.parameters()):
p_out.data = nn.Parameter(p_in1 +p_in2);
def forward(self, x):
x = self.block_1(x)
x = self.block_2(x)
x = self.block_3(x)
x = self.block_4(x)
# x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
#logits = self.classifier(x)
#probas = F.softmax(logits, dim=1)
# probas = nn.Softmax(logits)
#return probas
# return logits
有效!!!
我正在做迁移学习的实验。 我训练了 2 个具有完全相同结构的 CNN,一个用于 MNIST,一个用于 SVHN。 我获得了 2 个模型的参数(权重和偏差)。 现在,我想合并(求和或其他操作)这些权重。像这样的事情:
modelMNIST.parameters()
modelSVHN.parameters()
#now the new model
model3 = MyCNN(1)
model3.parameters = modelMNIST.parameters()+modelSVHN.parameters()
如果我这样做,我会得到这个错误:
SyntaxError: can't assign to function call
然后这样:
model3.block_1[0].weight = modelMNIST.block_1[0].weight + modelSVHN.block_1[0].weight
我收到这个错误:
TypeError: cannot assign 'torch.cuda.FloatTensor' as parameter 'weight' (torch.nn.Parameter or None expected)
有什么方法可以合并不同模型的权重吗?
您需要更新参数的 .data
属性。 Parameter
不是 FloatTensor,因此是错误。
由于两个网络相同,您可以使用以下代码更新权重。
for param1, param2 in zip(modelMNIST.parameters(), modelSVHN.parameters()):
param1.data += param2.data
我的解决方案是这样的:
class VGG16SUM(nn.Module):
def __init__(self, model1, model2, num_classes):
super(VGG16SUM, self).__init__()
# calculate same padding:
# (w - k + 2*p)/s + 1 = o
# => p = (s(o-1) - w + k)/2
self.block_1 = nn.Sequential(
nn.Conv2d(in_channels=1,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
# (1(32-1)- 32 + 3)/2 = 1
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(in_channels=64,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_2 = nn.Sequential(
nn.Conv2d(in_channels=64,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(in_channels=128,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_3 = nn.Sequential(
nn.Conv2d(in_channels=128,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_4 = nn.Sequential(
nn.Conv2d(in_channels=256,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.classifier = nn.Sequential(
nn.Linear(2048, 4096),
nn.ReLU(True),
nn.Dropout(p=0.25),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(p=0.25),
nn.Linear(4096, num_classes),
)
for p_out, p_in1, p_in2 in zip(self.parameters(), model1.parameters(), model2.parameters()):
p_out.data = nn.Parameter(p_in1 +p_in2);
def forward(self, x):
x = self.block_1(x)
x = self.block_2(x)
x = self.block_3(x)
x = self.block_4(x)
# x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
#logits = self.classifier(x)
#probas = F.softmax(logits, dim=1)
# probas = nn.Softmax(logits)
#return probas
# return logits
有效!!!