如何在 Pytorch 中实现收缩自动编码器?

How to implement contractive autoencoder in Pytorch?

我正在尝试在 Pytorch 中创建一个 contractive autoencoder。我找到 this thread 并据此尝试。这是我根据提到的主题编写的片段:

import datetime
import numpy as np 
import torch
import torchvision
from torchvision import datasets, transforms
from torchvision.utils import save_image, make_grid
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim
import matplotlib.pyplot as plt 
%matplotlib inline

dataset_train = datasets.MNIST(root='MNIST',
                               train=True,
                               transform = transforms.ToTensor(),
                               download=True)
dataset_test  = datasets.MNIST(root='MNIST', 
                               train=False, 
                               transform = transforms.ToTensor(),
                               download=True)
batch_size = 128
num_workers = 2
dataloader_train = torch.utils.data.DataLoader(dataset_train,
                                               batch_size = batch_size,
                                               shuffle=True,
                                               num_workers = num_workers, 
                                               pin_memory=True)

dataloader_test = torch.utils.data.DataLoader(dataset_test,
                                               batch_size = batch_size,
                                               num_workers = num_workers,
                                               pin_memory=True)

def view_images(imgs, labels, rows = 4, cols =11):
    imgs = imgs.detach().cpu().numpy().transpose(0,2,3,1)
    fig = plt.figure(figsize=(8,4))
    for i in range(imgs.shape[0]):
        ax = fig.add_subplot(rows, cols, i+1, xticks=[], yticks=[])
        ax.imshow(imgs[i].squeeze(), cmap='Greys_r')
        ax.set_title(labels[i].item())


# now let's view some 
imgs, labels = next(iter(dataloader_train))
view_images(imgs, labels,13,10)

class Contractive_AutoEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Linear(784, 512)
        self.decoder = nn.Linear(512, 784)

    def forward(self, input):
        # flatten the input
        shape = input.shape
        input = input.view(input.size(0), -1)
        output_e = F.relu(self.encoder(input))
        output = F.sigmoid(self.decoder(output_e))
        output = output.view(*shape)
        return output_e, output

def loss_function(output_e, outputs, imgs, device):
    output_e.backward(torch.ones(output_e.size()).to(device), retain_graph=True)
    criterion = nn.MSELoss()
    assert outputs.shape == imgs.shape ,f'outputs.shape : {outputs.shape} != imgs.shape : {imgs.shape}'
    
    imgs.grad.requires_grad = True 
    loss1 = criterion(outputs, imgs)
    print(imgs.grad)
    loss2 = torch.mean(pow(imgs.grad,2))
    loss = loss1 + loss2 
    return loss 

epochs = 50 
interval = 2000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Contractive_AutoEncoder().to(device)
optimizer = optim.Adam(model.parameters(), lr =0.001)

for e in range(epochs):
    for i, (imgs, labels) in enumerate(dataloader_train):
        imgs = imgs.to(device)
        labels = labels.to(device)

        outputs_e, outputs = model(imgs)
        loss = loss_function(outputs_e, outputs, imgs,device)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i%interval: 
            print('')

    print(f'epoch/epoechs: {e}/{epochs} loss : {loss.item():.4f} ')

为了简洁起见,我只为编码器和解码器使用了一层。显然,无论其中任何一层的层数如何,它都应该起作用!

但是这里的问题是,除了我不知道这是否是正确的方法(计算相对于输入的梯度)之外,我得到一个错误,使前一个解决方案成为可能wrong/not 适用。

即:

imgs.grad.requires_grad = True

产生错误:

AttributeError : 'NoneType' object has no attribute 'requires_grad'

我也尝试了该线程中建议的第二种方法,如下所示:

class Contractive_Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Linear(784, 512)
        
    def forward(self, input):
        # flatten the input
        input = input.view(input.size(0), -1)
        output_e = F.relu(self.encoder(input))
        return output_e

class Contractive_Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.decoder = nn.Linear(512, 784)

    def forward(self, input):
        # flatten the input
        output = F.sigmoid(self.decoder(input))
        output = output.view(-1,1,28,28)
        return output


epochs = 50 
interval = 2000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_enc = Contractive_Encoder().to(device)
model_dec = Contractive_Decoder().to(device)

optimizer = optim.Adam([{"params":model_enc.parameters()},
                        {"params":model_dec.parameters()}], lr =0.001)

optimizer_cond = optim.Adam(model_enc.parameters(), lr = 0.001)

criterion = nn.MSELoss()

for e in range(epochs):
    for i, (imgs, labels) in enumerate(dataloader_train):
        imgs = imgs.to(device)
        labels = labels.to(device)

        outputs_e = model_enc(imgs)
        outputs = model_dec(outputs_e)
        loss_rec = criterion(outputs, imgs)
        optimizer.zero_grad()
        loss_rec.backward()
        optimizer.step()

        imgs.requires_grad_(True)
        y = model_enc(imgs)
        optimizer_cond.zero_grad()
        y.backward(torch.ones(imgs.view(-1,28*28).size()))

        imgs.grad.requires_grad = True
        loss = torch.mean([pow(imgs.grad,2)])
        optimizer_cond.zero_grad()
        loss.backward()
        optimizer_cond.step()
        
        if i%interval: 
            print('')

    print(f'epoch/epoechs: {e}/{epochs} loss : {loss.item():.4f} ')

但我遇到错误:

RuntimeError: invalid gradient at index 0 - got [128, 784] but expected shape compatible with [128, 512]

我应该如何在 Pytorch 中处理这个问题?

总结

我写的contractive loss最终实现如下:

def loss_function(output_e, outputs, imgs, lamda = 1e-4, device=torch.device('cuda')):

    criterion = nn.MSELoss()
    assert outputs.shape == imgs.shape ,f'outputs.shape : {outputs.shape} != imgs.shape : {imgs.shape}'
    loss1 = criterion(outputs, imgs)

    output_e.backward(torch.ones(outputs_e.size()).to(device), retain_graph=True)    
    # Frobenious norm, the square root of sum of all elements (square value)
    # in a jacobian matrix 
    loss2 = torch.sqrt(torch.sum(torch.pow(imgs.grad,2)))
    imgs.grad.data.zero_()
    loss = loss1 + (lamda*loss2) 
    return loss 

在训练循环中你需要做:

for e in range(epochs):
    for i, (imgs, labels) in enumerate(dataloader_train):
        imgs = imgs.to(device)
        labels = labels.to(device)

        imgs.retain_grad()
        imgs.requires_grad_(True)

        outputs_e, outputs = model(imgs)
        loss = loss_function(outputs_e, outputs, imgs, lam,device)

        imgs.requires_grad_(False)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'epoch/epochs: {e}/{epochs} loss: {loss.item():.4f}')

完整解释

事实证明,@akshayk07 在评论中正确地指出,在 Pytorch 论坛中发现的实现在多个地方都是错误的。值得注意的是,它没有实现 Contractive Auto-Encoders:Explicit Invariance During Feature Extraction 论文中引入的实际收缩损失!除此之外,由于显而易见的原因,该实现根本无法工作,稍后将对此进行解释。

变化很明显,所以我试着解释一下这里发生了什么。首先请注意 imgs 不是 叶节点 ,因此梯度不会保留在图像 .grad 属性中。

为了保留非叶节点的梯度,你应该使用retain_graph()grad 仅为叶张量填充。此外,imgs.retain_grad() 应该在执行 forward() 之前调用,因为它将指示 autograd 将梯度存储到非叶节点中。

更新

感谢@Michael 指出 Frobenius 范数的正确计算实际上是(来自 ScienceDirect):

the square root of the sum of the squares of all the matrix entries

不是

the the square root of the sum of the absolute values of all the matrix entries as explained here

实现收缩自编码器的主要挑战是计算雅可比行列式的 Frobenius 范数,即代码或瓶颈层(向量)相对于输入层(向量)的梯度。这是损失函数中的正则化项。幸运的是,你已经为我解决了这个问题。谢谢!您在第一个学期使用 MSE 损失。有时会使用交叉熵损失。值得考虑。我想你几乎已经有了 Frobenius 范数,除了你需要取雅可比行列式 squares 之和的平方根,你在这里计算总和的平方根 绝对值。以下是我定义损失函数的方式(抱歉我稍微更改了符号以保持直白):

def cae_loss_fcn(code, img_out, img_in, lamda=1e-4, device=torch.device('cuda')):

    # First term in the loss function, for ensuring representational fidelity
    criterion=nn.MSELoss()
    assert img_out.shape == img_in.shape, f'img_out.shape : {img_out.shape} != img_in.shape : {img_in.shape}'
    loss1 = criterion(img_out, img_in)

    # Second term in the loss function, for enforcing contraction of representation
    code.backward(torch.ones(code.size()).to(device), retain_graph=True)
    # Frobenius norm of Jacobian of code with respect to input image
    loss2 = torch.sqrt(torch.sum(torch.pow(img_in.grad, 2))) # THE CORRECTION
    img_in.grad.data.zero_()

    # Total loss, the sum of the two loss terms, with weight applied to second term
    loss = loss1 + (lamda*loss2)

    return loss