访问经过训练的自动编码器的降维
Accessing reduced dimensionality of trained autoencoder
这是一个使用 PyTorch 在 mnist 上训练的自动编码器:
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
cuda = torch.cuda.is_available() # True if cuda is available, False otherwise
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
print('Training on %s' % ('GPU' if cuda else 'CPU'))
# Loading the MNIST data set
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.1307,), (0.3081,))])
mnist = torchvision.datasets.MNIST(root='../data/', train=True, transform=transform, download=True)
# Loader to feed the data batch by batch during training.
batch = 100
data_loader = torch.utils.data.DataLoader(mnist, batch_size=batch, shuffle=True)
autoencoder = nn.Sequential(
# Encoder
nn.Linear(28 * 28, 512),
nn.PReLU(512),
nn.BatchNorm1d(512),
# Low-dimensional representation
nn.Linear(512, 128),
nn.PReLU(128),
nn.BatchNorm1d(128),
# Decoder
nn.Linear(128, 512),
nn.PReLU(512),
nn.BatchNorm1d(512),
nn.Linear(512, 28 * 28))
autoencoder = autoencoder.type(FloatTensor)
optimizer = torch.optim.Adam(params=autoencoder.parameters(), lr=0.005)
epochs = 10
data_size = int(mnist.train_labels.size()[0])
for i in range(epochs):
for j, (images, _) in enumerate(data_loader):
images = images.view(images.size(0), -1) # from (batch 1, 28, 28) to (batch, 28, 28)
images = Variable(images).type(FloatTensor)
autoencoder.zero_grad()
reconstructions = autoencoder(images)
loss = torch.dist(images, reconstructions)
loss.backward()
optimizer.step()
print('Epoch %i/%i loss %.2f' % (i + 1, epochs, loss.data[0]))
print('Optimization finished.')
我正在尝试比较每个图像的低维表示。
打印每一层的维度:
for l in autoencoder.parameters() :
print(l.shape)
显示:
torch.Size([512, 784])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([128, 512])
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([512, 128])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([784, 512])
torch.Size([784])
那么维度似乎没有存储在学习向量中?
换句话说,如果我有 10000 张图像,每张图像包含 100 个像素,执行上述自动编码器将维度降低到 10 个像素应该允许访问所有 10000 个图像的 10 个像素维度?
我对pyTorch不是很熟悉,但是将自动编码器拆分为编码器和解码器模型似乎可行(我将隐藏层的大小从512更改为64,编码图像的维度从128到 4,使示例 运行 更快):
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
cuda = torch.cuda.is_available() # True if cuda is available, False otherwise
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
print('Training on %s' % ('GPU' if cuda else 'CPU'))
# Loading the MNIST data set
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.1307,), (0.3081,))])
mnist = torchvision.datasets.MNIST(root='../data/', train=True, transform=transform, download=True)
# Loader to feed the data batch by batch during training.
batch = 100
data_loader = torch.utils.data.DataLoader(mnist, batch_size=batch, shuffle=True)
encoder = nn.Sequential(
# Encoder
nn.Linear(28 * 28, 64),
nn.PReLU(64),
nn.BatchNorm1d(64),
# Low-dimensional representation
nn.Linear(64, 4),
nn.PReLU(4),
nn.BatchNorm1d(4))
decoder = nn.Sequential(
# Decoder
nn.Linear(4, 64),
nn.PReLU(64),
nn.BatchNorm1d(64),
nn.Linear(64, 28 * 28))
autoencoder = nn.Sequential(encoder, decoder)
encoder = encoder.type(FloatTensor)
decoder = decoder.type(FloatTensor)
autoencoder = autoencoder.type(FloatTensor)
optimizer = torch.optim.Adam(params=autoencoder.parameters(), lr=0.005)
epochs = 10
data_size = int(mnist.train_labels.size()[0])
for i in range(epochs):
for j, (images, _) in enumerate(data_loader):
images = images.view(images.size(0), -1) # from (batch 1, 28, 28) to (batch, 28, 28)
images = Variable(images).type(FloatTensor)
autoencoder.zero_grad()
reconstructions = autoencoder(images)
loss = torch.dist(images, reconstructions)
loss.backward()
optimizer.step()
print('Epoch %i/%i loss %.2f' % (i + 1, epochs, loss.data[0]))
print('Optimization finished.')
# Get the encoded images here
encoded_images = []
for j, (images, _) in enumerate(data_loader):
images = images.view(images.size(0), -1)
images = Variable(images).type(FloatTensor)
encoded_images.append(encoder(images))
这是一个使用 PyTorch 在 mnist 上训练的自动编码器:
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
cuda = torch.cuda.is_available() # True if cuda is available, False otherwise
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
print('Training on %s' % ('GPU' if cuda else 'CPU'))
# Loading the MNIST data set
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.1307,), (0.3081,))])
mnist = torchvision.datasets.MNIST(root='../data/', train=True, transform=transform, download=True)
# Loader to feed the data batch by batch during training.
batch = 100
data_loader = torch.utils.data.DataLoader(mnist, batch_size=batch, shuffle=True)
autoencoder = nn.Sequential(
# Encoder
nn.Linear(28 * 28, 512),
nn.PReLU(512),
nn.BatchNorm1d(512),
# Low-dimensional representation
nn.Linear(512, 128),
nn.PReLU(128),
nn.BatchNorm1d(128),
# Decoder
nn.Linear(128, 512),
nn.PReLU(512),
nn.BatchNorm1d(512),
nn.Linear(512, 28 * 28))
autoencoder = autoencoder.type(FloatTensor)
optimizer = torch.optim.Adam(params=autoencoder.parameters(), lr=0.005)
epochs = 10
data_size = int(mnist.train_labels.size()[0])
for i in range(epochs):
for j, (images, _) in enumerate(data_loader):
images = images.view(images.size(0), -1) # from (batch 1, 28, 28) to (batch, 28, 28)
images = Variable(images).type(FloatTensor)
autoencoder.zero_grad()
reconstructions = autoencoder(images)
loss = torch.dist(images, reconstructions)
loss.backward()
optimizer.step()
print('Epoch %i/%i loss %.2f' % (i + 1, epochs, loss.data[0]))
print('Optimization finished.')
我正在尝试比较每个图像的低维表示。
打印每一层的维度:
for l in autoencoder.parameters() :
print(l.shape)
显示:
torch.Size([512, 784])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([128, 512])
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([512, 128])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([784, 512])
torch.Size([784])
那么维度似乎没有存储在学习向量中?
换句话说,如果我有 10000 张图像,每张图像包含 100 个像素,执行上述自动编码器将维度降低到 10 个像素应该允许访问所有 10000 个图像的 10 个像素维度?
我对pyTorch不是很熟悉,但是将自动编码器拆分为编码器和解码器模型似乎可行(我将隐藏层的大小从512更改为64,编码图像的维度从128到 4,使示例 运行 更快):
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
cuda = torch.cuda.is_available() # True if cuda is available, False otherwise
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
print('Training on %s' % ('GPU' if cuda else 'CPU'))
# Loading the MNIST data set
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.1307,), (0.3081,))])
mnist = torchvision.datasets.MNIST(root='../data/', train=True, transform=transform, download=True)
# Loader to feed the data batch by batch during training.
batch = 100
data_loader = torch.utils.data.DataLoader(mnist, batch_size=batch, shuffle=True)
encoder = nn.Sequential(
# Encoder
nn.Linear(28 * 28, 64),
nn.PReLU(64),
nn.BatchNorm1d(64),
# Low-dimensional representation
nn.Linear(64, 4),
nn.PReLU(4),
nn.BatchNorm1d(4))
decoder = nn.Sequential(
# Decoder
nn.Linear(4, 64),
nn.PReLU(64),
nn.BatchNorm1d(64),
nn.Linear(64, 28 * 28))
autoencoder = nn.Sequential(encoder, decoder)
encoder = encoder.type(FloatTensor)
decoder = decoder.type(FloatTensor)
autoencoder = autoencoder.type(FloatTensor)
optimizer = torch.optim.Adam(params=autoencoder.parameters(), lr=0.005)
epochs = 10
data_size = int(mnist.train_labels.size()[0])
for i in range(epochs):
for j, (images, _) in enumerate(data_loader):
images = images.view(images.size(0), -1) # from (batch 1, 28, 28) to (batch, 28, 28)
images = Variable(images).type(FloatTensor)
autoencoder.zero_grad()
reconstructions = autoencoder(images)
loss = torch.dist(images, reconstructions)
loss.backward()
optimizer.step()
print('Epoch %i/%i loss %.2f' % (i + 1, epochs, loss.data[0]))
print('Optimization finished.')
# Get the encoded images here
encoded_images = []
for j, (images, _) in enumerate(data_loader):
images = images.view(images.size(0), -1)
images = Variable(images).type(FloatTensor)
encoded_images.append(encoder(images))