Pytorch中如何简化DataLoader for Autoencoder
How to simplify DataLoader for Autoencoder in Pytorch
是否有更简单的方法来设置数据加载器,因为输入和目标数据在自动编码器的情况下是相同的,并且在训练期间加载数据? DataLoader 总是需要两个输入。
目前我这样定义我的数据加载器:
X_train = rnd.random((300,100))
X_val = rnd.random((75,100))
train = data_utils.TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(X_train).float())
val = data_utils.TensorDataset(torch.from_numpy(X_val).float(), torch.from_numpy(X_val).float())
train_loader= data_utils.DataLoader(train, batch_size=1)
val_loader = data_utils.DataLoader(val, batch_size=1)
然后像这样训练:
for epoch in range(50):
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data), Variable(target).detach()
optimizer.zero_grad()
output = model(data, x)
loss = criterion(output, target)
我相信这很简单。除此之外,我想您将不得不实现自己的数据集。下面是示例代码。
class ImageLoader(torch.utils.data.Dataset):
def __init__(self, root, tform=None, imgloader=PIL.Image.open):
super(ImageLoader, self).__init__()
self.root=root
self.filenames=sorted(glob(root))
self.tform=tform
self.imgloader=imgloader
def __len__(self):
return len(self.filenames)
def __getitem__(self, i):
out = self.imgloader(self.filenames[i]) # io.imread(self.filenames[i])
if self.tform:
out = self.tform(out)
return out
然后您可以按如下方式使用它。
source_dataset=ImageLoader(root='/dldata/denoise_ae/clean/*.png', tform=source_depth_transform)
target_dataset=ImageLoader(root='/dldata/denoise_ae/clean_cam_n9dmaps/*.png', tform=target_depth_transform)
source_dataloader=torch.utils.data.DataLoader(source_dataset, batch_size=32, shuffle=False, drop_last=True, num_workers=15)
target_dataloader=torch.utils.data.DataLoader(target_dataset, batch_size=32, shuffle=False, drop_last=True, num_workers=15)
要测试第 1 批,请按以下步骤进行。
dataiter = iter(source_dataloader)
images = dataiter.next()
print(images.size())
最后你可以在批量训练循环中枚举加载的数据,如下所示。
for i, (source, target) in enumerate(zip(source_dataloader, target_dataloader), 0):
source, target = Variable(source.float().cuda()), Variable(target.float().cuda())
玩得开心。
PS。我共享的代码示例不会加载验证数据。
为什么不对 TensorDataset 进行子类化以使其与未标记数据兼容?
class UnlabeledTensorDataset(TensorDataset):
"""Dataset wrapping unlabeled data tensors.
Each sample will be retrieved by indexing tensors along the first
dimension.
Arguments:
data_tensor (Tensor): contains sample data.
"""
def __init__(self, data_tensor):
self.data_tensor = data_tensor
def __getitem__(self, index):
return self.data_tensor[index]
以及用于训练您的自动编码器的这些内容
X_train = rnd.random((300,100))
train = UnlabeledTensorDataset(torch.from_numpy(X_train).float())
train_loader= data_utils.DataLoader(train, batch_size=1)
for epoch in range(50):
for batch in train_loader:
data = Variable(batch)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, data)
是否有更简单的方法来设置数据加载器,因为输入和目标数据在自动编码器的情况下是相同的,并且在训练期间加载数据? DataLoader 总是需要两个输入。
目前我这样定义我的数据加载器:
X_train = rnd.random((300,100))
X_val = rnd.random((75,100))
train = data_utils.TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(X_train).float())
val = data_utils.TensorDataset(torch.from_numpy(X_val).float(), torch.from_numpy(X_val).float())
train_loader= data_utils.DataLoader(train, batch_size=1)
val_loader = data_utils.DataLoader(val, batch_size=1)
然后像这样训练:
for epoch in range(50):
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data), Variable(target).detach()
optimizer.zero_grad()
output = model(data, x)
loss = criterion(output, target)
我相信这很简单。除此之外,我想您将不得不实现自己的数据集。下面是示例代码。
class ImageLoader(torch.utils.data.Dataset):
def __init__(self, root, tform=None, imgloader=PIL.Image.open):
super(ImageLoader, self).__init__()
self.root=root
self.filenames=sorted(glob(root))
self.tform=tform
self.imgloader=imgloader
def __len__(self):
return len(self.filenames)
def __getitem__(self, i):
out = self.imgloader(self.filenames[i]) # io.imread(self.filenames[i])
if self.tform:
out = self.tform(out)
return out
然后您可以按如下方式使用它。
source_dataset=ImageLoader(root='/dldata/denoise_ae/clean/*.png', tform=source_depth_transform)
target_dataset=ImageLoader(root='/dldata/denoise_ae/clean_cam_n9dmaps/*.png', tform=target_depth_transform)
source_dataloader=torch.utils.data.DataLoader(source_dataset, batch_size=32, shuffle=False, drop_last=True, num_workers=15)
target_dataloader=torch.utils.data.DataLoader(target_dataset, batch_size=32, shuffle=False, drop_last=True, num_workers=15)
要测试第 1 批,请按以下步骤进行。
dataiter = iter(source_dataloader)
images = dataiter.next()
print(images.size())
最后你可以在批量训练循环中枚举加载的数据,如下所示。
for i, (source, target) in enumerate(zip(source_dataloader, target_dataloader), 0):
source, target = Variable(source.float().cuda()), Variable(target.float().cuda())
玩得开心。
PS。我共享的代码示例不会加载验证数据。
为什么不对 TensorDataset 进行子类化以使其与未标记数据兼容?
class UnlabeledTensorDataset(TensorDataset):
"""Dataset wrapping unlabeled data tensors.
Each sample will be retrieved by indexing tensors along the first
dimension.
Arguments:
data_tensor (Tensor): contains sample data.
"""
def __init__(self, data_tensor):
self.data_tensor = data_tensor
def __getitem__(self, index):
return self.data_tensor[index]
以及用于训练您的自动编码器的这些内容
X_train = rnd.random((300,100))
train = UnlabeledTensorDataset(torch.from_numpy(X_train).float())
train_loader= data_utils.DataLoader(train, batch_size=1)
for epoch in range(50):
for batch in train_loader:
data = Variable(batch)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, data)