如何在dataloader pytorch中单独加载根目录中的数据
How to load data in root directory separately in dataloader pytorch
所以,我正在尝试在 pytorch 中加载 this dataset,我在加载它时遇到了问题。
你可以看出我检查数据集时目录看起来像这样:
root
monet_jpg
monet_tfrec
photo_jpg
photo_tfrec
所以,我想在单独的数据加载器变量中加载照片和莫奈图像。但是这个方法好像不行
编辑:我的意思是 monet_ds 和 photo_ds return 只有莫奈图像(而 photo_ds 应该 return 来自 photo_jpg)
我正在尝试通过此代码加载数据:
import torchvision.datasets as dset
import torchvision.utils as vutils
from torch.utils.data import Subset
def load_data(dataroot , image_size, batch_size, workers,ngpu,shuffle=True):
#DataLoading
# Create the dataset
dataset = dset.ImageFolder(root=dataroot,
transform=transforms.Compose([
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]))
print(dataset.class_to_idx)
#print(dataset.imgs)
monet_ds = Subset(dataset, range(0,299))
photo_ds = Subset(dataset, range(300,))
# Create the dataloader
monet_ds = torch.utils.data.DataLoader(monet_ds, batch_size=batch_size,
num_workers=workers)
photo_ds = torch.utils.data.DataLoader(photo_ds, batch_size=batch_size,
num_workers=workers)
# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
print("Data loaded...")
root = "../input/gan-getting-started"
monet_ds, photo_ds, device = load_data(root, image_size, batch_size, workers, ngpu)
任何帮助在 pytorch 中完美加载此数据的帮助都会很有帮助。
谢谢。
看起来它们是完全独立的,所以下面应该可以正常工作:
import os
from torchvision.datasets.folder import default_loader
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
class MonetPhotoDataset(Dataset):
def __init__(self, root, transform=None):
self.transform = transform
self.img_paths = sorted(os.path.join(root, x) for x in os.listdir(root) if x.endswith('.jpg'))
def __len__(self):
return len(self.img_paths)
def __getitem__(self, idx):
img_path = self.img_paths[idx]
sample = default_loader(img_path)
if self.transform is not None:
sample = self.transform(sample)
return sample
def load_data(dataroot, image_size, batch_size, workers, ngpu, shuffle=True):
# set up transform
transform = transforms.Compose([
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
# create datasets
monet_ds = MonetPhotoDataset(root=os.path.join(dataroot, 'monet_jpg'), transform=transform)
photo_ds = MonetPhotoDataset(root=os.path.join(dataroot, 'photo_jpg'), transform=transform)
# create dataloaders
monet_dl = DataLoader(monet_ds, batch_size=batch_size, num_workers=workers)
photo_dl = DataLoader(photo_ds, batch_size=batch_size, num_workers=workers)
# decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
print("Data loaded...")
return monet_dl, photo_dl, device
root = "../input/gan-getting-started"
monet_dl, photo_dl, device = load_data(root, image_size, batch_size, workers, ngpu)
P.S.: 我保留了 load_data
是因为我假设您在代码中依赖它的签名,但我不会以其他方式使用它。另外,我没有测试上面的代码,所以预计会有一些错字,但逻辑是正确的。
请注意,此数据集 returns 只有图像。
所以,我正在尝试在 pytorch 中加载 this dataset,我在加载它时遇到了问题。
你可以看出我检查数据集时目录看起来像这样:
root
monet_jpg
monet_tfrec
photo_jpg
photo_tfrec
所以,我想在单独的数据加载器变量中加载照片和莫奈图像。但是这个方法好像不行
编辑:我的意思是 monet_ds 和 photo_ds return 只有莫奈图像(而 photo_ds 应该 return 来自 photo_jpg)
我正在尝试通过此代码加载数据:
import torchvision.datasets as dset
import torchvision.utils as vutils
from torch.utils.data import Subset
def load_data(dataroot , image_size, batch_size, workers,ngpu,shuffle=True):
#DataLoading
# Create the dataset
dataset = dset.ImageFolder(root=dataroot,
transform=transforms.Compose([
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]))
print(dataset.class_to_idx)
#print(dataset.imgs)
monet_ds = Subset(dataset, range(0,299))
photo_ds = Subset(dataset, range(300,))
# Create the dataloader
monet_ds = torch.utils.data.DataLoader(monet_ds, batch_size=batch_size,
num_workers=workers)
photo_ds = torch.utils.data.DataLoader(photo_ds, batch_size=batch_size,
num_workers=workers)
# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
print("Data loaded...")
root = "../input/gan-getting-started"
monet_ds, photo_ds, device = load_data(root, image_size, batch_size, workers, ngpu)
任何帮助在 pytorch 中完美加载此数据的帮助都会很有帮助。 谢谢。
看起来它们是完全独立的,所以下面应该可以正常工作:
import os
from torchvision.datasets.folder import default_loader
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
class MonetPhotoDataset(Dataset):
def __init__(self, root, transform=None):
self.transform = transform
self.img_paths = sorted(os.path.join(root, x) for x in os.listdir(root) if x.endswith('.jpg'))
def __len__(self):
return len(self.img_paths)
def __getitem__(self, idx):
img_path = self.img_paths[idx]
sample = default_loader(img_path)
if self.transform is not None:
sample = self.transform(sample)
return sample
def load_data(dataroot, image_size, batch_size, workers, ngpu, shuffle=True):
# set up transform
transform = transforms.Compose([
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
# create datasets
monet_ds = MonetPhotoDataset(root=os.path.join(dataroot, 'monet_jpg'), transform=transform)
photo_ds = MonetPhotoDataset(root=os.path.join(dataroot, 'photo_jpg'), transform=transform)
# create dataloaders
monet_dl = DataLoader(monet_ds, batch_size=batch_size, num_workers=workers)
photo_dl = DataLoader(photo_ds, batch_size=batch_size, num_workers=workers)
# decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
print("Data loaded...")
return monet_dl, photo_dl, device
root = "../input/gan-getting-started"
monet_dl, photo_dl, device = load_data(root, image_size, batch_size, workers, ngpu)
P.S.: 我保留了 load_data
是因为我假设您在代码中依赖它的签名,但我不会以其他方式使用它。另外,我没有测试上面的代码,所以预计会有一些错字,但逻辑是正确的。
请注意,此数据集 returns 只有图像。