如何在 Pytorch Lightning 中使用 numpy 数据集
How to use numpy dataset in Pytorch Lightning
我想使用 NumPy
创建一个数据集,然后想训练和测试一个简单的模型,例如“线性或逻辑”。
我正在努力学习Pytorch Lightning
。我找到了 tutorial that we can use the NumPy dataset and can use uniform distribution 。作为一个新人,我没有完整的思路,我怎么能做到呢!
我的代码如下
import numpy as np
import pytorch_lightning as pl
from torch.utils.data import random_split, DataLoader, TensorDataset
import torch
from torch.autograd import Variable
from torchvision import transforms
np.random.seed(42)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
class DataModuleClass(pl.LightningDataModule):
def __init__(self):
super().__init__()
self.constant = 2
self.batch_size = 10
self.transform = transforms.Compose([
transforms.ToTensor()
])
def prepare_data(self):
a = np.random.uniform(0, 500, 500)
b = np.random.normal(0, self.constant, len(x))
c = a + b
X = np.transpose(np.array([a, b]))
idx = np.arange(500)
np.random.shuffle(idx)
# Uses foirst 400 random indices for training
train_idx = idx[:400]
# Uses the remaining indices for validation
val_idx = idx[400:]
# Generate train and validation dataset
x_train, y_train = X[train_idx], y[train_idx]
x_val, y_val = X[val_idx], y[val_idx]
# Converting numpy array to Tensor
self.x_train_tensor = torch.from_numpy(x_train).float().to(device)
self.y_train_tensor = torch.from_numpy(y_train).float().to(device)
self.x_val_tensor = torch.from_numpy(x_val).float().to(device)
self.y_val_tensor = torch.from_numpy(y_val).float().to(device)
training_dataset = TensorDataset(self.x_train_tensor, self.y_train_tensor)
validation_dataset = TensorDataset(self.x_val_tensor, self.y_val_tensor)
return training_dataset, validation_dataset
def train_dataloader(self):
training_dataloader = prepare_data() # Most probably this is wrong way!!!
return DataLoader(self.training_dataloader)
def val_dataloader(self):
validation_dataloader = prepare_data() # Most probably this is wrong way!!!
return DataLoader(self.validation_dataloader)
# def test_dataloader(self):
obj = DataModuleClass()
print(obj.prepare_data())
这部分是根据给出的答案完成的[这里,我想把a and b as features
和c
当成label or target variable
。]
现在,如何将数据集传递给“训练和验证方法”?
这段代码将return你标记为 y 和 a,b 的 500 个随机样本的 2 个特征合并到 X 中。
import torch
from torch.autograd import Variable
def prepare_data(self):
a = np.random.uniform(0, 500, 500) # random feature 1 x 500
b = np.random.normal(0, 2, len(a)) # random feature 2 x 500
X = np.transpose(np.array([a,b])) # Merging feature 1 and 2 x 500
y = np.random.randint(0,2,len(a)) # random Labels as 0 and 1
X = Variable(torch.from_numpy(X).float()) # Converting numpy array X to Torch tensor with auto_grad enabled
y = Variable(torch.from_numpy(y).float()) # Converting numpy array y to Torch tensor with auto_grad enabled
return X,y
只是你必须 return 火炬张量
import numpy as np
import pytorch_lightning as pl
from torch.utils.data import random_split, DataLoader
class DataModuleClass(pl.LightningDataModule):
def __init__(self):
super().__init__()
self.constant = 2
self.batch_size = 20
self.transform = transforms.Compose([
transforms.ToTensor()
])
def prepare_data(self):
a = np.random.uniform(0, 500, 500)
b = np.random.normal(0, self.constant, len(a))
c = a + b
return torch.from_numpy(a).float(), torch.from_numpy(b).float(), torch.from_numpy(c).float()
您可以使用以下代码从 prepare_data()
或 setup()
获取数据。
def prepare_data(self):
a = np.random.uniform(0, 500, 500)
b = np.random.normal(0, self.constant, len(a))
c = a + b
X = np.transpose(np.array([a, b]))
# Converting numpy array to Tensor
self.x_train_tensor = torch.from_numpy(X).float().to(device)
self.y_train_tensor = torch.from_numpy(c).float().to(device)
training_dataset = TensorDataset(self.x_train_tensor, self.y_train_tensor)
self.training_dataset = training_dataset
def setup(self):
data = self.training_dataset
self.train_data, self.val_data = random_split(data, [400, 100])
def train_dataloader(self):
return DataLoader(self.train_data)
def val_dataloader(self):
return DataLoader(self.val_data)
您可以使用 random_split()
拆分数据集。
我想使用 NumPy
创建一个数据集,然后想训练和测试一个简单的模型,例如“线性或逻辑”。
我正在努力学习Pytorch Lightning
。我找到了 tutorial that we can use the NumPy dataset and can use uniform distribution
我的代码如下
import numpy as np
import pytorch_lightning as pl
from torch.utils.data import random_split, DataLoader, TensorDataset
import torch
from torch.autograd import Variable
from torchvision import transforms
np.random.seed(42)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
class DataModuleClass(pl.LightningDataModule):
def __init__(self):
super().__init__()
self.constant = 2
self.batch_size = 10
self.transform = transforms.Compose([
transforms.ToTensor()
])
def prepare_data(self):
a = np.random.uniform(0, 500, 500)
b = np.random.normal(0, self.constant, len(x))
c = a + b
X = np.transpose(np.array([a, b]))
idx = np.arange(500)
np.random.shuffle(idx)
# Uses foirst 400 random indices for training
train_idx = idx[:400]
# Uses the remaining indices for validation
val_idx = idx[400:]
# Generate train and validation dataset
x_train, y_train = X[train_idx], y[train_idx]
x_val, y_val = X[val_idx], y[val_idx]
# Converting numpy array to Tensor
self.x_train_tensor = torch.from_numpy(x_train).float().to(device)
self.y_train_tensor = torch.from_numpy(y_train).float().to(device)
self.x_val_tensor = torch.from_numpy(x_val).float().to(device)
self.y_val_tensor = torch.from_numpy(y_val).float().to(device)
training_dataset = TensorDataset(self.x_train_tensor, self.y_train_tensor)
validation_dataset = TensorDataset(self.x_val_tensor, self.y_val_tensor)
return training_dataset, validation_dataset
def train_dataloader(self):
training_dataloader = prepare_data() # Most probably this is wrong way!!!
return DataLoader(self.training_dataloader)
def val_dataloader(self):
validation_dataloader = prepare_data() # Most probably this is wrong way!!!
return DataLoader(self.validation_dataloader)
# def test_dataloader(self):
obj = DataModuleClass()
print(obj.prepare_data())
这部分是根据给出的答案完成的[这里,我想把a and b as features
和c
当成label or target variable
。]
现在,如何将数据集传递给“训练和验证方法”?
这段代码将return你标记为 y 和 a,b 的 500 个随机样本的 2 个特征合并到 X 中。
import torch
from torch.autograd import Variable
def prepare_data(self):
a = np.random.uniform(0, 500, 500) # random feature 1 x 500
b = np.random.normal(0, 2, len(a)) # random feature 2 x 500
X = np.transpose(np.array([a,b])) # Merging feature 1 and 2 x 500
y = np.random.randint(0,2,len(a)) # random Labels as 0 and 1
X = Variable(torch.from_numpy(X).float()) # Converting numpy array X to Torch tensor with auto_grad enabled
y = Variable(torch.from_numpy(y).float()) # Converting numpy array y to Torch tensor with auto_grad enabled
return X,y
只是你必须 return 火炬张量
import numpy as np
import pytorch_lightning as pl
from torch.utils.data import random_split, DataLoader
class DataModuleClass(pl.LightningDataModule):
def __init__(self):
super().__init__()
self.constant = 2
self.batch_size = 20
self.transform = transforms.Compose([
transforms.ToTensor()
])
def prepare_data(self):
a = np.random.uniform(0, 500, 500)
b = np.random.normal(0, self.constant, len(a))
c = a + b
return torch.from_numpy(a).float(), torch.from_numpy(b).float(), torch.from_numpy(c).float()
您可以使用以下代码从 prepare_data()
或 setup()
获取数据。
def prepare_data(self):
a = np.random.uniform(0, 500, 500)
b = np.random.normal(0, self.constant, len(a))
c = a + b
X = np.transpose(np.array([a, b]))
# Converting numpy array to Tensor
self.x_train_tensor = torch.from_numpy(X).float().to(device)
self.y_train_tensor = torch.from_numpy(c).float().to(device)
training_dataset = TensorDataset(self.x_train_tensor, self.y_train_tensor)
self.training_dataset = training_dataset
def setup(self):
data = self.training_dataset
self.train_data, self.val_data = random_split(data, [400, 100])
def train_dataloader(self):
return DataLoader(self.train_data)
def val_dataloader(self):
return DataLoader(self.val_data)
您可以使用 random_split()
拆分数据集。