在 PyTorch 中规范化 MNIST
Normalize MNIST in PyTorch
我正在尝试将 PyTorch 1.9 和 Python 3.8 中的 MNIST 数据集规范化到范围 [0, 1] 之间,代码为 (batch_size = 32)。
# Specify path to MNIST dataset-
path_to_data = "path_to_dataset"
# Define transformation(s) to be applied to dataset-
transforms_MNIST = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize(mean = (0.1307,), std = (0.3081,))
]
)
# Load MNIST dataset-
train_dataset = torchvision.datasets.MNIST(
# root = './data', train = True,
root = path_to_data + "data", train = True,
transform = transforms_MNIST, download = True
)
test_dataset = torchvision.datasets.MNIST(
# root = './data', train = False,
root = path_to_data + "data", train = False,
transform = transforms_MNIST
)
# Create training and testing dataloaders-
train_loader = torch.utils.data.DataLoader(
dataset = train_dataset, batch_size = batch_size,
shuffle = True
)
test_loader = torch.utils.data.DataLoader(
dataset = test_dataset, batch_size = batch_size,
shuffle = False
)
print(f"Sizes of train_dataset: {len(train_dataset)} and test_dataet: {len(test_dataset)}")
print(f"Sizes of train_loader: {len(train_loader)} and test_loader: {len(test_loader)}")
# Sizes of train_dataset: 60000 and test_dataet: 10000
# Sizes of train_loader: 1875 and test_loader: 313
# Sanity check-
print(f"train_dataset: min pixel value = {train_dataset.data.min().numpy():.3f} &"
f" max pixel value = {train_dataset.data.max().numpy():.3f}")
# train_dataset: min pixel value = 0.000 & max pixel value = 255.000
print(f"test_dataset: min pixel value = {test_dataset.data.min().numpy():.3f} &"
f" max pixel value = {test_dataset.data.max().numpy():.3f}")
# test_dataset: min pixel value = 0.000 & max pixel value = 255.000
print(f"len(train_loader) = {len(train_loader)} & len(test_loader) = {len(test_loader)}")
# len(train_loader) = 1875 & len(test_loader) = 313
# Sanity check-
len(train_dataset) / batch_size, len(test_dataset) / batch_size
# (1875.0, 312.5)
# Get some random batch of training images & labels-
images, labels = next(iter(train_loader))
# You get x images due to the specified batch size-
print(f"images.shape: {images.shape} & labels.shape: {labels.shape}")
# images.shape: torch.Size([32, 1, 28, 28]) & labels.shape: torch.Size([32])
# Get min and max values for normalized pixels in mini-batch-
images.min(), images.max()
# (tensor(-0.4242), tensor(2.8215))
'images' 的最小值和最大值应介于 0 和 1 之间,而是 0.4242 和 2.8215。出了什么问题?
发生这种情况是因为 Normalize
应用了实际上(也)称为 标准化的东西 :output = (input - mean) / std
.
加载图像时会自动执行您想要实现的规范化,因此您可以发表评论Normalize
。
我正在尝试将 PyTorch 1.9 和 Python 3.8 中的 MNIST 数据集规范化到范围 [0, 1] 之间,代码为 (batch_size = 32)。
# Specify path to MNIST dataset-
path_to_data = "path_to_dataset"
# Define transformation(s) to be applied to dataset-
transforms_MNIST = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize(mean = (0.1307,), std = (0.3081,))
]
)
# Load MNIST dataset-
train_dataset = torchvision.datasets.MNIST(
# root = './data', train = True,
root = path_to_data + "data", train = True,
transform = transforms_MNIST, download = True
)
test_dataset = torchvision.datasets.MNIST(
# root = './data', train = False,
root = path_to_data + "data", train = False,
transform = transforms_MNIST
)
# Create training and testing dataloaders-
train_loader = torch.utils.data.DataLoader(
dataset = train_dataset, batch_size = batch_size,
shuffle = True
)
test_loader = torch.utils.data.DataLoader(
dataset = test_dataset, batch_size = batch_size,
shuffle = False
)
print(f"Sizes of train_dataset: {len(train_dataset)} and test_dataet: {len(test_dataset)}")
print(f"Sizes of train_loader: {len(train_loader)} and test_loader: {len(test_loader)}")
# Sizes of train_dataset: 60000 and test_dataet: 10000
# Sizes of train_loader: 1875 and test_loader: 313
# Sanity check-
print(f"train_dataset: min pixel value = {train_dataset.data.min().numpy():.3f} &"
f" max pixel value = {train_dataset.data.max().numpy():.3f}")
# train_dataset: min pixel value = 0.000 & max pixel value = 255.000
print(f"test_dataset: min pixel value = {test_dataset.data.min().numpy():.3f} &"
f" max pixel value = {test_dataset.data.max().numpy():.3f}")
# test_dataset: min pixel value = 0.000 & max pixel value = 255.000
print(f"len(train_loader) = {len(train_loader)} & len(test_loader) = {len(test_loader)}")
# len(train_loader) = 1875 & len(test_loader) = 313
# Sanity check-
len(train_dataset) / batch_size, len(test_dataset) / batch_size
# (1875.0, 312.5)
# Get some random batch of training images & labels-
images, labels = next(iter(train_loader))
# You get x images due to the specified batch size-
print(f"images.shape: {images.shape} & labels.shape: {labels.shape}")
# images.shape: torch.Size([32, 1, 28, 28]) & labels.shape: torch.Size([32])
# Get min and max values for normalized pixels in mini-batch-
images.min(), images.max()
# (tensor(-0.4242), tensor(2.8215))
'images' 的最小值和最大值应介于 0 和 1 之间,而是 0.4242 和 2.8215。出了什么问题?
发生这种情况是因为 Normalize
应用了实际上(也)称为 标准化的东西 :output = (input - mean) / std
.
加载图像时会自动执行您想要实现的规范化,因此您可以发表评论Normalize
。