如何在 Pytorch 中实现批量大小 > 1 的 Flatten 层 (Pytorch_Geometric)
How to implement Flatten layer with batch size > 1 in Pytorch (Pytorch_Geometric)
我是 Pytorch 的新手,由于内存问题,我正在尝试将我以前的代码从 Tensorflow 转移到 Pytorch。但是,在尝试重现 Flatten
层时,一些问题不断出现。
在我的 DataLoader
对象中,batch_size
与输入的第一维混合(在我的 GNN 中,从 DataLoader
对象解压缩的输入大小为 [batch_size*node_num, attribute_num], 例如 GCNConv 层之后的 [4*896, 32])。基本上,如果我在 GCNConv
之后实现 torch.flatten()
,样本将混合在一起(达到 [4*896*32]),并且该网络只有 1 个输出,而我期望 #batch_size 输出。如果我改用 nn.Flatten()
,似乎什么也没有发生(仍然是 [4*896, 32])。是一开始就设置batch_size作为输入的第一个dim,还是直接使用view()
函数?我尝试直接使用 view()
并且它(似乎)起作用了,尽管我不确定这是否与 Flatten 相同。请参考我下面的代码。我目前正在使用 global_max_pool 因为它有效(它可以直接分隔 batch_size
)。
顺便说一句,我不确定为什么在 Pytorch 中训练这么慢...当 node_num
提高到 13000 时,我需要一个小时才能完成一个 epoch,而我每个 epoch 有 100 个 epoch测试折和 10 个测试折。在tensorflow中整个训练过程只需要几个小时。相同的网络架构和原始输入数据,如我的另一个 post 中所示 ,它也描述了我在使用 TF 时遇到的内存问题。
有一段时间很沮丧。我检查了 and post,但他们的问题似乎与我的有所不同。非常感谢任何帮助!
代码:
# Generate dataset
class STDataset(InMemoryDataset):
def __init__(self, root, transform=None, pre_transform=None):
super(STDataset, self).__init__(root, transform, pre_transform)
self.data, self.slices = torch.load(self.processed_paths[0])
@property
def raw_file_names(self):
return []
@property
def processed_file_names(self):
return ['pygdata.pt']
def download(self):
pass
def process(self):
data_list= []
for i in range(sample_size):
data = Data(x=torch.tensor(X_all[i],dtype=torch.float),edge_index=edge_index,y=torch.FloatTensor(y_all[i]))
data_list.append(data)
data, slices = self.collate(data_list)
torch.save((data, slices), self.processed_paths[0])
dataset = STDataset(root=save_dir)
train_dataset = dataset[:len(X_train)]
val_dataset = dataset[len(X_train):(len(X_train)+len(X_val))]
test_dataset = dataset[(len(X_train)+len(X_val)):]
# Build network
from torch_geometric.nn import GCNConv, GATConv, TopKPooling, global_max_pool, global_mean_pool
from torch.nn import Flatten, Linear, ELU
import torch.nn.functional as F
class GCN(torch.nn.Module):
def __init__(self):
super(GCN, self).__init__()
self.conv1 = GCNConv(in_channels = feature_num, out_channels = 32)
self.conv2 = GCNConv(in_channels = 32, out_channels = 32)
self.fc1 = Flatten()
# self.ln1 = Linear(in_features = batch_size*N*32, out_features = 512)
self.ln1 = Linear(in_features = 32, out_features = 32)
self.ln2 = Linear(in_features = 32, out_features = 1)
def forward(self,x,edge_index,batch):
# x, edge_index, batch = data.x, data.edge_index, data.batch
# print(np.shape(x),np.shape(edge_index),np.shape(batch))
x = F.elu(self.conv1(x,edge_index))
# x = x.squeeze(1)
x = F.elu(self.conv2(x,edge_index))
print(np.shape(x))
x = self.fc1(x)
# x = torch.flatten(x,0)
# x = torch.cat([global_max_pool(x,batch),global_mean_pool(x,batch)],dim=1)
print(np.shape(x))
x = self.ln1(x)
x = F.relu(x)
## Dropout?
print("o")
x = torch.sigmoid(self.ln2(x))
return x
# training
def train():
model.train()
loss_all=0
correct = 0
for i, data in enumerate(train_loader, 0):
data = data.to(device)
optimizer.zero_grad()
output = model(data.x, data.edge_index,data.batch)
label = data.y.to(device)
loss = loss_func(output, label)
loss.backward()
loss_all += loss.item()
output = output.detach().cpu().numpy().squeeze()
label = label.detach().cpu().numpy().squeeze()
correct += (abs(output-label)<0.5).sum()
optimizer.step()
return loss_all / len(train_dataset), correct / len(train_dataset)
device = torch.device('cuda')
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_func = torch.nn.BCELoss() # binary cross-entropy
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle = True)
for epoch in range(num_epochs):
gc.collect()
train_loss, train_acc = train()
使用 torch.nn.Flatten(start_dim = 1)
的错误消息(上面的代码):
ValueError Traceback (most recent call last)
<ipython-input-42-c96e8b058742> in <module>
65 for epoch in range(num_epochs):
66 gc.collect()
---> 67 train_loss, train_acc = train()
<ipython-input-42-c96e8b058742> in train()
10 output = model(data.x, data.edge_index,data.batch)
11 label = data.y.to(device)
---> 12 loss = loss_func(output, label)
13 loss.backward()
14 loss_all += loss.item()
~/miniconda3/envs/ST-Torch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
~/miniconda3/envs/ST-Torch/lib/python3.7/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
496
497 def forward(self, input, target):
--> 498 return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
499
500
~/miniconda3/envs/ST-Torch/lib/python3.7/site-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
2068 if input.numel() != target.numel():
2069 raise ValueError("Target and input must have the same number of elements. target nelement ({}) "
-> 2070 "!= input nelement ({})".format(target.numel(), input.numel()))
2071
2072 if weight is not None:
ValueError: Target and input must have the same number of elements. target nelement (4) != input nelement (3584)
你想要的形状 batch_size*node_num, attribute_num
有点奇怪。
通常应该是 batch_size, node_num*attribute_num
,因为您需要将输入与输出相匹配。 Pytorch 中的 Flatten
正是这样做的。
如果您真正想要的是 batch_size*node_num, attribute_num
,那么您只剩下使用 view
或 reshape
重塑张量。实际上 Flatten 本身只是调用 .reshape
.
tensor.view
: This will reshape the existing tensor to a new shape, if you edit this new tensor the old one will change too.
tensor.reshape
: This will create a new tensor using the data from old tensor but with new shape.
def forward(self,x,edge_index,batch):
x = F.elu(self.conv1(x,edge_index))
x = F.elu(self.conv2(x,edge_index))
# print(np.shape(x)) # don't use this
print(x.size()) # use this
# x = self.fc1(x) # this is the old one
## choose one of these
x = x.view(4*896, 32)
x = x.reshape(4*896, 32)
# print(np.shape(x)) # don't use this
print(x.size()) # use this
x = self.ln1(x)
x = F.relu(x)
## Dropout?
print("o")
x = torch.sigmoid(self.ln2(x))
return x
编辑 2 重塑
假设我们有一个 [[[1, 1, 1], [2, 2, 2]]]
的数组,其形状为 (1, 2, 3)
,在 Tensorflow 中表示 (batch, length, channel)
。
如果你想在 Pytorch 中正确使用这些数据,你需要将其设为 (batch, channel, length)
,即 (1, 3, 2)
。
这是permute
和reshape
的区别
>>> x = torch.tensor([[[1, 1, 1], [2, 2, 2]]])
>>> x.size()
torch.Size([1, 2, 3])
>>> x[0, 0, :]
tensor([1, 1, 1])
>>> y = x.reshape((1, 3, 2))
>>> y
tensor([[[1, 1],
[1, 2],
[2, 2]]])
>>> y[0, :, 0]
tensor([1, 1, 2])
>>> z = x.permute(0, 2, 1)
>>> z
tensor([[[1, 2],
[1, 2],
[1, 2]]])
>>> z[0, :, 0]
tensor([1, 1, 1])
如你所见,x
和z
的第一个通道都是[1, 1, 1]
,这就是我们想要的,而y
是[1, 1, 2]
。
我是 Pytorch 的新手,由于内存问题,我正在尝试将我以前的代码从 Tensorflow 转移到 Pytorch。但是,在尝试重现 Flatten
层时,一些问题不断出现。
在我的 DataLoader
对象中,batch_size
与输入的第一维混合(在我的 GNN 中,从 DataLoader
对象解压缩的输入大小为 [batch_size*node_num, attribute_num], 例如 GCNConv 层之后的 [4*896, 32])。基本上,如果我在 GCNConv
之后实现 torch.flatten()
,样本将混合在一起(达到 [4*896*32]),并且该网络只有 1 个输出,而我期望 #batch_size 输出。如果我改用 nn.Flatten()
,似乎什么也没有发生(仍然是 [4*896, 32])。是一开始就设置batch_size作为输入的第一个dim,还是直接使用view()
函数?我尝试直接使用 view()
并且它(似乎)起作用了,尽管我不确定这是否与 Flatten 相同。请参考我下面的代码。我目前正在使用 global_max_pool 因为它有效(它可以直接分隔 batch_size
)。
顺便说一句,我不确定为什么在 Pytorch 中训练这么慢...当 node_num
提高到 13000 时,我需要一个小时才能完成一个 epoch,而我每个 epoch 有 100 个 epoch测试折和 10 个测试折。在tensorflow中整个训练过程只需要几个小时。相同的网络架构和原始输入数据,如我的另一个 post 中所示
有一段时间很沮丧。我检查了
代码:
# Generate dataset
class STDataset(InMemoryDataset):
def __init__(self, root, transform=None, pre_transform=None):
super(STDataset, self).__init__(root, transform, pre_transform)
self.data, self.slices = torch.load(self.processed_paths[0])
@property
def raw_file_names(self):
return []
@property
def processed_file_names(self):
return ['pygdata.pt']
def download(self):
pass
def process(self):
data_list= []
for i in range(sample_size):
data = Data(x=torch.tensor(X_all[i],dtype=torch.float),edge_index=edge_index,y=torch.FloatTensor(y_all[i]))
data_list.append(data)
data, slices = self.collate(data_list)
torch.save((data, slices), self.processed_paths[0])
dataset = STDataset(root=save_dir)
train_dataset = dataset[:len(X_train)]
val_dataset = dataset[len(X_train):(len(X_train)+len(X_val))]
test_dataset = dataset[(len(X_train)+len(X_val)):]
# Build network
from torch_geometric.nn import GCNConv, GATConv, TopKPooling, global_max_pool, global_mean_pool
from torch.nn import Flatten, Linear, ELU
import torch.nn.functional as F
class GCN(torch.nn.Module):
def __init__(self):
super(GCN, self).__init__()
self.conv1 = GCNConv(in_channels = feature_num, out_channels = 32)
self.conv2 = GCNConv(in_channels = 32, out_channels = 32)
self.fc1 = Flatten()
# self.ln1 = Linear(in_features = batch_size*N*32, out_features = 512)
self.ln1 = Linear(in_features = 32, out_features = 32)
self.ln2 = Linear(in_features = 32, out_features = 1)
def forward(self,x,edge_index,batch):
# x, edge_index, batch = data.x, data.edge_index, data.batch
# print(np.shape(x),np.shape(edge_index),np.shape(batch))
x = F.elu(self.conv1(x,edge_index))
# x = x.squeeze(1)
x = F.elu(self.conv2(x,edge_index))
print(np.shape(x))
x = self.fc1(x)
# x = torch.flatten(x,0)
# x = torch.cat([global_max_pool(x,batch),global_mean_pool(x,batch)],dim=1)
print(np.shape(x))
x = self.ln1(x)
x = F.relu(x)
## Dropout?
print("o")
x = torch.sigmoid(self.ln2(x))
return x
# training
def train():
model.train()
loss_all=0
correct = 0
for i, data in enumerate(train_loader, 0):
data = data.to(device)
optimizer.zero_grad()
output = model(data.x, data.edge_index,data.batch)
label = data.y.to(device)
loss = loss_func(output, label)
loss.backward()
loss_all += loss.item()
output = output.detach().cpu().numpy().squeeze()
label = label.detach().cpu().numpy().squeeze()
correct += (abs(output-label)<0.5).sum()
optimizer.step()
return loss_all / len(train_dataset), correct / len(train_dataset)
device = torch.device('cuda')
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_func = torch.nn.BCELoss() # binary cross-entropy
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle = True)
for epoch in range(num_epochs):
gc.collect()
train_loss, train_acc = train()
使用 torch.nn.Flatten(start_dim = 1)
的错误消息(上面的代码):
ValueError Traceback (most recent call last)
<ipython-input-42-c96e8b058742> in <module>
65 for epoch in range(num_epochs):
66 gc.collect()
---> 67 train_loss, train_acc = train()
<ipython-input-42-c96e8b058742> in train()
10 output = model(data.x, data.edge_index,data.batch)
11 label = data.y.to(device)
---> 12 loss = loss_func(output, label)
13 loss.backward()
14 loss_all += loss.item()
~/miniconda3/envs/ST-Torch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
~/miniconda3/envs/ST-Torch/lib/python3.7/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
496
497 def forward(self, input, target):
--> 498 return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
499
500
~/miniconda3/envs/ST-Torch/lib/python3.7/site-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
2068 if input.numel() != target.numel():
2069 raise ValueError("Target and input must have the same number of elements. target nelement ({}) "
-> 2070 "!= input nelement ({})".format(target.numel(), input.numel()))
2071
2072 if weight is not None:
ValueError: Target and input must have the same number of elements. target nelement (4) != input nelement (3584)
你想要的形状 batch_size*node_num, attribute_num
有点奇怪。
通常应该是 batch_size, node_num*attribute_num
,因为您需要将输入与输出相匹配。 Pytorch 中的 Flatten
正是这样做的。
如果您真正想要的是 batch_size*node_num, attribute_num
,那么您只剩下使用 view
或 reshape
重塑张量。实际上 Flatten 本身只是调用 .reshape
.
tensor.view
: This will reshape the existing tensor to a new shape, if you edit this new tensor the old one will change too.
tensor.reshape
: This will create a new tensor using the data from old tensor but with new shape.
def forward(self,x,edge_index,batch):
x = F.elu(self.conv1(x,edge_index))
x = F.elu(self.conv2(x,edge_index))
# print(np.shape(x)) # don't use this
print(x.size()) # use this
# x = self.fc1(x) # this is the old one
## choose one of these
x = x.view(4*896, 32)
x = x.reshape(4*896, 32)
# print(np.shape(x)) # don't use this
print(x.size()) # use this
x = self.ln1(x)
x = F.relu(x)
## Dropout?
print("o")
x = torch.sigmoid(self.ln2(x))
return x
编辑 2 重塑
假设我们有一个 [[[1, 1, 1], [2, 2, 2]]]
的数组,其形状为 (1, 2, 3)
,在 Tensorflow 中表示 (batch, length, channel)
。
如果你想在 Pytorch 中正确使用这些数据,你需要将其设为 (batch, channel, length)
,即 (1, 3, 2)
。
这是permute
和reshape
>>> x = torch.tensor([[[1, 1, 1], [2, 2, 2]]])
>>> x.size()
torch.Size([1, 2, 3])
>>> x[0, 0, :]
tensor([1, 1, 1])
>>> y = x.reshape((1, 3, 2))
>>> y
tensor([[[1, 1],
[1, 2],
[2, 2]]])
>>> y[0, :, 0]
tensor([1, 1, 2])
>>> z = x.permute(0, 2, 1)
>>> z
tensor([[[1, 2],
[1, 2],
[1, 2]]])
>>> z[0, :, 0]
tensor([1, 1, 1])
如你所见,x
和z
的第一个通道都是[1, 1, 1]
,这就是我们想要的,而y
是[1, 1, 2]
。