如何在 Pytorch 中实现批量大小 > 1 的 Flatten 层 (Pytorch_Geometric)

Question

我是 Pytorch 的新手，由于内存问题，我正在尝试将我以前的代码从 Tensorflow 转移到 Pytorch。但是，在尝试重现 Flatten 层时，一些问题不断出现。

在我的 DataLoader 对象中，batch_size 与输入的第一维混合（在我的 GNN 中，从 DataLoader 对象解压缩的输入大小为 [batch_size*node_num, attribute_num], 例如 GCNConv 层之后的 [4*896, 32]）。基本上，如果我在 GCNConv 之后实现 torch.flatten()，样本将混合在一起（达到 [4*896*32]），并且该网络只有 1 个输出，而我期望 #batch_size 输出。如果我改用 nn.Flatten()，似乎什么也没有发生（仍然是 [4*896, 32]）。是一开始就设置batch_size作为输入的第一个dim，还是直接使用view()函数？我尝试直接使用 view() 并且它（似乎）起作用了，尽管我不确定这是否与 Flatten 相同。请参考我下面的代码。我目前正在使用 global_max_pool 因为它有效（它可以直接分隔 batch_size）。

顺便说一句，我不确定为什么在 Pytorch 中训练这么慢...当 node_num 提高到 13000 时，我需要一个小时才能完成一个 epoch，而我每个 epoch 有 100 个 epoch测试折和 10 个测试折。在tensorflow中整个训练过程只需要几个小时。相同的网络架构和原始输入数据，如我的另一个 post 中所示，它也描述了我在使用 TF 时遇到的内存问题。

有一段时间很沮丧。我检查了 and post，但他们的问题似乎与我的有所不同。非常感谢任何帮助！

代码：

# Generate dataset
class STDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(STDataset, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
    
    @property
    def raw_file_names(self):
        return []

    @property
    def processed_file_names(self):
        return ['pygdata.pt']

    def download(self):
        pass

    def process(self):
        data_list= []
        for i in range(sample_size):
            data = Data(x=torch.tensor(X_all[i],dtype=torch.float),edge_index=edge_index,y=torch.FloatTensor(y_all[i]))
            data_list.append(data)

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])
        
dataset = STDataset(root=save_dir)
train_dataset = dataset[:len(X_train)]
val_dataset = dataset[len(X_train):(len(X_train)+len(X_val))]
test_dataset = dataset[(len(X_train)+len(X_val)):]


# Build network

from torch_geometric.nn import GCNConv, GATConv, TopKPooling, global_max_pool, global_mean_pool
from torch.nn import Flatten, Linear, ELU
import torch.nn.functional as F

class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels = feature_num, out_channels = 32)
        self.conv2 = GCNConv(in_channels = 32, out_channels = 32)
        self.fc1 = Flatten()
#         self.ln1 = Linear(in_features = batch_size*N*32, out_features = 512) 
        self.ln1 = Linear(in_features = 32, out_features = 32)
        self.ln2 = Linear(in_features = 32, out_features = 1) 

    
    def forward(self,x,edge_index,batch):   
#         x, edge_index, batch = data.x, data.edge_index, data.batch
#         print(np.shape(x),np.shape(edge_index),np.shape(batch))
        x = F.elu(self.conv1(x,edge_index))
#         x = x.squeeze(1) 
        x = F.elu(self.conv2(x,edge_index))
        print(np.shape(x))
        x = self.fc1(x)
#         x = torch.flatten(x,0)
#         x = torch.cat([global_max_pool(x,batch),global_mean_pool(x,batch)],dim=1)
        print(np.shape(x))
        x = self.ln1(x)
        x = F.relu(x)
        ## Dropout?
        print("o")
        x = torch.sigmoid(self.ln2(x))
        return x
        
# training
def train():
    model.train()
    loss_all=0
    correct = 0
    for i, data in enumerate(train_loader, 0):
        data = data.to(device)
        optimizer.zero_grad() 
        output = model(data.x, data.edge_index,data.batch)
        label = data.y.to(device)
        loss = loss_func(output, label)
        loss.backward()
        loss_all += loss.item()
        
        output = output.detach().cpu().numpy().squeeze()
        label = label.detach().cpu().numpy().squeeze()        
        correct += (abs(output-label)<0.5).sum()
        
        optimizer.step()
  
    return loss_all / len(train_dataset), correct / len(train_dataset)

device = torch.device('cuda')
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_func = torch.nn.BCELoss()  # binary cross-entropy
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle = True)
for epoch in range(num_epochs):
    gc.collect()
    train_loss, train_acc = train()

使用 torch.nn.Flatten(start_dim = 1) 的错误消息（上面的代码）：

ValueError                                Traceback (most recent call last)
<ipython-input-42-c96e8b058742> in <module>
     65 for epoch in range(num_epochs):
     66     gc.collect()
---> 67     train_loss, train_acc = train()

<ipython-input-42-c96e8b058742> in train()
     10         output = model(data.x, data.edge_index,data.batch)
     11         label = data.y.to(device)
---> 12         loss = loss_func(output, label)
     13         loss.backward()
     14         loss_all += loss.item()

~/miniconda3/envs/ST-Torch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

~/miniconda3/envs/ST-Torch/lib/python3.7/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    496 
    497     def forward(self, input, target):
--> 498         return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
    499 
    500 

~/miniconda3/envs/ST-Torch/lib/python3.7/site-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
   2068     if input.numel() != target.numel():
   2069         raise ValueError("Target and input must have the same number of elements. target nelement ({}) "
-> 2070                          "!= input nelement ({})".format(target.numel(), input.numel()))
   2071 
   2072     if weight is not None:

ValueError: Target and input must have the same number of elements. target nelement (4) != input nelement (3584)

Answer 1

你想要的形状 batch_size*node_num, attribute_num 有点奇怪。

通常应该是 batch_size, node_num*attribute_num，因为您需要将输入与输出相匹配。 Pytorch 中的 Flatten 正是这样做的。

如果您真正想要的是 batch_size*node_num, attribute_num，那么您只剩下使用 view 或 reshape 重塑张量。实际上 Flatten 本身只是调用 .reshape.

tensor.view: This will reshape the existing tensor to a new shape, if you edit this new tensor the old one will change too.

tensor.reshape: This will create a new tensor using the data from old tensor but with new shape.

    def forward(self,x,edge_index,batch):   
        x = F.elu(self.conv1(x,edge_index))
        x = F.elu(self.conv2(x,edge_index))

        # print(np.shape(x)) # don't use this
        print(x.size())  # use this

        # x = self.fc1(x)  # this is the old one
        ## choose one of these
        x = x.view(4*896, 32)
        x = x.reshape(4*896, 32)  

        # print(np.shape(x)) # don't use this
        print(x.size())  # use this

        x = self.ln1(x)
        x = F.relu(x)
        ## Dropout?
        print("o")
        x = torch.sigmoid(self.ln2(x))
        return x

编辑 2 重塑

假设我们有一个 [[[1, 1, 1], [2, 2, 2]]] 的数组，其形状为 (1, 2, 3)，在 Tensorflow 中表示 (batch, length, channel)。

如果你想在 Pytorch 中正确使用这些数据，你需要将其设为 (batch, channel, length)，即 (1, 3, 2)。

这是permute和reshape

的区别

>>> x = torch.tensor([[[1, 1, 1], [2, 2, 2]]])
>>> x.size()
torch.Size([1, 2, 3])
>>> x[0, 0, :]
tensor([1, 1, 1])
>>> y = x.reshape((1, 3, 2))
>>> y
tensor([[[1, 1],
         [1, 2],
         [2, 2]]])
>>> y[0, :, 0]
tensor([1, 1, 2])
>>> z = x.permute(0, 2, 1)
>>> z
tensor([[[1, 2],
         [1, 2],
         [1, 2]]])
>>> z[0, :, 0]
tensor([1, 1, 1])

如你所见，x和z的第一个通道都是[1, 1, 1]，这就是我们想要的，而y是[1, 1, 2]。

如何在 Pytorch 中实现批量大小 > 1 的 Flatten 层 (Pytorch_Geometric)

How to implement Flatten layer with batch size > 1 in Pytorch (Pytorch_Geometric)

python

flatten

neural-network

conv-neural-network

pytorch

编辑 2 重塑