将 BatchNorm1d 层与嵌入层和线性层一起用于 NLP 文本分类问题会引发 RuntimeError
Using BatchNorm1d layer with Embedding and Linear layers for NLP text-classification problem throws RuntimeError
我正在尝试创建一个神经网络并训练我自己的嵌入。该网络具有以下结构(PyTorch):
import torch.nn as nn
class MultiClassClassifer(nn.Module):
#define all the layers used in model
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
#Constructor
super(MultiClassClassifer, self).__init__()
#embedding layer
self.embedding = nn.Embedding(vocab_size, embedding_dim)
#dense layer
self.hiddenLayer = nn.Linear(embedding_dim, hidden_dim)
#Batch normalization layer
self.batchnorm = nn.BatchNorm1d(hidden_dim)
#output layer
self.output = nn.Linear(hidden_dim, output_dim)
#activation layer
self.act = nn.Softmax(dim=1) #2d-tensor
#initialize weights of embedding layer
self.init_weights()
def init_weights(self):
initrange = 1.0
self.embedding.weight.data.uniform_(-initrange, initrange)
def forward(self, text):
embedded = self.embedding(text)
hidden_1 = self.batchnorm(self.hiddenLayer(embedded))
return self.act(self.output(hidden_1))
我的 training_iterator 对象看起来像:
batch = next(iter(train_iterator))
batch.text_normalized_tweet[0]
tensor([[ 240, 538, 305, 73, 9, 780, 2038, 13, 48, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1],
[ 853, 57, 2, 70, 1875, 176, 466, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1],
...])
形状:torch.Size([32, 25])
。 32= batch_size 我曾经使用 data.BucketIterator
和 25 = 批处理中的序列创建训练迭代器。
当我创建模型实例时:
INPUT_DIM = len(TEXT.vocab) #~5,000 tokens
EMBEDDING_DIM = 100
HIDDEN_DIM = 64
OUTPUT_DIM = 3 #target has 3 classes
model = MultiClassClassifer(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
并执行
model(batch.text_normalized_tweet[0]).squeeze(1)
我得到以下RuntimeError
RuntimeError: running_mean should contain 15 elements not 64
您还可以找到我的 Golab 笔记本 here。
嵌入层的输出是 [batch, seqlen, F],你可以在 batchnorm1d 的文档中看到你需要有一个形状为 [batch, F, seqlen] 的输入。您应该更改转置以获得所需的输出大小:
import torch.nn as nn
class MultiClassClassifer(nn.Module):
#define all the layers used in model
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
#Constructor
super(MultiClassClassifer, self).__init__()
#embedding layer
self.embedding = nn.Embedding(vocab_size, embedding_dim)
#dense layer
self.hiddenLayer = nn.Linear(embedding_dim, hidden_dim)
#Batch normalization layer
self.batchnorm = nn.BatchNorm1d(hidden_dim)
#output layer
self.output = nn.Linear(hidden_dim, output_dim)
#activation layer
self.act = nn.Softmax(dim=1) #2d-tensor
#initialize weights of embedding layer
self.init_weights()
def init_weights(self):
initrange = 1.0
self.embedding.weight.data.uniform_(-initrange, initrange)
def forward(self, text):
embedded = self.embedding(text)
embedded_T = embedded.transpose(1,2)
hidden_1 = self.batchnorm(self.hiddenLayer(embedded_T))
return self.act(self.output(hidden_1))
我根据@jhso () 给出的示例找到了解决方法。
INPUT_DIM = len(TEXT.vocab) #~5,000 tokens
EMBEDDING_DIM = 100
HIDDEN_DIM = 64
e = nn.Embedding(INPUT_DIM, EMBEDDING_DIM)
l = nn.Linear(EMBEDDING_DIM, HIDDEN_DIM)
b = nn.BatchNorm1d(HIDDEN_DIM)
soft = nn.Softmax(dim=1)
out = nn.Linear(HIDDEN_DIM, 3)
text, text_lengths = batch.text_normalized_tweet
y = e(text)
tensor, batch_size = nn.utils.rnn.pack_padded_sequence(y,text_lengths, batch_first=True)[0], nn.utils.rnn.pack_padded_sequence(y,text_lengths, batch_first=True)[1] #added rnn.pack_padded_sequence
y = b(l(tensor))
从 utils.rnn
包中添加了 pack_padded_sequence()
方法,它将嵌入作为输入。我还必须计算 text 和 text_lengths 因为我创建 training_iteror 的方式 returns 2个输出(文本,text_lenght)。
我正在尝试创建一个神经网络并训练我自己的嵌入。该网络具有以下结构(PyTorch):
import torch.nn as nn
class MultiClassClassifer(nn.Module):
#define all the layers used in model
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
#Constructor
super(MultiClassClassifer, self).__init__()
#embedding layer
self.embedding = nn.Embedding(vocab_size, embedding_dim)
#dense layer
self.hiddenLayer = nn.Linear(embedding_dim, hidden_dim)
#Batch normalization layer
self.batchnorm = nn.BatchNorm1d(hidden_dim)
#output layer
self.output = nn.Linear(hidden_dim, output_dim)
#activation layer
self.act = nn.Softmax(dim=1) #2d-tensor
#initialize weights of embedding layer
self.init_weights()
def init_weights(self):
initrange = 1.0
self.embedding.weight.data.uniform_(-initrange, initrange)
def forward(self, text):
embedded = self.embedding(text)
hidden_1 = self.batchnorm(self.hiddenLayer(embedded))
return self.act(self.output(hidden_1))
我的 training_iterator 对象看起来像:
batch = next(iter(train_iterator))
batch.text_normalized_tweet[0]
tensor([[ 240, 538, 305, 73, 9, 780, 2038, 13, 48, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1],
[ 853, 57, 2, 70, 1875, 176, 466, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1],
...])
形状:torch.Size([32, 25])
。 32= batch_size 我曾经使用 data.BucketIterator
和 25 = 批处理中的序列创建训练迭代器。
当我创建模型实例时:
INPUT_DIM = len(TEXT.vocab) #~5,000 tokens
EMBEDDING_DIM = 100
HIDDEN_DIM = 64
OUTPUT_DIM = 3 #target has 3 classes
model = MultiClassClassifer(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
并执行
model(batch.text_normalized_tweet[0]).squeeze(1)
我得到以下RuntimeError
RuntimeError: running_mean should contain 15 elements not 64
您还可以找到我的 Golab 笔记本 here。
嵌入层的输出是 [batch, seqlen, F],你可以在 batchnorm1d 的文档中看到你需要有一个形状为 [batch, F, seqlen] 的输入。您应该更改转置以获得所需的输出大小:
import torch.nn as nn
class MultiClassClassifer(nn.Module):
#define all the layers used in model
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
#Constructor
super(MultiClassClassifer, self).__init__()
#embedding layer
self.embedding = nn.Embedding(vocab_size, embedding_dim)
#dense layer
self.hiddenLayer = nn.Linear(embedding_dim, hidden_dim)
#Batch normalization layer
self.batchnorm = nn.BatchNorm1d(hidden_dim)
#output layer
self.output = nn.Linear(hidden_dim, output_dim)
#activation layer
self.act = nn.Softmax(dim=1) #2d-tensor
#initialize weights of embedding layer
self.init_weights()
def init_weights(self):
initrange = 1.0
self.embedding.weight.data.uniform_(-initrange, initrange)
def forward(self, text):
embedded = self.embedding(text)
embedded_T = embedded.transpose(1,2)
hidden_1 = self.batchnorm(self.hiddenLayer(embedded_T))
return self.act(self.output(hidden_1))
我根据@jhso (
INPUT_DIM = len(TEXT.vocab) #~5,000 tokens
EMBEDDING_DIM = 100
HIDDEN_DIM = 64
e = nn.Embedding(INPUT_DIM, EMBEDDING_DIM)
l = nn.Linear(EMBEDDING_DIM, HIDDEN_DIM)
b = nn.BatchNorm1d(HIDDEN_DIM)
soft = nn.Softmax(dim=1)
out = nn.Linear(HIDDEN_DIM, 3)
text, text_lengths = batch.text_normalized_tweet
y = e(text)
tensor, batch_size = nn.utils.rnn.pack_padded_sequence(y,text_lengths, batch_first=True)[0], nn.utils.rnn.pack_padded_sequence(y,text_lengths, batch_first=True)[1] #added rnn.pack_padded_sequence
y = b(l(tensor))
从 utils.rnn
包中添加了 pack_padded_sequence()
方法,它将嵌入作为输入。我还必须计算 text 和 text_lengths 因为我创建 training_iteror 的方式 returns 2个输出(文本,text_lenght)。