如何将ELMo纳入AllenNLP Guide的简单分类
How to incorporate ELMo into the simple classification of AllenNLP Guide
我是初学者,母语不是英语,所以我可能会问可怜questions.Sorry!
我最近完成了AllenNLP官方教程(https://guide.allennlp.org/training-and-prediction),想将简单分类器的词嵌入更改为ELMo。
此外,我想使简单分类器的架构更复杂以提高其准确性。
我想我已经完成了模型的实施。
simple_classifier.py
@Model.register("simple_classifier")
class SimpleClassifier(Model):
def __init__(
self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder
):
super().__init__(vocab)
self.embedder = embedder
self.encoder = encoder
num_labels = vocab.get_vocab_size("labels")
self.dropout = torch.nn.Dropout(p=0.2)
self.relu = torch.nn.ReLU()
self.layer1=torch.nn.Linear(encoder.get_output_dim(),512)
self.layer2 = torch.nn.Linear(512, 128)
self.layer3 = torch.nn.Linear(128, 50)
self.layer4 = torch.nn.Linear(50, 10)
self.classifier = torch.nn.Linear(10, num_labels)
self.accuracy = CategoricalAccuracy()
def forward(
self, text: TextFieldTensors, label: torch.Tensor = None
) -> Dict[str, torch.Tensor]:
# Shape: (batch_size, num_tokens, embedding_dim)
embedded_text = self.embedder(text)
# Shape: (batch_size, num_tokens)
mask = util.get_text_field_mask(text)
# Shape: (batch_size, encoding_dim)
encoded_text = self.encoder(embedded_text, mask)
x=self.relu(self.layer1(encoded_text))
x=self.relu(self.layer2(x))
x=self.relu(self.layer3(x))
x=self.relu(self.layer4(x))
# Shape: (batch_size, num_labels)
logits = self.classifier(x)
# Shape: (batch_size, num_labels)
probs = torch.nn.functional.softmax(logits)
# Shape: (1,)
output = {"probs": probs}
if label is not None:
self.accuracy(logits, label)
output["loss"] = torch.nn.functional.cross_entropy(logits, label)
return output
def get_metrics(self, reset: bool = False) -> Dict[str, float]:
return {"accuracy": self.accuracy.get_metric(reset)}
但是我不知道如何更改配置文件。
如何修改官方教程中的如下配置文件来使用ELMo?
my_text_classifier.jsonnet
{
"dataset_reader" : {
"type": "classification-tsv",
"token_indexers": {
"tokens": {
"type": "single_id"
}
}
},
"train_data_path": "data/movie_review/train.tsv",
"validation_data_path": "data/movie_review/dev.tsv",
"model": {
"type": "simple_classifier",
"embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 10
}
}
},
"encoder": {
"type": "bag_of_embeddings",
"embedding_dim": 10
}
},
"data_loader": {
"batch_size": 8,
"shuffle": true
},
"trainer": {
"optimizer": "adam",
"num_epochs": 5
}
}
如果有人能帮助我,我会很高兴。
查看BiDAF模型使用ELMo的方式:https://raw.githubusercontent.com/allenai/allennlp-models/main/training_config/rc/bidaf_elmo.jsonnet
您可以窃取该配置的一些组件。您将需要名称为 "elmo"
的令牌嵌入器,而且我相信 "tokens"
和 "elmo"
.
下的令牌索引器
无需编写任何代码即可运行。
我是初学者,母语不是英语,所以我可能会问可怜questions.Sorry!
我最近完成了AllenNLP官方教程(https://guide.allennlp.org/training-and-prediction),想将简单分类器的词嵌入更改为ELMo。
此外,我想使简单分类器的架构更复杂以提高其准确性。 我想我已经完成了模型的实施。
simple_classifier.py
@Model.register("simple_classifier")
class SimpleClassifier(Model):
def __init__(
self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder
):
super().__init__(vocab)
self.embedder = embedder
self.encoder = encoder
num_labels = vocab.get_vocab_size("labels")
self.dropout = torch.nn.Dropout(p=0.2)
self.relu = torch.nn.ReLU()
self.layer1=torch.nn.Linear(encoder.get_output_dim(),512)
self.layer2 = torch.nn.Linear(512, 128)
self.layer3 = torch.nn.Linear(128, 50)
self.layer4 = torch.nn.Linear(50, 10)
self.classifier = torch.nn.Linear(10, num_labels)
self.accuracy = CategoricalAccuracy()
def forward(
self, text: TextFieldTensors, label: torch.Tensor = None
) -> Dict[str, torch.Tensor]:
# Shape: (batch_size, num_tokens, embedding_dim)
embedded_text = self.embedder(text)
# Shape: (batch_size, num_tokens)
mask = util.get_text_field_mask(text)
# Shape: (batch_size, encoding_dim)
encoded_text = self.encoder(embedded_text, mask)
x=self.relu(self.layer1(encoded_text))
x=self.relu(self.layer2(x))
x=self.relu(self.layer3(x))
x=self.relu(self.layer4(x))
# Shape: (batch_size, num_labels)
logits = self.classifier(x)
# Shape: (batch_size, num_labels)
probs = torch.nn.functional.softmax(logits)
# Shape: (1,)
output = {"probs": probs}
if label is not None:
self.accuracy(logits, label)
output["loss"] = torch.nn.functional.cross_entropy(logits, label)
return output
def get_metrics(self, reset: bool = False) -> Dict[str, float]:
return {"accuracy": self.accuracy.get_metric(reset)}
但是我不知道如何更改配置文件。 如何修改官方教程中的如下配置文件来使用ELMo?
my_text_classifier.jsonnet
{
"dataset_reader" : {
"type": "classification-tsv",
"token_indexers": {
"tokens": {
"type": "single_id"
}
}
},
"train_data_path": "data/movie_review/train.tsv",
"validation_data_path": "data/movie_review/dev.tsv",
"model": {
"type": "simple_classifier",
"embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 10
}
}
},
"encoder": {
"type": "bag_of_embeddings",
"embedding_dim": 10
}
},
"data_loader": {
"batch_size": 8,
"shuffle": true
},
"trainer": {
"optimizer": "adam",
"num_epochs": 5
}
}
如果有人能帮助我,我会很高兴。
查看BiDAF模型使用ELMo的方式:https://raw.githubusercontent.com/allenai/allennlp-models/main/training_config/rc/bidaf_elmo.jsonnet
您可以窃取该配置的一些组件。您将需要名称为 "elmo"
的令牌嵌入器,而且我相信 "tokens"
和 "elmo"
.
无需编写任何代码即可运行。