RuntimeError: CUDA out of memory | Elastic Search
RuntimeError: CUDA out of memory | Elastic Search
我是机器学习的新手。我已经成功解决了与参数和模型设置有关的错误。
我正在使用这个 Notebook,其中 Apply DocumentClassifier 部分更改如下。
Jupyter 实验室,内核:conda_mxnet_latest_p37
.
错误似乎更多是关于我的笔记本电脑的硬件,而不是我的代码被破坏。
更新:我更改了 batch_size=4
,它 运行 很久才崩溃。
我解决这个错误的标准方法应该是什么?
我的代码:
with open('filt_gri.txt', 'r') as filehandle:
tags = [current_place.rstrip() for current_place in filehandle.readlines()]
doc_classifier = TransformersDocumentClassifier(model_name_or_path="cross-encoder/nli-distilroberta-base",
task="zero-shot-classification",
labels=tags,
batch_size=4)
# convert to Document using a fieldmap for custom content fields the classification should run on
docs_to_classify = [Document.from_dict(d) for d in docs_sliding_window]
# classify using gpu, batch_size makes sure we do not run out of memory
classified_docs = doc_classifier.predict(docs_to_classify)
# let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
print(classified_docs[0].to_dict())
all_docs = convert_files_to_dicts(dir_path=doc_dir)
preprocessor_sliding_window = PreProcessor(split_overlap=3,
split_length=10,
split_respect_sentence_boundary=False,
split_by='passage')
错误:
INFO - haystack.modeling.utils - Using devices: CUDA
INFO - haystack.modeling.utils - Using devices: CUDA
INFO - haystack.modeling.utils - Number of GPUs: 1
INFO - haystack.modeling.utils - Number of GPUs: 1
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-25-27dfca549a7d> in <module>
14
15 # classify using gpu, batch_size makes sure we do not run out of memory
---> 16 classified_docs = doc_classifier.predict(docs_to_classify)
17
18 # let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in predict(self, documents)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in <listcomp>(.0)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/zero_shot_classification.py in __call__(self, sequences, candidate_labels, hypothesis_template, multi_label, **kwargs)
151 sequences = [sequences]
152
--> 153 outputs = super().__call__(sequences, candidate_labels, hypothesis_template)
154 num_sequences = len(sequences)
155 candidate_labels = self._args_parser._parse_labels(candidate_labels)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/base.py in __call__(self, *args, **kwargs)
759 def __call__(self, *args, **kwargs):
760 inputs = self._parse_and_tokenize(*args, **kwargs)
--> 761 return self._forward(inputs)
762
763 def _forward(self, inputs, return_tensors=False):
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/base.py in _forward(self, inputs, return_tensors)
780 with torch.no_grad():
781 inputs = self.ensure_tensor_on_device(**inputs)
--> 782 predictions = self.model(**inputs)[0].cpu()
783
784 if return_tensors:
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1162 output_attentions=output_attentions,
1163 output_hidden_states=output_hidden_states,
-> 1164 return_dict=return_dict,
1165 )
1166 sequence_output = outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
823 output_attentions=output_attentions,
824 output_hidden_states=output_hidden_states,
--> 825 return_dict=return_dict,
826 )
827 sequence_output = encoder_outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
513 encoder_attention_mask,
514 past_key_value,
--> 515 output_attentions,
516 )
517
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
398 head_mask,
399 output_attentions=output_attentions,
--> 400 past_key_value=self_attn_past_key_value,
401 )
402 attention_output = self_attention_outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
328 encoder_attention_mask,
329 past_key_value,
--> 330 output_attentions,
331 )
332 attention_output = self.output(self_outputs[0], hidden_states)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
241 attention_scores = attention_scores + relative_position_scores_query + relative_position_scores_key
242
--> 243 attention_scores = attention_scores / math.sqrt(self.attention_head_size)
244 if attention_mask is not None:
245 # Apply the attention mask is (precomputed for all layers in RobertaModel forward() function)
RuntimeError: CUDA out of memory. Tried to allocate 3.60 GiB (GPU 0; 14.76 GiB total capacity; 7.33 GiB already allocated; 1.37 GiB free; 12.29 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-25-27dfca549a7d> in <module>
14
15 # classify using gpu, batch_size makes sure we do not run out of memory
---> 16 classified_docs = doc_classifier.predict(docs_to_classify)
17
18 # let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in predict(self, documents)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in <listcomp>(.0)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/zero_shot_classification.py in __call__(self, sequences, candidate_labels, hypothesis_template, multi_label, **kwargs)
151 sequences = [sequences]
152
--> 153 outputs = super().__call__(sequences, candidate_labels, hypothesis_template)
154 num_sequences = len(sequences)
155 candidate_labels = self._args_parser._parse_labels(candidate_labels)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/base.py in __call__(self, *args, **kwargs)
759 def __call__(self, *args, **kwargs):
760 inputs = self._parse_and_tokenize(*args, **kwargs)
--> 761 return self._forward(inputs)
762
763 def _forward(self, inputs, return_tensors=False):
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/base.py in _forward(self, inputs, return_tensors)
780 with torch.no_grad():
781 inputs = self.ensure_tensor_on_device(**inputs)
--> 782 predictions = self.model(**inputs)[0].cpu()
783
784 if return_tensors:
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1162 output_attentions=output_attentions,
1163 output_hidden_states=output_hidden_states,
-> 1164 return_dict=return_dict,
1165 )
1166 sequence_output = outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
823 output_attentions=output_attentions,
824 output_hidden_states=output_hidden_states,
--> 825 return_dict=return_dict,
826 )
827 sequence_output = encoder_outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
513 encoder_attention_mask,
514 past_key_value,
--> 515 output_attentions,
516 )
517
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
398 head_mask,
399 output_attentions=output_attentions,
--> 400 past_key_value=self_attn_past_key_value,
401 )
402 attention_output = self_attention_outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
328 encoder_attention_mask,
329 past_key_value,
--> 330 output_attentions,
331 )
332 attention_output = self.output(self_outputs[0], hidden_states)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
241 attention_scores = attention_scores + relative_position_scores_query + relative_position_scores_key
242
--> 243 attention_scores = attention_scores / math.sqrt(self.attention_head_size)
244 if attention_mask is not None:
245 # Apply the attention mask is (precomputed for all layers in RobertaModel forward() function)
RuntimeError: CUDA out of memory. Tried to allocate 3.60 GiB (GPU 0; 14.76 GiB total capacity; 7.33 GiB already allocated; 1.37 GiB free; 12.29 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
原码:
doc_classifier = TransformersDocumentClassifier(model_name_or_path="cross-encoder/nli-distilroberta-base",
task="zero-shot-classification",
labels=["music", "natural language processing", "history"],
batch_size=16
)
# ----------
# convert to Document using a fieldmap for custom content fields the classification should run on
docs_to_classify = [Document.from_dict(d) for d in docs_sliding_window]
# ----------
# classify using gpu, batch_size makes sure we do not run out of memory
classified_docs = doc_classifier.predict(docs_to_classify)
# ----------
# let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
print(classified_docs[0].to_dict())
请让我知道是否有任何其他我应该添加到 post/澄清的内容。
减少 batch_size
帮助了我:
batch_size=2
我是机器学习的新手。我已经成功解决了与参数和模型设置有关的错误。
我正在使用这个 Notebook,其中 Apply DocumentClassifier 部分更改如下。
Jupyter 实验室,内核:conda_mxnet_latest_p37
.
错误似乎更多是关于我的笔记本电脑的硬件,而不是我的代码被破坏。
更新:我更改了 batch_size=4
,它 运行 很久才崩溃。
我解决这个错误的标准方法应该是什么?
我的代码:
with open('filt_gri.txt', 'r') as filehandle:
tags = [current_place.rstrip() for current_place in filehandle.readlines()]
doc_classifier = TransformersDocumentClassifier(model_name_or_path="cross-encoder/nli-distilroberta-base",
task="zero-shot-classification",
labels=tags,
batch_size=4)
# convert to Document using a fieldmap for custom content fields the classification should run on
docs_to_classify = [Document.from_dict(d) for d in docs_sliding_window]
# classify using gpu, batch_size makes sure we do not run out of memory
classified_docs = doc_classifier.predict(docs_to_classify)
# let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
print(classified_docs[0].to_dict())
all_docs = convert_files_to_dicts(dir_path=doc_dir)
preprocessor_sliding_window = PreProcessor(split_overlap=3,
split_length=10,
split_respect_sentence_boundary=False,
split_by='passage')
错误:
INFO - haystack.modeling.utils - Using devices: CUDA
INFO - haystack.modeling.utils - Using devices: CUDA
INFO - haystack.modeling.utils - Number of GPUs: 1
INFO - haystack.modeling.utils - Number of GPUs: 1
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-25-27dfca549a7d> in <module>
14
15 # classify using gpu, batch_size makes sure we do not run out of memory
---> 16 classified_docs = doc_classifier.predict(docs_to_classify)
17
18 # let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in predict(self, documents)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in <listcomp>(.0)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/zero_shot_classification.py in __call__(self, sequences, candidate_labels, hypothesis_template, multi_label, **kwargs)
151 sequences = [sequences]
152
--> 153 outputs = super().__call__(sequences, candidate_labels, hypothesis_template)
154 num_sequences = len(sequences)
155 candidate_labels = self._args_parser._parse_labels(candidate_labels)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/base.py in __call__(self, *args, **kwargs)
759 def __call__(self, *args, **kwargs):
760 inputs = self._parse_and_tokenize(*args, **kwargs)
--> 761 return self._forward(inputs)
762
763 def _forward(self, inputs, return_tensors=False):
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/base.py in _forward(self, inputs, return_tensors)
780 with torch.no_grad():
781 inputs = self.ensure_tensor_on_device(**inputs)
--> 782 predictions = self.model(**inputs)[0].cpu()
783
784 if return_tensors:
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1162 output_attentions=output_attentions,
1163 output_hidden_states=output_hidden_states,
-> 1164 return_dict=return_dict,
1165 )
1166 sequence_output = outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
823 output_attentions=output_attentions,
824 output_hidden_states=output_hidden_states,
--> 825 return_dict=return_dict,
826 )
827 sequence_output = encoder_outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
513 encoder_attention_mask,
514 past_key_value,
--> 515 output_attentions,
516 )
517
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
398 head_mask,
399 output_attentions=output_attentions,
--> 400 past_key_value=self_attn_past_key_value,
401 )
402 attention_output = self_attention_outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
328 encoder_attention_mask,
329 past_key_value,
--> 330 output_attentions,
331 )
332 attention_output = self.output(self_outputs[0], hidden_states)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
241 attention_scores = attention_scores + relative_position_scores_query + relative_position_scores_key
242
--> 243 attention_scores = attention_scores / math.sqrt(self.attention_head_size)
244 if attention_mask is not None:
245 # Apply the attention mask is (precomputed for all layers in RobertaModel forward() function)
RuntimeError: CUDA out of memory. Tried to allocate 3.60 GiB (GPU 0; 14.76 GiB total capacity; 7.33 GiB already allocated; 1.37 GiB free; 12.29 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-25-27dfca549a7d> in <module>
14
15 # classify using gpu, batch_size makes sure we do not run out of memory
---> 16 classified_docs = doc_classifier.predict(docs_to_classify)
17
18 # let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in predict(self, documents)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in <listcomp>(.0)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/zero_shot_classification.py in __call__(self, sequences, candidate_labels, hypothesis_template, multi_label, **kwargs)
151 sequences = [sequences]
152
--> 153 outputs = super().__call__(sequences, candidate_labels, hypothesis_template)
154 num_sequences = len(sequences)
155 candidate_labels = self._args_parser._parse_labels(candidate_labels)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/base.py in __call__(self, *args, **kwargs)
759 def __call__(self, *args, **kwargs):
760 inputs = self._parse_and_tokenize(*args, **kwargs)
--> 761 return self._forward(inputs)
762
763 def _forward(self, inputs, return_tensors=False):
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/base.py in _forward(self, inputs, return_tensors)
780 with torch.no_grad():
781 inputs = self.ensure_tensor_on_device(**inputs)
--> 782 predictions = self.model(**inputs)[0].cpu()
783
784 if return_tensors:
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1162 output_attentions=output_attentions,
1163 output_hidden_states=output_hidden_states,
-> 1164 return_dict=return_dict,
1165 )
1166 sequence_output = outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
823 output_attentions=output_attentions,
824 output_hidden_states=output_hidden_states,
--> 825 return_dict=return_dict,
826 )
827 sequence_output = encoder_outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
513 encoder_attention_mask,
514 past_key_value,
--> 515 output_attentions,
516 )
517
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
398 head_mask,
399 output_attentions=output_attentions,
--> 400 past_key_value=self_attn_past_key_value,
401 )
402 attention_output = self_attention_outputs[0]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
328 encoder_attention_mask,
329 past_key_value,
--> 330 output_attentions,
331 )
332 attention_output = self.output(self_outputs[0], hidden_states)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
241 attention_scores = attention_scores + relative_position_scores_query + relative_position_scores_key
242
--> 243 attention_scores = attention_scores / math.sqrt(self.attention_head_size)
244 if attention_mask is not None:
245 # Apply the attention mask is (precomputed for all layers in RobertaModel forward() function)
RuntimeError: CUDA out of memory. Tried to allocate 3.60 GiB (GPU 0; 14.76 GiB total capacity; 7.33 GiB already allocated; 1.37 GiB free; 12.29 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
原码:
doc_classifier = TransformersDocumentClassifier(model_name_or_path="cross-encoder/nli-distilroberta-base",
task="zero-shot-classification",
labels=["music", "natural language processing", "history"],
batch_size=16
)
# ----------
# convert to Document using a fieldmap for custom content fields the classification should run on
docs_to_classify = [Document.from_dict(d) for d in docs_sliding_window]
# ----------
# classify using gpu, batch_size makes sure we do not run out of memory
classified_docs = doc_classifier.predict(docs_to_classify)
# ----------
# let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
print(classified_docs[0].to_dict())
请让我知道是否有任何其他我应该添加到 post/澄清的内容。
减少 batch_size
帮助了我:
batch_size=2