ValueError: You must include at least one label and at least one sequence
ValueError: You must include at least one label and at least one sequence
我正在使用这个 Notebook,其中 Apply DocumentClassifier 部分更改如下。
Jupyter Labs,内核:conda_mxnet_latest_p37
.
错误似乎是 ML 标准实践响应。但是,我传递/创建与原始代码相同的参数和变量名称。所以这与我的代码中的值有关。
我的代码:
with open('filt_gri.txt', 'r') as filehandle:
tags = [current_place.rstrip() for current_place in filehandle.readlines()]
doc_classifier = TransformersDocumentClassifier(model_name_or_path="cross-encoder/nli-distilroberta-base",
task="zero-shot-classification",
labels=tags,
batch_size=16)
# convert to Document using a fieldmap for custom content fields the classification should run on
docs_to_classify = [Document.from_dict(d) for d in docs_sliding_window]
# classify using gpu, batch_size makes sure we do not run out of memory
classified_docs = doc_classifier.predict(docs_to_classify)
# let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
print(classified_docs[0].to_dict())
all_docs = convert_files_to_dicts(dir_path=doc_dir)
preprocessor_sliding_window = PreProcessor(split_overlap=3,
split_length=10,
split_respect_sentence_boundary=False,
split_by='passage')
输出:
INFO - haystack.modeling.utils - Using devices: CUDA
INFO - haystack.modeling.utils - Number of GPUs: 1
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-11-77eb98038283> in <module>
14
15 # classify using gpu, batch_size makes sure we do not run out of memory
---> 16 classified_docs = doc_classifier.predict(docs_to_classify)
17
18 # let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in predict(self, documents)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in <listcomp>(.0)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/zero_shot_classification.py in __call__(self, sequences, candidate_labels, hypothesis_template, multi_label, **kwargs)
151 sequences = [sequences]
152
--> 153 outputs = super().__call__(sequences, candidate_labels, hypothesis_template)
154 num_sequences = len(sequences)
155 candidate_labels = self._args_parser._parse_labels(candidate_labels)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/base.py in __call__(self, *args, **kwargs)
758
759 def __call__(self, *args, **kwargs):
--> 760 inputs = self._parse_and_tokenize(*args, **kwargs)
761 return self._forward(inputs)
762
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/zero_shot_classification.py in _parse_and_tokenize(self, sequences, candidate_labels, hypothesis_template, padding, add_special_tokens, truncation, **kwargs)
92 Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
93 """
---> 94 sequence_pairs = self._args_parser(sequences, candidate_labels, hypothesis_template)
95 inputs = self.tokenizer(
96 sequence_pairs,
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/zero_shot_classification.py in __call__(self, sequences, labels, hypothesis_template)
25 def __call__(self, sequences, labels, hypothesis_template):
26 if len(labels) == 0 or len(sequences) == 0:
---> 27 raise ValueError("You must include at least one label and at least one sequence.")
28 if hypothesis_template.format(labels[0]) == hypothesis_template:
29 raise ValueError(
ValueError: You must include at least one label and at least one sequence.
原码:
doc_classifier = TransformersDocumentClassifier(model_name_or_path="cross-encoder/nli-distilroberta-base",
task="zero-shot-classification",
labels=["music", "natural language processing", "history"],
batch_size=16
)
# ----------
# convert to Document using a fieldmap for custom content fields the classification should run on
docs_to_classify = [Document.from_dict(d) for d in docs_sliding_window]
# ----------
# classify using gpu, batch_size makes sure we do not run out of memory
classified_docs = doc_classifier.predict(docs_to_classify)
# ----------
# let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
print(classified_docs[0].to_dict())
请让我知道是否还有任何我应该添加到 post/ 澄清的内容。
阅读官方docs 分析调用.predict(docs_to_classify)
时出现错误建议你尝试做一些基本的测试比如使用参数labels = ["negative", "positive"]
,如果有则更正它是由外部文件的 string values 引起的,您还可以选择检查它指示使用 pipelines[= 的位置19=].
pipeline = Pipeline()
pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipeline.add_node(component=doc_classifier, name='DocClassifier', inputs=['Retriever'])
我正在使用这个 Notebook,其中 Apply DocumentClassifier 部分更改如下。
Jupyter Labs,内核:conda_mxnet_latest_p37
.
错误似乎是 ML 标准实践响应。但是,我传递/创建与原始代码相同的参数和变量名称。所以这与我的代码中的值有关。
我的代码:
with open('filt_gri.txt', 'r') as filehandle:
tags = [current_place.rstrip() for current_place in filehandle.readlines()]
doc_classifier = TransformersDocumentClassifier(model_name_or_path="cross-encoder/nli-distilroberta-base",
task="zero-shot-classification",
labels=tags,
batch_size=16)
# convert to Document using a fieldmap for custom content fields the classification should run on
docs_to_classify = [Document.from_dict(d) for d in docs_sliding_window]
# classify using gpu, batch_size makes sure we do not run out of memory
classified_docs = doc_classifier.predict(docs_to_classify)
# let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
print(classified_docs[0].to_dict())
all_docs = convert_files_to_dicts(dir_path=doc_dir)
preprocessor_sliding_window = PreProcessor(split_overlap=3,
split_length=10,
split_respect_sentence_boundary=False,
split_by='passage')
输出:
INFO - haystack.modeling.utils - Using devices: CUDA
INFO - haystack.modeling.utils - Number of GPUs: 1
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-11-77eb98038283> in <module>
14
15 # classify using gpu, batch_size makes sure we do not run out of memory
---> 16 classified_docs = doc_classifier.predict(docs_to_classify)
17
18 # let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in predict(self, documents)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/haystack/nodes/document_classifier/transformers.py in <listcomp>(.0)
137 batches = self.get_batches(texts, batch_size=self.batch_size)
138 if self.task == 'zero-shot-classification':
--> 139 batched_predictions = [self.model(batch, candidate_labels=self.labels, truncation=True) for batch in batches]
140 elif self.task == 'text-classification':
141 batched_predictions = [self.model(batch, return_all_scores=self.return_all_scores, truncation=True) for batch in batches]
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/zero_shot_classification.py in __call__(self, sequences, candidate_labels, hypothesis_template, multi_label, **kwargs)
151 sequences = [sequences]
152
--> 153 outputs = super().__call__(sequences, candidate_labels, hypothesis_template)
154 num_sequences = len(sequences)
155 candidate_labels = self._args_parser._parse_labels(candidate_labels)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/base.py in __call__(self, *args, **kwargs)
758
759 def __call__(self, *args, **kwargs):
--> 760 inputs = self._parse_and_tokenize(*args, **kwargs)
761 return self._forward(inputs)
762
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/zero_shot_classification.py in _parse_and_tokenize(self, sequences, candidate_labels, hypothesis_template, padding, add_special_tokens, truncation, **kwargs)
92 Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
93 """
---> 94 sequence_pairs = self._args_parser(sequences, candidate_labels, hypothesis_template)
95 inputs = self.tokenizer(
96 sequence_pairs,
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/transformers/pipelines/zero_shot_classification.py in __call__(self, sequences, labels, hypothesis_template)
25 def __call__(self, sequences, labels, hypothesis_template):
26 if len(labels) == 0 or len(sequences) == 0:
---> 27 raise ValueError("You must include at least one label and at least one sequence.")
28 if hypothesis_template.format(labels[0]) == hypothesis_template:
29 raise ValueError(
ValueError: You must include at least one label and at least one sequence.
原码:
doc_classifier = TransformersDocumentClassifier(model_name_or_path="cross-encoder/nli-distilroberta-base",
task="zero-shot-classification",
labels=["music", "natural language processing", "history"],
batch_size=16
)
# ----------
# convert to Document using a fieldmap for custom content fields the classification should run on
docs_to_classify = [Document.from_dict(d) for d in docs_sliding_window]
# ----------
# classify using gpu, batch_size makes sure we do not run out of memory
classified_docs = doc_classifier.predict(docs_to_classify)
# ----------
# let's see how it looks: there should be a classification result in the meta entry containing labels and scores.
print(classified_docs[0].to_dict())
请让我知道是否还有任何我应该添加到 post/ 澄清的内容。
阅读官方docs 分析调用.predict(docs_to_classify)
时出现错误建议你尝试做一些基本的测试比如使用参数labels = ["negative", "positive"]
,如果有则更正它是由外部文件的 string values 引起的,您还可以选择检查它指示使用 pipelines[= 的位置19=].
pipeline = Pipeline()
pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipeline.add_node(component=doc_classifier, name='DocClassifier', inputs=['Retriever'])