使用 Spacy 的电子邮件分类器,在尝试实施 BOW 时由于版本问题抛出以下错误
Email Classifier using Spacy , throwing the below error due to version issue when tried to implement BOW
我正在尝试使用独有的 类 和“弓形”架构创建 TextCategorizer,但由于版本问题而抛出以下错误,我的 python 版本是 3.8,也是我的 spacy 版本是 3.2.3 ,请有人帮我解决这个问题
######## Main method ########
def main():
# Load dataset
data = pd.read_csv(data_path, sep='\t')
observations = len(data.index)
# print("Dataset Size: {}".format(observations))
# Create an empty spacy model
nlp = spacy.blank("en")
# Create the TextCategorizer with exclusive classes and "bow" architecture
text_cat = nlp.create_pipe(
"textcat",
config={
"exclusive_classes": True,
"architecture": "bow"})
# Adding the TextCategorizer to the created empty model
nlp.add_pipe(text_cat)
# Add labels to text classifier
text_cat.add_label("ham")
text_cat.add_label("spam")
# Split data into train and test datasets
x_train, x_test, y_train, y_test = train_test_split(
data['text'], data['label'], test_size=0.33, random_state=7)
# Create the train and test data for the spacy model
train_lables = [{'cats': {'ham': label == 'ham',
'spam': label == 'spam'}} for label in y_train]
test_lables = [{'cats': {'ham': label == 'ham',
'spam': label == 'spam'}} for label in y_test]
# Spacy model data
train_data = list(zip(x_train, train_lables))
test_data = list(zip(x_test, test_lables))
# Model configurations
optimizer = nlp.begin_training()
batch_size = 5
epochs = 10
# Training the model
train_model(nlp, train_data, optimizer, batch_size, epochs)
# Sample predictions
# print(train_data[0])
# sample_test = nlp(train_data[0][0])
# print(sample_test.cats)
# Train and test accuracy
train_predictions = get_predictions(nlp, x_train)
test_predictions = get_predictions(nlp, x_test)
train_accuracy = accuracy_score(y_train, train_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)
print("Train accuracy: {}".format(train_accuracy))
print("Test accuracy: {}".format(test_accuracy))
# Creating the confusion matrix graphs
cf_train_matrix = confusion_matrix(y_train, train_predictions)
plt.figure(figsize=(10,8))
sns.heatmap(cf_train_matrix, annot=True, fmt='d')
cf_test_matrix = confusion_matrix(y_test, test_predictions)
plt.figure(figsize=(10,8))
sns.heatmap(cf_test_matrix, annot=True, fmt='d')
if __name__ == "__main__":
main()
错误如下
---------------------------------------------------------------------------
ConfigValidationError Traceback (most recent call last)
<ipython-input-6-a77bb5692b25> in <module>
72
73 if __name__ == "__main__":
---> 74 main()
<ipython-input-6-a77bb5692b25> in main()
12
13 # Create the TextCategorizer with exclusive classes and "bow" architecture
---> 14 text_cat = nlp.add_pipe(
15 "textcat",
16 config={
~\anaconda3\lib\site-packages\spacy\language.py in add_pipe(self, factory_name, name, before, after, first, last, source, config, raw_config, validate)
790 lang_code=self.lang,
791 )
--> 792 pipe_component = self.create_pipe(
793 factory_name,
794 name=name,
~\anaconda3\lib\site-packages\spacy\language.py in create_pipe(self, factory_name, name, config, raw_config, validate)
672 # We're calling the internal _fill here to avoid constructing the
673 # registered functions twice
--> 674 resolved = registry.resolve(cfg, validate=validate)
675 filled = registry.fill({"cfg": cfg[factory_name]}, validate=validate)["cfg"]
676 filled = Config(filled)
~\anaconda3\lib\site-packages\thinc\config.py in resolve(cls, config, schema, overrides, validate)
727 validate: bool = True,
728 ) -> Dict[str, Any]:
--> 729 resolved, _ = cls._make(
730 config, schema=schema, overrides=overrides, validate=validate, resolve=True
731 )
~\anaconda3\lib\site-packages\thinc\config.py in _make(cls, config, schema, overrides, resolve, validate)
776 if not is_interpolated:
777 config = Config(orig_config).interpolate()
--> 778 filled, _, resolved = cls._fill(
779 config, schema, validate=validate, overrides=overrides, resolve=resolve
780 )
~\anaconda3\lib\site-packages\thinc\config.py in _fill(cls, config, schema, validate, resolve, parent, overrides)
831 schema.__fields__[key] = copy_model_field(field, Any)
832 promise_schema = cls.make_promise_schema(value, resolve=resolve)
--> 833 filled[key], validation[v_key], final[key] = cls._fill(
834 value,
835 promise_schema,
~\anaconda3\lib\site-packages\thinc\config.py in _fill(cls, config, schema, validate, resolve, parent, overrides)
897 result = schema.parse_obj(validation)
898 except ValidationError as e:
--> 899 raise ConfigValidationError(
900 config=config, errors=e.errors(), parent=parent
901 ) from None
ConfigValidationError:
Config validation error
textcat -> architecture extra fields not permitted
textcat -> exclusive_classes extra fields not permitted
{'nlp': <spacy.lang.en.English object at 0x000001B90CD4BF70>, 'name': 'textcat', 'architecture': 'bow', 'exclusive_classes': True, 'model': {'@architectures': 'spacy.TextCatEnsemble.v2', 'linear_model': {'@architectures': 'spacy.TextCatBOW.v2', 'exclusive_classes': True, 'ngram_size': 1, 'no_output_layer': False}, 'tok2vec': {'@architectures': 'spacy.Tok2Vec.v2', 'embed': {'@architectures': 'spacy.MultiHashEmbed.v2', 'width': 64, 'rows': [2000, 2000, 1000, 1000, 1000, 1000], 'attrs': ['ORTH', 'LOWER', 'PREFIX', 'SUFFIX', 'SHAPE', 'ID'], 'include_static_vectors': False}, 'encode': {'@architectures': 'spacy.MaxoutWindowEncoder.v2', 'width': 64, 'window_size': 1, 'maxout_pieces': 3, 'depth': 2}}}, 'scorer': {'@scorers': 'spacy.textcat_scorer.v1'}, 'threshold': 0.5, '@factories': 'textcat'}
我的 Spacy 版本
print(spacy.__version__)
3.2.3
我的Python版本
import sys
print(sys.version)
3.8.8 (default, Apr 13 2021, 15:08:03) [MSC v.1916 64 bit (AMD64)]
尝试降级 Spacy 版本
!conda install -c conda-forge spacy = 2.1.8
Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Solving environment: ...working...
Building graph of deps: 0%| | 0/5 [00:00<?, ?it/s]
Examining spacy=2.1.8: 0%| | 0/5 [00:00<?, ?it/s]
Examining python=3.8: 20%|## | 1/5 [00:00<00:00, 4.80it/s]
Examining python=3.8: 40%|#### | 2/5 [00:00<00:00, 9.60it/s]
Examining @/win-64::__cuda==11.6=0: 40%|#### | 2/5 [00:01<00:00, 9.60it/s]
Examining @/win-64::__cuda==11.6=0: 60%|###### | 3/5 [00:01<00:01, 1.97it/s]
Examining @/win-64::__win==0=0: 60%|###### | 3/5 [00:01<00:01, 1.97it/s]
Examining @/win-64::__archspec==1=x86_64: 80%|######## | 4/5 [00:01<00:00, 1.97it/s]
Determining conflicts: 0%| | 0/5 [00:00<?, ?it/s]
Examining conflict for spacy python: 0%| | 0/5 [00:00<?, ?it/s]
UnsatisfiableError: The following specifications were found
to be incompatible with the existing python installation in your environment:
Specifications:
- spacy=2.1.8 -> python[version='>=3.6,<3.7.0a0|>=3.7,<3.8.0a0']
Your python: python=3.8
Found conflicts! Looking for incompatible packages.
This can take several minutes. Press CTRL-C to abort.
failed
If python is on the left-most side of the chain, that's the version you've asked for.
When python appears to the right, that indicates that the thing on the left is somehow
not available for the python version you are constrained to. Note that conda will not
change your python version to a different minor version unless you explicitly specify
that.
请随时发表评论或提问。
谢谢
从我理解错误消息的方式来看,它告诉您您要安装的 spacy 版本 (2.1.8) 与您拥有的 python 版本 (3.8.8) 不兼容。它需要 Python 3.6 或 3.7。
因此,要么创建一个 Python 3.6 或 3.7 的环境(在 conda 中创建新环境时很容易指定 Python 版本),要么使用更高版本的 spacy。如果您只使用最新版本的 spacy,您是否已经尝试过代码是否有效?
您使用这个 spacy 版本有什么具体原因吗?如果您使用的某些方法不再受支持,则将您的代码更新为较新的 spacy 方法可能更有意义。特别是如果您这样做是为了了解 spacy,那么学习不再受支持的方法会适得其反。可悲的是,很多教程都没有更新他们的代码,或者至少没有指定他们正在使用的版本,然后将他们的代码留在网上多年。
我正在尝试使用独有的 类 和“弓形”架构创建 TextCategorizer,但由于版本问题而抛出以下错误,我的 python 版本是 3.8,也是我的 spacy 版本是 3.2.3 ,请有人帮我解决这个问题
######## Main method ########
def main():
# Load dataset
data = pd.read_csv(data_path, sep='\t')
observations = len(data.index)
# print("Dataset Size: {}".format(observations))
# Create an empty spacy model
nlp = spacy.blank("en")
# Create the TextCategorizer with exclusive classes and "bow" architecture
text_cat = nlp.create_pipe(
"textcat",
config={
"exclusive_classes": True,
"architecture": "bow"})
# Adding the TextCategorizer to the created empty model
nlp.add_pipe(text_cat)
# Add labels to text classifier
text_cat.add_label("ham")
text_cat.add_label("spam")
# Split data into train and test datasets
x_train, x_test, y_train, y_test = train_test_split(
data['text'], data['label'], test_size=0.33, random_state=7)
# Create the train and test data for the spacy model
train_lables = [{'cats': {'ham': label == 'ham',
'spam': label == 'spam'}} for label in y_train]
test_lables = [{'cats': {'ham': label == 'ham',
'spam': label == 'spam'}} for label in y_test]
# Spacy model data
train_data = list(zip(x_train, train_lables))
test_data = list(zip(x_test, test_lables))
# Model configurations
optimizer = nlp.begin_training()
batch_size = 5
epochs = 10
# Training the model
train_model(nlp, train_data, optimizer, batch_size, epochs)
# Sample predictions
# print(train_data[0])
# sample_test = nlp(train_data[0][0])
# print(sample_test.cats)
# Train and test accuracy
train_predictions = get_predictions(nlp, x_train)
test_predictions = get_predictions(nlp, x_test)
train_accuracy = accuracy_score(y_train, train_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)
print("Train accuracy: {}".format(train_accuracy))
print("Test accuracy: {}".format(test_accuracy))
# Creating the confusion matrix graphs
cf_train_matrix = confusion_matrix(y_train, train_predictions)
plt.figure(figsize=(10,8))
sns.heatmap(cf_train_matrix, annot=True, fmt='d')
cf_test_matrix = confusion_matrix(y_test, test_predictions)
plt.figure(figsize=(10,8))
sns.heatmap(cf_test_matrix, annot=True, fmt='d')
if __name__ == "__main__":
main()
错误如下
---------------------------------------------------------------------------
ConfigValidationError Traceback (most recent call last)
<ipython-input-6-a77bb5692b25> in <module>
72
73 if __name__ == "__main__":
---> 74 main()
<ipython-input-6-a77bb5692b25> in main()
12
13 # Create the TextCategorizer with exclusive classes and "bow" architecture
---> 14 text_cat = nlp.add_pipe(
15 "textcat",
16 config={
~\anaconda3\lib\site-packages\spacy\language.py in add_pipe(self, factory_name, name, before, after, first, last, source, config, raw_config, validate)
790 lang_code=self.lang,
791 )
--> 792 pipe_component = self.create_pipe(
793 factory_name,
794 name=name,
~\anaconda3\lib\site-packages\spacy\language.py in create_pipe(self, factory_name, name, config, raw_config, validate)
672 # We're calling the internal _fill here to avoid constructing the
673 # registered functions twice
--> 674 resolved = registry.resolve(cfg, validate=validate)
675 filled = registry.fill({"cfg": cfg[factory_name]}, validate=validate)["cfg"]
676 filled = Config(filled)
~\anaconda3\lib\site-packages\thinc\config.py in resolve(cls, config, schema, overrides, validate)
727 validate: bool = True,
728 ) -> Dict[str, Any]:
--> 729 resolved, _ = cls._make(
730 config, schema=schema, overrides=overrides, validate=validate, resolve=True
731 )
~\anaconda3\lib\site-packages\thinc\config.py in _make(cls, config, schema, overrides, resolve, validate)
776 if not is_interpolated:
777 config = Config(orig_config).interpolate()
--> 778 filled, _, resolved = cls._fill(
779 config, schema, validate=validate, overrides=overrides, resolve=resolve
780 )
~\anaconda3\lib\site-packages\thinc\config.py in _fill(cls, config, schema, validate, resolve, parent, overrides)
831 schema.__fields__[key] = copy_model_field(field, Any)
832 promise_schema = cls.make_promise_schema(value, resolve=resolve)
--> 833 filled[key], validation[v_key], final[key] = cls._fill(
834 value,
835 promise_schema,
~\anaconda3\lib\site-packages\thinc\config.py in _fill(cls, config, schema, validate, resolve, parent, overrides)
897 result = schema.parse_obj(validation)
898 except ValidationError as e:
--> 899 raise ConfigValidationError(
900 config=config, errors=e.errors(), parent=parent
901 ) from None
ConfigValidationError:
Config validation error
textcat -> architecture extra fields not permitted
textcat -> exclusive_classes extra fields not permitted
{'nlp': <spacy.lang.en.English object at 0x000001B90CD4BF70>, 'name': 'textcat', 'architecture': 'bow', 'exclusive_classes': True, 'model': {'@architectures': 'spacy.TextCatEnsemble.v2', 'linear_model': {'@architectures': 'spacy.TextCatBOW.v2', 'exclusive_classes': True, 'ngram_size': 1, 'no_output_layer': False}, 'tok2vec': {'@architectures': 'spacy.Tok2Vec.v2', 'embed': {'@architectures': 'spacy.MultiHashEmbed.v2', 'width': 64, 'rows': [2000, 2000, 1000, 1000, 1000, 1000], 'attrs': ['ORTH', 'LOWER', 'PREFIX', 'SUFFIX', 'SHAPE', 'ID'], 'include_static_vectors': False}, 'encode': {'@architectures': 'spacy.MaxoutWindowEncoder.v2', 'width': 64, 'window_size': 1, 'maxout_pieces': 3, 'depth': 2}}}, 'scorer': {'@scorers': 'spacy.textcat_scorer.v1'}, 'threshold': 0.5, '@factories': 'textcat'}
我的 Spacy 版本
print(spacy.__version__)
3.2.3
我的Python版本
import sys
print(sys.version)
3.8.8 (default, Apr 13 2021, 15:08:03) [MSC v.1916 64 bit (AMD64)]
尝试降级 Spacy 版本
!conda install -c conda-forge spacy = 2.1.8
Collecting package metadata (current_repodata.json): ...working... done Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve. Collecting package metadata (repodata.json): ...working... done Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve. Solving environment: ...working... Building graph of deps: 0%| | 0/5 [00:00<?, ?it/s] Examining spacy=2.1.8: 0%| | 0/5 [00:00<?, ?it/s] Examining python=3.8: 20%|## | 1/5 [00:00<00:00, 4.80it/s] Examining python=3.8: 40%|#### | 2/5 [00:00<00:00, 9.60it/s] Examining @/win-64::__cuda==11.6=0: 40%|#### | 2/5 [00:01<00:00, 9.60it/s] Examining @/win-64::__cuda==11.6=0: 60%|###### | 3/5 [00:01<00:01, 1.97it/s] Examining @/win-64::__win==0=0: 60%|###### | 3/5 [00:01<00:01, 1.97it/s] Examining @/win-64::__archspec==1=x86_64: 80%|######## | 4/5 [00:01<00:00, 1.97it/s] Determining conflicts: 0%| | 0/5 [00:00<?, ?it/s] Examining conflict for spacy python: 0%| | 0/5 [00:00<?, ?it/s] UnsatisfiableError: The following specifications were found to be incompatible with the existing python installation in your environment: Specifications: - spacy=2.1.8 -> python[version='>=3.6,<3.7.0a0|>=3.7,<3.8.0a0'] Your python: python=3.8 Found conflicts! Looking for incompatible packages. This can take several minutes. Press CTRL-C to abort. failed If python is on the left-most side of the chain, that's the version you've asked for. When python appears to the right, that indicates that the thing on the left is somehow not available for the python version you are constrained to. Note that conda will not change your python version to a different minor version unless you explicitly specify that.
请随时发表评论或提问。 谢谢
从我理解错误消息的方式来看,它告诉您您要安装的 spacy 版本 (2.1.8) 与您拥有的 python 版本 (3.8.8) 不兼容。它需要 Python 3.6 或 3.7。
因此,要么创建一个 Python 3.6 或 3.7 的环境(在 conda 中创建新环境时很容易指定 Python 版本),要么使用更高版本的 spacy。如果您只使用最新版本的 spacy,您是否已经尝试过代码是否有效?
您使用这个 spacy 版本有什么具体原因吗?如果您使用的某些方法不再受支持,则将您的代码更新为较新的 spacy 方法可能更有意义。特别是如果您这样做是为了了解 spacy,那么学习不再受支持的方法会适得其反。可悲的是,很多教程都没有更新他们的代码,或者至少没有指定他们正在使用的版本,然后将他们的代码留在网上多年。