在 spaCy 3.0 中加载管道时出错
Error when loading pipelines in spaCy 3.0
更新到 spaCy 3.0.6 后,我无法加载任何经过训练的管道,尽管它们似乎都已正确安装:
================= Installed pipeline packages (spaCy v3.0.6) =================
ℹ spaCy installation:
/Users/baconbaker/anaconda3/envs/ml/lib/python3.8/site-packages/spacy
NAME SPACY VERSION
en_core_web_sm >=3.0.0,<3.1.0 3.0.0 ✔
en_core_web_trf >=3.0.0,<3.1.0 3.0.0 ✔
这发生在使用 spacy.load() 并将管道作为模块导入时(以下所有行的错误都相同):
nlp = spacy.load("en_core_web_trf")
nlp = spacy.load("en_core_web_sm")
import en_core_web_sm
nlp = en_core_web_sm.load()
import en_core_web_trf
nlp = en_core_web_trf.load()
我得到的错误如下:
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-9-b38eb3aae320> in <module>
1 import en_core_web_trf
----> 2 nlp = en_core_web_trf.load()
~/anaconda3/envs/ml/lib/python3.8/site-packages/en_core_web_trf/__init__.py in load(**overrides)
8
9 def load(**overrides):
---> 10 return load_model_from_init_py(__file__, **overrides)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_init_py(init_file, vocab, disable, exclude, config)
514 if not model_path.exists():
515 raise IOError(Errors.E052.format(path=data_path))
--> 516 return load_model_from_path(
517 data_path,
518 vocab=vocab,
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_path(model_path, meta, vocab, disable, exclude, config)
389 config_path = model_path / "config.cfg"
390 config = load_config(config_path, overrides=dict_to_dot(config))
--> 391 nlp = load_model_from_config(config, vocab=vocab, disable=disable, exclude=exclude)
392 return nlp.from_disk(model_path, exclude=exclude)
393
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_config(config, vocab, disable, exclude, auto_fill, validate)
426 # registry, including custom subclasses provided via entry points
427 lang_cls = get_lang_class(nlp_config["lang"])
--> 428 nlp = lang_cls.from_config(
429 config,
430 vocab=vocab,
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/language.py in from_config(cls, config, vocab, disable, exclude, meta, auto_fill, validate)
1637 # then we would load them twice at runtime: once when we make from config,
1638 # and then again when we load from disk.
-> 1639 nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer, meta=meta)
1640 if after_creation is not None:
1641 nlp = after_creation(nlp)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/language.py in __init__(self, vocab, max_length, meta, create_tokenizer, batch_size, **kwargs)
148 # points. The factory decorator applied to these functions takes care
149 # of the rest.
--> 150 util.registry._entry_point_factories.get_all()
151
152 self._config = DEFAULT_CONFIG.merge(self.default_config)
~/anaconda3/envs/ml/lib/python3.8/site-packages/catalogue/__init__.py in get_all(self)
106 result = {}
107 if self.entry_points:
--> 108 result.update(self.get_entry_points())
109 for keys, value in REGISTRY.items():
110 if len(self.namespace) == len(keys) - 1 and all(
~/anaconda3/envs/ml/lib/python3.8/site-packages/catalogue/__init__.py in get_entry_points(self)
121 result = {}
122 for entry_point in AVAILABLE_ENTRY_POINTS.get(self.entry_point_namespace, []):
--> 123 result[entry_point.name] = entry_point.load()
124 return result
125
~/anaconda3/envs/ml/lib/python3.8/importlib/metadata.py in load(self)
75 """
76 match = self.pattern.match(self.value)
---> 77 module = import_module(match.group('module'))
78 attrs = filter(None, (match.group('attr') or '').split('.'))
79 return functools.reduce(getattr, attrs, module)
~/anaconda3/envs/ml/lib/python3.8/importlib/__init__.py in import_module(name, package)
125 break
126 level += 1
--> 127 return _bootstrap._gcd_import(name[level:], package, level)
128
129
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _gcd_import(name, package, level)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _gcd_import(name, package, level)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _load_unlocked(spec)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap_external.py in exec_module(self, module)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/__init__.py in <module>
----> 1 from . import architectures
2 from . import annotation_setters
3 from . import span_getters
4 from .layers import TransformerModel
5 from .pipeline_component import Transformer, install_extensions
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/architectures.py in <module>
3 from thinc.types import Ragged, Floats2d
4 from spacy.tokens import Doc
----> 5 from .layers import TransformerModel, TransformerListener
6 from .layers import trfs2arrays, split_trf_batch
7 from .util import registry
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/layers/__init__.py in <module>
----> 1 from .listener import TransformerListener
2 from .transformer_model import TransformerModel
3 from .split_trf import split_trf_batch
4 from .trfs2arrays import trfs2arrays
5
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/layers/listener.py in <module>
2 from thinc.api import Model
3 from spacy.tokens import Doc
----> 4 from ..data_classes import TransformerData
5
6
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/data_classes.py in <module>
9 import srsly
10
---> 11 from .util import transpose_list
12 from .align import get_token_positions
13
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/util.py in <module>
2 from pathlib import Path
3 import random
----> 4 from transformers import AutoModel, AutoTokenizer
5 from transformers.tokenization_utils import BatchEncoding
6 from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/__init__.py in <module>
624
625 # Trainer
--> 626 from .trainer import Trainer
627 from .trainer_pt_utils import torch_distributed_zero_first
628 else:
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/trainer.py in <module>
67 TrainerState,
68 )
---> 69 from .trainer_pt_utils import (
70 DistributedTensorGatherer,
71 SequentialDistributedSampler,
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/trainer_pt_utils.py in <module>
38 SAVE_STATE_WARNING = ""
39 else:
---> 40 from torch.optim.lr_scheduler import SAVE_STATE_WARNING
41
42 logger = logging.get_logger(__name__)
ImportError: cannot import name 'SAVE_STATE_WARNING' from 'torch.optim.lr_scheduler' (/Users/baconbaker/anaconda3/envs/ml/lib/python3.8/site-packages/torch/optim/lr_scheduler.py)
从当前稳定版本 1.8.1 恢复到 torch 1.4.0 解决了问题,但我不想这样做。
是否有替代解决方案?
这似乎已在较新版本的 transformers
(https://github.com/huggingface/transformers/pull/8979) 中修复。尝试同时升级 transformers
和 spacy-transformers
.
更新到 spaCy 3.0.6 后,我无法加载任何经过训练的管道,尽管它们似乎都已正确安装:
================= Installed pipeline packages (spaCy v3.0.6) =================
ℹ spaCy installation:
/Users/baconbaker/anaconda3/envs/ml/lib/python3.8/site-packages/spacy
NAME SPACY VERSION
en_core_web_sm >=3.0.0,<3.1.0 3.0.0 ✔
en_core_web_trf >=3.0.0,<3.1.0 3.0.0 ✔
这发生在使用 spacy.load() 并将管道作为模块导入时(以下所有行的错误都相同):
nlp = spacy.load("en_core_web_trf")
nlp = spacy.load("en_core_web_sm")
import en_core_web_sm
nlp = en_core_web_sm.load()
import en_core_web_trf
nlp = en_core_web_trf.load()
我得到的错误如下:
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-9-b38eb3aae320> in <module>
1 import en_core_web_trf
----> 2 nlp = en_core_web_trf.load()
~/anaconda3/envs/ml/lib/python3.8/site-packages/en_core_web_trf/__init__.py in load(**overrides)
8
9 def load(**overrides):
---> 10 return load_model_from_init_py(__file__, **overrides)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_init_py(init_file, vocab, disable, exclude, config)
514 if not model_path.exists():
515 raise IOError(Errors.E052.format(path=data_path))
--> 516 return load_model_from_path(
517 data_path,
518 vocab=vocab,
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_path(model_path, meta, vocab, disable, exclude, config)
389 config_path = model_path / "config.cfg"
390 config = load_config(config_path, overrides=dict_to_dot(config))
--> 391 nlp = load_model_from_config(config, vocab=vocab, disable=disable, exclude=exclude)
392 return nlp.from_disk(model_path, exclude=exclude)
393
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_config(config, vocab, disable, exclude, auto_fill, validate)
426 # registry, including custom subclasses provided via entry points
427 lang_cls = get_lang_class(nlp_config["lang"])
--> 428 nlp = lang_cls.from_config(
429 config,
430 vocab=vocab,
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/language.py in from_config(cls, config, vocab, disable, exclude, meta, auto_fill, validate)
1637 # then we would load them twice at runtime: once when we make from config,
1638 # and then again when we load from disk.
-> 1639 nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer, meta=meta)
1640 if after_creation is not None:
1641 nlp = after_creation(nlp)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/language.py in __init__(self, vocab, max_length, meta, create_tokenizer, batch_size, **kwargs)
148 # points. The factory decorator applied to these functions takes care
149 # of the rest.
--> 150 util.registry._entry_point_factories.get_all()
151
152 self._config = DEFAULT_CONFIG.merge(self.default_config)
~/anaconda3/envs/ml/lib/python3.8/site-packages/catalogue/__init__.py in get_all(self)
106 result = {}
107 if self.entry_points:
--> 108 result.update(self.get_entry_points())
109 for keys, value in REGISTRY.items():
110 if len(self.namespace) == len(keys) - 1 and all(
~/anaconda3/envs/ml/lib/python3.8/site-packages/catalogue/__init__.py in get_entry_points(self)
121 result = {}
122 for entry_point in AVAILABLE_ENTRY_POINTS.get(self.entry_point_namespace, []):
--> 123 result[entry_point.name] = entry_point.load()
124 return result
125
~/anaconda3/envs/ml/lib/python3.8/importlib/metadata.py in load(self)
75 """
76 match = self.pattern.match(self.value)
---> 77 module = import_module(match.group('module'))
78 attrs = filter(None, (match.group('attr') or '').split('.'))
79 return functools.reduce(getattr, attrs, module)
~/anaconda3/envs/ml/lib/python3.8/importlib/__init__.py in import_module(name, package)
125 break
126 level += 1
--> 127 return _bootstrap._gcd_import(name[level:], package, level)
128
129
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _gcd_import(name, package, level)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _gcd_import(name, package, level)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _load_unlocked(spec)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap_external.py in exec_module(self, module)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/__init__.py in <module>
----> 1 from . import architectures
2 from . import annotation_setters
3 from . import span_getters
4 from .layers import TransformerModel
5 from .pipeline_component import Transformer, install_extensions
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/architectures.py in <module>
3 from thinc.types import Ragged, Floats2d
4 from spacy.tokens import Doc
----> 5 from .layers import TransformerModel, TransformerListener
6 from .layers import trfs2arrays, split_trf_batch
7 from .util import registry
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/layers/__init__.py in <module>
----> 1 from .listener import TransformerListener
2 from .transformer_model import TransformerModel
3 from .split_trf import split_trf_batch
4 from .trfs2arrays import trfs2arrays
5
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/layers/listener.py in <module>
2 from thinc.api import Model
3 from spacy.tokens import Doc
----> 4 from ..data_classes import TransformerData
5
6
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/data_classes.py in <module>
9 import srsly
10
---> 11 from .util import transpose_list
12 from .align import get_token_positions
13
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/util.py in <module>
2 from pathlib import Path
3 import random
----> 4 from transformers import AutoModel, AutoTokenizer
5 from transformers.tokenization_utils import BatchEncoding
6 from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/__init__.py in <module>
624
625 # Trainer
--> 626 from .trainer import Trainer
627 from .trainer_pt_utils import torch_distributed_zero_first
628 else:
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/trainer.py in <module>
67 TrainerState,
68 )
---> 69 from .trainer_pt_utils import (
70 DistributedTensorGatherer,
71 SequentialDistributedSampler,
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/trainer_pt_utils.py in <module>
38 SAVE_STATE_WARNING = ""
39 else:
---> 40 from torch.optim.lr_scheduler import SAVE_STATE_WARNING
41
42 logger = logging.get_logger(__name__)
ImportError: cannot import name 'SAVE_STATE_WARNING' from 'torch.optim.lr_scheduler' (/Users/baconbaker/anaconda3/envs/ml/lib/python3.8/site-packages/torch/optim/lr_scheduler.py)
从当前稳定版本 1.8.1 恢复到 torch 1.4.0 解决了问题,但我不想这样做。
是否有替代解决方案?
这似乎已在较新版本的 transformers
(https://github.com/huggingface/transformers/pull/8979) 中修复。尝试同时升级 transformers
和 spacy-transformers
.