python spacy 的多线程训练
Multi-threading training for spacy in python
我试图找到一种在 spacy 上使用多线程来训练 NER 模型的方法。看起来我的工作计算机默认使用多线程 (Ubuntu 16.04 Python3.5) 但我的服务器上没有。
知道为什么吗?
服务器上关于 spaCy 和环境的信息
Platform Linux-3.14.32-xxxx-grs-ipv6-64-x86_64-with-Debian-8
Python version 3.4.2
Location /home/nlp/.env/lib/python3.4/site-packages/spacy
Models fr, fr_core_news_md
spaCy version 2.0.5
尝试过程:
安装
python3 -m venv .env
source .env/bin/activate
pip install -U spacy
pip3 install pip --upgrade
python -m spacy download fr
python -m spacy validate
脚本python3
import spacy
import random
ITERATION_NBR = 100
DROP_RATE = 0.5
TRAIN_DATA = [
('Who is Shaka Khan?', {
'entities': [(7, 17, 'PERSON')]
}),
('I like London and Berlin.', {
'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')]
})
]
def main():
try:
nlp = spacy.load("fr")
except:
nlp = spacy.load("fr_core_news_sm")
if 'ner' not in nlp.pipe_names:
ner = nlp.create_pipe('ner')
nlp.add_pipe(ner, last=True)
else:
ner = nlp.get_pipe('ner')
for _, annotations in TRAIN_DATA:
for ent in annotations.get('entities'):
ner.add_label(ent[2])
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):
optimizer = nlp.begin_training()
for itn in range(ITERATION_NBR):
random.shuffle(TRAIN_DATA)
losses = {}
for text, annotations in TRAIN_DATA:
nlp.update(
[text],
[annotations],
drop=DROP_RATE,
sgd=optimizer,
losses=losses)
执行
python3 <scriptName>.py
需要python >= 3.5 训练 spacy
时多线程默认工作
我试图找到一种在 spacy 上使用多线程来训练 NER 模型的方法。看起来我的工作计算机默认使用多线程 (Ubuntu 16.04 Python3.5) 但我的服务器上没有。
知道为什么吗?
服务器上关于 spaCy 和环境的信息
Platform Linux-3.14.32-xxxx-grs-ipv6-64-x86_64-with-Debian-8
Python version 3.4.2
Location /home/nlp/.env/lib/python3.4/site-packages/spacy
Models fr, fr_core_news_md
spaCy version 2.0.5
尝试过程:
安装
python3 -m venv .env
source .env/bin/activate
pip install -U spacy
pip3 install pip --upgrade
python -m spacy download fr
python -m spacy validate
脚本python3
import spacy
import random
ITERATION_NBR = 100
DROP_RATE = 0.5
TRAIN_DATA = [
('Who is Shaka Khan?', {
'entities': [(7, 17, 'PERSON')]
}),
('I like London and Berlin.', {
'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')]
})
]
def main():
try:
nlp = spacy.load("fr")
except:
nlp = spacy.load("fr_core_news_sm")
if 'ner' not in nlp.pipe_names:
ner = nlp.create_pipe('ner')
nlp.add_pipe(ner, last=True)
else:
ner = nlp.get_pipe('ner')
for _, annotations in TRAIN_DATA:
for ent in annotations.get('entities'):
ner.add_label(ent[2])
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):
optimizer = nlp.begin_training()
for itn in range(ITERATION_NBR):
random.shuffle(TRAIN_DATA)
losses = {}
for text, annotations in TRAIN_DATA:
nlp.update(
[text],
[annotations],
drop=DROP_RATE,
sgd=optimizer,
losses=losses)
执行
python3 <scriptName>.py
需要python >= 3.5 训练 spacy
时多线程默认工作