在动词标签计数中使用 defaultdict 函数
Using defaultdict function in verb tags counting
你能解释一下为什么defaultdict函数在下面的代码中没有定义verbs吗?谷歌搜索告诉我语法没问题。请帮忙!提前致谢:)
我的代码
import spacy
from collections import defaultdict
nlp = spacy.load("en_core_web_sm")
!pip install pandas==0.24.2 --user
import pandas as pd
def calculate_the_word_types(df):
verbs = defaultdict(calculate_the_word_types)
for i, row in df.iterrows():
doc = nlp(row["text"])
for v in map(lambda x: x.lemma_, filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)):
verbs[v] += 1
df.at(i, "nr_verb", len(list(map(lambda x: x.text,
filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)))))
return df
verbs
错误代码
NameError Traceback (most recent call last)
<ipython-input-32-7e7c626bb331> in <module>()
13
14 for v in map(lambda x: x.lemma_, filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)):
---> 15 verbs[v] += 1
16 df.at(i, "nr_verb", len(list(map(lambda x: x.text,
17 filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)))))
NameError: name 'verbs' is not defined
set_value()
函数已弃用。
作为替换,您可以像这样使用“.at”:df.at["YOURINDEX", "YOURCOLUMN"] = "YOURVALUE"。
此外,您在这一行上有问题 verbs = defaultdict(calculate_the_word_types)
用 0 初始化,因为它将充当计数器。
并修正你的缩进
工作代码-
import spacy
from collections import defaultdict
nlp = spacy.load("en_core_web_sm")
!pip install pandas==0.24.2 --user
import pandas as pd
def calculate_the_word_types(df):
verbs = defaultdict(lambda: 0)
# count all tokens, but not the punctuations
for i, row in df.iterrows():
doc = nlp(row["text"])
# count only the verbs
for v in map(lambda x: x.lemma_, filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)):
verbs[v] += 1
df.at[i, "nr_verb"] = len(list(map(lambda x: x.text,
filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc))))
return df
# dataframe
df = pd.DataFrame({'text':['hello there', 'I love Tatooine', 'I hate sands']})
# print the dataframe with verb count
print(calculate_the_word_types(df))
输出-
text nr_verb
0 hello there 0.0
1 I love Tatooine 1.0
2 I hate sands 1.0
你能解释一下为什么defaultdict函数在下面的代码中没有定义verbs吗?谷歌搜索告诉我语法没问题。请帮忙!提前致谢:)
我的代码
import spacy
from collections import defaultdict
nlp = spacy.load("en_core_web_sm")
!pip install pandas==0.24.2 --user
import pandas as pd
def calculate_the_word_types(df):
verbs = defaultdict(calculate_the_word_types)
for i, row in df.iterrows():
doc = nlp(row["text"])
for v in map(lambda x: x.lemma_, filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)):
verbs[v] += 1
df.at(i, "nr_verb", len(list(map(lambda x: x.text,
filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)))))
return df
verbs
错误代码
NameError Traceback (most recent call last)
<ipython-input-32-7e7c626bb331> in <module>()
13
14 for v in map(lambda x: x.lemma_, filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)):
---> 15 verbs[v] += 1
16 df.at(i, "nr_verb", len(list(map(lambda x: x.text,
17 filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)))))
NameError: name 'verbs' is not defined
set_value()
函数已弃用。
作为替换,您可以像这样使用“.at”:df.at["YOURINDEX", "YOURCOLUMN"] = "YOURVALUE"。
此外,您在这一行上有问题 verbs = defaultdict(calculate_the_word_types)
用 0 初始化,因为它将充当计数器。
并修正你的缩进
工作代码-
import spacy
from collections import defaultdict
nlp = spacy.load("en_core_web_sm")
!pip install pandas==0.24.2 --user
import pandas as pd
def calculate_the_word_types(df):
verbs = defaultdict(lambda: 0)
# count all tokens, but not the punctuations
for i, row in df.iterrows():
doc = nlp(row["text"])
# count only the verbs
for v in map(lambda x: x.lemma_, filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)):
verbs[v] += 1
df.at[i, "nr_verb"] = len(list(map(lambda x: x.text,
filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc))))
return df
# dataframe
df = pd.DataFrame({'text':['hello there', 'I love Tatooine', 'I hate sands']})
# print the dataframe with verb count
print(calculate_the_word_types(df))
输出-
text nr_verb
0 hello there 0.0
1 I love Tatooine 1.0
2 I hate sands 1.0