在动词标签计数中使用 defaultdict 函数

Using defaultdict function in verb tags counting



import spacy
from collections import defaultdict
nlp = spacy.load("en_core_web_sm")
!pip install pandas==0.24.2 --user
import pandas as pd

def calculate_the_word_types(df):
  verbs = defaultdict(calculate_the_word_types)

for i, row in df.iterrows():
  doc = nlp(row["text"])

for v in map(lambda x: x.lemma_, filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)):
  verbs[v] += 1
  df.at(i, "nr_verb", len(list(map(lambda x: x.text,
                                                 filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)))))

return df


NameError                                 Traceback (most recent call last)
<ipython-input-32-7e7c626bb331> in <module>()
     14 for v in map(lambda x: x.lemma_, filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)):
---> 15   verbs[v] += 1
     16   df.at(i, "nr_verb", len(list(map(lambda x: x.text,
     17                                                  filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)))))

NameError: name 'verbs' is not defined

set_value() 函数已弃用。

作为替换,您可以像这样使用“.at”:df.at["YOURINDEX", "YOURCOLUMN"] = "YOURVALUE"。 此外,您在这一行上有问题 verbs = defaultdict(calculate_the_word_types) 用 0 初始化,因为它将充当计数器。



import spacy
from collections import defaultdict
nlp = spacy.load("en_core_web_sm")
!pip install pandas==0.24.2 --user
import pandas as pd

def calculate_the_word_types(df):
    verbs = defaultdict(lambda: 0)

    # count all tokens, but not the punctuations
    for i, row in df.iterrows():
        doc = nlp(row["text"])

        # count only the verbs
        for v in map(lambda x: x.lemma_, filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)):
            verbs[v] += 1
        df.at[i, "nr_verb"] = len(list(map(lambda x: x.text, 
                                        filter(lambda x: (x.pos_ == 'AUX') | (x.pos_ == 'VERB'), doc)))) 

    return df

# dataframe
df = pd.DataFrame({'text':['hello there', 'I love Tatooine', 'I hate sands']})

# print the dataframe with verb count


        text      nr_verb
0   hello there     0.0
1   I love Tatooine 1.0
2   I hate sands    1.0