如何使用 wordnet python 从数据框中提取反义词并将其放入另一个数据框中
How to extract antonyms from a dataframe and put it in another using wordnet python
我正在尝试为我的数据框中的每一列获取一组同义词和反义词,我能够获取同义词但反义词,我不能,这是我的尝试,谢谢
import pandas as pd
import nltk.corpus
from nltk.corpus import stopwords, wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
df = pd.read_csv('healthtapQAs++.csv')
df['unpunctuated'] = df['question'].str.replace(r'[^\w\s]+', '')
df['tokenized'] = df['unpunctuated'].apply(word_tokenize)
stop = stopwords.words('english')
df['stopped']= df['tokenized'].apply(lambda x: [item for item in x if item
not in stop])
df['syno'] = df['stopped'].apply(lambda x: [wordnet.synsets(y) for y in x])
df['synonyms_question'] = df['syno'].apply( lambda x:[(y[0].lemmas()
[0].name()) if len(y) >0 else "" for y in x])
anto = df['syno']
for j in anto.lemmas():
if j.antonyms():
df['antonyms'] = j.antonyms[0]().name()
#df['antoyms_question'] = df['syno'].apply( lambda x:[(for j in
y[0].lemmas() if if j.antonyms()) if len(y) >0 else "" for y in x])
这是我遇到的错误,在编码方面我完全是新手...谢谢。
return object.__getattribute__(self, name)
AttributeError: 'Series' object has no attribute 'lemmas'
所以,我解决了
通过添加我在网上找到的这个函数,然后在我的数据帧上使用它
def get_antonyms(input_lemma):
antonyms = []
for syn in wordnet.synsets(input_lemma):
for lemma in syn.lemmas():
if lemma.antonyms():
antonyms.append(lemma.antonyms()[0].name())
return antonyms
之后我使用如下
df['antonyms_question'] = df['stopped'].apply(lambda x: [get_antonyms(y) for y in x])
我正在尝试为我的数据框中的每一列获取一组同义词和反义词,我能够获取同义词但反义词,我不能,这是我的尝试,谢谢
import pandas as pd
import nltk.corpus
from nltk.corpus import stopwords, wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
df = pd.read_csv('healthtapQAs++.csv')
df['unpunctuated'] = df['question'].str.replace(r'[^\w\s]+', '')
df['tokenized'] = df['unpunctuated'].apply(word_tokenize)
stop = stopwords.words('english')
df['stopped']= df['tokenized'].apply(lambda x: [item for item in x if item
not in stop])
df['syno'] = df['stopped'].apply(lambda x: [wordnet.synsets(y) for y in x])
df['synonyms_question'] = df['syno'].apply( lambda x:[(y[0].lemmas()
[0].name()) if len(y) >0 else "" for y in x])
anto = df['syno']
for j in anto.lemmas():
if j.antonyms():
df['antonyms'] = j.antonyms[0]().name()
#df['antoyms_question'] = df['syno'].apply( lambda x:[(for j in
y[0].lemmas() if if j.antonyms()) if len(y) >0 else "" for y in x])
这是我遇到的错误,在编码方面我完全是新手...谢谢。
return object.__getattribute__(self, name)
AttributeError: 'Series' object has no attribute 'lemmas'
所以,我解决了 通过添加我在网上找到的这个函数,然后在我的数据帧上使用它
def get_antonyms(input_lemma):
antonyms = []
for syn in wordnet.synsets(input_lemma):
for lemma in syn.lemmas():
if lemma.antonyms():
antonyms.append(lemma.antonyms()[0].name())
return antonyms
之后我使用如下
df['antonyms_question'] = df['stopped'].apply(lambda x: [get_antonyms(y) for y in x])