计算 Sentiwordnet 的极性
Calculate the Polarity of Sentiwordnet
我正在尝试计算以比较评论的分数(从 1 到 5)与从文本评论的情感分析中提取的分数。我正在使用 sentiwordnet,我设法获得了正面、负面和 objective 分数,但我无法迭代 42 条评论的每个句子中每个单词的分数来计算分数。
这是我的代码:
def preprocess_token(text):
lower_text=text.lower()
tokens = nltk.tokenize.word_tokenize(lower_text)
return tokens
def penn_to_wn(tag):
if tag.startswith('J'):
return wn.ADJ
elif tag.startswith('N'):
return wn.NOUN
elif tag.startswith('R'):
return wn.ADV
elif tag.startswith('V'):
return wn.VERB
return None
lemmatizer = WordNetLemmatizer()
def get_sentiment(word,tag):
wn_tag = penn_to_wn(tag)
if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
return []
lemma = lemmatizer.lemmatize(word, pos=wn_tag)
if not lemma:
return []
synsets = wn.synsets(word, pos=wn_tag)
if not synsets:
return []
synset = synsets[0]
swn_synset = swn.senti_synset(synset.name())
return [swn_synset.pos_score(),swn_synset.neg_score(),swn_synset.obj_score()]
data["text_n"] = data['Text'].str.replace('[^\w\s]','')
data['tokens']=data['text_n'].apply(preprocess_token)
word_list=[]
for i in range(len(data['tokens'])):
word_list.append([])
for i in range(len(data['tokens'])):
for word in data['tokens'][i]:
if word[:].isalpha():
word_list[i].append(word[:])
tagged_tokens=[]
for token in word_list:
tagged_tokens.append(nltk.pos_tag(token))
senti_val=[]
for i in range(len(tagged_tokens)):
t=tuple(get_sentiment(x,y) for x,y in tagged_tokens[i])
values= [x for x in t if x]
senti_val.append(values)
data['value']=senti_val
calc=[]
for i in range(len(senti_val)):
r=senti_val[i][i][0] - senti_val[i][i][1]
calc.append(r)
#This last chunk of code gives me error:
Traceback (most recent call last):
File "C:\Users\----\OneDrive\python\reviews.py", line 158, in <module>
r=senti_val[i][i][0] - senti_val[i][i][1]
IndexError: list index out of range
#I want to get the result of subtracting the negative score from the positive score for each word of each sentence and get it in a list.
任何关于问题所在或我下一步应该做什么的建议都将不胜感激。
提前致谢
calc=[]
#iterating over each sentence
for i in range(len(senti_val)):
# subtract positive and negative score of each token in the sentence
scores = [token_score[0] - token_score[1] for token_score in senti_val[i]]
# if you want to sum up all the score within the sentence
# scores = sum(scores)
print(scores)
calc.append(scores)
我正在尝试计算以比较评论的分数(从 1 到 5)与从文本评论的情感分析中提取的分数。我正在使用 sentiwordnet,我设法获得了正面、负面和 objective 分数,但我无法迭代 42 条评论的每个句子中每个单词的分数来计算分数。
这是我的代码:
def preprocess_token(text):
lower_text=text.lower()
tokens = nltk.tokenize.word_tokenize(lower_text)
return tokens
def penn_to_wn(tag):
if tag.startswith('J'):
return wn.ADJ
elif tag.startswith('N'):
return wn.NOUN
elif tag.startswith('R'):
return wn.ADV
elif tag.startswith('V'):
return wn.VERB
return None
lemmatizer = WordNetLemmatizer()
def get_sentiment(word,tag):
wn_tag = penn_to_wn(tag)
if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
return []
lemma = lemmatizer.lemmatize(word, pos=wn_tag)
if not lemma:
return []
synsets = wn.synsets(word, pos=wn_tag)
if not synsets:
return []
synset = synsets[0]
swn_synset = swn.senti_synset(synset.name())
return [swn_synset.pos_score(),swn_synset.neg_score(),swn_synset.obj_score()]
data["text_n"] = data['Text'].str.replace('[^\w\s]','')
data['tokens']=data['text_n'].apply(preprocess_token)
word_list=[]
for i in range(len(data['tokens'])):
word_list.append([])
for i in range(len(data['tokens'])):
for word in data['tokens'][i]:
if word[:].isalpha():
word_list[i].append(word[:])
tagged_tokens=[]
for token in word_list:
tagged_tokens.append(nltk.pos_tag(token))
senti_val=[]
for i in range(len(tagged_tokens)):
t=tuple(get_sentiment(x,y) for x,y in tagged_tokens[i])
values= [x for x in t if x]
senti_val.append(values)
data['value']=senti_val
calc=[]
for i in range(len(senti_val)):
r=senti_val[i][i][0] - senti_val[i][i][1]
calc.append(r)
#This last chunk of code gives me error:
Traceback (most recent call last):
File "C:\Users\----\OneDrive\python\reviews.py", line 158, in <module>
r=senti_val[i][i][0] - senti_val[i][i][1]
IndexError: list index out of range
#I want to get the result of subtracting the negative score from the positive score for each word of each sentence and get it in a list.
任何关于问题所在或我下一步应该做什么的建议都将不胜感激。
提前致谢
calc=[]
#iterating over each sentence
for i in range(len(senti_val)):
# subtract positive and negative score of each token in the sentence
scores = [token_score[0] - token_score[1] for token_score in senti_val[i]]
# if you want to sum up all the score within the sentence
# scores = sum(scores)
print(scores)
calc.append(scores)