如何根据python中的条件制作词云?
How can I make a word cloud based on a condition in python?
我尝试做一个“Review Gast”栏的词云,条件是“sentiment”栏需要是“negative”。然而,词云显示了所有评论,而不仅仅是负面评论。我做错了什么?
for i in reviews_english[reviews_english['sentiment']=='negative']['Review Gast'].astype(str).values:
vectorizerneg = CountVectorizer(ngram_range=(3, 3), stop_words=STOPWORDS and ['width', 'px', 'jpg', 'cdn', 'src', 'https'])
bag_of_words = vectorizerneg.fit_transform(reviews_english['Review Gast'])
vectorizer.vocabulary_
sum_words = bag_of_words.sum(axis=0)
words_freq = [(word, sum_words[0, idx]) for word, idx in vectorizer.vocabulary_.items()]
words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
print (words_freq[:100])
#Generating wordcloud and saving as jpg image
words_dict = dict(words_freq)
WC_height = 1000
WC_width = 1500
WC_max_words = 200
wordCloud = WordCloud(width = 800, height = 800,background_color="white",min_font_size = 10, max_words=50)
wordCloud.generate_from_frequencies(words_dict)
plt.title('Most frequently occurring bigrams connected by same colour and font size')
plt.imshow(wordCloud, interpolation='bilinear')
plt.axis("off")
plt.show()
wordCloud.to_file('wordcloud_bigram.jpg')
你 运行 fit_transform
在整个专栏 reviews_english['Review Gast']
上。尝试
vectorizerneg = CountVectorizer(ngram_range=(3, 3), stop_words=STOPWORDS and ['width', 'px', 'jpg', 'cdn', 'src', 'https'])
bag_of_words = vectorizerneg.fit_transform(reviews_english[reviews_english['sentiment']=='negative']['Review Gast'].to_list())
您可以删除 for 循环,因为它只会重新加载 CountVectorizer。
我尝试做一个“Review Gast”栏的词云,条件是“sentiment”栏需要是“negative”。然而,词云显示了所有评论,而不仅仅是负面评论。我做错了什么?
for i in reviews_english[reviews_english['sentiment']=='negative']['Review Gast'].astype(str).values:
vectorizerneg = CountVectorizer(ngram_range=(3, 3), stop_words=STOPWORDS and ['width', 'px', 'jpg', 'cdn', 'src', 'https'])
bag_of_words = vectorizerneg.fit_transform(reviews_english['Review Gast'])
vectorizer.vocabulary_
sum_words = bag_of_words.sum(axis=0)
words_freq = [(word, sum_words[0, idx]) for word, idx in vectorizer.vocabulary_.items()]
words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
print (words_freq[:100])
#Generating wordcloud and saving as jpg image
words_dict = dict(words_freq)
WC_height = 1000
WC_width = 1500
WC_max_words = 200
wordCloud = WordCloud(width = 800, height = 800,background_color="white",min_font_size = 10, max_words=50)
wordCloud.generate_from_frequencies(words_dict)
plt.title('Most frequently occurring bigrams connected by same colour and font size')
plt.imshow(wordCloud, interpolation='bilinear')
plt.axis("off")
plt.show()
wordCloud.to_file('wordcloud_bigram.jpg')
你 运行 fit_transform
在整个专栏 reviews_english['Review Gast']
上。尝试
vectorizerneg = CountVectorizer(ngram_range=(3, 3), stop_words=STOPWORDS and ['width', 'px', 'jpg', 'cdn', 'src', 'https'])
bag_of_words = vectorizerneg.fit_transform(reviews_english[reviews_english['sentiment']=='negative']['Review Gast'].to_list())
您可以删除 for 循环,因为它只会重新加载 CountVectorizer。