在 wordcloud 中向停用词添加单词不起作用
adding words to stopwords in wordcloud does not work
我想向 wordcloud 添加一些停用词,但生成的图像显示了我添加到停用词的确切词。我做错了什么?如何将单词添加到停用词
from wordcloud import STOPWORDS as EN_STOPWORDS
from wordcloud import ImageColorGenerator
from stopword_persian import stopword_persian as STOPWORDS
from wordcloud_fa import WordCloudFa
# Add another stopword
STOPWORDS.add('ميساخته')
stopwords = STOPWORDS.union(EN_STOPWORDS)
# Generate a word cloud image
wordcloud = WordCloudFa(
persian_normalize=True,
include_numbers=True,
max_words=300,
stopwords=stopwords,
margin=0,
width=3000,
height=3000,
min_font_size=1,
max_font_size=500,
random_state=True,
background_color="black",
mask=twitter_mask
).generate(text)
你可以这样做。
import matplotlib.pyplot as plt
import nltk # Natural Language ToolKit
nltk.download('stopwords')
from nltk.corpus import stopwords # to get rid of StopWords
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator # to create a Word Cloud
from PIL import Image # Pillow with WordCloud to image manipulation
text = 'New stop words are bad for this text.'
# Adding the stopwords
stop_words = stopwords.words('en')
new_stopwors = ['new', 'stop', 'words']
stop_words.extend(new_stopwords)
stop_words = set(stop_words)
# Getting rid of the stopwords
clean_text = [word for word in text.split() if word not in stop_words]
# Converting the list to string
text = ' '.join([str(elem) for elem in clean_text])
# Generating a wordcloud
wordcloud = WordCloud(background_color = "black").generate(text)
# Display the generated image:
plt.figure(figsize = (15, 10))
plt.imshow(wordcloud, interpolation = 'bilinear')
plt.axis("off")
plt.show()
这里有一个有用的link如果你想探索更多关于插值的信息。
我想向 wordcloud 添加一些停用词,但生成的图像显示了我添加到停用词的确切词。我做错了什么?如何将单词添加到停用词
from wordcloud import STOPWORDS as EN_STOPWORDS
from wordcloud import ImageColorGenerator
from stopword_persian import stopword_persian as STOPWORDS
from wordcloud_fa import WordCloudFa
# Add another stopword
STOPWORDS.add('ميساخته')
stopwords = STOPWORDS.union(EN_STOPWORDS)
# Generate a word cloud image
wordcloud = WordCloudFa(
persian_normalize=True,
include_numbers=True,
max_words=300,
stopwords=stopwords,
margin=0,
width=3000,
height=3000,
min_font_size=1,
max_font_size=500,
random_state=True,
background_color="black",
mask=twitter_mask
).generate(text)
你可以这样做。
import matplotlib.pyplot as plt
import nltk # Natural Language ToolKit
nltk.download('stopwords')
from nltk.corpus import stopwords # to get rid of StopWords
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator # to create a Word Cloud
from PIL import Image # Pillow with WordCloud to image manipulation
text = 'New stop words are bad for this text.'
# Adding the stopwords
stop_words = stopwords.words('en')
new_stopwors = ['new', 'stop', 'words']
stop_words.extend(new_stopwords)
stop_words = set(stop_words)
# Getting rid of the stopwords
clean_text = [word for word in text.split() if word not in stop_words]
# Converting the list to string
text = ' '.join([str(elem) for elem in clean_text])
# Generating a wordcloud
wordcloud = WordCloud(background_color = "black").generate(text)
# Display the generated image:
plt.figure(figsize = (15, 10))
plt.imshow(wordcloud, interpolation = 'bilinear')
plt.axis("off")
plt.show()
这里有一个有用的link如果你想探索更多关于插值的信息。