Python WordCloud - 如何根据数据列制作文字颜色
Python WordCloud - How to make the word colour based on a data column
我有一个我想要在词云上列出的电影片名列表,但电影的颜色取决于电影类别(例如剧情片、喜剧等),而不是完全随机的。
数据采用 CSV 格式,一列 'title' 另一列 'category'。到目前为止,我有以下代码。我觉得我需要使用参数“color_func”,但不确定如何使用。
#Importing Libraries
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from wordcloud import WordCloud
from collections import Counter
#Importing Dataset
df = pd.read_csv("films.csv")
#Creating the text variable
word_ls = df.title
#Creating a count (I want the words to be the same size)
word_could_dict = Counter(word_ls)
# Creating word_cloud with text as argument in .generate() method
wordcloud = WordCloud().generate_from_frequencies(word_could_dict)
# Display the generated Word Cloud
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
可选的附加问题:
- 我也可以根据电影类别随机选择字体吗?
- 如何添加图例?
非常感谢任何建议。谢谢
好的,我已经调整了您的代码以包含示例 color mapping 代码。如前所述,您的代码不会计算字数,但会计算完整标题的数量(显然 wordcloud
如果标题的频率相同以使文字适合图像,则会稍微随机化标题的大小;在下面的示例《妈妈咪呀!》和《侏儒与朱丽叶》出现过两次,其他电影出现过一次):
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from wordcloud import (WordCloud, get_single_color_func)
from collections import Counter
import random
#get sample dataset
df = pd.read_csv('https://gist.github.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv')
#random color generation function
def random_color():
return "#"+''.join([random.choice('ABCDEF0123456789') for i in range(6)])
#generate same random colors for each category
df['color'] = df.groupby('Genre')['Genre'].transform(lambda x: random_color())
class SimpleGroupedColorFunc(object):
"""Create a color function object which assigns EXACT colors
to certain words based on the color to words mapping
Parameters
----------
color_to_words : dict(str -> list(str))
A dictionary that maps a color to the list of words.
default_color : str
Color that will be assigned to a word that's not a member
of any value from color_to_words.
"""
def __init__(self, color_to_words, default_color):
self.word_to_color = {word: color
for (color, words) in color_to_words.items()
for word in words}
self.default_color = default_color
def __call__(self, word, **kwargs):
return self.word_to_color.get(word, self.default_color)
#create a dict of colors and matching movies
color_to_words = df.groupby('color')['Film'].agg(list).to_dict()
#Creating the text variable
word_ls = df.Film
#Creating a count (I want the words to be the same size)
word_could_dict = Counter(word_ls)
# Creating word_cloud with text as argument in .generate() method
wordcloud = WordCloud().generate_from_frequencies(word_could_dict)
# Words that are not in any of the color_to_words values
# will be colored with a grey single color function
default_color = 'grey'
# Create a color function with single tone
grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color)
# Apply our color function
wordcloud.recolor(color_func=grouped_color_func)
# Display the generated Word Cloud
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
categories = df.groupby('color')['Genre'].agg('first').to_dict()
patches = [mpatches.Patch(color=k, label=v) for k,v in categories.items()]
plt.legend(handles=patches)
plt.axis("off")
plt.imshow(wordcloud, interpolation='bilinear')
我有一个我想要在词云上列出的电影片名列表,但电影的颜色取决于电影类别(例如剧情片、喜剧等),而不是完全随机的。
数据采用 CSV 格式,一列 'title' 另一列 'category'。到目前为止,我有以下代码。我觉得我需要使用参数“color_func”,但不确定如何使用。
#Importing Libraries
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from wordcloud import WordCloud
from collections import Counter
#Importing Dataset
df = pd.read_csv("films.csv")
#Creating the text variable
word_ls = df.title
#Creating a count (I want the words to be the same size)
word_could_dict = Counter(word_ls)
# Creating word_cloud with text as argument in .generate() method
wordcloud = WordCloud().generate_from_frequencies(word_could_dict)
# Display the generated Word Cloud
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
可选的附加问题:
- 我也可以根据电影类别随机选择字体吗?
- 如何添加图例?
非常感谢任何建议。谢谢
好的,我已经调整了您的代码以包含示例 color mapping 代码。如前所述,您的代码不会计算字数,但会计算完整标题的数量(显然 wordcloud
如果标题的频率相同以使文字适合图像,则会稍微随机化标题的大小;在下面的示例《妈妈咪呀!》和《侏儒与朱丽叶》出现过两次,其他电影出现过一次):
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from wordcloud import (WordCloud, get_single_color_func)
from collections import Counter
import random
#get sample dataset
df = pd.read_csv('https://gist.github.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv')
#random color generation function
def random_color():
return "#"+''.join([random.choice('ABCDEF0123456789') for i in range(6)])
#generate same random colors for each category
df['color'] = df.groupby('Genre')['Genre'].transform(lambda x: random_color())
class SimpleGroupedColorFunc(object):
"""Create a color function object which assigns EXACT colors
to certain words based on the color to words mapping
Parameters
----------
color_to_words : dict(str -> list(str))
A dictionary that maps a color to the list of words.
default_color : str
Color that will be assigned to a word that's not a member
of any value from color_to_words.
"""
def __init__(self, color_to_words, default_color):
self.word_to_color = {word: color
for (color, words) in color_to_words.items()
for word in words}
self.default_color = default_color
def __call__(self, word, **kwargs):
return self.word_to_color.get(word, self.default_color)
#create a dict of colors and matching movies
color_to_words = df.groupby('color')['Film'].agg(list).to_dict()
#Creating the text variable
word_ls = df.Film
#Creating a count (I want the words to be the same size)
word_could_dict = Counter(word_ls)
# Creating word_cloud with text as argument in .generate() method
wordcloud = WordCloud().generate_from_frequencies(word_could_dict)
# Words that are not in any of the color_to_words values
# will be colored with a grey single color function
default_color = 'grey'
# Create a color function with single tone
grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color)
# Apply our color function
wordcloud.recolor(color_func=grouped_color_func)
# Display the generated Word Cloud
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
categories = df.groupby('color')['Genre'].agg('first').to_dict()
patches = [mpatches.Patch(color=k, label=v) for k,v in categories.items()]
plt.legend(handles=patches)
plt.axis("off")
plt.imshow(wordcloud, interpolation='bilinear')