Python WordCloud - 如何根据数据列制作文字颜色

Python WordCloud - How to make the word colour based on a data column

我有一个我想要在词云上列出的电影片名列表,但电影的颜色取决于电影类别(例如剧情片、喜剧等),而不是完全随机的。

数据采用 CSV 格式,一列 'title' 另一列 'category'。到目前为止,我有以下代码。我觉得我需要使用参数“color_func”,但不确定如何使用。

#Importing Libraries
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from wordcloud import WordCloud
from collections import Counter

#Importing Dataset
df = pd.read_csv("films.csv")

#Creating the text variable
word_ls = df.title

#Creating a count (I want the words to be the same size)
word_could_dict = Counter(word_ls)

# Creating word_cloud with text as argument in .generate() method
wordcloud = WordCloud().generate_from_frequencies(word_could_dict)

# Display the generated Word Cloud
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

可选的附加问题:

  1. 我也可以根据电影类别随机选择字体吗?
  2. 如何添加图例?

非常感谢任何建议。谢谢

好的,我已经调整了您的代码以包含示例 color mapping 代码。如前所述,您的代码不会计算字数,但会计算完整标题的数量(显然 wordcloud 如果标题的频率相同以使文字适合图像,则会稍微随机化标题的大小;在下面的示例《妈妈咪呀!》和《侏儒与朱丽叶》出现过两次,其他电影出现过一次):

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from wordcloud import (WordCloud, get_single_color_func)
from collections import Counter
import random

#get sample dataset
df = pd.read_csv('https://gist.github.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv')

#random color generation function
def random_color():
    return "#"+''.join([random.choice('ABCDEF0123456789') for i in range(6)])

#generate same random colors for each category
df['color'] = df.groupby('Genre')['Genre'].transform(lambda x: random_color())


class SimpleGroupedColorFunc(object):
    """Create a color function object which assigns EXACT colors
       to certain words based on the color to words mapping
       Parameters
       ----------
       color_to_words : dict(str -> list(str))
         A dictionary that maps a color to the list of words.
       default_color : str
         Color that will be assigned to a word that's not a member
         of any value from color_to_words.
    """

    def __init__(self, color_to_words, default_color):
        self.word_to_color = {word: color
                              for (color, words) in color_to_words.items()
                              for word in words}

        self.default_color = default_color

    def __call__(self, word, **kwargs):
        return self.word_to_color.get(word, self.default_color)

#create a dict of colors and matching movies
color_to_words = df.groupby('color')['Film'].agg(list).to_dict()

#Creating the text variable
word_ls = df.Film

#Creating a count (I want the words to be the same size)
word_could_dict = Counter(word_ls)

# Creating word_cloud with text as argument in .generate() method
wordcloud = WordCloud().generate_from_frequencies(word_could_dict)

# Words that are not in any of the color_to_words values
# will be colored with a grey single color function
default_color = 'grey'

# Create a color function with single tone
grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color)

# Apply our color function
wordcloud.recolor(color_func=grouped_color_func)

# Display the generated Word Cloud
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

add a legend

import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

categories = df.groupby('color')['Genre'].agg('first').to_dict()
patches = [mpatches.Patch(color=k, label=v) for k,v in categories.items()]

plt.legend(handles=patches)
plt.axis("off")
plt.imshow(wordcloud, interpolation='bilinear')