使用 python 将 wordcloud 保存在循环中

save wordcloud in loop using python

我正在尝试为数据中的不同类别生成词云。我能够循环生成wordcloud。请参考以下脚本。

import pandas as pd
from wordcloud import WordCloud,STOPWORDS
import matplotlib.pyplot as plt       

##Read data file
df = pd.read_csv("Data.csv",encoding='cp1252')

def wordcloud_draw(data, color = 'black'):
    words = ' '.join(data)
    cleaned_word = " ".join([word for word in words.split()
                            if 'http' not in word
                                and not word.startswith('@')
                                and not word.startswith('#')
                                and word != 'RT'
                            ])
    wordcloud = WordCloud(stopwords=STOPWORDS,
                      background_color=color,
                      width=2500,
                      height=2000
                     ).generate(cleaned_word)
    plt.figure(1,figsize=(13, 13))
    plt.imshow(wordcloud)
    plt.axis('off')
    plt.title('Wordcloud of key')
    plt.savefig('(key).png')
    plt.close()

dict_of_Categorization = {k: v for k, v in df.groupby('Categorization')}

for key, value in dict_of_Categorization.items():
    num_Descriptions = len(value)
    if (num_Descriptions >= 50):
        num_Descriptions
        wordcloud_draw(value["Description"],'white')

现在我想根据他们的键(包含类别)保存每个词云。我尝试保存用于创建 wordcloud 的函数,但它不起作用,因为它没有使用迭代值,而是保存为“(key).png”并覆盖相同的下一个 wordclod。谁能帮帮我。

您需要将 key 传递给您的 wordcloud_draw() 函数,然后使用它创建必要的字符串,如下所示:

import pandas as pd
from wordcloud import WordCloud,STOPWORDS
import matplotlib.pyplot as plt       

#Read data file
df = pd.read_csv("Data.csv", encoding='cp1252')

def wordcloud_draw(data, key, color='black'):
    words = ' '.join(data)
    cleaned_word = " ".join([word for word in words.split()
                            if 'http' not in word
                                and not word.startswith('@')
                                and not word.startswith('#')
                                and word != 'RT'
                            ])
    wordcloud = WordCloud(stopwords=STOPWORDS,
                      background_color=color,
                      width=2500,
                      height=2000
                     ).generate(cleaned_word)
    plt.figure(1,figsize=(13, 13))
    plt.imshow(wordcloud)
    plt.axis('off')
    plt.title('Wordcloud of key "{}"'.format(key))
    plt.savefig('{}.png'.format(key))
    plt.close()

dict_of_Categorization = {k: v for k, v in df.groupby('Categorization')}

for key, value in dict_of_Categorization.items():
    num_Descriptions = len(value)
    if (num_Descriptions >= 50):
        num_Descriptions
        wordcloud_draw(value["Description"], key, 'white')

您只需将密钥作为参数传递给您的 wordcloud_draw 函数。例如:

import pandas as pd
from wordcloud import WordCloud,STOPWORDS
import matplotlib.pyplot as plt       

##Read data file
df = pd.read_csv("Data.csv",encoding='cp1252')

def wordcloud_draw(data, save_name, color = 'black'):
    words = ' '.join(data)
    cleaned_word = " ".join([word for word in words.split()
                            if 'http' not in word
                                and not word.startswith('@')
                                and not word.startswith('#')
                                and word != 'RT'
                            ])
    wordcloud = WordCloud(stopwords=STOPWORDS,
                      background_color=color,
                      width=2500,
                      height=2000
                     ).generate(cleaned_word)
    plt.figure(1,figsize=(13, 13))
    plt.imshow(wordcloud)
    plt.axis('off')
    plt.title('Wordcloud of key')
    plt.savefig(savename+'.png')
    plt.close()

dict_of_Categorization = {k: v for k, v in df.groupby('Categorization')}

for key, value in dict_of_Categorization.items():
    num_Descriptions = len(value)
    if (num_Descriptions >= 50):
        num_Descriptions
        wordcloud_draw(value["Description"],key,'white')