如何更改分组条的默认分组颜色

How to change the default group color for grouped bars

我发现 的代码很有帮助,但在添加颜色信息以避免分组子栏中的栏自动颜色时出现错误。即获得重复的颜色,如果我在 plt.bar(....,color). color=['black', 'red', 'green', 'blue', 'cyan','brown','grey','goldenrod','lime','violet','indigo','coral','olive'].

中添加特定的“颜色”

在何处添加颜色细节,以便分组子栏的默认颜色及其图例根据需要是唯一的。

def grouped_barplot(df, cat,subcat, val , err):
   
    u = df[cat].unique()
    x = np.arange(len(u))
    subx = df[subcat].unique()
    
    offsets = (np.arange(len(subx))-np.arange(len(subx)).mean())/(len(subx)+1.)
    width= np.diff(offsets).mean()
    
    for i,gr in enumerate(subx):
       
        dfg = df[df[subcat] == gr]
        plt.bar(x+offsets[i], dfg[val].values, width=width, 
                label="{} {}".format(subcat, gr),  yerr=dfg[err].values, capsize=5)


    plt.xlabel("Test", fontsize=14)
    plt.ylabel("value ",fontsize=14)
    plt.xticks(x, u, fontsize=14)
    plt.yticks((0,10,20,30,40,50,60,70,80,90,100), fontsize=14)
    

    plt.legend(title = "ML",loc="upper center", bbox_to_anchor=(.5, 1.25), ncol=6, fontsize=12)
    

    plt.show()
    
plt.title('comparision')
  • 代码来自 requires 20 lines of code to do what can be done with 3 lines of code, as explained in .
  • 测试于 python 3.8.12pandas 1.3.4matplotlib 3.4.3

导入和 DataFrame

import pandas as pd
import matplotlib as mpl
from matplotlib.patches import Patch
import matplotlib.pyplot as plt
import numpy as np

data = {'Candidate': ['X', 'Y', 'Z', 'X', 'Y', 'Z', 'X', 'Y', 'Z'],
        'Sample_Set': [1, 1, 1, 2, 2, 2, 3, 3, 3],
        'Values': [20, 10, 10, 200, 101, 99, 1999, 998, 1003],
        'Error': [5, 2, 3, 30, 30, 30, 10, 10, 10]}

colors = ['black', 'red', 'green', 'blue', 'cyan','brown','grey','goldenrod','lime','violet','indigo','coral','olive']

df = pd.DataFrame(data)

df = df.sort_values(['Candidate', 'Sample_Set'])

# display(df)
  Candidate  Sample_Set  Values  Error
0         X           1      20      5
3         X           2     200     30
6         X           3    1999     10
1         Y           1      10      2
4         Y           2     101     30
7         Y           3     998     10
2         Z           1      10      3
5         Z           2      99     30
8         Z           3    1003     10

# reshape the dataframe into a wide format for Values
vals = df.pivot(index='Candidate', columns='Sample_Set', values='Values')

# display(vals)
Sample_Set   1    2     3
Candidate                
X           20  200  1999
Y           10  101   998
Z           10   99  1003

# reshape the dataframe into a wide format for Errors
yerr = df.pivot(index='Candidate', columns='Sample_Set', values='Error')

# display(yerr)
Sample_Set  1   2   3
Candidate            
X           5  30  10
Y           2  30  10
Z           3  30  10

更新答案

  • 根据 OP 的评论,目标似乎是替换默认颜色,但各组的颜色应该相同。
    • 使用新答案中的代码,只需将 colors 列表传递给 color 参数即可轻松完成此操作。
# plot vals with yerr
ax = vals.plot(kind='bar', yerr=yerr, logy=True, rot=0, figsize=(6, 5), ylabel='Value', title='Comparison', color=colors)
ax.legend(title='Sample Set', bbox_to_anchor=(1, 1.02), loc='upper left')
plt.show()

使用 OP

中引用的答案
  • for i, (gr, color) in enumerate(zip(subx, colors)): 允许将列表中的颜色分配给每个组。
def grouped_barplot(df, cat,subcat, val , err):
    u = df[cat].unique()
    x = np.arange(len(u))
    subx = df[subcat].unique()
    offsets = (np.arange(len(subx))-np.arange(len(subx)).mean())/(len(subx)+1.)
    width= np.diff(offsets).mean()

    colors = ['black', 'red', 'green', 'blue', 'cyan','brown','grey','goldenrod','lime','violet','indigo','coral','olive']
    
    # add the colors to the loop
    for i, (gr, color) in enumerate(zip(subx, colors)):
        dfg = df[df[subcat].eq(gr)]
        plt.bar(x+offsets[i], dfg[val], width=width, yerr=dfg[err], color=color, label=gr)
 
    plt.legend(title='Sample_Set', bbox_to_anchor=(1, 1.02), loc='upper left')
    plt.yscale('log')
    plt.xlabel(cat)
    plt.ylabel(val)
    plt.xticks(x, u)
    plt.show()


cat = "Candidate"
subcat = "Sample_Set"
val = "Values"
err = "Error"

grouped_barplot(df, cat, subcat, val, err )

原答案

  • 请求可以执行,但不应该:
    • 这不是一个很好的可视化实践。给定组中的条形应该都具有相同的颜色。可视化的重点是传达信息。这只会让情节变得难以阅读,从而违背初衷。
  • 因为条形是按组绘制的:'X''Y''Z' of 'Sample_Set 1''X''Y''Sample_Set 2''Z''Sample_Set 3''X''Y''Z',图例中只会创建3个label,即自定义需要创建具有适当句柄和标签的补丁图例。

绘图和自定义

  • rects的顺序和df的顺序不一样,所以df排序不同,为了zip正确的颜色到正确的[=37] =]
# add a colors column to the dataframe
df['color'] = colors[:len(df)]

# plot vals with yerr
ax = vals.plot(kind='bar', yerr=yerr, logy=True, rot=0, figsize=(6, 5), legend=False, ylabel='Value', title='Comparison')

# extract the Rectangle bar objects
rects = [c for c in ax.get_children() if isinstance(c, mpl.patches.Rectangle)]

# change the face color of the bar
for rect, color in zip(rects, df.sort_values(['Sample_Set', 'Candidate'])['color']):
    rect.set_fc(color)

# create a custom handle for the legend
handles = list()
for idx, v in df.iterrows():
    patch = Patch(color=v.color, label=f'{v.Candidate} {v.Sample_Set}')
    handles.append(patch)    

# add the legend
ax.legend(title='Candidate Sample_Set', handles=handles, bbox_to_anchor=(1, 1.02), loc='upper left')

plt.show()

使用

  • 使用上面的df
def grouped_barplot(df, cat,subcat, val , err):
    u = df[cat].unique()
    x = np.arange(len(u))
    subx = df[subcat].unique()
    offsets = (np.arange(len(subx))-np.arange(len(subx)).mean())/(len(subx)+1.)
    width= np.diff(offsets).mean()

    # group the colors: range(3) because there are 3 groups
    colors = [df.color[n::3] for n in range(3)]
    
    for i, (gr, color) in enumerate(zip(subx, colors)):
        dfg = df[df[subcat].eq(gr)]
        plt.bar(x+offsets[i], dfg[val], width=width, yerr=dfg[err], color=color)
    
    handles = list()
    for idx, v in df.iterrows():
        patch = Patch(color=v.color, label=f'{v.Candidate} {v.Sample_Set}')
        handles.append(patch)    
    
    plt.legend(title='Candidate Sample_Set', handles=handles, bbox_to_anchor=(1, 1.02), loc='upper left')
    plt.yscale('log')
    plt.xlabel(cat)
    plt.ylabel(val)
    plt.xticks(x, u)
    plt.show()


cat = "Candidate"
subcat = "Sample_Set"
val = "Values"
err = "Error"

# add a colors column to the dataframe
df['color'] = colors[:len(df)]

grouped_barplot(df, cat, subcat, val, err )